npm - euparliamentmonitor - Versions diffs - 0.9.20 → 0.9.22 - Mend

euparliamentmonitor 0.9.20 → 0.9.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +2 -2
package/package.json +6 -3
package/scripts/aggregator/editorial-brief-resolver.d.ts +38 -0
package/scripts/aggregator/editorial-brief-resolver.js +32 -0
package/scripts/aggregator/generator/render-one.js +35 -0
package/scripts/aggregator/html/localize-body.d.ts +32 -0
package/scripts/aggregator/html/localize-body.js +69 -0
package/scripts/aggregator/html/shell.d.ts +10 -0
package/scripts/aggregator/html/shell.js +11 -1
package/scripts/aggregator/markdown-renderer.d.ts +23 -24
package/scripts/aggregator/markdown-renderer.js +39 -25
package/scripts/aggregator/metadata/artifact-walker.js +2 -2
package/scripts/aggregator/metadata/heading-rules.js +1 -0
package/scripts/aggregator/metadata/resolve-helpers.js +9 -3
package/scripts/aggregator/reader-guide/builder.js +3 -1
package/scripts/aggregator/reader-guide/labels.d.ts +7 -0
package/scripts/aggregator/reader-guide/labels.js +22 -0
package/scripts/aggregator/reader-intelligence-guide.d.ts +1 -1
package/scripts/aggregator/reader-intelligence-guide.js +1 -1
package/scripts/aggregator/seo-entity-extractor.d.ts +45 -0
package/scripts/aggregator/seo-entity-extractor.js +211 -0
package/scripts/copy-vendor.js +84 -112
package/scripts/discover-untranslated-briefs.js +123 -4
package/scripts/dump-article-seo.js +567 -0
package/scripts/generators/news-indexes/backfill.d.ts +6 -1
package/scripts/generators/news-indexes/backfill.js +71 -4
package/scripts/generators/news-indexes/per-language.js +21 -7
package/scripts/generators/political-intelligence/html.js +39 -8
package/scripts/generators/sitemap/html.js +25 -7
package/scripts/mcp/ep/error-classifier.d.ts +2 -2
package/scripts/mcp/ep/error-classifier.js +2 -2
package/scripts/validate-brief-translations.js +119 -5

package/README.md CHANGED Viewed

@@ -136,7 +136,7 @@ The published site is the audience-facing companion to this npm/TypeScript packa
 **MCP Server Integration**: The project uses the
 [European-Parliament-MCP-Server](https://github.com/Hack23/European-Parliament-MCP-Server)
-v1.3.9 for accessing real EU Parliament data via the Model Context Protocol.
+v1.3.10 for accessing real EU Parliament data via the Model Context Protocol.
 - **MCP Server Status**: ✅ Fully operational — 60+ EP data tools available
   (feeds, direct lookups, analytical tools, intelligence correlation)
@@ -432,7 +432,7 @@ import type { ArticleCategory, LanguageCode } from 'euparliamentmonitor/types';
 ## 🔌 Data Sources
-**Primary — European Parliament MCP Server** ([Hack23/European-Parliament-MCP-Server](https://github.com/Hack23/European-Parliament-MCP-Server) v1.3.9+, fully operational):
+**Primary — European Parliament MCP Server** ([Hack23/European-Parliament-MCP-Server](https://github.com/Hack23/European-Parliament-MCP-Server) v1.3.10+, fully operational):
 - 🗳️ Plenary sessions, voting records, roll-call votes
 - 📜 Adopted texts, motions, resolutions, urgency files

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "euparliamentmonitor",
-  "version": "0.9.20",
+  "version": "0.9.22",
   "type": "module",
   "description": "European Parliament Intelligence Platform - Monitor political activity with systematic transparency",
   "main": "scripts/index.js",
@@ -71,6 +71,7 @@
     "prior-run-diff": "node scripts/aggregator/prior-run-diff.js",
     "generate-article": "node scripts/aggregator/article-generator.js",
     "generate-article:all": "node scripts/aggregator/article-generator.js --all",
+    "dump:article-seo": "node scripts/dump-article-seo.js",
     "generate-news-indexes": "node scripts/generators/news-indexes.js",
     "generate-sitemap": "node scripts/generators/sitemap.js",
     "image:generate": "node scripts/generate-responsive-images.js",
@@ -164,6 +165,7 @@
     "chartjs-plugin-annotation": "3.1.0",
     "clean-css": "^5.3.3",
     "d3": "7.9.0",
+    "esbuild": "0.28.0",
     "eslint": "10.4.0",
     "eslint-config-prettier": "10.1.8",
     "eslint-plugin-jsdoc": "63.0.0",
@@ -194,7 +196,7 @@
     "node": ">=26"
   },
   "dependencies": {
-    "european-parliament-mcp-server": "1.3.9",
+    "european-parliament-mcp-server": "1.3.10",
     "markdown-it": "^14.1.1",
     "markdown-it-anchor": "^9.2.0",
     "markdown-it-attrs": "^4.3.1",
@@ -208,6 +210,7 @@
     "flatted": ">=3.4.2",
     "path-to-regexp": ">=8.4.0",
     "ip-address": ">=10.1.1",
-    "uuid": ">=11.1.1"
+    "uuid": ">=11.1.1",
+    "qs": "6.15.2"
   }
 }

package/scripts/aggregator/editorial-brief-resolver.d.ts CHANGED Viewed

@@ -73,4 +73,42 @@ export declare function resolveLocalizedBriefHighlight(runDir: string, lang: Lan
  * brief candidate file exists
  */
 export declare function discoverLocalizedBriefs(runDir: string, languages: readonly LanguageCode[]): readonly LanguageCode[];
+/**
+ * Localized brief body suitable for HTML rendering.
+ *
+ * Unlike {@link resolveLocalizedBriefHighlight} — which extracts a few
+ * short SEO/metadata fields (headline, summary) for `<meta>` tags and
+ * JSON-LD — this helper returns the **full body** of the translated
+ * executive brief, with the SPDX preamble stripped, so the caller can
+ * render it through {@link renderMarkdown} and splice the resulting
+ * HTML into the per-language article variant.
+ *
+ * Used by the article-generator HTML pipeline (`render-one.ts`) to
+ * upgrade non-English variants from the English aggregated body to a
+ * truly localized one whenever a translated `executive-brief_<lang>.md`
+ * exists in the run directory.
+ */
+export interface LocalizedBriefBody {
+    /** Markdown body of the localized brief (post-SPDX strip). */
+    readonly markdown: string;
+    /** Run-relative path of the file that produced {@link markdown}. */
+    readonly sourceFile: string;
+}
+/**
+ * Read the **full markdown body** of a translated executive brief for
+ * `lang` from `runDir`, searching the standard candidate paths
+ * (`executive-brief_<lang>.md` → `extended/executive-brief_<lang>.md`).
+ * SPDX HTML-comment preambles are stripped using the same logic as the
+ * SEO-metadata path, so the returned markdown starts at the first real
+ * content line (`# Headline` or similar).
+ *
+ * Returns `null` when `runDir` is missing, the language is English, or
+ * no candidate file exists. The caller is expected to fall back to the
+ * English aggregated body in that case — see `render-one.ts`.
+ *
+ * @param runDir - Absolute run directory
+ * @param lang - Target language code (omitted when `lang === 'en'`)
+ * @returns Localized brief body + source file, or `null` when absent
+ */
+export declare function readLocalizedBriefBody(runDir: string, lang: LanguageCode): LocalizedBriefBody | null;
 //# sourceMappingURL=editorial-brief-resolver.d.ts.map

package/scripts/aggregator/editorial-brief-resolver.js CHANGED Viewed

@@ -217,4 +217,36 @@ export function discoverLocalizedBriefs(runDir, languages) {
     }
     return out;
 }
+/**
+ * Read the **full markdown body** of a translated executive brief for
+ * `lang` from `runDir`, searching the standard candidate paths
+ * (`executive-brief_<lang>.md` → `extended/executive-brief_<lang>.md`).
+ * SPDX HTML-comment preambles are stripped using the same logic as the
+ * SEO-metadata path, so the returned markdown starts at the first real
+ * content line (`# Headline` or similar).
+ *
+ * Returns `null` when `runDir` is missing, the language is English, or
+ * no candidate file exists. The caller is expected to fall back to the
+ * English aggregated body in that case — see `render-one.ts`.
+ *
+ * @param runDir - Absolute run directory
+ * @param lang - Target language code (omitted when `lang === 'en'`)
+ * @returns Localized brief body + source file, or `null` when absent
+ */
+export function readLocalizedBriefBody(runDir, lang) {
+    if (!runDir || lang === 'en')
+        return null;
+    if (!fs.existsSync(runDir))
+        return null;
+    for (const rel of localizedBriefCandidates(lang)) {
+        const abs = path.join(runDir, rel);
+        if (!fs.existsSync(abs))
+            continue;
+        const body = readArtefactBody(abs);
+        if (body.trim().length === 0)
+            continue;
+        return { markdown: body, sourceFile: rel };
+    }
+    return null;
+}
 //# sourceMappingURL=editorial-brief-resolver.js.map

package/scripts/aggregator/generator/render-one.js CHANGED Viewed

@@ -16,6 +16,11 @@ import { resolveArticleMetadata, extractStrongProseLine, } from '../article-meta
 import { buildArticleMeta, serializeArticleMeta } from '../article-meta.js';
 import { renderMarkdown } from '../markdown-renderer.js';
 import { wrapArticleHtml, getArticleFilename, localizeArticleBody, enhanceTradecraftCards, enhanceAnalysisIndexCards, } from '../article-html.js';
+import { replaceExecutiveBriefSection } from '../html/localize-body.js';
+import { readLocalizedBriefBody } from '../editorial-brief-resolver.js';
+import { extractRunMentions } from '../seo-entity-extractor.js';
+import { SECTION_TITLE_LABELS } from '../../constants/ui/related-analysis.js';
+import { getLocalizedString } from '../../constants/language-core.js';
 import { buildReaderIntelligenceGuideHtml, stripInlineReaderGuide, } from '../reader-intelligence-guide.js';
 import { ALL_LANGUAGES } from '../../constants/language-core.js';
 import { blobUrl } from '../infra/github-urls.js';
@@ -84,6 +89,10 @@ function buildJekyllArticleMarkdown(aggregated, metadata, slug, sourceFolder) {
  *        canonical English Markdown source written by the same run
  * @param chromeOptions.articleCount - Total article count surfaced in the
  *        site footer's `<p class="footer-stats">…</p>` line
+ * @param chromeOptions.mentions - SEO `mentions` list (organization names
+ *        extracted from `intelligence/stakeholder-map.md` and
+ *        `extended/media-framing-analysis.md`) emitted into JSON-LD on
+ *        every language variant
  * @param opts - CLI options (needed for `outDir`)
  * @returns Relative filename of the HTML file written
  */
@@ -96,6 +105,30 @@ function writeLanguageVariant(lang, slug, aggregated, englishHtml, chromeOptions
         metaSource = fs.readFileSync(langMdAbs, 'utf8');
         bodyHtml = renderMarkdown(metaSource).html;
     }
+    else if (lang !== 'en') {
+        // No full per-language source markdown — but the run may still
+        // ship a translated `executive-brief_<lang>.md`. When present,
+        // splice its rendered HTML into the `#section-executive-brief`
+        // block so non-English readers see localized BLUF + key findings
+        // instead of English fallback prose. SEO metadata (`<title>`,
+        // `<meta description>`, JSON-LD `headline`) is already localized
+        // via `resolveLocalizedBriefHighlight` upstream, so this hook
+        // exclusively touches the rendered article body.
+        const localized = opts.runDir !== null ? readLocalizedBriefBody(opts.runDir, lang) : null;
+        if (localized) {
+            const localizedRendered = renderMarkdown(localized.markdown).html;
+            // Strip the first H1 from the translated brief —
+            // `replaceExecutiveBriefSection` re-emits the canonical
+            // `<h2 id="section-executive-brief">…</h2>` heading itself,
+            // and the brief's own `# Headline` is duplicate chrome.
+            const briefBodyHtml = localizedRendered.replace(/<h1[^>]*>[\s\S]*?<\/h1>\s*/, '');
+            const briefHeadingMap = SECTION_TITLE_LABELS['executive-brief'];
+            const localizedHeading = briefHeadingMap
+                ? getLocalizedString(briefHeadingMap, lang)
+                : 'Executive Brief';
+            bodyHtml = replaceExecutiveBriefSection(bodyHtml, localizedHeading, briefBodyHtml);
+        }
+    }
     bodyHtml = stripInlineReaderGuide(bodyHtml);
     bodyHtml = bodyHtml.replace(/<h1[^>]*>[\s\S]*?<\/h1>\s*/, '');
     const guideHtml = buildReaderIntelligenceGuideHtml(lang, aggregated.sectionToc, aggregated.includedArtifacts);
@@ -123,6 +156,7 @@ function writeLanguageVariant(lang, slug, aggregated, englishHtml, chromeOptions
         toc: aggregated.sectionToc,
         articleCount: chromeOptions.articleCount,
         isBasedOn: aggregated.includedArtifacts.map((a) => blobUrl(a.repoRelPath)),
+        mentions: chromeOptions.mentions,
     });
     const filename = getArticleFilename(slug, lang);
     fs.writeFileSync(path.join(opts.outDir, filename), html, 'utf8');
@@ -236,6 +270,7 @@ export function generateArticle(opts, runSuffix, articleCountOverride) {
             metadata: effectiveMetadata,
             sourceMarkdownRelPath: runArticleMdRelPath,
             articleCount: articleCountOverride ?? countPublishedArticles(opts.repoRoot),
+            mentions: opts.runDir ? extractRunMentions(opts.runDir) : [],
         };
         for (const lang of opts.langs) {
             const filename = writeLanguageVariant(lang, slug, aggregated, rendered.html, chromeOptions, opts);

package/scripts/aggregator/html/localize-body.d.ts CHANGED Viewed

@@ -22,6 +22,38 @@ export declare function localizeArticleBody(bodyHtml: string, lang: LanguageCode
  * @returns Modified string, or `haystack` unchanged when `needle` is absent
  */
 export declare function replaceFirstStringIn(haystack: string, needle: string, replacement: string): string;
+/**
+ * Replace the **inner body** of the Executive Brief section (the
+ * `<h2 id="section-executive-brief">…</h2>` heading and everything that
+ * follows it up to — but not including — the next `<h2 id="section-…">`
+ * sibling) with the supplied replacement HTML. The Executive Brief
+ * heading itself is preserved by emitting it inline ahead of the
+ * replacement, so the in-page anchor (`#section-executive-brief`) and
+ * the table-of-contents link continue to work.
+ *
+ * Used by the article-generator HTML pipeline to inject the rendered
+ * markdown of a translated `executive-brief_<lang>.md` into the
+ * non-English language variants without forking the whole aggregated
+ * article into 14 source-language copies — see
+ * `editorial-brief-resolver.readLocalizedBriefBody` and
+ * `render-one.writeLanguageVariant`.
+ *
+ * Implementation uses `indexOf`/slice exclusively to stay within
+ * CodeQL's safe-regex envelope. Returns `html` unchanged when the
+ * Executive Brief heading is absent or malformed.
+ *
+ * @param html - Full article body HTML
+ * @param localizedHeading - Localized text for the Executive Brief H2
+ *                           (e.g. `"Sammanfattning"` for `sv`). Must be
+ *                           plain text — caller is responsible for any
+ *                           escaping (it's passed through `escapeHTML`).
+ * @param replacementBodyHtml - HTML to splice in **after** the heading.
+ *                              Should not contain its own `<h2>` for
+ *                              the Executive Brief — the heading is
+ *                              re-emitted by this helper.
+ * @returns Updated HTML with the localized brief body in place.
+ */
+export declare function replaceExecutiveBriefSection(html: string, localizedHeading: string, replacementBodyHtml: string): string;
 /**
  * Replace an H2 heading's text content by locating it via its `id` attribute.
  * Uses indexOf-based search to avoid polynomial regex backtracking (CodeQL).

package/scripts/aggregator/html/localize-body.js CHANGED Viewed

@@ -102,6 +102,75 @@ export function replaceFirstStringIn(haystack, needle, replacement) {
         return haystack;
     return haystack.slice(0, idx) + replacement + haystack.slice(idx + needle.length);
 }
+/**
+ * Replace the **inner body** of the Executive Brief section (the
+ * `<h2 id="section-executive-brief">…</h2>` heading and everything that
+ * follows it up to — but not including — the next `<h2 id="section-…">`
+ * sibling) with the supplied replacement HTML. The Executive Brief
+ * heading itself is preserved by emitting it inline ahead of the
+ * replacement, so the in-page anchor (`#section-executive-brief`) and
+ * the table-of-contents link continue to work.
+ *
+ * Used by the article-generator HTML pipeline to inject the rendered
+ * markdown of a translated `executive-brief_<lang>.md` into the
+ * non-English language variants without forking the whole aggregated
+ * article into 14 source-language copies — see
+ * `editorial-brief-resolver.readLocalizedBriefBody` and
+ * `render-one.writeLanguageVariant`.
+ *
+ * Implementation uses `indexOf`/slice exclusively to stay within
+ * CodeQL's safe-regex envelope. Returns `html` unchanged when the
+ * Executive Brief heading is absent or malformed.
+ *
+ * @param html - Full article body HTML
+ * @param localizedHeading - Localized text for the Executive Brief H2
+ *                           (e.g. `"Sammanfattning"` for `sv`). Must be
+ *                           plain text — caller is responsible for any
+ *                           escaping (it's passed through `escapeHTML`).
+ * @param replacementBodyHtml - HTML to splice in **after** the heading.
+ *                              Should not contain its own `<h2>` for
+ *                              the Executive Brief — the heading is
+ *                              re-emitted by this helper.
+ * @returns Updated HTML with the localized brief body in place.
+ */
+export function replaceExecutiveBriefSection(html, localizedHeading, replacementBodyHtml) {
+    const idMarker = 'id="section-executive-brief"';
+    const idIdx = html.indexOf(idMarker);
+    if (idIdx === -1)
+        return html;
+    // Walk back to the opening `<h2` of the Executive Brief heading.
+    const h2Open = html.lastIndexOf('<h2', idIdx);
+    if (h2Open === -1)
+        return html;
+    // Find the end of the heading element.
+    const h2CloseTagIdx = html.indexOf('</h2>', idIdx);
+    if (h2CloseTagIdx === -1)
+        return html;
+    const afterHeading = h2CloseTagIdx + '</h2>'.length;
+    // Find the next `<h2 id="section-...">` boundary — the start of the
+    // following article section. If there is no further section heading
+    // we conservatively bail out (replacing through end-of-body would
+    // also drop appendix content like Reader Guide / Key Takeaways).
+    const nextSectionId = html.indexOf('id="section-', afterHeading);
+    if (nextSectionId === -1)
+        return html;
+    const nextH2 = html.lastIndexOf('<h2', nextSectionId);
+    if (nextH2 === -1 || nextH2 <= afterHeading)
+        return html;
+    // Find the start of the line containing the next `<h2` so we don't
+    // strip leading whitespace from the next section. We look at most
+    // one newline back.
+    let cutEnd = nextH2;
+    const prevNewline = html.lastIndexOf('\n', nextH2 - 1);
+    if (prevNewline !== -1 && prevNewline >= afterHeading) {
+        cutEnd = prevNewline + 1;
+    }
+    const newHeading = `<h2 id="section-executive-brief">${escapeHTML(localizedHeading)}</h2>\n`;
+    const trimmedReplacement = replacementBodyHtml.endsWith('\n')
+        ? replacementBodyHtml
+        : `${replacementBodyHtml}\n`;
+    return html.slice(0, h2Open) + newHeading + trimmedReplacement + html.slice(cutEnd);
+}
 /**
  * Replace an H2 heading's text content by locating it via its `id` attribute.
  * Uses indexOf-based search to avoid polynomial regex backtracking (CodeQL).

package/scripts/aggregator/html/shell.d.ts CHANGED Viewed

@@ -59,6 +59,16 @@ export interface WrapArticleOptions {
      * Emitted as `isBasedOn` in the JSON-LD `NewsArticle` schema for provenance.
      */
     readonly isBasedOn?: readonly string[];
+    /**
+     * Optional: real-world organizations (political groups, media outlets,
+     * institutions) named in the article's intelligence and media-framing
+     * artifacts. Emitted as JSON-LD `mentions` Organization entries to give
+     * search engines and AI overviews high-precision entity grounding.
+     * Currently only extractable from the English intelligence corpus; the
+     * same list is reused across every language variant because the entities
+     * are language-independent proper nouns.
+     */
+    readonly mentions?: readonly string[];
 }
 /**
  * Render the full article HTML document with the shared chrome.

package/scripts/aggregator/html/shell.js CHANGED Viewed

@@ -23,6 +23,7 @@ import { getSitemapFilename } from '../../generators/sitemap/index.js';
 import { truncateHeadline, getTitleSeparator, getLocalizedArticleType, getLocalizedArticleTypePlain, } from './headline.js';
 import { getArticleFilename, buildArticleHreflangLinks, buildLanguageSwitcher, } from './hreflang.js';
 import { buildArticleToc } from './toc.js';
+import { blobUrl } from '../infra/github-urls.js';
 /** Publisher organization name used in JSON-LD, meta tags. */
 export const PUBLISHER_NAME = 'Hack23 AB';
 /** Site name used across meta tags and structured data. */
@@ -50,8 +51,9 @@ export function wrapArticleHtml(options) {
     const sitemapLabel = getLocalizedString(FOOTER_SITEMAP_LABELS, safeLang);
     const politicalIntelligenceHref = `../${getPoliticalIntelligenceFilename(safeLang)}`;
     const sitemapHref = `../${getSitemapFilename(safeLang)}`;
+    const sourceMdHref = options.sourceMarkdownRelPath ? blobUrl(options.sourceMarkdownRelPath) : '';
     const sourceMdLink = options.sourceMarkdownRelPath
-        ? `<p class="article-source-md"><a href="${BASE_URL}/${options.sourceMarkdownRelPath}" rel="alternate" type="text/markdown"><svg class="icon icon-inline" width="16" height="16" viewBox="0 0 24 24" role="img" aria-hidden="true" focusable="false"><path d="M9 5H7a2 2 0 0 0-2 2v10a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2v-2M12 3h6a2 2 0 0 1 2 2v6M10 14 20 4" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/></svg> ${escapeHTML(sourceMdLabel)}</a></p>`
+        ? `<p class="article-source-md"><a href="${escapeHTML(sourceMdHref)}" rel="alternate" type="text/markdown"><svg class="icon icon-inline" width="16" height="16" viewBox="0 0 24 24" role="img" aria-hidden="true" focusable="false"><path d="M9 5H7a2 2 0 0 0-2 2v10a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2v-2M12 3h6a2 2 0 0 1 2 2v6M10 14 20 4" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/></svg> ${escapeHTML(sourceMdLabel)}</a></p>`
         : '';
     const tocHtml = buildArticleToc(options.toc ?? [], safeLang);
     const articleMainClass = tocHtml.length > 0 ? 'article-main--with-toc' : 'article-main--no-toc';
@@ -128,6 +130,14 @@ export function wrapArticleHtml(options) {
                 isBasedOn: options.isBasedOn.map((url) => ({ '@type': 'CreativeWork', url })),
             }
             : {}),
+        ...(options.mentions && options.mentions.length > 0
+            ? {
+                mentions: options.mentions.map((name) => ({
+                    '@type': 'Organization',
+                    name,
+                })),
+            }
+            : {}),
     };
     const breadcrumbLd = {
         '@context': 'https://schema.org',

package/scripts/aggregator/markdown-renderer.d.ts CHANGED Viewed

@@ -67,6 +67,26 @@ export declare function stripMarkdownFrontMatter(markdown: string): string;
  * @returns Slug of up to 80 ASCII-ish characters, with dashes as separators
  */
 export declare function slugify(text: string): string;
+/**
+ * Decode the small set of HTML entities that Markdown authors (and
+ * upstream generators) occasionally pre-encode inside fenced mermaid
+ * blocks — typically &amp; for & in political-group labels like
+ * S&D or Greens/EFA. Without this decode step, the subsequent
+ * escapeHtml pass would re-escape & to &amp; and emit
+ * S&amp;amp;D into the rendered HTML, which the Mermaid client
+ * library then renders verbatim instead of as S&D.
+ *
+ * Uses indexOf/split/join exclusively (no RegExp) to stay
+ * within CodeQL's safe-regex envelope. Only the canonical entity
+ * forms are decoded — anything more exotic (e.g. &#x26;) is left
+ * alone so we never accidentally swallow a literal that the author
+ * intended to keep encoded.
+ *
+ * @param content - Raw fenced-block content (post-sanitizeMermaidQuadrantChart)
+ * @returns Content with pre-encoded HTML entities normalised back to
+ *          their literal characters, ready for a single escapeHtml.
+ */
+export declare function decodeMermaidPreEncodedEntities(content: string): string;
 /**
  * Auto-quote unquoted `quadrantChart` labels so the Mermaid v11 lexer
  * accepts them. The Mermaid `quadrantChart` grammar treats unquoted
@@ -74,30 +94,9 @@ export declare function slugify(text: string): string;
  * en-dashes (`–`, U+2013), ellipsis (`…`), parentheses, colons, and
  * non-ASCII currency symbols (`€`) all trigger
  * `Lexical error … Unrecognized text` and prevent the diagram from
- * rendering, leaving the raw `<pre>` source visible on the page.
- *
- * The style guide already instructs authors to wrap every quadrant /
- * axis / data-point label in double quotes (see
- * `analysis/methodologies/political-style-guide.md` § Standard
- * `quadrantChart` init block), but AI-generated `article.md` files
- * occasionally drop the quoting. Rather than reject the article at
- * Stage C we sanitize at the renderer boundary so every published
- * HTML page renders, regardless of upstream authoring discipline.
- *
- * Sanitization is deliberately scoped to `quadrantChart` blocks —
- * `flowchart`, `sequenceDiagram`, `mindmap`, `pie`, `gantt`, and
- * `xychart-beta` accept the same Unicode characters in their unquoted
- * labels and are passed through unchanged.
- *
- * Lines normalised:
- *   - `x-axis Left --> Right`     → `x-axis "Left" --> "Right"`
- *   - `y-axis Low --> High`       → `y-axis "Low" --> "High"`
- *   - `quadrant-N Label text`     → `quadrant-N "Label text"`
- *   - `Data Label: [x, y]`        → `"Data Label": [x, y]`
- *
- * Already-quoted operands are preserved byte-for-byte. The `title`
- * line, the `%%{init:…}%%` directive, and any line not matching one
- * of the recognised shapes are also left untouched.
+ * rendering. Sanitization is scoped to `quadrantChart` blocks only;
+ * other diagram types accept those characters in unquoted labels and
+ * are passed through unchanged.
  *
  * @param content - Raw mermaid fence body
  * @returns The same content with `quadrantChart` labels auto-quoted;

package/scripts/aggregator/markdown-renderer.js CHANGED Viewed

@@ -180,6 +180,40 @@ function rewriteQuadrantChartLine(line) {
     }
     return line;
 }
+/**
+ * Decode the small set of HTML entities that Markdown authors (and
+ * upstream generators) occasionally pre-encode inside fenced mermaid
+ * blocks — typically &amp; for & in political-group labels like
+ * S&D or Greens/EFA. Without this decode step, the subsequent
+ * escapeHtml pass would re-escape & to &amp; and emit
+ * S&amp;amp;D into the rendered HTML, which the Mermaid client
+ * library then renders verbatim instead of as S&D.
+ *
+ * Uses indexOf/split/join exclusively (no RegExp) to stay
+ * within CodeQL's safe-regex envelope. Only the canonical entity
+ * forms are decoded — anything more exotic (e.g. &#x26;) is left
+ * alone so we never accidentally swallow a literal that the author
+ * intended to keep encoded.
+ *
+ * @param content - Raw fenced-block content (post-sanitizeMermaidQuadrantChart)
+ * @returns Content with pre-encoded HTML entities normalised back to
+ *          their literal characters, ready for a single escapeHtml.
+ */
+export function decodeMermaidPreEncodedEntities(content) {
+    // Order matters: decode the named entities first (which all contain
+    // `&` followed by ASCII letters), then finally `&amp;` itself so we
+    // don't double-decode `&amp;lt;` -> `<`.
+    // Each replacement is a plain string `split(needle).join(replacement)`
+    // which is linear and trivially CodeQL-safe.
+    let out = content;
+    out = out.split('&lt;').join('<');
+    out = out.split('&gt;').join('>');
+    out = out.split('&quot;').join('"');
+    out = out.split('&#39;').join("'");
+    out = out.split('&apos;').join("'");
+    out = out.split('&amp;').join('&');
+    return out;
+}
 /**
  * Auto-quote unquoted `quadrantChart` labels so the Mermaid v11 lexer
  * accepts them. The Mermaid `quadrantChart` grammar treats unquoted
@@ -187,30 +221,9 @@ function rewriteQuadrantChartLine(line) {
  * en-dashes (`–`, U+2013), ellipsis (`…`), parentheses, colons, and
  * non-ASCII currency symbols (`€`) all trigger
  * `Lexical error … Unrecognized text` and prevent the diagram from
- * rendering, leaving the raw `<pre>` source visible on the page.
- *
- * The style guide already instructs authors to wrap every quadrant /
- * axis / data-point label in double quotes (see
- * `analysis/methodologies/political-style-guide.md` § Standard
- * `quadrantChart` init block), but AI-generated `article.md` files
- * occasionally drop the quoting. Rather than reject the article at
- * Stage C we sanitize at the renderer boundary so every published
- * HTML page renders, regardless of upstream authoring discipline.
- *
- * Sanitization is deliberately scoped to `quadrantChart` blocks —
- * `flowchart`, `sequenceDiagram`, `mindmap`, `pie`, `gantt`, and
- * `xychart-beta` accept the same Unicode characters in their unquoted
- * labels and are passed through unchanged.
- *
- * Lines normalised:
- *   - `x-axis Left --> Right`     → `x-axis "Left" --> "Right"`
- *   - `y-axis Low --> High`       → `y-axis "Low" --> "High"`
- *   - `quadrant-N Label text`     → `quadrant-N "Label text"`
- *   - `Data Label: [x, y]`        → `"Data Label": [x, y]`
- *
- * Already-quoted operands are preserved byte-for-byte. The `title`
- * line, the `%%{init:…}%%` directive, and any line not matching one
- * of the recognised shapes are also left untouched.
+ * rendering. Sanitization is scoped to `quadrantChart` blocks only;
+ * other diagram types accept those characters in unquoted labels and
+ * are passed through unchanged.
  *
  * @param content - Raw mermaid fence body
  * @returns The same content with `quadrantChart` labels auto-quoted;
@@ -267,7 +280,8 @@ function installMermaidFence(md) {
             const labelFn = env2.mermaidLabel ?? ((n) => `Mermaid diagram ${n + 1}`);
             const label = md.utils.escapeHtml(labelFn(currentIndex, token.content));
             const sanitized = sanitizeMermaidQuadrantChart(token.content);
-            const body = md.utils.escapeHtml(sanitized);
+            const decoded = decodeMermaidPreEncodedEntities(sanitized);
+            const body = md.utils.escapeHtml(decoded);
             return `<figure class="mermaid-figure" role="img" aria-label="${label}">\n<pre class="mermaid">${body}</pre>\n</figure>\n`;
         }
         return defaultFence(tokens, idx, opts, env, self);

package/scripts/aggregator/metadata/artifact-walker.js CHANGED Viewed

@@ -17,7 +17,7 @@ import fs from 'fs';
 import path from 'path';
 import { extractFirstH1 } from './h1-extractor.js';
 import { extractLedeAfterHeading, extractStrongProseLine } from './lede-extractor.js';
-import { isGenericHeading, stripArtifactCategoryAffix } from './heading-rules.js';
+import { isGenericHeading, isArtifactCategoryHeading, stripArtifactCategoryAffix, } from './heading-rules.js';
 import { truncateTitle } from './text-utils.js';
 import { extractPriorityFindingHighlight } from './priority-finding-highlight.js';
 /** Ordered list of artefact filenames that typically carry the editorial H1. */
@@ -132,7 +132,7 @@ function probeCandidateForHighlight(runDir, rel, articleType, date) {
     // distinctive editorial headline ("Digital Markets Act Enforcement",
     // "Ukraine War Accountability") instead of a stripped category noun.
     const priority = extractPriorityFindingHighlight(body);
-    if (priority?.headline) {
+    if (priority?.headline && !isArtifactCategoryHeading(priority.headline)) {
         return {
             cleanHighlight: {
                 headline: truncateTitle(priority.headline),

package/scripts/aggregator/metadata/heading-rules.js CHANGED Viewed

@@ -69,6 +69,7 @@ export const ARTIFACT_CATEGORY_PREFIXES = [
     'commission wp alignment',
     'committee activity report',
     'cross run continuity',
+    'data availability assessment',
     'deep analysis',
     'economic context',
     'executive brief',

package/scripts/aggregator/metadata/resolve-helpers.js CHANGED Viewed

@@ -123,13 +123,19 @@ export function composeContextualTitle(fallbackTitle, editorialHeadline, runId)
  */
 export function composeContextualDescription(lang, baseDescription, editorial, date, _runId) {
     const labels = getLocalizedString(SEO_CONTEXT_LABELS, lang);
-    const parts = [baseDescription.trim()];
-    parts.push(`${labels.date} ${date}.`);
+    const base = baseDescription.trim();
+    const parts = [base];
+    const datePart = `${labels.date} ${date}.`;
+    if (!containsNormalized(base, `${labels.date} ${date}`)) {
+        parts.push(datePart);
+    }
     const context = pickFirstNonEmpty([editorial.summary, editorial.headline]);
     if (context && !containsNormalized(parts[0] ?? '', context)) {
         parts.push(`${labels.context}: ${context}`);
     }
-    parts.push(labels.reader);
+    if (!containsNormalized(parts.join(' '), labels.reader)) {
+        parts.push(labels.reader);
+    }
     return truncateDescription(parts.join(' '));
 }
 /**

package/scripts/aggregator/reader-guide/builder.js CHANGED Viewed

@@ -3,7 +3,7 @@
 import { getLocalizedString, getTextDirection } from '../../constants/language-core.js';
 import { escapeHTML } from '../../utils/file-utils.js';
 import { READER_GUIDE_SECTION_ID } from '../reader-guide-constants.js';
-import { READER_GUIDE_TITLE_LABELS, READER_GUIDE_INTRO_LABELS, READER_GUIDE_COL_NEED_LABELS, READER_GUIDE_COL_VALUE_LABELS, } from './labels.js';
+import { READER_GUIDE_TITLE_LABELS, READER_GUIDE_INTRO_LABELS, READER_GUIDE_TIP_LABELS, READER_GUIDE_COL_NEED_LABELS, READER_GUIDE_COL_VALUE_LABELS, } from './labels.js';
 import { READER_GUIDE_ROWS } from './rows.js';
 import { getReaderGuideSectionIcon } from './icons.js';
 /**
@@ -40,11 +40,13 @@ export function buildReaderIntelligenceGuideHtml(lang, sections, _included = [])
         return '';
     const title = getLocalizedString(READER_GUIDE_TITLE_LABELS, lang);
     const intro = getLocalizedString(READER_GUIDE_INTRO_LABELS, lang);
+    const tip = getLocalizedString(READER_GUIDE_TIP_LABELS, lang);
     const colNeed = getLocalizedString(READER_GUIDE_COL_NEED_LABELS, lang);
     const colValue = getLocalizedString(READER_GUIDE_COL_VALUE_LABELS, lang);
     return `<section id="${READER_GUIDE_SECTION_ID}" data-component="reader-intelligence-guide" aria-label="${escapeHTML(title)}"${dir === 'rtl' ? ' dir="rtl"' : ''}>
 <h2 id="${READER_GUIDE_SECTION_ID}-heading"><span class="guide-icon" aria-hidden="true">🧭</span> ${escapeHTML(title)}</h2>
 <p class="reader-guide-intro">${escapeHTML(intro)}</p>
+<p class="reader-guide-tip"><span class="guide-icon" aria-hidden="true">💡</span> ${escapeHTML(tip)}</p>
 <div class="table-scroll" role="region" tabindex="0" aria-labelledby="${READER_GUIDE_SECTION_ID}-heading">
 <table class="reader-guide-table">
 <caption class="sr-only">${escapeHTML(title)}</caption>

package/scripts/aggregator/reader-guide/labels.d.ts CHANGED Viewed

@@ -10,6 +10,13 @@ import type { LanguageMap } from '../../types/index.js';
 export declare const READER_GUIDE_TITLE_LABELS: LanguageMap;
 /** Introduction text for the Reader Intelligence Guide */
 export declare const READER_GUIDE_INTRO_LABELS: LanguageMap;
+/**
+ * Practical "how to read this article" tip rendered immediately under the
+ * intro. Distinct from the intro so existing snapshot tests continue to
+ * match the intro string verbatim, and so styles can target the two
+ * paragraphs independently.
+ */
+export declare const READER_GUIDE_TIP_LABELS: LanguageMap;
 /** Table header: "Reader need" */
 export declare const READER_GUIDE_COL_NEED_LABELS: LanguageMap;
 /** Table header: "What you'll get" */