euparliamentmonitor 0.9.20 → 0.9.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +2 -2
  2. package/package.json +6 -3
  3. package/scripts/aggregator/editorial-brief-resolver.d.ts +38 -0
  4. package/scripts/aggregator/editorial-brief-resolver.js +32 -0
  5. package/scripts/aggregator/generator/render-one.js +35 -0
  6. package/scripts/aggregator/html/localize-body.d.ts +32 -0
  7. package/scripts/aggregator/html/localize-body.js +69 -0
  8. package/scripts/aggregator/html/shell.d.ts +10 -0
  9. package/scripts/aggregator/html/shell.js +11 -1
  10. package/scripts/aggregator/markdown-renderer.d.ts +23 -24
  11. package/scripts/aggregator/markdown-renderer.js +39 -25
  12. package/scripts/aggregator/metadata/artifact-walker.js +2 -2
  13. package/scripts/aggregator/metadata/heading-rules.js +1 -0
  14. package/scripts/aggregator/metadata/resolve-helpers.js +9 -3
  15. package/scripts/aggregator/reader-guide/builder.js +3 -1
  16. package/scripts/aggregator/reader-guide/labels.d.ts +7 -0
  17. package/scripts/aggregator/reader-guide/labels.js +22 -0
  18. package/scripts/aggregator/reader-intelligence-guide.d.ts +1 -1
  19. package/scripts/aggregator/reader-intelligence-guide.js +1 -1
  20. package/scripts/aggregator/seo-entity-extractor.d.ts +45 -0
  21. package/scripts/aggregator/seo-entity-extractor.js +211 -0
  22. package/scripts/copy-vendor.js +84 -112
  23. package/scripts/discover-untranslated-briefs.js +123 -4
  24. package/scripts/dump-article-seo.js +567 -0
  25. package/scripts/generators/news-indexes/backfill.d.ts +6 -1
  26. package/scripts/generators/news-indexes/backfill.js +71 -4
  27. package/scripts/generators/news-indexes/per-language.js +21 -7
  28. package/scripts/generators/political-intelligence/html.js +39 -8
  29. package/scripts/generators/sitemap/html.js +25 -7
  30. package/scripts/mcp/ep/error-classifier.d.ts +2 -2
  31. package/scripts/mcp/ep/error-classifier.js +2 -2
  32. package/scripts/validate-brief-translations.js +119 -5
package/README.md CHANGED
@@ -136,7 +136,7 @@ The published site is the audience-facing companion to this npm/TypeScript packa
136
136
 
137
137
  **MCP Server Integration**: The project uses the
138
138
  [European-Parliament-MCP-Server](https://github.com/Hack23/European-Parliament-MCP-Server)
139
- v1.3.9 for accessing real EU Parliament data via the Model Context Protocol.
139
+ v1.3.10 for accessing real EU Parliament data via the Model Context Protocol.
140
140
 
141
141
  - **MCP Server Status**: ✅ Fully operational — 60+ EP data tools available
142
142
  (feeds, direct lookups, analytical tools, intelligence correlation)
@@ -432,7 +432,7 @@ import type { ArticleCategory, LanguageCode } from 'euparliamentmonitor/types';
432
432
 
433
433
  ## 🔌 Data Sources
434
434
 
435
- **Primary — European Parliament MCP Server** ([Hack23/European-Parliament-MCP-Server](https://github.com/Hack23/European-Parliament-MCP-Server) v1.3.9+, fully operational):
435
+ **Primary — European Parliament MCP Server** ([Hack23/European-Parliament-MCP-Server](https://github.com/Hack23/European-Parliament-MCP-Server) v1.3.10+, fully operational):
436
436
 
437
437
  - 🗳️ Plenary sessions, voting records, roll-call votes
438
438
  - 📜 Adopted texts, motions, resolutions, urgency files
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "euparliamentmonitor",
3
- "version": "0.9.20",
3
+ "version": "0.9.22",
4
4
  "type": "module",
5
5
  "description": "European Parliament Intelligence Platform - Monitor political activity with systematic transparency",
6
6
  "main": "scripts/index.js",
@@ -71,6 +71,7 @@
71
71
  "prior-run-diff": "node scripts/aggregator/prior-run-diff.js",
72
72
  "generate-article": "node scripts/aggregator/article-generator.js",
73
73
  "generate-article:all": "node scripts/aggregator/article-generator.js --all",
74
+ "dump:article-seo": "node scripts/dump-article-seo.js",
74
75
  "generate-news-indexes": "node scripts/generators/news-indexes.js",
75
76
  "generate-sitemap": "node scripts/generators/sitemap.js",
76
77
  "image:generate": "node scripts/generate-responsive-images.js",
@@ -164,6 +165,7 @@
164
165
  "chartjs-plugin-annotation": "3.1.0",
165
166
  "clean-css": "^5.3.3",
166
167
  "d3": "7.9.0",
168
+ "esbuild": "0.28.0",
167
169
  "eslint": "10.4.0",
168
170
  "eslint-config-prettier": "10.1.8",
169
171
  "eslint-plugin-jsdoc": "63.0.0",
@@ -194,7 +196,7 @@
194
196
  "node": ">=26"
195
197
  },
196
198
  "dependencies": {
197
- "european-parliament-mcp-server": "1.3.9",
199
+ "european-parliament-mcp-server": "1.3.10",
198
200
  "markdown-it": "^14.1.1",
199
201
  "markdown-it-anchor": "^9.2.0",
200
202
  "markdown-it-attrs": "^4.3.1",
@@ -208,6 +210,7 @@
208
210
  "flatted": ">=3.4.2",
209
211
  "path-to-regexp": ">=8.4.0",
210
212
  "ip-address": ">=10.1.1",
211
- "uuid": ">=11.1.1"
213
+ "uuid": ">=11.1.1",
214
+ "qs": "6.15.2"
212
215
  }
213
216
  }
@@ -73,4 +73,42 @@ export declare function resolveLocalizedBriefHighlight(runDir: string, lang: Lan
73
73
  * brief candidate file exists
74
74
  */
75
75
  export declare function discoverLocalizedBriefs(runDir: string, languages: readonly LanguageCode[]): readonly LanguageCode[];
76
+ /**
77
+ * Localized brief body suitable for HTML rendering.
78
+ *
79
+ * Unlike {@link resolveLocalizedBriefHighlight} — which extracts a few
80
+ * short SEO/metadata fields (headline, summary) for `<meta>` tags and
81
+ * JSON-LD — this helper returns the **full body** of the translated
82
+ * executive brief, with the SPDX preamble stripped, so the caller can
83
+ * render it through {@link renderMarkdown} and splice the resulting
84
+ * HTML into the per-language article variant.
85
+ *
86
+ * Used by the article-generator HTML pipeline (`render-one.ts`) to
87
+ * upgrade non-English variants from the English aggregated body to a
88
+ * truly localized one whenever a translated `executive-brief_<lang>.md`
89
+ * exists in the run directory.
90
+ */
91
+ export interface LocalizedBriefBody {
92
+ /** Markdown body of the localized brief (post-SPDX strip). */
93
+ readonly markdown: string;
94
+ /** Run-relative path of the file that produced {@link markdown}. */
95
+ readonly sourceFile: string;
96
+ }
97
+ /**
98
+ * Read the **full markdown body** of a translated executive brief for
99
+ * `lang` from `runDir`, searching the standard candidate paths
100
+ * (`executive-brief_<lang>.md` → `extended/executive-brief_<lang>.md`).
101
+ * SPDX HTML-comment preambles are stripped using the same logic as the
102
+ * SEO-metadata path, so the returned markdown starts at the first real
103
+ * content line (`# Headline` or similar).
104
+ *
105
+ * Returns `null` when `runDir` is missing, the language is English, or
106
+ * no candidate file exists. The caller is expected to fall back to the
107
+ * English aggregated body in that case — see `render-one.ts`.
108
+ *
109
+ * @param runDir - Absolute run directory
110
+ * @param lang - Target language code (omitted when `lang === 'en'`)
111
+ * @returns Localized brief body + source file, or `null` when absent
112
+ */
113
+ export declare function readLocalizedBriefBody(runDir: string, lang: LanguageCode): LocalizedBriefBody | null;
76
114
  //# sourceMappingURL=editorial-brief-resolver.d.ts.map
@@ -217,4 +217,36 @@ export function discoverLocalizedBriefs(runDir, languages) {
217
217
  }
218
218
  return out;
219
219
  }
220
+ /**
221
+ * Read the **full markdown body** of a translated executive brief for
222
+ * `lang` from `runDir`, searching the standard candidate paths
223
+ * (`executive-brief_<lang>.md` → `extended/executive-brief_<lang>.md`).
224
+ * SPDX HTML-comment preambles are stripped using the same logic as the
225
+ * SEO-metadata path, so the returned markdown starts at the first real
226
+ * content line (`# Headline` or similar).
227
+ *
228
+ * Returns `null` when `runDir` is missing, the language is English, or
229
+ * no candidate file exists. The caller is expected to fall back to the
230
+ * English aggregated body in that case — see `render-one.ts`.
231
+ *
232
+ * @param runDir - Absolute run directory
233
+ * @param lang - Target language code (omitted when `lang === 'en'`)
234
+ * @returns Localized brief body + source file, or `null` when absent
235
+ */
236
+ export function readLocalizedBriefBody(runDir, lang) {
237
+ if (!runDir || lang === 'en')
238
+ return null;
239
+ if (!fs.existsSync(runDir))
240
+ return null;
241
+ for (const rel of localizedBriefCandidates(lang)) {
242
+ const abs = path.join(runDir, rel);
243
+ if (!fs.existsSync(abs))
244
+ continue;
245
+ const body = readArtefactBody(abs);
246
+ if (body.trim().length === 0)
247
+ continue;
248
+ return { markdown: body, sourceFile: rel };
249
+ }
250
+ return null;
251
+ }
220
252
  //# sourceMappingURL=editorial-brief-resolver.js.map
@@ -16,6 +16,11 @@ import { resolveArticleMetadata, extractStrongProseLine, } from '../article-meta
16
16
  import { buildArticleMeta, serializeArticleMeta } from '../article-meta.js';
17
17
  import { renderMarkdown } from '../markdown-renderer.js';
18
18
  import { wrapArticleHtml, getArticleFilename, localizeArticleBody, enhanceTradecraftCards, enhanceAnalysisIndexCards, } from '../article-html.js';
19
+ import { replaceExecutiveBriefSection } from '../html/localize-body.js';
20
+ import { readLocalizedBriefBody } from '../editorial-brief-resolver.js';
21
+ import { extractRunMentions } from '../seo-entity-extractor.js';
22
+ import { SECTION_TITLE_LABELS } from '../../constants/ui/related-analysis.js';
23
+ import { getLocalizedString } from '../../constants/language-core.js';
19
24
  import { buildReaderIntelligenceGuideHtml, stripInlineReaderGuide, } from '../reader-intelligence-guide.js';
20
25
  import { ALL_LANGUAGES } from '../../constants/language-core.js';
21
26
  import { blobUrl } from '../infra/github-urls.js';
@@ -84,6 +89,10 @@ function buildJekyllArticleMarkdown(aggregated, metadata, slug, sourceFolder) {
84
89
  * canonical English Markdown source written by the same run
85
90
  * @param chromeOptions.articleCount - Total article count surfaced in the
86
91
  * site footer's `<p class="footer-stats">…</p>` line
92
+ * @param chromeOptions.mentions - SEO `mentions` list (organization names
93
+ * extracted from `intelligence/stakeholder-map.md` and
94
+ * `extended/media-framing-analysis.md`) emitted into JSON-LD on
95
+ * every language variant
87
96
  * @param opts - CLI options (needed for `outDir`)
88
97
  * @returns Relative filename of the HTML file written
89
98
  */
@@ -96,6 +105,30 @@ function writeLanguageVariant(lang, slug, aggregated, englishHtml, chromeOptions
96
105
  metaSource = fs.readFileSync(langMdAbs, 'utf8');
97
106
  bodyHtml = renderMarkdown(metaSource).html;
98
107
  }
108
+ else if (lang !== 'en') {
109
+ // No full per-language source markdown — but the run may still
110
+ // ship a translated `executive-brief_<lang>.md`. When present,
111
+ // splice its rendered HTML into the `#section-executive-brief`
112
+ // block so non-English readers see localized BLUF + key findings
113
+ // instead of English fallback prose. SEO metadata (`<title>`,
114
+ // `<meta description>`, JSON-LD `headline`) is already localized
115
+ // via `resolveLocalizedBriefHighlight` upstream, so this hook
116
+ // exclusively touches the rendered article body.
117
+ const localized = opts.runDir !== null ? readLocalizedBriefBody(opts.runDir, lang) : null;
118
+ if (localized) {
119
+ const localizedRendered = renderMarkdown(localized.markdown).html;
120
+ // Strip the first H1 from the translated brief —
121
+ // `replaceExecutiveBriefSection` re-emits the canonical
122
+ // `<h2 id="section-executive-brief">…</h2>` heading itself,
123
+ // and the brief's own `# Headline` is duplicate chrome.
124
+ const briefBodyHtml = localizedRendered.replace(/<h1[^>]*>[\s\S]*?<\/h1>\s*/, '');
125
+ const briefHeadingMap = SECTION_TITLE_LABELS['executive-brief'];
126
+ const localizedHeading = briefHeadingMap
127
+ ? getLocalizedString(briefHeadingMap, lang)
128
+ : 'Executive Brief';
129
+ bodyHtml = replaceExecutiveBriefSection(bodyHtml, localizedHeading, briefBodyHtml);
130
+ }
131
+ }
99
132
  bodyHtml = stripInlineReaderGuide(bodyHtml);
100
133
  bodyHtml = bodyHtml.replace(/<h1[^>]*>[\s\S]*?<\/h1>\s*/, '');
101
134
  const guideHtml = buildReaderIntelligenceGuideHtml(lang, aggregated.sectionToc, aggregated.includedArtifacts);
@@ -123,6 +156,7 @@ function writeLanguageVariant(lang, slug, aggregated, englishHtml, chromeOptions
123
156
  toc: aggregated.sectionToc,
124
157
  articleCount: chromeOptions.articleCount,
125
158
  isBasedOn: aggregated.includedArtifacts.map((a) => blobUrl(a.repoRelPath)),
159
+ mentions: chromeOptions.mentions,
126
160
  });
127
161
  const filename = getArticleFilename(slug, lang);
128
162
  fs.writeFileSync(path.join(opts.outDir, filename), html, 'utf8');
@@ -236,6 +270,7 @@ export function generateArticle(opts, runSuffix, articleCountOverride) {
236
270
  metadata: effectiveMetadata,
237
271
  sourceMarkdownRelPath: runArticleMdRelPath,
238
272
  articleCount: articleCountOverride ?? countPublishedArticles(opts.repoRoot),
273
+ mentions: opts.runDir ? extractRunMentions(opts.runDir) : [],
239
274
  };
240
275
  for (const lang of opts.langs) {
241
276
  const filename = writeLanguageVariant(lang, slug, aggregated, rendered.html, chromeOptions, opts);
@@ -22,6 +22,38 @@ export declare function localizeArticleBody(bodyHtml: string, lang: LanguageCode
22
22
  * @returns Modified string, or `haystack` unchanged when `needle` is absent
23
23
  */
24
24
  export declare function replaceFirstStringIn(haystack: string, needle: string, replacement: string): string;
25
+ /**
26
+ * Replace the **inner body** of the Executive Brief section (the
27
+ * `<h2 id="section-executive-brief">…</h2>` heading and everything that
28
+ * follows it up to — but not including — the next `<h2 id="section-…">`
29
+ * sibling) with the supplied replacement HTML. The Executive Brief
30
+ * heading itself is preserved by emitting it inline ahead of the
31
+ * replacement, so the in-page anchor (`#section-executive-brief`) and
32
+ * the table-of-contents link continue to work.
33
+ *
34
+ * Used by the article-generator HTML pipeline to inject the rendered
35
+ * markdown of a translated `executive-brief_<lang>.md` into the
36
+ * non-English language variants without forking the whole aggregated
37
+ * article into 14 source-language copies — see
38
+ * `editorial-brief-resolver.readLocalizedBriefBody` and
39
+ * `render-one.writeLanguageVariant`.
40
+ *
41
+ * Implementation uses `indexOf`/slice exclusively to stay within
42
+ * CodeQL's safe-regex envelope. Returns `html` unchanged when the
43
+ * Executive Brief heading is absent or malformed.
44
+ *
45
+ * @param html - Full article body HTML
46
+ * @param localizedHeading - Localized text for the Executive Brief H2
47
+ * (e.g. `"Sammanfattning"` for `sv`). Must be
48
+ * plain text — caller is responsible for any
49
+ * escaping (it's passed through `escapeHTML`).
50
+ * @param replacementBodyHtml - HTML to splice in **after** the heading.
51
+ * Should not contain its own `<h2>` for
52
+ * the Executive Brief — the heading is
53
+ * re-emitted by this helper.
54
+ * @returns Updated HTML with the localized brief body in place.
55
+ */
56
+ export declare function replaceExecutiveBriefSection(html: string, localizedHeading: string, replacementBodyHtml: string): string;
25
57
  /**
26
58
  * Replace an H2 heading's text content by locating it via its `id` attribute.
27
59
  * Uses indexOf-based search to avoid polynomial regex backtracking (CodeQL).
@@ -102,6 +102,75 @@ export function replaceFirstStringIn(haystack, needle, replacement) {
102
102
  return haystack;
103
103
  return haystack.slice(0, idx) + replacement + haystack.slice(idx + needle.length);
104
104
  }
105
+ /**
106
+ * Replace the **inner body** of the Executive Brief section (the
107
+ * `<h2 id="section-executive-brief">…</h2>` heading and everything that
108
+ * follows it up to — but not including — the next `<h2 id="section-…">`
109
+ * sibling) with the supplied replacement HTML. The Executive Brief
110
+ * heading itself is preserved by emitting it inline ahead of the
111
+ * replacement, so the in-page anchor (`#section-executive-brief`) and
112
+ * the table-of-contents link continue to work.
113
+ *
114
+ * Used by the article-generator HTML pipeline to inject the rendered
115
+ * markdown of a translated `executive-brief_<lang>.md` into the
116
+ * non-English language variants without forking the whole aggregated
117
+ * article into 14 source-language copies — see
118
+ * `editorial-brief-resolver.readLocalizedBriefBody` and
119
+ * `render-one.writeLanguageVariant`.
120
+ *
121
+ * Implementation uses `indexOf`/slice exclusively to stay within
122
+ * CodeQL's safe-regex envelope. Returns `html` unchanged when the
123
+ * Executive Brief heading is absent or malformed.
124
+ *
125
+ * @param html - Full article body HTML
126
+ * @param localizedHeading - Localized text for the Executive Brief H2
127
+ * (e.g. `"Sammanfattning"` for `sv`). Must be
128
+ * plain text — caller is responsible for any
129
+ * escaping (it's passed through `escapeHTML`).
130
+ * @param replacementBodyHtml - HTML to splice in **after** the heading.
131
+ * Should not contain its own `<h2>` for
132
+ * the Executive Brief — the heading is
133
+ * re-emitted by this helper.
134
+ * @returns Updated HTML with the localized brief body in place.
135
+ */
136
+ export function replaceExecutiveBriefSection(html, localizedHeading, replacementBodyHtml) {
137
+ const idMarker = 'id="section-executive-brief"';
138
+ const idIdx = html.indexOf(idMarker);
139
+ if (idIdx === -1)
140
+ return html;
141
+ // Walk back to the opening `<h2` of the Executive Brief heading.
142
+ const h2Open = html.lastIndexOf('<h2', idIdx);
143
+ if (h2Open === -1)
144
+ return html;
145
+ // Find the end of the heading element.
146
+ const h2CloseTagIdx = html.indexOf('</h2>', idIdx);
147
+ if (h2CloseTagIdx === -1)
148
+ return html;
149
+ const afterHeading = h2CloseTagIdx + '</h2>'.length;
150
+ // Find the next `<h2 id="section-...">` boundary — the start of the
151
+ // following article section. If there is no further section heading
152
+ // we conservatively bail out (replacing through end-of-body would
153
+ // also drop appendix content like Reader Guide / Key Takeaways).
154
+ const nextSectionId = html.indexOf('id="section-', afterHeading);
155
+ if (nextSectionId === -1)
156
+ return html;
157
+ const nextH2 = html.lastIndexOf('<h2', nextSectionId);
158
+ if (nextH2 === -1 || nextH2 <= afterHeading)
159
+ return html;
160
+ // Find the start of the line containing the next `<h2` so we don't
161
+ // strip leading whitespace from the next section. We look at most
162
+ // one newline back.
163
+ let cutEnd = nextH2;
164
+ const prevNewline = html.lastIndexOf('\n', nextH2 - 1);
165
+ if (prevNewline !== -1 && prevNewline >= afterHeading) {
166
+ cutEnd = prevNewline + 1;
167
+ }
168
+ const newHeading = `<h2 id="section-executive-brief">${escapeHTML(localizedHeading)}</h2>\n`;
169
+ const trimmedReplacement = replacementBodyHtml.endsWith('\n')
170
+ ? replacementBodyHtml
171
+ : `${replacementBodyHtml}\n`;
172
+ return html.slice(0, h2Open) + newHeading + trimmedReplacement + html.slice(cutEnd);
173
+ }
105
174
  /**
106
175
  * Replace an H2 heading's text content by locating it via its `id` attribute.
107
176
  * Uses indexOf-based search to avoid polynomial regex backtracking (CodeQL).
@@ -59,6 +59,16 @@ export interface WrapArticleOptions {
59
59
  * Emitted as `isBasedOn` in the JSON-LD `NewsArticle` schema for provenance.
60
60
  */
61
61
  readonly isBasedOn?: readonly string[];
62
+ /**
63
+ * Optional: real-world organizations (political groups, media outlets,
64
+ * institutions) named in the article's intelligence and media-framing
65
+ * artifacts. Emitted as JSON-LD `mentions` Organization entries to give
66
+ * search engines and AI overviews high-precision entity grounding.
67
+ * Currently only extractable from the English intelligence corpus; the
68
+ * same list is reused across every language variant because the entities
69
+ * are language-independent proper nouns.
70
+ */
71
+ readonly mentions?: readonly string[];
62
72
  }
63
73
  /**
64
74
  * Render the full article HTML document with the shared chrome.
@@ -23,6 +23,7 @@ import { getSitemapFilename } from '../../generators/sitemap/index.js';
23
23
  import { truncateHeadline, getTitleSeparator, getLocalizedArticleType, getLocalizedArticleTypePlain, } from './headline.js';
24
24
  import { getArticleFilename, buildArticleHreflangLinks, buildLanguageSwitcher, } from './hreflang.js';
25
25
  import { buildArticleToc } from './toc.js';
26
+ import { blobUrl } from '../infra/github-urls.js';
26
27
  /** Publisher organization name used in JSON-LD, meta tags. */
27
28
  export const PUBLISHER_NAME = 'Hack23 AB';
28
29
  /** Site name used across meta tags and structured data. */
@@ -50,8 +51,9 @@ export function wrapArticleHtml(options) {
50
51
  const sitemapLabel = getLocalizedString(FOOTER_SITEMAP_LABELS, safeLang);
51
52
  const politicalIntelligenceHref = `../${getPoliticalIntelligenceFilename(safeLang)}`;
52
53
  const sitemapHref = `../${getSitemapFilename(safeLang)}`;
54
+ const sourceMdHref = options.sourceMarkdownRelPath ? blobUrl(options.sourceMarkdownRelPath) : '';
53
55
  const sourceMdLink = options.sourceMarkdownRelPath
54
- ? `<p class="article-source-md"><a href="${BASE_URL}/${options.sourceMarkdownRelPath}" rel="alternate" type="text/markdown"><svg class="icon icon-inline" width="16" height="16" viewBox="0 0 24 24" role="img" aria-hidden="true" focusable="false"><path d="M9 5H7a2 2 0 0 0-2 2v10a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2v-2M12 3h6a2 2 0 0 1 2 2v6M10 14 20 4" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/></svg> ${escapeHTML(sourceMdLabel)}</a></p>`
56
+ ? `<p class="article-source-md"><a href="${escapeHTML(sourceMdHref)}" rel="alternate" type="text/markdown"><svg class="icon icon-inline" width="16" height="16" viewBox="0 0 24 24" role="img" aria-hidden="true" focusable="false"><path d="M9 5H7a2 2 0 0 0-2 2v10a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2v-2M12 3h6a2 2 0 0 1 2 2v6M10 14 20 4" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/></svg> ${escapeHTML(sourceMdLabel)}</a></p>`
55
57
  : '';
56
58
  const tocHtml = buildArticleToc(options.toc ?? [], safeLang);
57
59
  const articleMainClass = tocHtml.length > 0 ? 'article-main--with-toc' : 'article-main--no-toc';
@@ -128,6 +130,14 @@ export function wrapArticleHtml(options) {
128
130
  isBasedOn: options.isBasedOn.map((url) => ({ '@type': 'CreativeWork', url })),
129
131
  }
130
132
  : {}),
133
+ ...(options.mentions && options.mentions.length > 0
134
+ ? {
135
+ mentions: options.mentions.map((name) => ({
136
+ '@type': 'Organization',
137
+ name,
138
+ })),
139
+ }
140
+ : {}),
131
141
  };
132
142
  const breadcrumbLd = {
133
143
  '@context': 'https://schema.org',
@@ -67,6 +67,26 @@ export declare function stripMarkdownFrontMatter(markdown: string): string;
67
67
  * @returns Slug of up to 80 ASCII-ish characters, with dashes as separators
68
68
  */
69
69
  export declare function slugify(text: string): string;
70
+ /**
71
+ * Decode the small set of HTML entities that Markdown authors (and
72
+ * upstream generators) occasionally pre-encode inside fenced mermaid
73
+ * blocks — typically &amp; for & in political-group labels like
74
+ * S&D or Greens/EFA. Without this decode step, the subsequent
75
+ * escapeHtml pass would re-escape & to &amp; and emit
76
+ * S&amp;amp;D into the rendered HTML, which the Mermaid client
77
+ * library then renders verbatim instead of as S&D.
78
+ *
79
+ * Uses indexOf/split/join exclusively (no RegExp) to stay
80
+ * within CodeQL's safe-regex envelope. Only the canonical entity
81
+ * forms are decoded — anything more exotic (e.g. &#x26;) is left
82
+ * alone so we never accidentally swallow a literal that the author
83
+ * intended to keep encoded.
84
+ *
85
+ * @param content - Raw fenced-block content (post-sanitizeMermaidQuadrantChart)
86
+ * @returns Content with pre-encoded HTML entities normalised back to
87
+ * their literal characters, ready for a single escapeHtml.
88
+ */
89
+ export declare function decodeMermaidPreEncodedEntities(content: string): string;
70
90
  /**
71
91
  * Auto-quote unquoted `quadrantChart` labels so the Mermaid v11 lexer
72
92
  * accepts them. The Mermaid `quadrantChart` grammar treats unquoted
@@ -74,30 +94,9 @@ export declare function slugify(text: string): string;
74
94
  * en-dashes (`–`, U+2013), ellipsis (`…`), parentheses, colons, and
75
95
  * non-ASCII currency symbols (`€`) all trigger
76
96
  * `Lexical error … Unrecognized text` and prevent the diagram from
77
- * rendering, leaving the raw `<pre>` source visible on the page.
78
- *
79
- * The style guide already instructs authors to wrap every quadrant /
80
- * axis / data-point label in double quotes (see
81
- * `analysis/methodologies/political-style-guide.md` § Standard
82
- * `quadrantChart` init block), but AI-generated `article.md` files
83
- * occasionally drop the quoting. Rather than reject the article at
84
- * Stage C we sanitize at the renderer boundary so every published
85
- * HTML page renders, regardless of upstream authoring discipline.
86
- *
87
- * Sanitization is deliberately scoped to `quadrantChart` blocks —
88
- * `flowchart`, `sequenceDiagram`, `mindmap`, `pie`, `gantt`, and
89
- * `xychart-beta` accept the same Unicode characters in their unquoted
90
- * labels and are passed through unchanged.
91
- *
92
- * Lines normalised:
93
- * - `x-axis Left --> Right` → `x-axis "Left" --> "Right"`
94
- * - `y-axis Low --> High` → `y-axis "Low" --> "High"`
95
- * - `quadrant-N Label text` → `quadrant-N "Label text"`
96
- * - `Data Label: [x, y]` → `"Data Label": [x, y]`
97
- *
98
- * Already-quoted operands are preserved byte-for-byte. The `title`
99
- * line, the `%%{init:…}%%` directive, and any line not matching one
100
- * of the recognised shapes are also left untouched.
97
+ * rendering. Sanitization is scoped to `quadrantChart` blocks only;
98
+ * other diagram types accept those characters in unquoted labels and
99
+ * are passed through unchanged.
101
100
  *
102
101
  * @param content - Raw mermaid fence body
103
102
  * @returns The same content with `quadrantChart` labels auto-quoted;
@@ -180,6 +180,40 @@ function rewriteQuadrantChartLine(line) {
180
180
  }
181
181
  return line;
182
182
  }
183
+ /**
184
+ * Decode the small set of HTML entities that Markdown authors (and
185
+ * upstream generators) occasionally pre-encode inside fenced mermaid
186
+ * blocks — typically &amp; for & in political-group labels like
187
+ * S&D or Greens/EFA. Without this decode step, the subsequent
188
+ * escapeHtml pass would re-escape & to &amp; and emit
189
+ * S&amp;amp;D into the rendered HTML, which the Mermaid client
190
+ * library then renders verbatim instead of as S&D.
191
+ *
192
+ * Uses indexOf/split/join exclusively (no RegExp) to stay
193
+ * within CodeQL's safe-regex envelope. Only the canonical entity
194
+ * forms are decoded — anything more exotic (e.g. &#x26;) is left
195
+ * alone so we never accidentally swallow a literal that the author
196
+ * intended to keep encoded.
197
+ *
198
+ * @param content - Raw fenced-block content (post-sanitizeMermaidQuadrantChart)
199
+ * @returns Content with pre-encoded HTML entities normalised back to
200
+ * their literal characters, ready for a single escapeHtml.
201
+ */
202
+ export function decodeMermaidPreEncodedEntities(content) {
203
+ // Order matters: decode the named entities first (which all contain
204
+ // `&` followed by ASCII letters), then finally `&amp;` itself so we
205
+ // don't double-decode `&amp;lt;` -> `<`.
206
+ // Each replacement is a plain string `split(needle).join(replacement)`
207
+ // which is linear and trivially CodeQL-safe.
208
+ let out = content;
209
+ out = out.split('&lt;').join('<');
210
+ out = out.split('&gt;').join('>');
211
+ out = out.split('&quot;').join('"');
212
+ out = out.split('&#39;').join("'");
213
+ out = out.split('&apos;').join("'");
214
+ out = out.split('&amp;').join('&');
215
+ return out;
216
+ }
183
217
  /**
184
218
  * Auto-quote unquoted `quadrantChart` labels so the Mermaid v11 lexer
185
219
  * accepts them. The Mermaid `quadrantChart` grammar treats unquoted
@@ -187,30 +221,9 @@ function rewriteQuadrantChartLine(line) {
187
221
  * en-dashes (`–`, U+2013), ellipsis (`…`), parentheses, colons, and
188
222
  * non-ASCII currency symbols (`€`) all trigger
189
223
  * `Lexical error … Unrecognized text` and prevent the diagram from
190
- * rendering, leaving the raw `<pre>` source visible on the page.
191
- *
192
- * The style guide already instructs authors to wrap every quadrant /
193
- * axis / data-point label in double quotes (see
194
- * `analysis/methodologies/political-style-guide.md` § Standard
195
- * `quadrantChart` init block), but AI-generated `article.md` files
196
- * occasionally drop the quoting. Rather than reject the article at
197
- * Stage C we sanitize at the renderer boundary so every published
198
- * HTML page renders, regardless of upstream authoring discipline.
199
- *
200
- * Sanitization is deliberately scoped to `quadrantChart` blocks —
201
- * `flowchart`, `sequenceDiagram`, `mindmap`, `pie`, `gantt`, and
202
- * `xychart-beta` accept the same Unicode characters in their unquoted
203
- * labels and are passed through unchanged.
204
- *
205
- * Lines normalised:
206
- * - `x-axis Left --> Right` → `x-axis "Left" --> "Right"`
207
- * - `y-axis Low --> High` → `y-axis "Low" --> "High"`
208
- * - `quadrant-N Label text` → `quadrant-N "Label text"`
209
- * - `Data Label: [x, y]` → `"Data Label": [x, y]`
210
- *
211
- * Already-quoted operands are preserved byte-for-byte. The `title`
212
- * line, the `%%{init:…}%%` directive, and any line not matching one
213
- * of the recognised shapes are also left untouched.
224
+ * rendering. Sanitization is scoped to `quadrantChart` blocks only;
225
+ * other diagram types accept those characters in unquoted labels and
226
+ * are passed through unchanged.
214
227
  *
215
228
  * @param content - Raw mermaid fence body
216
229
  * @returns The same content with `quadrantChart` labels auto-quoted;
@@ -267,7 +280,8 @@ function installMermaidFence(md) {
267
280
  const labelFn = env2.mermaidLabel ?? ((n) => `Mermaid diagram ${n + 1}`);
268
281
  const label = md.utils.escapeHtml(labelFn(currentIndex, token.content));
269
282
  const sanitized = sanitizeMermaidQuadrantChart(token.content);
270
- const body = md.utils.escapeHtml(sanitized);
283
+ const decoded = decodeMermaidPreEncodedEntities(sanitized);
284
+ const body = md.utils.escapeHtml(decoded);
271
285
  return `<figure class="mermaid-figure" role="img" aria-label="${label}">\n<pre class="mermaid">${body}</pre>\n</figure>\n`;
272
286
  }
273
287
  return defaultFence(tokens, idx, opts, env, self);
@@ -17,7 +17,7 @@ import fs from 'fs';
17
17
  import path from 'path';
18
18
  import { extractFirstH1 } from './h1-extractor.js';
19
19
  import { extractLedeAfterHeading, extractStrongProseLine } from './lede-extractor.js';
20
- import { isGenericHeading, stripArtifactCategoryAffix } from './heading-rules.js';
20
+ import { isGenericHeading, isArtifactCategoryHeading, stripArtifactCategoryAffix, } from './heading-rules.js';
21
21
  import { truncateTitle } from './text-utils.js';
22
22
  import { extractPriorityFindingHighlight } from './priority-finding-highlight.js';
23
23
  /** Ordered list of artefact filenames that typically carry the editorial H1. */
@@ -132,7 +132,7 @@ function probeCandidateForHighlight(runDir, rel, articleType, date) {
132
132
  // distinctive editorial headline ("Digital Markets Act Enforcement",
133
133
  // "Ukraine War Accountability") instead of a stripped category noun.
134
134
  const priority = extractPriorityFindingHighlight(body);
135
- if (priority?.headline) {
135
+ if (priority?.headline && !isArtifactCategoryHeading(priority.headline)) {
136
136
  return {
137
137
  cleanHighlight: {
138
138
  headline: truncateTitle(priority.headline),
@@ -69,6 +69,7 @@ export const ARTIFACT_CATEGORY_PREFIXES = [
69
69
  'commission wp alignment',
70
70
  'committee activity report',
71
71
  'cross run continuity',
72
+ 'data availability assessment',
72
73
  'deep analysis',
73
74
  'economic context',
74
75
  'executive brief',
@@ -123,13 +123,19 @@ export function composeContextualTitle(fallbackTitle, editorialHeadline, runId)
123
123
  */
124
124
  export function composeContextualDescription(lang, baseDescription, editorial, date, _runId) {
125
125
  const labels = getLocalizedString(SEO_CONTEXT_LABELS, lang);
126
- const parts = [baseDescription.trim()];
127
- parts.push(`${labels.date} ${date}.`);
126
+ const base = baseDescription.trim();
127
+ const parts = [base];
128
+ const datePart = `${labels.date} ${date}.`;
129
+ if (!containsNormalized(base, `${labels.date} ${date}`)) {
130
+ parts.push(datePart);
131
+ }
128
132
  const context = pickFirstNonEmpty([editorial.summary, editorial.headline]);
129
133
  if (context && !containsNormalized(parts[0] ?? '', context)) {
130
134
  parts.push(`${labels.context}: ${context}`);
131
135
  }
132
- parts.push(labels.reader);
136
+ if (!containsNormalized(parts.join(' '), labels.reader)) {
137
+ parts.push(labels.reader);
138
+ }
133
139
  return truncateDescription(parts.join(' '));
134
140
  }
135
141
  /**
@@ -3,7 +3,7 @@
3
3
  import { getLocalizedString, getTextDirection } from '../../constants/language-core.js';
4
4
  import { escapeHTML } from '../../utils/file-utils.js';
5
5
  import { READER_GUIDE_SECTION_ID } from '../reader-guide-constants.js';
6
- import { READER_GUIDE_TITLE_LABELS, READER_GUIDE_INTRO_LABELS, READER_GUIDE_COL_NEED_LABELS, READER_GUIDE_COL_VALUE_LABELS, } from './labels.js';
6
+ import { READER_GUIDE_TITLE_LABELS, READER_GUIDE_INTRO_LABELS, READER_GUIDE_TIP_LABELS, READER_GUIDE_COL_NEED_LABELS, READER_GUIDE_COL_VALUE_LABELS, } from './labels.js';
7
7
  import { READER_GUIDE_ROWS } from './rows.js';
8
8
  import { getReaderGuideSectionIcon } from './icons.js';
9
9
  /**
@@ -40,11 +40,13 @@ export function buildReaderIntelligenceGuideHtml(lang, sections, _included = [])
40
40
  return '';
41
41
  const title = getLocalizedString(READER_GUIDE_TITLE_LABELS, lang);
42
42
  const intro = getLocalizedString(READER_GUIDE_INTRO_LABELS, lang);
43
+ const tip = getLocalizedString(READER_GUIDE_TIP_LABELS, lang);
43
44
  const colNeed = getLocalizedString(READER_GUIDE_COL_NEED_LABELS, lang);
44
45
  const colValue = getLocalizedString(READER_GUIDE_COL_VALUE_LABELS, lang);
45
46
  return `<section id="${READER_GUIDE_SECTION_ID}" data-component="reader-intelligence-guide" aria-label="${escapeHTML(title)}"${dir === 'rtl' ? ' dir="rtl"' : ''}>
46
47
  <h2 id="${READER_GUIDE_SECTION_ID}-heading"><span class="guide-icon" aria-hidden="true">🧭</span> ${escapeHTML(title)}</h2>
47
48
  <p class="reader-guide-intro">${escapeHTML(intro)}</p>
49
+ <p class="reader-guide-tip"><span class="guide-icon" aria-hidden="true">💡</span> ${escapeHTML(tip)}</p>
48
50
  <div class="table-scroll" role="region" tabindex="0" aria-labelledby="${READER_GUIDE_SECTION_ID}-heading">
49
51
  <table class="reader-guide-table">
50
52
  <caption class="sr-only">${escapeHTML(title)}</caption>
@@ -10,6 +10,13 @@ import type { LanguageMap } from '../../types/index.js';
10
10
  export declare const READER_GUIDE_TITLE_LABELS: LanguageMap;
11
11
  /** Introduction text for the Reader Intelligence Guide */
12
12
  export declare const READER_GUIDE_INTRO_LABELS: LanguageMap;
13
+ /**
14
+ * Practical "how to read this article" tip rendered immediately under the
15
+ * intro. Distinct from the intro so existing snapshot tests continue to
16
+ * match the intro string verbatim, and so styles can target the two
17
+ * paragraphs independently.
18
+ */
19
+ export declare const READER_GUIDE_TIP_LABELS: LanguageMap;
13
20
  /** Table header: "Reader need" */
14
21
  export declare const READER_GUIDE_COL_NEED_LABELS: LanguageMap;
15
22
  /** Table header: "What you'll get" */