npm - euparliamentmonitor - Versions diffs - 0.9.20 → 0.9.21 - Mend

euparliamentmonitor 0.9.20 → 0.9.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +2 -2
package/package.json +2 -2
package/scripts/aggregator/editorial-brief-resolver.d.ts +38 -0
package/scripts/aggregator/editorial-brief-resolver.js +32 -0
package/scripts/aggregator/generator/render-one.js +35 -0
package/scripts/aggregator/html/localize-body.d.ts +32 -0
package/scripts/aggregator/html/localize-body.js +69 -0
package/scripts/aggregator/html/shell.d.ts +10 -0
package/scripts/aggregator/html/shell.js +11 -1
package/scripts/aggregator/markdown-renderer.d.ts +23 -24
package/scripts/aggregator/markdown-renderer.js +39 -25
package/scripts/aggregator/metadata/resolve-helpers.js +9 -3
package/scripts/aggregator/reader-guide/builder.js +3 -1
package/scripts/aggregator/reader-guide/labels.d.ts +7 -0
package/scripts/aggregator/reader-guide/labels.js +22 -0
package/scripts/aggregator/reader-intelligence-guide.d.ts +1 -1
package/scripts/aggregator/reader-intelligence-guide.js +1 -1
package/scripts/aggregator/seo-entity-extractor.d.ts +45 -0
package/scripts/aggregator/seo-entity-extractor.js +211 -0
package/scripts/discover-untranslated-briefs.js +123 -4
package/scripts/generators/news-indexes/per-language.js +21 -7
package/scripts/generators/political-intelligence/html.js +39 -8
package/scripts/generators/sitemap/html.js +25 -7
package/scripts/mcp/ep/error-classifier.d.ts +2 -2
package/scripts/mcp/ep/error-classifier.js +2 -2
package/scripts/validate-brief-translations.js +119 -5

package/scripts/aggregator/seo-entity-extractor.d.ts ADDED Viewed

@@ -0,0 +1,45 @@
+/**
+ * Extract organization names from `intelligence/stakeholder-map.md`'s H3
+ * headings. Each tier-1/tier-2 stakeholder appears as a heading shaped like:
+ *   `### EPP — Manfred Weber / 185 MEPs (25.73%)`
+ *   `### European Commission — Ursula von der Leyen (EPP)`
+ *   `### Tech Industry (Big Tech Gatekeepers)`
+ *
+ * The entity name is everything before the first em-dash, en-dash, slash,
+ * parenthesis, or colon — whichever comes first — trimmed and de-duplicated
+ * with case-insensitive equality. "Risk N: …" headings are filtered out
+ * because they describe risk scenarios rather than organizations.
+ *
+ * @param markdown - Raw stakeholder-map.md contents
+ * @returns Ordered, de-duplicated stakeholder names
+ */
+export declare function extractStakeholderNames(markdown: string): readonly string[];
+/**
+ * Extract media-outlet names from `extended/media-framing-analysis.md`.
+ * Editorial convention is a series of bold "framing buckets":
+ *   `**Centre-Left Media (Le Monde, Der Spiegel, Guardian EU section):**`
+ *   `**Tech-Beat Media (TechCrunch EU, The Verge, Politico Tech):**`
+ *
+ * This function pulls every comma-separated outlet from each parenthetical
+ * list, trims trailing colons / asterisks, and de-duplicates with
+ * case-insensitive equality.
+ *
+ * @param markdown - Raw media-framing-analysis.md contents
+ * @returns Ordered, de-duplicated media-outlet names
+ */
+export declare function extractMediaOutletNames(markdown: string): readonly string[];
+/**
+ * Collect SEO `mentions` entities for an analysis run by combining
+ * stakeholder names and media-outlet names from the run's intelligence
+ * and extended folders. Returns a single deduplicated, length-capped
+ * list ready to feed into JSON-LD `mentions`.
+ *
+ * Stakeholders are listed first (high-signal political-group / institution
+ * entities), media outlets second. The combined list is truncated to
+ * {@link MAX_MENTIONS} entries.
+ *
+ * @param runDir - Absolute analysis run directory path
+ * @returns Ordered, de-duplicated mentions list (may be empty)
+ */
+export declare function extractRunMentions(runDir: string): readonly string[];
+//# sourceMappingURL=seo-entity-extractor.d.ts.map

package/scripts/aggregator/seo-entity-extractor.js ADDED Viewed

@@ -0,0 +1,211 @@
+// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * @module Aggregator/SeoEntityExtractor
+ * @description Extract real-world organizations named in an analysis run's
+ * `intelligence/stakeholder-map.md` and `extended/media-framing-analysis.md`
+ * artifacts for emission as JSON-LD `mentions` entries on every language
+ * variant of the rendered article.
+ *
+ * The same English-extracted list is reused across all 14 language variants
+ * because the entities are language-independent proper nouns (political
+ * groups, EU institutions, media outlets) — search engines and AI overviews
+ * benefit from consistent entity grounding regardless of which language
+ * the surrounding prose is in.
+ */
+import fs from 'fs';
+import path from 'path';
+/**
+ * Maximum number of mentions emitted into the JSON-LD `mentions` array.
+ * Schema.org accepts arbitrarily many entries, but indexers commonly cap
+ * structured-data entity lists at ~30 — staying under this avoids
+ * truncation and keeps the rendered JSON-LD blob compact.
+ */
+const MAX_MENTIONS = 30;
+/**
+ * Minimum length for an extracted entity name. Below this, the candidate
+ * is almost certainly a fragment (single capital letter, lone particle)
+ * rather than a real organization.
+ */
+const MIN_ENTITY_LENGTH = 2;
+/**
+ * Maximum length for an extracted entity name. Anything longer is almost
+ * certainly a misparsed sentence fragment.
+ */
+const MAX_ENTITY_LENGTH = 80;
+/**
+ * Read a UTF-8 file relative to `runDir`. Returns `null` when the path is
+ * missing or unreadable — the extractor treats absent intelligence
+ * artifacts as a soft signal (no mentions to emit) rather than an error.
+ *
+ * @param runDir - Absolute path to the analysis run directory
+ * @param relPath - Forward-slash path under `runDir`
+ * @returns File contents or `null`
+ */
+function readRunFile(runDir, relPath) {
+    const abs = path.join(runDir, relPath);
+    try {
+        if (!fs.existsSync(abs))
+            return null;
+        return fs.readFileSync(abs, 'utf8');
+    }
+    catch {
+        return null;
+    }
+}
+/**
+ * Extract organization names from `intelligence/stakeholder-map.md`'s H3
+ * headings. Each tier-1/tier-2 stakeholder appears as a heading shaped like:
+ *   `### EPP — Manfred Weber / 185 MEPs (25.73%)`
+ *   `### European Commission — Ursula von der Leyen (EPP)`
+ *   `### Tech Industry (Big Tech Gatekeepers)`
+ *
+ * The entity name is everything before the first em-dash, en-dash, slash,
+ * parenthesis, or colon — whichever comes first — trimmed and de-duplicated
+ * with case-insensitive equality. "Risk N: …" headings are filtered out
+ * because they describe risk scenarios rather than organizations.
+ *
+ * @param markdown - Raw stakeholder-map.md contents
+ * @returns Ordered, de-duplicated stakeholder names
+ */
+export function extractStakeholderNames(markdown) {
+    const lines = markdown.split('\n');
+    const names = [];
+    const seen = new Set();
+    for (const rawLine of lines) {
+        if (!rawLine.startsWith('### '))
+            continue;
+        const headingText = rawLine.slice(4).trim();
+        if (!headingText)
+            continue;
+        // Skip risk-scenario headings: `### Risk N: …` / `### Risk 1: PfE Internal Split…`
+        if (/^risk\s+\d+\s*:/i.test(headingText))
+            continue;
+        // Split on the first em-dash, en-dash, slash, opening paren, or colon.
+        const splitIdx = findFirstSplitChar(headingText);
+        const candidate = splitIdx >= 0 ? headingText.slice(0, splitIdx) : headingText;
+        const name = candidate.trim().replace(/\*+$/, '').trim();
+        if (!isValidEntityName(name))
+            continue;
+        const key = name.toLowerCase();
+        if (seen.has(key))
+            continue;
+        seen.add(key);
+        names.push(name);
+    }
+    return names;
+}
+/**
+ * Find the index of the first stakeholder-heading separator character.
+ * Uses indexOf in a loop instead of a regex to satisfy CodeQL's
+ * regex-injection / catastrophic-backtracking lints (cf.
+ * `replaceFirstStringIn` in `html/localize-body.ts`).
+ *
+ * @param text - Heading text (without the leading `### `)
+ * @returns Index of the first separator, or `-1` if none found
+ */
+function findFirstSplitChar(text) {
+    const separators = ['—', '–', '/', '(', ':'];
+    let best = -1;
+    for (const sep of separators) {
+        const idx = text.indexOf(sep);
+        if (idx >= 0 && (best < 0 || idx < best))
+            best = idx;
+    }
+    return best;
+}
+/**
+ * Extract media-outlet names from `extended/media-framing-analysis.md`.
+ * Editorial convention is a series of bold "framing buckets":
+ *   `**Centre-Left Media (Le Monde, Der Spiegel, Guardian EU section):**`
+ *   `**Tech-Beat Media (TechCrunch EU, The Verge, Politico Tech):**`
+ *
+ * This function pulls every comma-separated outlet from each parenthetical
+ * list, trims trailing colons / asterisks, and de-duplicates with
+ * case-insensitive equality.
+ *
+ * @param markdown - Raw media-framing-analysis.md contents
+ * @returns Ordered, de-duplicated media-outlet names
+ */
+export function extractMediaOutletNames(markdown) {
+    const lines = markdown.split('\n');
+    const names = [];
+    const seen = new Set();
+    for (const rawLine of lines) {
+        // Look for bold prefix followed by parenthesised outlet list.
+        // Pattern: `**…Media (X, Y, Z):**` — anchor on `Media (` to avoid
+        // matching unrelated parentheticals in surrounding prose.
+        const mediaIdx = rawLine.indexOf('Media (');
+        if (mediaIdx < 0)
+            continue;
+        const openParen = rawLine.indexOf('(', mediaIdx);
+        if (openParen < 0)
+            continue;
+        const closeParen = rawLine.indexOf(')', openParen);
+        if (closeParen < 0)
+            continue;
+        const inner = rawLine.slice(openParen + 1, closeParen);
+        for (const piece of inner.split(',')) {
+            const candidate = piece.trim().replace(/\*+$/, '').trim();
+            if (!isValidEntityName(candidate))
+                continue;
+            const key = candidate.toLowerCase();
+            if (seen.has(key))
+                continue;
+            seen.add(key);
+            names.push(candidate);
+        }
+    }
+    return names;
+}
+/**
+ * Guard for extracted-entity sanity: rejects empty strings, single
+ * characters, and pathological multi-sentence captures.
+ *
+ * @param name - Candidate entity name
+ * @returns `true` when the name is a plausible organization label
+ */
+function isValidEntityName(name) {
+    if (!name)
+        return false;
+    if (name.length < MIN_ENTITY_LENGTH)
+        return false;
+    if (name.length > MAX_ENTITY_LENGTH)
+        return false;
+    // Reject candidates that are just punctuation / decoration.
+    if (!/[A-Za-z]/.test(name))
+        return false;
+    return true;
+}
+/**
+ * Collect SEO `mentions` entities for an analysis run by combining
+ * stakeholder names and media-outlet names from the run's intelligence
+ * and extended folders. Returns a single deduplicated, length-capped
+ * list ready to feed into JSON-LD `mentions`.
+ *
+ * Stakeholders are listed first (high-signal political-group / institution
+ * entities), media outlets second. The combined list is truncated to
+ * {@link MAX_MENTIONS} entries.
+ *
+ * @param runDir - Absolute analysis run directory path
+ * @returns Ordered, de-duplicated mentions list (may be empty)
+ */
+export function extractRunMentions(runDir) {
+    const stakeholderMd = readRunFile(runDir, 'intelligence/stakeholder-map.md');
+    const mediaMd = readRunFile(runDir, 'extended/media-framing-analysis.md');
+    const stakeholders = stakeholderMd ? extractStakeholderNames(stakeholderMd) : [];
+    const mediaOutlets = mediaMd ? extractMediaOutletNames(mediaMd) : [];
+    const merged = [];
+    const seen = new Set();
+    for (const name of [...stakeholders, ...mediaOutlets]) {
+        const key = name.toLowerCase();
+        if (seen.has(key))
+            continue;
+        seen.add(key);
+        merged.push(name);
+        if (merged.length >= MAX_MENTIONS)
+            break;
+    }
+    return merged;
+}
+//# sourceMappingURL=seo-entity-extractor.js.map

package/scripts/discover-untranslated-briefs.js CHANGED Viewed

@@ -60,6 +60,16 @@
  *                              #   flagged largeSource:true and the agent
  *                              #   switches to a 2-phase skeleton-then-edit
  *                              #   translation strategy (see news-translate.md)
+ *     [--target-brief <id>]   # optional operator override: when set, the
+ *                              #   queue contains ONLY this brief regardless
+ *                              #   of mode / max-briefs / max-age-days.
+ *                              #   Accepted forms:
+ *                              #     YYYY-MM-DD/<slug>
+ *                              #     YYYY-MM-DD/<slug>/extended
+ *                              #     analysis/daily/YYYY-MM-DD/<slug>/executive-brief.md
+ *                              #     analysis/daily/YYYY-MM-DD/<slug>/extended/executive-brief.md
+ *                              #   Used by operator-dispatched runs that need
+ *                              #   to (re)translate one specific brief.
  *     [--output <path>]       # default stdout
  *     [--include-extended]    # also scan extended/executive-brief.md
  *
@@ -144,6 +154,75 @@ export const DISCOVERY_MODES = Object.freeze([
   'newest-first',
 ]);
+/**
+ * Parse a `--target-brief` operator override into a `{ date, slug, isExtended }`
+ * triple. Accepts four equivalent operator-friendly forms so the same input
+ * works whether the operator copies a path out of the repo, a date/slug pair
+ * out of the discovery JSON, or types the canonical short form by hand:
+ *
+ *   1. `YYYY-MM-DD/<slug>`                                    — short form
+ *   2. `YYYY-MM-DD/<slug>/extended`                           — extended legacy path
+ *   3. `analysis/daily/YYYY-MM-DD/<slug>/executive-brief.md`  — full repo path
+ *   4. `analysis/daily/YYYY-MM-DD/<slug>/extended/executive-brief.md`
+ *
+ * Validation is intentionally strict (whitelisted character classes, fixed date
+ * format, slug character class) — the value flows from a workflow_dispatch
+ * string input into a filesystem lookup, so a permissive parser would be a
+ * directory-traversal foothold.
+ *
+ * Throws on any malformed spec; never returns null (callers must check for
+ * empty input BEFORE calling this helper).
+ *
+ * @param {string} spec — already-trimmed, non-empty operator input
+ * @returns {{ date: string, slug: string, isExtended: boolean }}
+ */
+export function parseTargetBriefSpec(spec) {
+  // Strip leading "analysis/daily/" prefix and trailing "/executive-brief.md"
+  // so all four accepted forms collapse to "<date>/<slug>" or
+  // "<date>/<slug>/extended".
+  let core = spec;
+  if (core.startsWith('analysis/daily/')) {
+    core = core.slice('analysis/daily/'.length);
+  }
+  if (core.endsWith('/executive-brief.md')) {
+    core = core.slice(0, -'/executive-brief.md'.length);
+  }
+  // Reject any path-traversal or absolute-path attempts up-front.
+  if (
+    core.startsWith('/') ||
+    core.includes('..') ||
+    core.includes('\\') ||
+    core.includes('\0')
+  ) {
+    throw new Error(
+      `--target-brief: refusing path-traversal or absolute path in "${spec}"`,
+    );
+  }
+  const parts = core.split('/');
+  let isExtended = false;
+  if (parts.length === 3 && parts[2] === 'extended') {
+    isExtended = true;
+  } else if (parts.length !== 2) {
+    throw new Error(
+      `--target-brief: expected "YYYY-MM-DD/<slug>" or "YYYY-MM-DD/<slug>/extended" (got "${spec}")`,
+    );
+  }
+  const [date, slug] = parts;
+  if (!/^\d{4}-\d{2}-\d{2}$/.test(date)) {
+    throw new Error(
+      `--target-brief: date "${date}" is not in YYYY-MM-DD format (from "${spec}")`,
+    );
+  }
+  // Slug character class matches the existing on-disk convention used by
+  // src/config/article-horizons.ts (lowercase, digits, dashes).
+  if (!/^[a-z0-9][a-z0-9-]{0,63}$/.test(slug)) {
+    throw new Error(
+      `--target-brief: slug "${slug}" must match [a-z0-9][a-z0-9-]{0,63} (from "${spec}")`,
+    );
+  }
+  return { date, slug, isExtended };
+}
 /**
  * Parse CLI argv into an options object. Exported for unit tests.
  * @param {string[]} argv
@@ -158,6 +237,7 @@ export function parseArgs(argv) {
     mode: 'fresh-then-backlog',
     runNumber: 0,
     maxSourceLines: DEFAULT_MAX_SOURCE_LINES,
+    targetBrief: null,
   };
   for (let i = 0; i < argv.length; i += 1) {
     const arg = argv[i];
@@ -193,6 +273,21 @@ export function parseArgs(argv) {
         opts.maxSourceLines = Number.parseInt(argv[i + 1], 10);
         i += 1;
         break;
+      case '--target-brief': {
+        const raw = argv[i + 1];
+        i += 1;
+        // Normalize and validate. Empty / whitespace-only / the literal
+        // string "none" is treated as "no override" so the workflow can
+        // wire `TARGET_BRIEF: ${{ inputs.target_brief }}` without having
+        // to special-case the empty-default case in bash.
+        const trimmed = typeof raw === 'string' ? raw.trim() : '';
+        if (trimmed === '' || trimmed === 'none') {
+          opts.targetBrief = null;
+          break;
+        }
+        opts.targetBrief = parseTargetBriefSpec(trimmed);
+        break;
+      }
       case '--help':
       case '-h':
         printHelp();
@@ -232,7 +327,8 @@ function printHelp() {
   process.stdout.write(
     'Usage: discover-untranslated-briefs.js [--repo-root <path>] ' +
       '[--max-briefs <n>] [--max-age-days <n>] [--mode <name>] ' +
-      '[--run-number <n>] [--max-source-lines <n>] [--output <path>] [--include-extended]\n',
+      '[--run-number <n>] [--max-source-lines <n>] [--target-brief <YYYY-MM-DD/slug>] ' +
+      '[--output <path>] [--include-extended]\n',
   );
 }
@@ -402,9 +498,15 @@ export function countFixedTokens(absPath) {
  * Build the prioritised queue. See module docstring for ordering rules.
  *
  * @param {ReturnType<typeof findExecutiveBriefSources>} sources
- * @param {number | { maxBriefs: number, mode?: string, runNumber?: number, maxSourceLines?: number }} options
+ * @param {number | {
+ *   maxBriefs: number,
+ *   mode?: string,
+ *   runNumber?: number,
+ *   maxSourceLines?: number,
+ *   targetBrief?: { date: string, slug: string, isExtended: boolean } | null,
+ * }} options
  *   Numeric form retained for backward compatibility — equivalent to
- *   `{ maxBriefs, mode: 'fresh-then-backlog', runNumber: 0, maxSourceLines: DEFAULT_MAX_SOURCE_LINES }`.
+ *   `{ maxBriefs, mode: 'fresh-then-backlog', runNumber: 0, maxSourceLines: DEFAULT_MAX_SOURCE_LINES, targetBrief: null }`.
  */
 export function buildQueue(sources, options) {
   const opts =
@@ -414,6 +516,7 @@ export function buildQueue(sources, options) {
           mode: 'fresh-then-backlog',
           runNumber: 0,
           maxSourceLines: DEFAULT_MAX_SOURCE_LINES,
+          targetBrief: null,
         }
       : {
           maxBriefs: options.maxBriefs,
@@ -422,6 +525,7 @@ export function buildQueue(sources, options) {
           maxSourceLines: Number.isFinite(options.maxSourceLines)
             ? options.maxSourceLines
             : DEFAULT_MAX_SOURCE_LINES,
+          targetBrief: options.targetBrief || null,
         };
   if (!DISCOVERY_MODES.includes(opts.mode)) {
     throw new Error(
@@ -493,7 +597,20 @@ export function buildQueue(sources, options) {
   };
   let queue;
-  if (opts.mode === 'newest-first') {
+  if (opts.targetBrief) {
+    // Operator override: ignore mode / maxBriefs / parity and queue exactly
+    // the one brief the operator asked for, IF it has any missing languages.
+    // If the targeted brief is fully translated (no gaps), the queue is
+    // empty — the workflow's downstream validator handles the empty-queue
+    // case gracefully (skip with no work to do).
+    const tb = opts.targetBrief;
+    queue = withGaps.filter(
+      (entry) =>
+        entry.date === tb.date &&
+        entry.slug === tb.slug &&
+        entry.isExtended === tb.isExtended,
+    );
+  } else if (opts.mode === 'newest-first') {
     queue = [...withGaps].sort(newestFirst).slice(0, opts.maxBriefs);
   } else if (opts.mode === 'backlog-only') {
     queue = [...withGaps].sort(oldestFirstFinishPartial).slice(0, opts.maxBriefs);
@@ -577,6 +694,7 @@ export function main(argv) {
     mode: opts.mode,
     runNumber: opts.runNumber,
     maxSourceLines: opts.maxSourceLines,
+    targetBrief: opts.targetBrief,
   });
   const payload = {
     generatedAt: new Date().toISOString(),
@@ -587,6 +705,7 @@ export function main(argv) {
       mode: opts.mode,
       runNumber: opts.runNumber,
       maxSourceLines: opts.maxSourceLines,
+      targetBrief: opts.targetBrief,
     },
     totals,
     queue,

package/scripts/generators/news-indexes/per-language.js CHANGED Viewed

@@ -8,7 +8,7 @@
  * the monolithic `news-indexes.ts` so the HTML/SEO surface can be
  * regression-tested independently of discovery and write logic.
  */
-import { APP_VERSION, BUILD_SHORT, BASE_URL } from '../../constants/config.js';
+import { APP_VERSION, BUILD_SHORT, BUILD_TIME, BASE_URL } from '../../constants/config.js';
 import { getNewsIndexSeo } from '../seo-copy.js';
 import { buildHeadFreshnessTags } from '../../constants/build-info-meta.js';
 import { ALL_LANGUAGES, LANGUAGE_NAMES, LANGUAGE_FLAGS, PAGE_TITLES, PAGE_DESCRIPTIONS, SECTION_HEADINGS, NO_ARTICLES_MESSAGES, SKIP_LINK_TEXTS, AI_SECTION_CONTENT, FILTER_LABELS, ARTICLE_TYPE_LABELS, HEADER_SUBTITLE_LABELS, getLocalizedString, getTextDirection, } from '../../constants/languages.js';
@@ -186,6 +186,8 @@ export function generateIndexHTML(lang, articles, metaMap = new Map()) {
         inLanguage: lang,
         isPartOf: { '@type': 'WebSite', name: SITE_NAME, url: BASE_URL },
         publisher: { '@id': `${BASE_URL}/#organization` },
+        datePublished: BUILD_TIME,
+        dateModified: BUILD_TIME,
         breadcrumb: {
             '@type': 'BreadcrumbList',
             itemListElement: [
@@ -201,12 +203,24 @@ export function generateIndexHTML(lang, articles, metaMap = new Map()) {
         mainEntity: {
             '@type': 'ItemList',
             numberOfItems: Math.min(articles.length, 50),
-            itemListElement: articles.slice(0, 50).map((a, idx) => ({
-                '@type': 'ListItem',
-                position: idx + 1,
-                url: `${BASE_URL}/news/${a.filename}`,
-                name: metaMap.get(a.filename)?.title ?? formatSlug(a.slug),
-            })),
+            itemListElement: articles.slice(0, 50).map((a, idx) => {
+                const url = `${BASE_URL}/news/${a.filename}`;
+                const headline = metaMap.get(a.filename)?.title ?? formatSlug(a.slug);
+                return {
+                    '@type': 'ListItem',
+                    position: idx + 1,
+                    url,
+                    item: {
+                        '@type': 'NewsArticle',
+                        '@id': url,
+                        url,
+                        headline,
+                        name: headline,
+                        datePublished: a.date,
+                        inLanguage: a.lang,
+                    },
+                };
+            }),
         },
     }).replace(/</g, '\\u003c');
     const faqJsonLd = JSON.stringify({

package/scripts/generators/political-intelligence/html.js CHANGED Viewed

@@ -12,7 +12,7 @@
  * link points at GitHub blob/tree URLs so readers can audit the raw
  * tradecraft behind every published article.
  */
-import { BASE_URL, BUILD_SHORT, THEME_TOGGLE_SCRIPT } from '../../constants/config.js';
+import { BASE_URL, BUILD_SHORT, BUILD_TIME, THEME_TOGGLE_SCRIPT } from '../../constants/config.js';
 import { buildHeadFreshnessTags } from '../../constants/build-info-meta.js';
 import { ALL_LANGUAGES, LANGUAGE_FLAGS, LANGUAGE_NAMES, PAGE_TITLES, SKIP_LINK_TEXTS, getLocalizedString, getTextDirection, } from '../../constants/languages.js';
 import { buildOgLocaleTags, ORG_SAME_AS, buildTwitterAttributionTags, } from '../../constants/seo/index.js';
@@ -263,6 +263,7 @@ export function generatePoliticalIntelligenceHTML(lang, data) {
             height: 192,
         },
     };
+    const publisherRef = { '@id': `${BASE_URL}/#organization` };
     const jsonLd = {
         '@context': SCHEMA_ORG,
         '@type': 'CollectionPage',
@@ -270,13 +271,15 @@ export function generatePoliticalIntelligenceHTML(lang, data) {
         url: canonicalUrl,
         description: copy.intro,
         inLanguage: safeLang,
-        author: publisher,
-        publisher,
+        author: publisherRef,
+        publisher: publisherRef,
+        datePublished: BUILD_TIME,
+        dateModified: BUILD_TIME,
         isPartOf: {
             '@type': 'WebSite',
             name: SITE_NAME,
             url: BASE_URL,
-            publisher,
+            publisher: publisherRef,
         },
         breadcrumb: {
             '@type': 'BreadcrumbList',
@@ -299,25 +302,53 @@ export function generatePoliticalIntelligenceHTML(lang, data) {
                     '@type': 'ListItem',
                     position: 1,
                     name: copy.methodologiesHeading,
-                    item: `${canonicalUrl}#pi-methodologies`,
+                    url: `${canonicalUrl}#pi-methodologies`,
+                    item: {
+                        '@type': 'WebPageElement',
+                        '@id': `${canonicalUrl}#pi-methodologies`,
+                        url: `${canonicalUrl}#pi-methodologies`,
+                        name: copy.methodologiesHeading,
+                        inLanguage: safeLang,
+                    },
                 },
                 {
                     '@type': 'ListItem',
                     position: 2,
                     name: copy.templatesHeading,
-                    item: `${canonicalUrl}#pi-templates`,
+                    url: `${canonicalUrl}#pi-templates`,
+                    item: {
+                        '@type': 'WebPageElement',
+                        '@id': `${canonicalUrl}#pi-templates`,
+                        url: `${canonicalUrl}#pi-templates`,
+                        name: copy.templatesHeading,
+                        inLanguage: safeLang,
+                    },
                 },
                 {
                     '@type': 'ListItem',
                     position: 3,
                     name: copy.referenceHeading,
-                    item: `${canonicalUrl}#pi-reference`,
+                    url: `${canonicalUrl}#pi-reference`,
+                    item: {
+                        '@type': 'WebPageElement',
+                        '@id': `${canonicalUrl}#pi-reference`,
+                        url: `${canonicalUrl}#pi-reference`,
+                        name: copy.referenceHeading,
+                        inLanguage: safeLang,
+                    },
                 },
                 {
                     '@type': 'ListItem',
                     position: 4,
                     name: copy.dailyHeading,
-                    item: `${canonicalUrl}#pi-daily`,
+                    url: `${canonicalUrl}#pi-daily`,
+                    item: {
+                        '@type': 'WebPageElement',
+                        '@id': `${canonicalUrl}#pi-daily`,
+                        url: `${canonicalUrl}#pi-daily`,
+                        name: copy.dailyHeading,
+                        inLanguage: safeLang,
+                    },
                 },
             ],
         },

package/scripts/generators/sitemap/html.js CHANGED Viewed

@@ -19,7 +19,7 @@
  * `test/unit/sitemap-byte-equality.test.js` (compares against the
  * golden snapshots taken from `npm run prebuild`).
  */
-import { BASE_URL, BUILD_SHORT, THEME_TOGGLE_SCRIPT } from '../../constants/config.js';
+import { BASE_URL, BUILD_SHORT, BUILD_TIME, THEME_TOGGLE_SCRIPT } from '../../constants/config.js';
 import { buildHeadFreshnessTags } from '../../constants/build-info-meta.js';
 import { getSitemapSeo } from '../seo-copy.js';
 import { ALL_LANGUAGES, LANGUAGE_NAMES, LANGUAGE_FLAGS, PAGE_TITLES, PAGE_DESCRIPTIONS, SKIP_LINK_TEXTS, getLocalizedString, getTextDirection, } from '../../constants/languages.js';
@@ -192,6 +192,8 @@ ${items}
             url: BASE_URL,
         },
         publisher: { '@id': `${BASE_URL}/#organization` },
+        datePublished: BUILD_TIME,
+        dateModified: BUILD_TIME,
         breadcrumb: {
             '@type': 'BreadcrumbList',
             itemListElement: [
@@ -213,12 +215,28 @@ ${items}
             '@type': 'ItemList',
             numberOfItems: Math.min(articleInfos.length, 50),
             name: sections.news,
-            itemListElement: articleInfos.slice(0, 50).map((info, idx) => ({
-                '@type': 'ListItem',
-                position: idx + 1,
-                url: `${BASE_URL}/news/${info.filename}`,
-                name: info.title,
-            })),
+            itemListElement: articleInfos.slice(0, 50).map((info, idx) => {
+                const url = `${BASE_URL}/news/${info.filename}`;
+                // Extract per-article language from filename suffix (e.g. `…-foo-en.html` → `en`).
+                // Mirrors the canonical `<slug>-<lang>.html` convention enforced by
+                // `ARTICLE_FILENAME_PATTERN` / `getArticleFilename()`.
+                const langMatch = /-([a-z]{2})\.html$/.exec(info.filename);
+                const articleLang = langMatch ? langMatch[1] : lang;
+                return {
+                    '@type': 'ListItem',
+                    position: idx + 1,
+                    url,
+                    item: {
+                        '@type': 'NewsArticle',
+                        '@id': url,
+                        url,
+                        headline: info.title,
+                        name: info.title,
+                        datePublished: info.date,
+                        inLanguage: articleLang,
+                    },
+                };
+            }),
         },
     };
     const jsonLdString = JSON.stringify(jsonLd).replace(/</g, '\\u003c');

package/scripts/mcp/ep/error-classifier.d.ts CHANGED Viewed

@@ -6,7 +6,7 @@ import type { MCPToolResult } from '../../types/index.js';
 /**
  * Classify an error message into a diagnostic error category.
  *
- * Maps EP MCP Server v1.3.9 structured error codes and generic HTTP/network
+ * Maps EP MCP Server v1.3.10 structured error codes and generic HTTP/network
  * errors into one of six broad categories used for logging and retry decisions:
  *
  * Returned categories (priority order):
@@ -26,7 +26,7 @@ export declare function classifyToolError(message: string): string;
  * covering the two shapes historically emitted by the EP MCP server.
  *
  * 1. **Uniform envelope** (all feeds as of
- *    `european-parliament-mcp-server@1.3.9`) —
+ *    `european-parliament-mcp-server@1.3.10`) —
  *    `{status:"unavailable", items:[], generatedAt:"..."}` established by
  *    Hack23/European-Parliament-MCP-Server#301 and extended to
  *    `get_events_feed`/`get_procedures_feed` by