euparliamentmonitor 0.9.20 → 0.9.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Extract organization names from `intelligence/stakeholder-map.md`'s H3
3
+ * headings. Each tier-1/tier-2 stakeholder appears as a heading shaped like:
4
+ * `### EPP — Manfred Weber / 185 MEPs (25.73%)`
5
+ * `### European Commission — Ursula von der Leyen (EPP)`
6
+ * `### Tech Industry (Big Tech Gatekeepers)`
7
+ *
8
+ * The entity name is everything before the first em-dash, en-dash, slash,
9
+ * parenthesis, or colon — whichever comes first — trimmed and de-duplicated
10
+ * with case-insensitive equality. "Risk N: …" headings are filtered out
11
+ * because they describe risk scenarios rather than organizations.
12
+ *
13
+ * @param markdown - Raw stakeholder-map.md contents
14
+ * @returns Ordered, de-duplicated stakeholder names
15
+ */
16
+ export declare function extractStakeholderNames(markdown: string): readonly string[];
17
+ /**
18
+ * Extract media-outlet names from `extended/media-framing-analysis.md`.
19
+ * Editorial convention is a series of bold "framing buckets":
20
+ * `**Centre-Left Media (Le Monde, Der Spiegel, Guardian EU section):**`
21
+ * `**Tech-Beat Media (TechCrunch EU, The Verge, Politico Tech):**`
22
+ *
23
+ * This function pulls every comma-separated outlet from each parenthetical
24
+ * list, trims trailing colons / asterisks, and de-duplicates with
25
+ * case-insensitive equality.
26
+ *
27
+ * @param markdown - Raw media-framing-analysis.md contents
28
+ * @returns Ordered, de-duplicated media-outlet names
29
+ */
30
+ export declare function extractMediaOutletNames(markdown: string): readonly string[];
31
+ /**
32
+ * Collect SEO `mentions` entities for an analysis run by combining
33
+ * stakeholder names and media-outlet names from the run's intelligence
34
+ * and extended folders. Returns a single deduplicated, length-capped
35
+ * list ready to feed into JSON-LD `mentions`.
36
+ *
37
+ * Stakeholders are listed first (high-signal political-group / institution
38
+ * entities), media outlets second. The combined list is truncated to
39
+ * {@link MAX_MENTIONS} entries.
40
+ *
41
+ * @param runDir - Absolute analysis run directory path
42
+ * @returns Ordered, de-duplicated mentions list (may be empty)
43
+ */
44
+ export declare function extractRunMentions(runDir: string): readonly string[];
45
+ //# sourceMappingURL=seo-entity-extractor.d.ts.map
@@ -0,0 +1,211 @@
1
+ // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * @module Aggregator/SeoEntityExtractor
5
+ * @description Extract real-world organizations named in an analysis run's
6
+ * `intelligence/stakeholder-map.md` and `extended/media-framing-analysis.md`
7
+ * artifacts for emission as JSON-LD `mentions` entries on every language
8
+ * variant of the rendered article.
9
+ *
10
+ * The same English-extracted list is reused across all 14 language variants
11
+ * because the entities are language-independent proper nouns (political
12
+ * groups, EU institutions, media outlets) — search engines and AI overviews
13
+ * benefit from consistent entity grounding regardless of which language
14
+ * the surrounding prose is in.
15
+ */
16
+ import fs from 'fs';
17
+ import path from 'path';
18
+ /**
19
+ * Maximum number of mentions emitted into the JSON-LD `mentions` array.
20
+ * Schema.org accepts arbitrarily many entries, but indexers commonly cap
21
+ * structured-data entity lists at ~30 — staying under this avoids
22
+ * truncation and keeps the rendered JSON-LD blob compact.
23
+ */
24
+ const MAX_MENTIONS = 30;
25
+ /**
26
+ * Minimum length for an extracted entity name. Below this, the candidate
27
+ * is almost certainly a fragment (single capital letter, lone particle)
28
+ * rather than a real organization.
29
+ */
30
+ const MIN_ENTITY_LENGTH = 2;
31
+ /**
32
+ * Maximum length for an extracted entity name. Anything longer is almost
33
+ * certainly a misparsed sentence fragment.
34
+ */
35
+ const MAX_ENTITY_LENGTH = 80;
36
+ /**
37
+ * Read a UTF-8 file relative to `runDir`. Returns `null` when the path is
38
+ * missing or unreadable — the extractor treats absent intelligence
39
+ * artifacts as a soft signal (no mentions to emit) rather than an error.
40
+ *
41
+ * @param runDir - Absolute path to the analysis run directory
42
+ * @param relPath - Forward-slash path under `runDir`
43
+ * @returns File contents or `null`
44
+ */
45
+ function readRunFile(runDir, relPath) {
46
+ const abs = path.join(runDir, relPath);
47
+ try {
48
+ if (!fs.existsSync(abs))
49
+ return null;
50
+ return fs.readFileSync(abs, 'utf8');
51
+ }
52
+ catch {
53
+ return null;
54
+ }
55
+ }
56
+ /**
57
+ * Extract organization names from `intelligence/stakeholder-map.md`'s H3
58
+ * headings. Each tier-1/tier-2 stakeholder appears as a heading shaped like:
59
+ * `### EPP — Manfred Weber / 185 MEPs (25.73%)`
60
+ * `### European Commission — Ursula von der Leyen (EPP)`
61
+ * `### Tech Industry (Big Tech Gatekeepers)`
62
+ *
63
+ * The entity name is everything before the first em-dash, en-dash, slash,
64
+ * parenthesis, or colon — whichever comes first — trimmed and de-duplicated
65
+ * with case-insensitive equality. "Risk N: …" headings are filtered out
66
+ * because they describe risk scenarios rather than organizations.
67
+ *
68
+ * @param markdown - Raw stakeholder-map.md contents
69
+ * @returns Ordered, de-duplicated stakeholder names
70
+ */
71
+ export function extractStakeholderNames(markdown) {
72
+ const lines = markdown.split('\n');
73
+ const names = [];
74
+ const seen = new Set();
75
+ for (const rawLine of lines) {
76
+ if (!rawLine.startsWith('### '))
77
+ continue;
78
+ const headingText = rawLine.slice(4).trim();
79
+ if (!headingText)
80
+ continue;
81
+ // Skip risk-scenario headings: `### Risk N: …` / `### Risk 1: PfE Internal Split…`
82
+ if (/^risk\s+\d+\s*:/i.test(headingText))
83
+ continue;
84
+ // Split on the first em-dash, en-dash, slash, opening paren, or colon.
85
+ const splitIdx = findFirstSplitChar(headingText);
86
+ const candidate = splitIdx >= 0 ? headingText.slice(0, splitIdx) : headingText;
87
+ const name = candidate.trim().replace(/\*+$/, '').trim();
88
+ if (!isValidEntityName(name))
89
+ continue;
90
+ const key = name.toLowerCase();
91
+ if (seen.has(key))
92
+ continue;
93
+ seen.add(key);
94
+ names.push(name);
95
+ }
96
+ return names;
97
+ }
98
+ /**
99
+ * Find the index of the first stakeholder-heading separator character.
100
+ * Uses indexOf in a loop instead of a regex to satisfy CodeQL's
101
+ * regex-injection / catastrophic-backtracking lints (cf.
102
+ * `replaceFirstStringIn` in `html/localize-body.ts`).
103
+ *
104
+ * @param text - Heading text (without the leading `### `)
105
+ * @returns Index of the first separator, or `-1` if none found
106
+ */
107
+ function findFirstSplitChar(text) {
108
+ const separators = ['—', '–', '/', '(', ':'];
109
+ let best = -1;
110
+ for (const sep of separators) {
111
+ const idx = text.indexOf(sep);
112
+ if (idx >= 0 && (best < 0 || idx < best))
113
+ best = idx;
114
+ }
115
+ return best;
116
+ }
117
+ /**
118
+ * Extract media-outlet names from `extended/media-framing-analysis.md`.
119
+ * Editorial convention is a series of bold "framing buckets":
120
+ * `**Centre-Left Media (Le Monde, Der Spiegel, Guardian EU section):**`
121
+ * `**Tech-Beat Media (TechCrunch EU, The Verge, Politico Tech):**`
122
+ *
123
+ * This function pulls every comma-separated outlet from each parenthetical
124
+ * list, trims trailing colons / asterisks, and de-duplicates with
125
+ * case-insensitive equality.
126
+ *
127
+ * @param markdown - Raw media-framing-analysis.md contents
128
+ * @returns Ordered, de-duplicated media-outlet names
129
+ */
130
+ export function extractMediaOutletNames(markdown) {
131
+ const lines = markdown.split('\n');
132
+ const names = [];
133
+ const seen = new Set();
134
+ for (const rawLine of lines) {
135
+ // Look for bold prefix followed by parenthesised outlet list.
136
+ // Pattern: `**…Media (X, Y, Z):**` — anchor on `Media (` to avoid
137
+ // matching unrelated parentheticals in surrounding prose.
138
+ const mediaIdx = rawLine.indexOf('Media (');
139
+ if (mediaIdx < 0)
140
+ continue;
141
+ const openParen = rawLine.indexOf('(', mediaIdx);
142
+ if (openParen < 0)
143
+ continue;
144
+ const closeParen = rawLine.indexOf(')', openParen);
145
+ if (closeParen < 0)
146
+ continue;
147
+ const inner = rawLine.slice(openParen + 1, closeParen);
148
+ for (const piece of inner.split(',')) {
149
+ const candidate = piece.trim().replace(/\*+$/, '').trim();
150
+ if (!isValidEntityName(candidate))
151
+ continue;
152
+ const key = candidate.toLowerCase();
153
+ if (seen.has(key))
154
+ continue;
155
+ seen.add(key);
156
+ names.push(candidate);
157
+ }
158
+ }
159
+ return names;
160
+ }
161
+ /**
162
+ * Guard for extracted-entity sanity: rejects empty strings, single
163
+ * characters, and pathological multi-sentence captures.
164
+ *
165
+ * @param name - Candidate entity name
166
+ * @returns `true` when the name is a plausible organization label
167
+ */
168
+ function isValidEntityName(name) {
169
+ if (!name)
170
+ return false;
171
+ if (name.length < MIN_ENTITY_LENGTH)
172
+ return false;
173
+ if (name.length > MAX_ENTITY_LENGTH)
174
+ return false;
175
+ // Reject candidates that are just punctuation / decoration.
176
+ if (!/[A-Za-z]/.test(name))
177
+ return false;
178
+ return true;
179
+ }
180
+ /**
181
+ * Collect SEO `mentions` entities for an analysis run by combining
182
+ * stakeholder names and media-outlet names from the run's intelligence
183
+ * and extended folders. Returns a single deduplicated, length-capped
184
+ * list ready to feed into JSON-LD `mentions`.
185
+ *
186
+ * Stakeholders are listed first (high-signal political-group / institution
187
+ * entities), media outlets second. The combined list is truncated to
188
+ * {@link MAX_MENTIONS} entries.
189
+ *
190
+ * @param runDir - Absolute analysis run directory path
191
+ * @returns Ordered, de-duplicated mentions list (may be empty)
192
+ */
193
+ export function extractRunMentions(runDir) {
194
+ const stakeholderMd = readRunFile(runDir, 'intelligence/stakeholder-map.md');
195
+ const mediaMd = readRunFile(runDir, 'extended/media-framing-analysis.md');
196
+ const stakeholders = stakeholderMd ? extractStakeholderNames(stakeholderMd) : [];
197
+ const mediaOutlets = mediaMd ? extractMediaOutletNames(mediaMd) : [];
198
+ const merged = [];
199
+ const seen = new Set();
200
+ for (const name of [...stakeholders, ...mediaOutlets]) {
201
+ const key = name.toLowerCase();
202
+ if (seen.has(key))
203
+ continue;
204
+ seen.add(key);
205
+ merged.push(name);
206
+ if (merged.length >= MAX_MENTIONS)
207
+ break;
208
+ }
209
+ return merged;
210
+ }
211
+ //# sourceMappingURL=seo-entity-extractor.js.map
@@ -60,6 +60,16 @@
60
60
  * # flagged largeSource:true and the agent
61
61
  * # switches to a 2-phase skeleton-then-edit
62
62
  * # translation strategy (see news-translate.md)
63
+ * [--target-brief <id>] # optional operator override: when set, the
64
+ * # queue contains ONLY this brief regardless
65
+ * # of mode / max-briefs / max-age-days.
66
+ * # Accepted forms:
67
+ * # YYYY-MM-DD/<slug>
68
+ * # YYYY-MM-DD/<slug>/extended
69
+ * # analysis/daily/YYYY-MM-DD/<slug>/executive-brief.md
70
+ * # analysis/daily/YYYY-MM-DD/<slug>/extended/executive-brief.md
71
+ * # Used by operator-dispatched runs that need
72
+ * # to (re)translate one specific brief.
63
73
  * [--output <path>] # default stdout
64
74
  * [--include-extended] # also scan extended/executive-brief.md
65
75
  *
@@ -144,6 +154,75 @@ export const DISCOVERY_MODES = Object.freeze([
144
154
  'newest-first',
145
155
  ]);
146
156
 
157
+ /**
158
+ * Parse a `--target-brief` operator override into a `{ date, slug, isExtended }`
159
+ * triple. Accepts four equivalent operator-friendly forms so the same input
160
+ * works whether the operator copies a path out of the repo, a date/slug pair
161
+ * out of the discovery JSON, or types the canonical short form by hand:
162
+ *
163
+ * 1. `YYYY-MM-DD/<slug>` — short form
164
+ * 2. `YYYY-MM-DD/<slug>/extended` — extended legacy path
165
+ * 3. `analysis/daily/YYYY-MM-DD/<slug>/executive-brief.md` — full repo path
166
+ * 4. `analysis/daily/YYYY-MM-DD/<slug>/extended/executive-brief.md`
167
+ *
168
+ * Validation is intentionally strict (whitelisted character classes, fixed date
169
+ * format, slug character class) — the value flows from a workflow_dispatch
170
+ * string input into a filesystem lookup, so a permissive parser would be a
171
+ * directory-traversal foothold.
172
+ *
173
+ * Throws on any malformed spec; never returns null (callers must check for
174
+ * empty input BEFORE calling this helper).
175
+ *
176
+ * @param {string} spec — already-trimmed, non-empty operator input
177
+ * @returns {{ date: string, slug: string, isExtended: boolean }}
178
+ */
179
+ export function parseTargetBriefSpec(spec) {
180
+ // Strip leading "analysis/daily/" prefix and trailing "/executive-brief.md"
181
+ // so all four accepted forms collapse to "<date>/<slug>" or
182
+ // "<date>/<slug>/extended".
183
+ let core = spec;
184
+ if (core.startsWith('analysis/daily/')) {
185
+ core = core.slice('analysis/daily/'.length);
186
+ }
187
+ if (core.endsWith('/executive-brief.md')) {
188
+ core = core.slice(0, -'/executive-brief.md'.length);
189
+ }
190
+ // Reject any path-traversal or absolute-path attempts up-front.
191
+ if (
192
+ core.startsWith('/') ||
193
+ core.includes('..') ||
194
+ core.includes('\\') ||
195
+ core.includes('\0')
196
+ ) {
197
+ throw new Error(
198
+ `--target-brief: refusing path-traversal or absolute path in "${spec}"`,
199
+ );
200
+ }
201
+ const parts = core.split('/');
202
+ let isExtended = false;
203
+ if (parts.length === 3 && parts[2] === 'extended') {
204
+ isExtended = true;
205
+ } else if (parts.length !== 2) {
206
+ throw new Error(
207
+ `--target-brief: expected "YYYY-MM-DD/<slug>" or "YYYY-MM-DD/<slug>/extended" (got "${spec}")`,
208
+ );
209
+ }
210
+ const [date, slug] = parts;
211
+ if (!/^\d{4}-\d{2}-\d{2}$/.test(date)) {
212
+ throw new Error(
213
+ `--target-brief: date "${date}" is not in YYYY-MM-DD format (from "${spec}")`,
214
+ );
215
+ }
216
+ // Slug character class matches the existing on-disk convention used by
217
+ // src/config/article-horizons.ts (lowercase, digits, dashes).
218
+ if (!/^[a-z0-9][a-z0-9-]{0,63}$/.test(slug)) {
219
+ throw new Error(
220
+ `--target-brief: slug "${slug}" must match [a-z0-9][a-z0-9-]{0,63} (from "${spec}")`,
221
+ );
222
+ }
223
+ return { date, slug, isExtended };
224
+ }
225
+
147
226
  /**
148
227
  * Parse CLI argv into an options object. Exported for unit tests.
149
228
  * @param {string[]} argv
@@ -158,6 +237,7 @@ export function parseArgs(argv) {
158
237
  mode: 'fresh-then-backlog',
159
238
  runNumber: 0,
160
239
  maxSourceLines: DEFAULT_MAX_SOURCE_LINES,
240
+ targetBrief: null,
161
241
  };
162
242
  for (let i = 0; i < argv.length; i += 1) {
163
243
  const arg = argv[i];
@@ -193,6 +273,21 @@ export function parseArgs(argv) {
193
273
  opts.maxSourceLines = Number.parseInt(argv[i + 1], 10);
194
274
  i += 1;
195
275
  break;
276
+ case '--target-brief': {
277
+ const raw = argv[i + 1];
278
+ i += 1;
279
+ // Normalize and validate. Empty / whitespace-only / the literal
280
+ // string "none" is treated as "no override" so the workflow can
281
+ // wire `TARGET_BRIEF: ${{ inputs.target_brief }}` without having
282
+ // to special-case the empty-default case in bash.
283
+ const trimmed = typeof raw === 'string' ? raw.trim() : '';
284
+ if (trimmed === '' || trimmed === 'none') {
285
+ opts.targetBrief = null;
286
+ break;
287
+ }
288
+ opts.targetBrief = parseTargetBriefSpec(trimmed);
289
+ break;
290
+ }
196
291
  case '--help':
197
292
  case '-h':
198
293
  printHelp();
@@ -232,7 +327,8 @@ function printHelp() {
232
327
  process.stdout.write(
233
328
  'Usage: discover-untranslated-briefs.js [--repo-root <path>] ' +
234
329
  '[--max-briefs <n>] [--max-age-days <n>] [--mode <name>] ' +
235
- '[--run-number <n>] [--max-source-lines <n>] [--output <path>] [--include-extended]\n',
330
+ '[--run-number <n>] [--max-source-lines <n>] [--target-brief <YYYY-MM-DD/slug>] ' +
331
+ '[--output <path>] [--include-extended]\n',
236
332
  );
237
333
  }
238
334
 
@@ -402,9 +498,15 @@ export function countFixedTokens(absPath) {
402
498
  * Build the prioritised queue. See module docstring for ordering rules.
403
499
  *
404
500
  * @param {ReturnType<typeof findExecutiveBriefSources>} sources
405
- * @param {number | { maxBriefs: number, mode?: string, runNumber?: number, maxSourceLines?: number }} options
501
+ * @param {number | {
502
+ * maxBriefs: number,
503
+ * mode?: string,
504
+ * runNumber?: number,
505
+ * maxSourceLines?: number,
506
+ * targetBrief?: { date: string, slug: string, isExtended: boolean } | null,
507
+ * }} options
406
508
  * Numeric form retained for backward compatibility — equivalent to
407
- * `{ maxBriefs, mode: 'fresh-then-backlog', runNumber: 0, maxSourceLines: DEFAULT_MAX_SOURCE_LINES }`.
509
+ * `{ maxBriefs, mode: 'fresh-then-backlog', runNumber: 0, maxSourceLines: DEFAULT_MAX_SOURCE_LINES, targetBrief: null }`.
408
510
  */
409
511
  export function buildQueue(sources, options) {
410
512
  const opts =
@@ -414,6 +516,7 @@ export function buildQueue(sources, options) {
414
516
  mode: 'fresh-then-backlog',
415
517
  runNumber: 0,
416
518
  maxSourceLines: DEFAULT_MAX_SOURCE_LINES,
519
+ targetBrief: null,
417
520
  }
418
521
  : {
419
522
  maxBriefs: options.maxBriefs,
@@ -422,6 +525,7 @@ export function buildQueue(sources, options) {
422
525
  maxSourceLines: Number.isFinite(options.maxSourceLines)
423
526
  ? options.maxSourceLines
424
527
  : DEFAULT_MAX_SOURCE_LINES,
528
+ targetBrief: options.targetBrief || null,
425
529
  };
426
530
  if (!DISCOVERY_MODES.includes(opts.mode)) {
427
531
  throw new Error(
@@ -493,7 +597,20 @@ export function buildQueue(sources, options) {
493
597
  };
494
598
 
495
599
  let queue;
496
- if (opts.mode === 'newest-first') {
600
+ if (opts.targetBrief) {
601
+ // Operator override: ignore mode / maxBriefs / parity and queue exactly
602
+ // the one brief the operator asked for, IF it has any missing languages.
603
+ // If the targeted brief is fully translated (no gaps), the queue is
604
+ // empty — the workflow's downstream validator handles the empty-queue
605
+ // case gracefully (skip with no work to do).
606
+ const tb = opts.targetBrief;
607
+ queue = withGaps.filter(
608
+ (entry) =>
609
+ entry.date === tb.date &&
610
+ entry.slug === tb.slug &&
611
+ entry.isExtended === tb.isExtended,
612
+ );
613
+ } else if (opts.mode === 'newest-first') {
497
614
  queue = [...withGaps].sort(newestFirst).slice(0, opts.maxBriefs);
498
615
  } else if (opts.mode === 'backlog-only') {
499
616
  queue = [...withGaps].sort(oldestFirstFinishPartial).slice(0, opts.maxBriefs);
@@ -577,6 +694,7 @@ export function main(argv) {
577
694
  mode: opts.mode,
578
695
  runNumber: opts.runNumber,
579
696
  maxSourceLines: opts.maxSourceLines,
697
+ targetBrief: opts.targetBrief,
580
698
  });
581
699
  const payload = {
582
700
  generatedAt: new Date().toISOString(),
@@ -587,6 +705,7 @@ export function main(argv) {
587
705
  mode: opts.mode,
588
706
  runNumber: opts.runNumber,
589
707
  maxSourceLines: opts.maxSourceLines,
708
+ targetBrief: opts.targetBrief,
590
709
  },
591
710
  totals,
592
711
  queue,
@@ -8,7 +8,7 @@
8
8
  * the monolithic `news-indexes.ts` so the HTML/SEO surface can be
9
9
  * regression-tested independently of discovery and write logic.
10
10
  */
11
- import { APP_VERSION, BUILD_SHORT, BASE_URL } from '../../constants/config.js';
11
+ import { APP_VERSION, BUILD_SHORT, BUILD_TIME, BASE_URL } from '../../constants/config.js';
12
12
  import { getNewsIndexSeo } from '../seo-copy.js';
13
13
  import { buildHeadFreshnessTags } from '../../constants/build-info-meta.js';
14
14
  import { ALL_LANGUAGES, LANGUAGE_NAMES, LANGUAGE_FLAGS, PAGE_TITLES, PAGE_DESCRIPTIONS, SECTION_HEADINGS, NO_ARTICLES_MESSAGES, SKIP_LINK_TEXTS, AI_SECTION_CONTENT, FILTER_LABELS, ARTICLE_TYPE_LABELS, HEADER_SUBTITLE_LABELS, getLocalizedString, getTextDirection, } from '../../constants/languages.js';
@@ -186,6 +186,8 @@ export function generateIndexHTML(lang, articles, metaMap = new Map()) {
186
186
  inLanguage: lang,
187
187
  isPartOf: { '@type': 'WebSite', name: SITE_NAME, url: BASE_URL },
188
188
  publisher: { '@id': `${BASE_URL}/#organization` },
189
+ datePublished: BUILD_TIME,
190
+ dateModified: BUILD_TIME,
189
191
  breadcrumb: {
190
192
  '@type': 'BreadcrumbList',
191
193
  itemListElement: [
@@ -201,12 +203,24 @@ export function generateIndexHTML(lang, articles, metaMap = new Map()) {
201
203
  mainEntity: {
202
204
  '@type': 'ItemList',
203
205
  numberOfItems: Math.min(articles.length, 50),
204
- itemListElement: articles.slice(0, 50).map((a, idx) => ({
205
- '@type': 'ListItem',
206
- position: idx + 1,
207
- url: `${BASE_URL}/news/${a.filename}`,
208
- name: metaMap.get(a.filename)?.title ?? formatSlug(a.slug),
209
- })),
206
+ itemListElement: articles.slice(0, 50).map((a, idx) => {
207
+ const url = `${BASE_URL}/news/${a.filename}`;
208
+ const headline = metaMap.get(a.filename)?.title ?? formatSlug(a.slug);
209
+ return {
210
+ '@type': 'ListItem',
211
+ position: idx + 1,
212
+ url,
213
+ item: {
214
+ '@type': 'NewsArticle',
215
+ '@id': url,
216
+ url,
217
+ headline,
218
+ name: headline,
219
+ datePublished: a.date,
220
+ inLanguage: a.lang,
221
+ },
222
+ };
223
+ }),
210
224
  },
211
225
  }).replace(/</g, '\\u003c');
212
226
  const faqJsonLd = JSON.stringify({
@@ -12,7 +12,7 @@
12
12
  * link points at GitHub blob/tree URLs so readers can audit the raw
13
13
  * tradecraft behind every published article.
14
14
  */
15
- import { BASE_URL, BUILD_SHORT, THEME_TOGGLE_SCRIPT } from '../../constants/config.js';
15
+ import { BASE_URL, BUILD_SHORT, BUILD_TIME, THEME_TOGGLE_SCRIPT } from '../../constants/config.js';
16
16
  import { buildHeadFreshnessTags } from '../../constants/build-info-meta.js';
17
17
  import { ALL_LANGUAGES, LANGUAGE_FLAGS, LANGUAGE_NAMES, PAGE_TITLES, SKIP_LINK_TEXTS, getLocalizedString, getTextDirection, } from '../../constants/languages.js';
18
18
  import { buildOgLocaleTags, ORG_SAME_AS, buildTwitterAttributionTags, } from '../../constants/seo/index.js';
@@ -263,6 +263,7 @@ export function generatePoliticalIntelligenceHTML(lang, data) {
263
263
  height: 192,
264
264
  },
265
265
  };
266
+ const publisherRef = { '@id': `${BASE_URL}/#organization` };
266
267
  const jsonLd = {
267
268
  '@context': SCHEMA_ORG,
268
269
  '@type': 'CollectionPage',
@@ -270,13 +271,15 @@ export function generatePoliticalIntelligenceHTML(lang, data) {
270
271
  url: canonicalUrl,
271
272
  description: copy.intro,
272
273
  inLanguage: safeLang,
273
- author: publisher,
274
- publisher,
274
+ author: publisherRef,
275
+ publisher: publisherRef,
276
+ datePublished: BUILD_TIME,
277
+ dateModified: BUILD_TIME,
275
278
  isPartOf: {
276
279
  '@type': 'WebSite',
277
280
  name: SITE_NAME,
278
281
  url: BASE_URL,
279
- publisher,
282
+ publisher: publisherRef,
280
283
  },
281
284
  breadcrumb: {
282
285
  '@type': 'BreadcrumbList',
@@ -299,25 +302,53 @@ export function generatePoliticalIntelligenceHTML(lang, data) {
299
302
  '@type': 'ListItem',
300
303
  position: 1,
301
304
  name: copy.methodologiesHeading,
302
- item: `${canonicalUrl}#pi-methodologies`,
305
+ url: `${canonicalUrl}#pi-methodologies`,
306
+ item: {
307
+ '@type': 'WebPageElement',
308
+ '@id': `${canonicalUrl}#pi-methodologies`,
309
+ url: `${canonicalUrl}#pi-methodologies`,
310
+ name: copy.methodologiesHeading,
311
+ inLanguage: safeLang,
312
+ },
303
313
  },
304
314
  {
305
315
  '@type': 'ListItem',
306
316
  position: 2,
307
317
  name: copy.templatesHeading,
308
- item: `${canonicalUrl}#pi-templates`,
318
+ url: `${canonicalUrl}#pi-templates`,
319
+ item: {
320
+ '@type': 'WebPageElement',
321
+ '@id': `${canonicalUrl}#pi-templates`,
322
+ url: `${canonicalUrl}#pi-templates`,
323
+ name: copy.templatesHeading,
324
+ inLanguage: safeLang,
325
+ },
309
326
  },
310
327
  {
311
328
  '@type': 'ListItem',
312
329
  position: 3,
313
330
  name: copy.referenceHeading,
314
- item: `${canonicalUrl}#pi-reference`,
331
+ url: `${canonicalUrl}#pi-reference`,
332
+ item: {
333
+ '@type': 'WebPageElement',
334
+ '@id': `${canonicalUrl}#pi-reference`,
335
+ url: `${canonicalUrl}#pi-reference`,
336
+ name: copy.referenceHeading,
337
+ inLanguage: safeLang,
338
+ },
315
339
  },
316
340
  {
317
341
  '@type': 'ListItem',
318
342
  position: 4,
319
343
  name: copy.dailyHeading,
320
- item: `${canonicalUrl}#pi-daily`,
344
+ url: `${canonicalUrl}#pi-daily`,
345
+ item: {
346
+ '@type': 'WebPageElement',
347
+ '@id': `${canonicalUrl}#pi-daily`,
348
+ url: `${canonicalUrl}#pi-daily`,
349
+ name: copy.dailyHeading,
350
+ inLanguage: safeLang,
351
+ },
321
352
  },
322
353
  ],
323
354
  },
@@ -19,7 +19,7 @@
19
19
  * `test/unit/sitemap-byte-equality.test.js` (compares against the
20
20
  * golden snapshots taken from `npm run prebuild`).
21
21
  */
22
- import { BASE_URL, BUILD_SHORT, THEME_TOGGLE_SCRIPT } from '../../constants/config.js';
22
+ import { BASE_URL, BUILD_SHORT, BUILD_TIME, THEME_TOGGLE_SCRIPT } from '../../constants/config.js';
23
23
  import { buildHeadFreshnessTags } from '../../constants/build-info-meta.js';
24
24
  import { getSitemapSeo } from '../seo-copy.js';
25
25
  import { ALL_LANGUAGES, LANGUAGE_NAMES, LANGUAGE_FLAGS, PAGE_TITLES, PAGE_DESCRIPTIONS, SKIP_LINK_TEXTS, getLocalizedString, getTextDirection, } from '../../constants/languages.js';
@@ -192,6 +192,8 @@ ${items}
192
192
  url: BASE_URL,
193
193
  },
194
194
  publisher: { '@id': `${BASE_URL}/#organization` },
195
+ datePublished: BUILD_TIME,
196
+ dateModified: BUILD_TIME,
195
197
  breadcrumb: {
196
198
  '@type': 'BreadcrumbList',
197
199
  itemListElement: [
@@ -213,12 +215,28 @@ ${items}
213
215
  '@type': 'ItemList',
214
216
  numberOfItems: Math.min(articleInfos.length, 50),
215
217
  name: sections.news,
216
- itemListElement: articleInfos.slice(0, 50).map((info, idx) => ({
217
- '@type': 'ListItem',
218
- position: idx + 1,
219
- url: `${BASE_URL}/news/${info.filename}`,
220
- name: info.title,
221
- })),
218
+ itemListElement: articleInfos.slice(0, 50).map((info, idx) => {
219
+ const url = `${BASE_URL}/news/${info.filename}`;
220
+ // Extract per-article language from filename suffix (e.g. `…-foo-en.html` → `en`).
221
+ // Mirrors the canonical `<slug>-<lang>.html` convention enforced by
222
+ // `ARTICLE_FILENAME_PATTERN` / `getArticleFilename()`.
223
+ const langMatch = /-([a-z]{2})\.html$/.exec(info.filename);
224
+ const articleLang = langMatch ? langMatch[1] : lang;
225
+ return {
226
+ '@type': 'ListItem',
227
+ position: idx + 1,
228
+ url,
229
+ item: {
230
+ '@type': 'NewsArticle',
231
+ '@id': url,
232
+ url,
233
+ headline: info.title,
234
+ name: info.title,
235
+ datePublished: info.date,
236
+ inLanguage: articleLang,
237
+ },
238
+ };
239
+ }),
222
240
  },
223
241
  };
224
242
  const jsonLdString = JSON.stringify(jsonLd).replace(/</g, '\\u003c');
@@ -6,7 +6,7 @@ import type { MCPToolResult } from '../../types/index.js';
6
6
  /**
7
7
  * Classify an error message into a diagnostic error category.
8
8
  *
9
- * Maps EP MCP Server v1.3.9 structured error codes and generic HTTP/network
9
+ * Maps EP MCP Server v1.3.10 structured error codes and generic HTTP/network
10
10
  * errors into one of six broad categories used for logging and retry decisions:
11
11
  *
12
12
  * Returned categories (priority order):
@@ -26,7 +26,7 @@ export declare function classifyToolError(message: string): string;
26
26
  * covering the two shapes historically emitted by the EP MCP server.
27
27
  *
28
28
  * 1. **Uniform envelope** (all feeds as of
29
- * `european-parliament-mcp-server@1.3.9`) —
29
+ * `european-parliament-mcp-server@1.3.10`) —
30
30
  * `{status:"unavailable", items:[], generatedAt:"..."}` established by
31
31
  * Hack23/European-Parliament-MCP-Server#301 and extended to
32
32
  * `get_events_feed`/`get_procedures_feed` by