euparliamentmonitor 0.9.19 → 0.9.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/README.md +2 -2
  2. package/package.json +4 -3
  3. package/scripts/aggregator/editorial-brief-resolver.d.ts +38 -0
  4. package/scripts/aggregator/editorial-brief-resolver.js +32 -0
  5. package/scripts/aggregator/generator/render-one.js +35 -0
  6. package/scripts/aggregator/html/localize-body.d.ts +32 -0
  7. package/scripts/aggregator/html/localize-body.js +69 -0
  8. package/scripts/aggregator/html/shell.d.ts +10 -0
  9. package/scripts/aggregator/html/shell.js +11 -1
  10. package/scripts/aggregator/markdown-renderer.d.ts +23 -24
  11. package/scripts/aggregator/markdown-renderer.js +39 -25
  12. package/scripts/aggregator/metadata/artifact-highlight.d.ts +15 -22
  13. package/scripts/aggregator/metadata/artifact-highlight.js +14 -230
  14. package/scripts/aggregator/metadata/artifact-walker.d.ts +34 -0
  15. package/scripts/aggregator/metadata/artifact-walker.js +177 -0
  16. package/scripts/aggregator/metadata/editorial-highlight.d.ts +15 -0
  17. package/scripts/aggregator/metadata/editorial-highlight.js +53 -0
  18. package/scripts/aggregator/metadata/priority-finding-highlight.js +7 -2
  19. package/scripts/aggregator/metadata/resolve-helpers.js +9 -3
  20. package/scripts/aggregator/metadata/text-utils.js +7 -0
  21. package/scripts/aggregator/metadata/translated-sibling.d.ts +23 -0
  22. package/scripts/aggregator/metadata/translated-sibling.js +39 -0
  23. package/scripts/aggregator/reader-guide/builder.js +3 -1
  24. package/scripts/aggregator/reader-guide/labels.d.ts +7 -0
  25. package/scripts/aggregator/reader-guide/labels.js +22 -0
  26. package/scripts/aggregator/reader-intelligence-guide.d.ts +1 -1
  27. package/scripts/aggregator/reader-intelligence-guide.js +1 -1
  28. package/scripts/aggregator/seo-entity-extractor.d.ts +45 -0
  29. package/scripts/aggregator/seo-entity-extractor.js +211 -0
  30. package/scripts/constants/articles/breaking-strings-central.d.ts +8 -0
  31. package/scripts/constants/articles/breaking-strings-central.js +105 -0
  32. package/scripts/constants/articles/breaking-strings-east.d.ts +8 -0
  33. package/scripts/constants/articles/breaking-strings-east.js +203 -0
  34. package/scripts/constants/articles/breaking-strings-nordic.d.ts +8 -0
  35. package/scripts/constants/articles/breaking-strings-nordic.js +252 -0
  36. package/scripts/constants/articles/breaking-strings-west.d.ts +8 -0
  37. package/scripts/constants/articles/breaking-strings-west.js +154 -0
  38. package/scripts/constants/articles/breaking.d.ts +0 -1
  39. package/scripts/constants/articles/breaking.js +9 -6
  40. package/scripts/constants/articles/dashboard/ar.d.ts +8 -0
  41. package/scripts/constants/articles/dashboard/ar.js +71 -0
  42. package/scripts/constants/articles/dashboard/da.d.ts +8 -0
  43. package/scripts/constants/articles/dashboard/da.js +71 -0
  44. package/scripts/constants/articles/dashboard/de.d.ts +8 -0
  45. package/scripts/constants/articles/dashboard/de.js +71 -0
  46. package/scripts/constants/articles/dashboard/en.d.ts +8 -0
  47. package/scripts/constants/articles/dashboard/en.js +71 -0
  48. package/scripts/constants/articles/dashboard/es.d.ts +8 -0
  49. package/scripts/constants/articles/dashboard/es.js +71 -0
  50. package/scripts/constants/articles/dashboard/fi.d.ts +8 -0
  51. package/scripts/constants/articles/dashboard/fi.js +71 -0
  52. package/scripts/constants/articles/dashboard/fr.d.ts +8 -0
  53. package/scripts/constants/articles/dashboard/fr.js +71 -0
  54. package/scripts/constants/articles/dashboard/he.d.ts +8 -0
  55. package/scripts/constants/articles/dashboard/he.js +71 -0
  56. package/scripts/constants/articles/dashboard/index.d.ts +7 -0
  57. package/scripts/constants/articles/dashboard/index.js +33 -0
  58. package/scripts/constants/articles/dashboard/ja.d.ts +8 -0
  59. package/scripts/constants/articles/dashboard/ja.js +71 -0
  60. package/scripts/constants/articles/dashboard/ko.d.ts +8 -0
  61. package/scripts/constants/articles/dashboard/ko.js +71 -0
  62. package/scripts/constants/articles/dashboard/nl.d.ts +8 -0
  63. package/scripts/constants/articles/dashboard/nl.js +71 -0
  64. package/scripts/constants/articles/dashboard/no.d.ts +8 -0
  65. package/scripts/constants/articles/dashboard/no.js +71 -0
  66. package/scripts/constants/articles/dashboard/sv.d.ts +8 -0
  67. package/scripts/constants/articles/dashboard/sv.js +71 -0
  68. package/scripts/constants/articles/dashboard/zh.d.ts +8 -0
  69. package/scripts/constants/articles/dashboard/zh.js +71 -0
  70. package/scripts/constants/articles/dashboard.d.ts +7 -2
  71. package/scripts/constants/articles/dashboard.js +4 -8
  72. package/scripts/constants/articles/deep-analysis/ar.d.ts +8 -0
  73. package/scripts/constants/articles/deep-analysis/ar.js +75 -0
  74. package/scripts/constants/articles/deep-analysis/da.d.ts +8 -0
  75. package/scripts/constants/articles/deep-analysis/da.js +75 -0
  76. package/scripts/constants/articles/deep-analysis/de.d.ts +8 -0
  77. package/scripts/constants/articles/deep-analysis/de.js +75 -0
  78. package/scripts/constants/articles/deep-analysis/en.d.ts +8 -0
  79. package/scripts/constants/articles/deep-analysis/en.js +75 -0
  80. package/scripts/constants/articles/deep-analysis/es.d.ts +8 -0
  81. package/scripts/constants/articles/deep-analysis/es.js +75 -0
  82. package/scripts/constants/articles/deep-analysis/fi.d.ts +8 -0
  83. package/scripts/constants/articles/deep-analysis/fi.js +75 -0
  84. package/scripts/constants/articles/deep-analysis/fr.d.ts +8 -0
  85. package/scripts/constants/articles/deep-analysis/fr.js +75 -0
  86. package/scripts/constants/articles/deep-analysis/he.d.ts +8 -0
  87. package/scripts/constants/articles/deep-analysis/he.js +75 -0
  88. package/scripts/constants/articles/deep-analysis/index.d.ts +7 -0
  89. package/scripts/constants/articles/deep-analysis/index.js +33 -0
  90. package/scripts/constants/articles/deep-analysis/ja.d.ts +8 -0
  91. package/scripts/constants/articles/deep-analysis/ja.js +75 -0
  92. package/scripts/constants/articles/deep-analysis/ko.d.ts +8 -0
  93. package/scripts/constants/articles/deep-analysis/ko.js +75 -0
  94. package/scripts/constants/articles/deep-analysis/nl.d.ts +8 -0
  95. package/scripts/constants/articles/deep-analysis/nl.js +75 -0
  96. package/scripts/constants/articles/deep-analysis/no.d.ts +8 -0
  97. package/scripts/constants/articles/deep-analysis/no.js +75 -0
  98. package/scripts/constants/articles/deep-analysis/sv.d.ts +8 -0
  99. package/scripts/constants/articles/deep-analysis/sv.js +75 -0
  100. package/scripts/constants/articles/deep-analysis/zh.d.ts +8 -0
  101. package/scripts/constants/articles/deep-analysis/zh.js +75 -0
  102. package/scripts/constants/articles/deep-analysis.d.ts +4 -3
  103. package/scripts/constants/articles/deep-analysis.js +3 -7
  104. package/scripts/constants/articles/localized-keywords-central.d.ts +8 -0
  105. package/scripts/constants/articles/localized-keywords-central.js +118 -0
  106. package/scripts/constants/articles/localized-keywords-nordic.d.ts +8 -0
  107. package/scripts/constants/articles/localized-keywords-nordic.js +303 -0
  108. package/scripts/constants/articles/localized-keywords.js +4 -2
  109. package/scripts/constants/articles/swot-builder-central.d.ts +8 -0
  110. package/scripts/constants/articles/swot-builder-central.js +90 -0
  111. package/scripts/constants/articles/swot-builder-nordic.d.ts +8 -0
  112. package/scripts/constants/articles/swot-builder-nordic.js +216 -0
  113. package/scripts/constants/articles/swot.js +4 -2
  114. package/scripts/constants/articles/week-ahead-eu.d.ts +12 -0
  115. package/scripts/constants/articles/week-ahead-eu.js +278 -0
  116. package/scripts/constants/articles/week-ahead-global.d.ts +12 -0
  117. package/scripts/constants/articles/week-ahead-global.js +278 -0
  118. package/scripts/constants/articles/week-ahead.d.ts +4 -7
  119. package/scripts/constants/articles/week-ahead.js +11 -535
  120. package/scripts/constants/world-bank/category-map-analysis.d.ts +9 -0
  121. package/scripts/constants/world-bank/category-map-analysis.js +204 -0
  122. package/scripts/constants/world-bank/category-map-legislative.d.ts +9 -0
  123. package/scripts/constants/world-bank/category-map-legislative.js +130 -0
  124. package/scripts/constants/world-bank/category-map-periodic.d.ts +9 -0
  125. package/scripts/constants/world-bank/category-map-periodic.js +176 -0
  126. package/scripts/constants/world-bank/category-map.d.ts +3 -26
  127. package/scripts/constants/world-bank/category-map.js +8 -501
  128. package/scripts/discover-untranslated-briefs.js +123 -4
  129. package/scripts/generators/news-indexes/per-language.js +21 -7
  130. package/scripts/generators/political-intelligence/html.js +39 -8
  131. package/scripts/generators/sitemap/html.js +25 -7
  132. package/scripts/mcp/ep/client.d.ts +0 -1
  133. package/scripts/mcp/ep/client.js +0 -65
  134. package/scripts/mcp/ep/error-classifier.d.ts +2 -2
  135. package/scripts/mcp/ep/error-classifier.js +2 -2
  136. package/scripts/mcp/ep/tools-list.d.ts +13 -0
  137. package/scripts/mcp/ep/tools-list.js +79 -0
  138. package/scripts/mcp/ep-mcp-client.d.ts +1 -0
  139. package/scripts/mcp/ep-mcp-client.js +1 -0
  140. package/scripts/mcp/imf/client.d.ts +3 -64
  141. package/scripts/mcp/imf/client.js +18 -207
  142. package/scripts/mcp/imf/http-transport.d.ts +92 -0
  143. package/scripts/mcp/imf/http-transport.js +232 -0
  144. package/scripts/mcp/transport/connection.d.ts +25 -53
  145. package/scripts/mcp/transport/connection.js +90 -250
  146. package/scripts/mcp/transport/process.d.ts +62 -0
  147. package/scripts/mcp/transport/process.js +147 -0
  148. package/scripts/mcp/transport/reconnect.d.ts +73 -0
  149. package/scripts/mcp/transport/reconnect.js +96 -0
  150. package/scripts/validate-brief-translations.js +122 -6
  151. package/scripts/constants/articles/breaking-strings-eu.d.ts +0 -7
  152. package/scripts/constants/articles/breaking-strings-global.d.ts +0 -7
  153. package/scripts/constants/articles/dashboard-builder-eu.d.ts +0 -7
  154. package/scripts/constants/articles/dashboard-builder-global.d.ts +0 -7
  155. package/scripts/constants/articles/deep-analysis-strings-eu.d.ts +0 -7
  156. package/scripts/constants/articles/deep-analysis-strings-global.d.ts +0 -7
  157. package/scripts/constants/articles/localized-keywords-eu.d.ts +0 -7
  158. package/scripts/constants/articles/swot-builder-eu.d.ts +0 -7
@@ -2,238 +2,22 @@
2
2
  // SPDX-License-Identifier: Apache-2.0
3
3
  /**
4
4
  * @module Aggregator/Metadata/ArtifactHighlight
5
- * @description Editorial-artefact highlight resolver extracted from
6
- * `article-metadata.ts`. Walks the canonical list of editorial artefacts
7
- * inside a run directory and returns the best `{headline, summary}`
8
- * pair — either a non-generic H1, a named priority finding, or a
9
- * stripped category-affix core — for use as the article `<title>` and
10
- * `<meta description>`.
5
+ * @description Thin re-export barrel that aggregates the three split
6
+ * highlight modules back into the single public surface expected by
7
+ * `article-metadata.ts`:
11
8
  *
12
- * Pure moduledepends only on Node `fs`/`path` plus the leaf metadata
13
- * helpers (h1-extractor, lede-extractor, heading-rules, text-utils) and
14
- * the language-core constants for the translated-sibling filter.
15
- */
16
- import fs from 'fs';
17
- import path from 'path';
18
- import { ALL_LANGUAGES } from '../../constants/language-core.js';
19
- import { extractFirstH1 } from './h1-extractor.js';
20
- import { extractLedeAfterHeading, extractStrongProseLine } from './lede-extractor.js';
21
- import { isGenericHeading, stripArtifactCategoryAffix } from './heading-rules.js';
22
- import { truncateTitle } from './text-utils.js';
23
- /** Ordered list of artefact filenames that typically carry the editorial H1. */
24
- const EDITORIAL_ARTEFACT_CANDIDATES = [
25
- // `executive-brief.md` is the canonical Riksdagsmonitor-aligned editorial
26
- // artefact (see `analysis/methodologies/ai-driven-analysis-guide.md`).
27
- // It always carries the journalist's BLUF and a `## 60-Second Read`
28
- // paragraph that is the lede — preferring it over `synthesis-summary.md`
29
- // keeps Stage-B internal vocabulary ("Purpose: This artifact provides …")
30
- // out of the SEO-critical `<title>` and `<meta description>` surfaces.
31
- 'executive-brief.md',
32
- 'extended/executive-brief.md',
33
- 'intelligence/synthesis-summary.md',
34
- 'intelligence/executive-summary.md',
35
- 'intelligence/intelligence-briefing.md',
36
- 'executive-summary.md',
37
- 'intelligence-briefing.md',
38
- 'synthesis-summary.md',
39
- 'breaking-news-analysis.md',
40
- 'committee-activity-report.md',
41
- 'legislative-pipeline-analysis.md',
42
- 'weekly-outlook.md',
43
- 'monthly-outlook.md',
44
- 'week-in-review.md',
45
- 'month-in-review.md',
46
- 'motions-analysis.md',
47
- 'propositions-analysis.md',
48
- ];
49
- /**
50
- * Attempt to read the first H1 and first prose paragraph from the first
51
- * existing artefact under `EDITORIAL_ARTEFACT_CANDIDATES`. Returns
52
- * `null` when no candidate artefact exists.
53
- *
54
- * @param runDir - Absolute run directory path
55
- * @param articleType - Article type slug (used by {@link isGenericHeading})
56
- * @param date - ISO run date (used by {@link isGenericHeading})
57
- * @returns `{headline, summary}` where either field may be empty
58
- */
59
- export function extractArtifactHighlight(runDir, articleType, date) {
60
- if (!runDir || !fs.existsSync(runDir))
61
- return null;
62
- const direct = scanCandidatesForHighlight(runDir, EDITORIAL_ARTEFACT_CANDIDATES, articleType, date);
63
- if (direct.headline)
64
- return { headline: direct.headline, summary: direct.summary };
65
- // Top-level fallback scan — used only when none of the canonical
66
- // editorial artefacts produced a non-generic H1. We must NOT pick up
67
- // translated sibling briefs (`executive-brief_<lang>.md`,
68
- // `synthesis-summary_<lang>.md`, …) here, because their H1s are
69
- // legitimate localized headlines that the English-only
70
- // {@link isGenericHeading} detector cannot recognise as boilerplate.
71
- // Letting them through poisoned the English `<title>` and
72
- // `<meta description>` for the 2026-05-15 batch with Arabic content
73
- // from `executive-brief_ar.md`. See {@link isTranslatedSiblingBrief}
74
- // and the regression test in `test/unit/article-metadata.test.js`.
75
- const topLevel = safeReaddir(runDir).filter((f) => f.endsWith('.md') && f !== 'manifest.json' && !isTranslatedSiblingBrief(f));
76
- const fallback = scanCandidatesForHighlight(runDir, topLevel, articleType, date);
77
- if (fallback.headline)
78
- return { headline: fallback.headline, summary: fallback.summary };
79
- const summaryOnly = direct.summary || fallback.summary;
80
- if (summaryOnly) {
81
- return { headline: '', summary: summaryOnly };
82
- }
83
- return null;
84
- }
85
- /**
86
- * Filename suffix pattern that identifies a translated sibling brief
87
- * (e.g. `executive-brief_ar.md`, `synthesis-summary_zh.md`). The
88
- * `_<lang>` token is matched against {@link ALL_LANGUAGES} so we never
89
- * exclude a legitimate English artefact whose name happens to end in
90
- * `_<two-letter-suffix>.md`.
91
- */
92
- const TRANSLATED_SIBLING_SUFFIX_RE = new RegExp(`_(${ALL_LANGUAGES.join('|')})\\.md$`, 'i');
93
- /**
94
- * Return `true` when a top-level `.md` filename looks like a translated
95
- * sibling of a canonical editorial artefact (e.g.
96
- * `executive-brief_ar.md`). These files must be excluded from the
97
- * top-level fallback scan in {@link extractArtifactHighlight} because
98
- * their localized H1s evade the English-only generic-heading detector
99
- * and would otherwise hijack the English SEO surfaces.
100
- *
101
- * @param filename - Run-relative `.md` filename (no path separators)
102
- * @returns `true` when the file is a translated sibling brief
103
- */
104
- export function isTranslatedSiblingBrief(filename) {
105
- return TRANSLATED_SIBLING_SUFFIX_RE.test(filename);
106
- }
107
- /**
108
- * Walk a list of candidate artefact paths and return the first
109
- * non-generic headline + summary pair, plus the first usable lede
110
- * summary seen along the way. Extracted from
111
- * {@link extractArtifactHighlight} to keep its cognitive complexity
112
- * within the SonarJS budget.
9
+ * - {@link extractArtifactHighlight} primary editorial-artefact walker
10
+ * (see `editorial-highlight.ts`)
11
+ * - {@link extractPriorityFindingHighlight} fallback priority-finding
12
+ * extractor (see `priority-finding-highlight.ts`)
13
+ * - {@link isTranslatedSiblingBrief} — translated-sibling filter predicate
14
+ * (see `translated-sibling.ts`)
113
15
  *
114
- * @param runDir - Absolute run directory path
115
- * @param candidates - Run-relative candidate filenames to probe
116
- * @param articleType - Article-type slug (used by {@link isGenericHeading})
117
- * @param date - ISO run date (used by {@link isGenericHeading})
118
- * @returns `{headline, summary}` where either field may be empty
16
+ * @see editorial-highlight.ts
17
+ * @see priority-finding-highlight.ts
18
+ * @see translated-sibling.ts
119
19
  */
120
- function scanCandidatesForHighlight(runDir, candidates, articleType, date) {
121
- let bestSummaryOnly = '';
122
- for (const rel of candidates) {
123
- const probe = probeCandidateForHighlight(runDir, rel, articleType, date);
124
- if (probe.cleanHighlight)
125
- return probe.cleanHighlight;
126
- if (probe.strippedHeadline) {
127
- return { headline: probe.strippedHeadline, summary: probe.summary ?? bestSummaryOnly };
128
- }
129
- if (!bestSummaryOnly && probe.summary) {
130
- bestSummaryOnly = probe.summary;
131
- }
132
- }
133
- return { headline: '', summary: bestSummaryOnly };
134
- }
135
- /**
136
- * Read a single candidate artefact and classify what it can contribute
137
- * to the highlight resolver. Extracted from
138
- * {@link scanCandidatesForHighlight} to keep its cognitive complexity
139
- * within the SonarJS budget.
140
- *
141
- * @param runDir - Absolute run directory
142
- * @param rel - Run-relative artefact path
143
- * @param articleType - Article-type slug for {@link isGenericHeading}
144
- * @param date - ISO run date for {@link isGenericHeading}
145
- * @returns
146
- * - `cleanHighlight` when the artefact has a non-generic H1 (caller may
147
- * return it directly)
148
- * - `strippedHeadline` when the H1 is generic but yields an editorial
149
- * core after {@link stripArtifactCategoryAffix}
150
- * - `summary` when the artefact carries a usable lede or strong prose
151
- * line (independent of the headline outcome)
152
- */
153
- function probeCandidateForHighlight(runDir, rel, articleType, date) {
154
- const abs = path.join(runDir, rel);
155
- if (!fs.existsSync(abs))
156
- return {};
157
- const body = readArtefactBody(abs);
158
- const headline = extractFirstH1(body);
159
- const lede = extractLedeAfterHeading(body);
160
- const summary = lede || extractStrongProseLine(body);
161
- if (headline && !isGenericHeading(headline, articleType, date)) {
162
- return { cleanHighlight: { headline: truncateTitle(headline), summary } };
163
- }
164
- // The artefact H1 is generic boilerplate (`Executive Brief — EU Parliament
165
- // Breaking News`). Before falling back to a stripped category-core
166
- // headline, try to surface the FIRST NAMED PRIORITY FINDING from the
167
- // brief's `## Key Developments` / `## Priority Dossiers` /
168
- // `## Top Findings` block. This is the canonical Stage-B authoring
169
- // pattern (see `analysis/templates/executive-brief.md`) — every brief
170
- // lists its top dossiers as `**Name** (procedure-code, date) — paragraph`
171
- // or `### N. Name (committee)`. Surfacing that name produces a
172
- // distinctive editorial headline ("Digital Markets Act Enforcement",
173
- // "Ukraine War Accountability") instead of a stripped category noun.
174
- const priority = extractPriorityFindingHighlight(body);
175
- if (priority?.headline) {
176
- return {
177
- cleanHighlight: {
178
- headline: truncateTitle(priority.headline),
179
- summary: priority.summary || summary,
180
- },
181
- };
182
- }
183
- if (headline) {
184
- const stripped = stripArtifactCategoryAffix(headline);
185
- if (stripped && !isGenericHeading(stripped, articleType, date)) {
186
- return { strippedHeadline: truncateTitle(stripped), summary };
187
- }
188
- }
189
- return { summary };
190
- }
20
+ export { extractArtifactHighlight } from './editorial-highlight.js';
191
21
  export { extractPriorityFindingHighlight } from './priority-finding-highlight.js';
192
- import { extractPriorityFindingHighlight } from './priority-finding-highlight.js';
193
- /**
194
- * Read an artefact file, skipping any SPDX HTML-comment header rows so the
195
- * first-H1 / first-prose logic is never derailed by the REUSE preamble.
196
- *
197
- * @param abs - Absolute file path
198
- * @returns File contents with SPDX comment lines dropped
199
- */
200
- function readArtefactBody(abs) {
201
- let text;
202
- try {
203
- text = fs.readFileSync(abs, 'utf8');
204
- }
205
- catch {
206
- return '';
207
- }
208
- const lines = text.split('\n');
209
- let i = 0;
210
- while (i < lines.length) {
211
- const line = (lines[i] ?? '').trim();
212
- if (line === '') {
213
- i++;
214
- continue;
215
- }
216
- if (line.startsWith('<!--') && line.endsWith('-->')) {
217
- i++;
218
- continue;
219
- }
220
- break;
221
- }
222
- return lines.slice(i).join('\n');
223
- }
224
- /**
225
- * `fs.readdirSync` wrapped to never throw for missing or unreadable
226
- * directories.
227
- *
228
- * @param dir - Absolute directory path
229
- * @returns Entries in {@link dir} or `[]` when unreadable
230
- */
231
- function safeReaddir(dir) {
232
- try {
233
- return fs.readdirSync(dir);
234
- }
235
- catch {
236
- return [];
237
- }
238
- }
22
+ export { isTranslatedSiblingBrief } from './translated-sibling.js';
239
23
  //# sourceMappingURL=artifact-highlight.js.map
@@ -0,0 +1,34 @@
1
+ /** Ordered list of artefact filenames that typically carry the editorial H1. */
2
+ export declare const EDITORIAL_ARTEFACT_CANDIDATES: readonly string[];
3
+ /**
4
+ * Read an artefact file, skipping any SPDX HTML-comment header rows so the
5
+ * first-H1 / first-prose logic is never derailed by the REUSE preamble.
6
+ *
7
+ * @param abs - Absolute file path
8
+ * @returns File contents with SPDX comment lines dropped
9
+ */
10
+ export declare function readArtefactBody(abs: string): string;
11
+ /**
12
+ * `fs.readdirSync` wrapped to never throw for missing or unreadable
13
+ * directories.
14
+ *
15
+ * @param dir - Absolute directory path
16
+ * @returns Entries in {@link dir} or `[]` when unreadable
17
+ */
18
+ export declare function safeReaddir(dir: string): string[];
19
+ /**
20
+ * Walk a list of candidate artefact paths and return the first
21
+ * non-generic headline + summary pair, plus the first usable lede
22
+ * summary seen along the way.
23
+ *
24
+ * @param runDir - Absolute run directory path
25
+ * @param candidates - Run-relative candidate filenames to probe
26
+ * @param articleType - Article-type slug (used by {@link isGenericHeading})
27
+ * @param date - ISO run date (used by {@link isGenericHeading})
28
+ * @returns `{headline, summary}` where either field may be empty
29
+ */
30
+ export declare function scanCandidatesForHighlight(runDir: string, candidates: readonly string[], articleType: string, date: string): {
31
+ readonly headline: string;
32
+ readonly summary: string;
33
+ };
34
+ //# sourceMappingURL=artifact-walker.d.ts.map
@@ -0,0 +1,177 @@
1
+ // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * @module Aggregator/Metadata/ArtifactWalker
5
+ * @description Shared editorial-artefact discovery helpers: candidate-list
6
+ * walker, file-existence guards, lede + H1 helper composition. Extracted
7
+ * from `artifact-highlight.ts` to keep individual file sizes under 400 LOC.
8
+ *
9
+ * Exported surface: `EDITORIAL_ARTEFACT_CANDIDATES` (ordered candidate list),
10
+ * `readArtefactBody` (SPDX-aware file reader), `safeReaddir` (fault-tolerant
11
+ * directory listing), and `scanCandidatesForHighlight` (composed walker).
12
+ * Internal helper `probeCandidateForHighlight` is not exported so that
13
+ * `editorial-highlight.ts` can call the walker without depending on the
14
+ * lower-level per-file probe logic.
15
+ */
16
+ import fs from 'fs';
17
+ import path from 'path';
18
+ import { extractFirstH1 } from './h1-extractor.js';
19
+ import { extractLedeAfterHeading, extractStrongProseLine } from './lede-extractor.js';
20
+ import { isGenericHeading, stripArtifactCategoryAffix } from './heading-rules.js';
21
+ import { truncateTitle } from './text-utils.js';
22
+ import { extractPriorityFindingHighlight } from './priority-finding-highlight.js';
23
+ /** Ordered list of artefact filenames that typically carry the editorial H1. */
24
+ export const EDITORIAL_ARTEFACT_CANDIDATES = [
25
+ // `executive-brief.md` is the canonical Riksdagsmonitor-aligned editorial
26
+ // artefact (see `analysis/methodologies/ai-driven-analysis-guide.md`).
27
+ // It always carries the journalist's BLUF and a `## 60-Second Read`
28
+ // paragraph that is the lede — preferring it over `synthesis-summary.md`
29
+ // keeps Stage-B internal vocabulary ("Purpose: This artifact provides …")
30
+ // out of the SEO-critical `<title>` and `<meta description>` surfaces.
31
+ 'executive-brief.md',
32
+ 'extended/executive-brief.md',
33
+ 'intelligence/synthesis-summary.md',
34
+ 'intelligence/executive-summary.md',
35
+ 'intelligence/intelligence-briefing.md',
36
+ 'executive-summary.md',
37
+ 'intelligence-briefing.md',
38
+ 'synthesis-summary.md',
39
+ 'breaking-news-analysis.md',
40
+ 'committee-activity-report.md',
41
+ 'legislative-pipeline-analysis.md',
42
+ 'weekly-outlook.md',
43
+ 'monthly-outlook.md',
44
+ 'week-in-review.md',
45
+ 'month-in-review.md',
46
+ 'motions-analysis.md',
47
+ 'propositions-analysis.md',
48
+ ];
49
+ /**
50
+ * Read an artefact file, skipping any SPDX HTML-comment header rows so the
51
+ * first-H1 / first-prose logic is never derailed by the REUSE preamble.
52
+ *
53
+ * @param abs - Absolute file path
54
+ * @returns File contents with SPDX comment lines dropped
55
+ */
56
+ export function readArtefactBody(abs) {
57
+ let text;
58
+ try {
59
+ text = fs.readFileSync(abs, 'utf8');
60
+ }
61
+ catch {
62
+ return '';
63
+ }
64
+ const lines = text.split('\n');
65
+ let i = 0;
66
+ while (i < lines.length) {
67
+ const line = (lines[i] ?? '').trim();
68
+ if (line === '') {
69
+ i++;
70
+ continue;
71
+ }
72
+ if (line.startsWith('<!--') && line.endsWith('-->')) {
73
+ i++;
74
+ continue;
75
+ }
76
+ break;
77
+ }
78
+ return lines.slice(i).join('\n');
79
+ }
80
+ /**
81
+ * `fs.readdirSync` wrapped to never throw for missing or unreadable
82
+ * directories.
83
+ *
84
+ * @param dir - Absolute directory path
85
+ * @returns Entries in {@link dir} or `[]` when unreadable
86
+ */
87
+ export function safeReaddir(dir) {
88
+ try {
89
+ return fs.readdirSync(dir);
90
+ }
91
+ catch {
92
+ return [];
93
+ }
94
+ }
95
+ /**
96
+ * Read a single candidate artefact and classify what it can contribute
97
+ * to the highlight resolver. Extracted from
98
+ * {@link scanCandidatesForHighlight} to keep its cognitive complexity
99
+ * within the SonarJS budget.
100
+ *
101
+ * @param runDir - Absolute run directory
102
+ * @param rel - Run-relative artefact path
103
+ * @param articleType - Article-type slug for {@link isGenericHeading}
104
+ * @param date - ISO run date for {@link isGenericHeading}
105
+ * @returns
106
+ * - `cleanHighlight` when the artefact has a non-generic H1 (caller may
107
+ * return it directly)
108
+ * - `strippedHeadline` when the H1 is generic but yields an editorial
109
+ * core after {@link stripArtifactCategoryAffix}
110
+ * - `summary` when the artefact carries a usable lede or strong prose
111
+ * line (independent of the headline outcome)
112
+ */
113
+ function probeCandidateForHighlight(runDir, rel, articleType, date) {
114
+ const abs = path.join(runDir, rel);
115
+ if (!fs.existsSync(abs))
116
+ return {};
117
+ const body = readArtefactBody(abs);
118
+ const headline = extractFirstH1(body);
119
+ const lede = extractLedeAfterHeading(body);
120
+ const summary = lede || extractStrongProseLine(body);
121
+ if (headline && !isGenericHeading(headline, articleType, date)) {
122
+ return { cleanHighlight: { headline: truncateTitle(headline), summary } };
123
+ }
124
+ // The artefact H1 is generic boilerplate (`Executive Brief — EU Parliament
125
+ // Breaking News`). Before falling back to a stripped category-core
126
+ // headline, try to surface the FIRST NAMED PRIORITY FINDING from the
127
+ // brief's `## Key Developments` / `## Priority Dossiers` /
128
+ // `## Top Findings` block. This is the canonical Stage-B authoring
129
+ // pattern (see `analysis/templates/executive-brief.md`) — every brief
130
+ // lists its top dossiers as `**Name** (procedure-code, date) — paragraph`
131
+ // or `### N. Name (committee)`. Surfacing that name produces a
132
+ // distinctive editorial headline ("Digital Markets Act Enforcement",
133
+ // "Ukraine War Accountability") instead of a stripped category noun.
134
+ const priority = extractPriorityFindingHighlight(body);
135
+ if (priority?.headline) {
136
+ return {
137
+ cleanHighlight: {
138
+ headline: truncateTitle(priority.headline),
139
+ summary: priority.summary || summary,
140
+ },
141
+ };
142
+ }
143
+ if (headline) {
144
+ const stripped = stripArtifactCategoryAffix(headline);
145
+ if (stripped && !isGenericHeading(stripped, articleType, date)) {
146
+ return { strippedHeadline: truncateTitle(stripped), summary };
147
+ }
148
+ }
149
+ return { summary };
150
+ }
151
+ /**
152
+ * Walk a list of candidate artefact paths and return the first
153
+ * non-generic headline + summary pair, plus the first usable lede
154
+ * summary seen along the way.
155
+ *
156
+ * @param runDir - Absolute run directory path
157
+ * @param candidates - Run-relative candidate filenames to probe
158
+ * @param articleType - Article-type slug (used by {@link isGenericHeading})
159
+ * @param date - ISO run date (used by {@link isGenericHeading})
160
+ * @returns `{headline, summary}` where either field may be empty
161
+ */
162
+ export function scanCandidatesForHighlight(runDir, candidates, articleType, date) {
163
+ let bestSummaryOnly = '';
164
+ for (const rel of candidates) {
165
+ const probe = probeCandidateForHighlight(runDir, rel, articleType, date);
166
+ if (probe.cleanHighlight)
167
+ return probe.cleanHighlight;
168
+ if (probe.strippedHeadline) {
169
+ return { headline: probe.strippedHeadline, summary: probe.summary ?? bestSummaryOnly };
170
+ }
171
+ if (!bestSummaryOnly && probe.summary) {
172
+ bestSummaryOnly = probe.summary;
173
+ }
174
+ }
175
+ return { headline: '', summary: bestSummaryOnly };
176
+ }
177
+ //# sourceMappingURL=artifact-walker.js.map
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Attempt to read the first H1 and first prose paragraph from the first
3
+ * existing artefact under {@link EDITORIAL_ARTEFACT_CANDIDATES}. Returns
4
+ * `null` when no candidate artefact exists.
5
+ *
6
+ * @param runDir - Absolute run directory path
7
+ * @param articleType - Article type slug (used by {@link isGenericHeading})
8
+ * @param date - ISO run date (used by {@link isGenericHeading})
9
+ * @returns `{headline, summary}` where either field may be empty
10
+ */
11
+ export declare function extractArtifactHighlight(runDir: string, articleType: string, date: string): {
12
+ readonly headline: string;
13
+ readonly summary: string;
14
+ } | null;
15
+ //# sourceMappingURL=editorial-highlight.d.ts.map
@@ -0,0 +1,53 @@
1
+ // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * @module Aggregator/Metadata/EditorialHighlight
5
+ * @description Primary editorial-artefact highlight resolver. Walks the
6
+ * canonical list of editorial artefacts inside a run directory and returns
7
+ * the best `{headline, summary}` pair — either a non-generic H1, a named
8
+ * priority finding, or a stripped category-affix core — for use as the
9
+ * article `<title>` and `<meta description>`.
10
+ *
11
+ * Depends on {@link artifact-walker} for shared discovery helpers and
12
+ * {@link translated-sibling} for the translated-sibling filter.
13
+ */
14
+ import fs from 'fs';
15
+ import { EDITORIAL_ARTEFACT_CANDIDATES, safeReaddir, scanCandidatesForHighlight, } from './artifact-walker.js';
16
+ import { isTranslatedSiblingBrief } from './translated-sibling.js';
17
+ /**
18
+ * Attempt to read the first H1 and first prose paragraph from the first
19
+ * existing artefact under {@link EDITORIAL_ARTEFACT_CANDIDATES}. Returns
20
+ * `null` when no candidate artefact exists.
21
+ *
22
+ * @param runDir - Absolute run directory path
23
+ * @param articleType - Article type slug (used by {@link isGenericHeading})
24
+ * @param date - ISO run date (used by {@link isGenericHeading})
25
+ * @returns `{headline, summary}` where either field may be empty
26
+ */
27
+ export function extractArtifactHighlight(runDir, articleType, date) {
28
+ if (!runDir || !fs.existsSync(runDir))
29
+ return null;
30
+ const direct = scanCandidatesForHighlight(runDir, EDITORIAL_ARTEFACT_CANDIDATES, articleType, date);
31
+ if (direct.headline)
32
+ return { headline: direct.headline, summary: direct.summary };
33
+ // Top-level fallback scan — used only when none of the canonical
34
+ // editorial artefacts produced a non-generic H1. We must NOT pick up
35
+ // translated sibling briefs (`executive-brief_<lang>.md`,
36
+ // `synthesis-summary_<lang>.md`, …) here, because their H1s are
37
+ // legitimate localized headlines that the English-only
38
+ // {@link isGenericHeading} detector cannot recognise as boilerplate.
39
+ // Letting them through poisoned the English `<title>` and
40
+ // `<meta description>` for the 2026-05-15 batch with Arabic content
41
+ // from `executive-brief_ar.md`. See {@link isTranslatedSiblingBrief}
42
+ // and the regression test in `test/unit/article-metadata.test.js`.
43
+ const topLevel = safeReaddir(runDir).filter((f) => f.endsWith('.md') && f !== 'manifest.json' && !isTranslatedSiblingBrief(f));
44
+ const fallback = scanCandidatesForHighlight(runDir, topLevel, articleType, date);
45
+ if (fallback.headline)
46
+ return { headline: fallback.headline, summary: fallback.summary };
47
+ const summaryOnly = direct.summary || fallback.summary;
48
+ if (summaryOnly) {
49
+ return { headline: '', summary: summaryOnly };
50
+ }
51
+ return null;
52
+ }
53
+ //# sourceMappingURL=editorial-highlight.js.map
@@ -9,7 +9,7 @@
9
9
  * artifact-highlight.ts when an artefact has no usable H1.
10
10
  */
11
11
  import { normaliseHeadingText } from './heading-rules.js';
12
- import { DESCRIPTION_MAX_LENGTH, shouldSkipDescriptionLine, stripInlineMarkdown, truncateDescription, } from './text-utils.js';
12
+ import { DESCRIPTION_MAX_LENGTH, shouldSkipDescriptionLine, stripInlineMarkdown, stripLeadingProseLabel, truncateDescription, } from './text-utils.js';
13
13
  /**
14
14
  * Section headings inside the executive brief that introduce the
15
15
  * named-priority-finding block (matched case-insensitively against the
@@ -390,6 +390,11 @@ function collectPrioritySummaryLines(tail, lines, i) {
390
390
  let tailText = stripInlineMarkdown(tail).trim();
391
391
  tailText = tailText.replace(/^\([^()]{3,80}\)\s*/u, '');
392
392
  tailText = stripPriorityTailMetadata(tailText).trim();
393
+ // Strip leading all-caps prose labels (`BLUF:`, `SITUATION:`, `WEP:`,
394
+ // `KEY MOTION:`) that the lede-extractor walker already removes —
395
+ // priority-finding summaries flow into the same `<meta description>`
396
+ // surface and the HTML pipeline test forbids the all-caps opener.
397
+ tailText = stripLeadingProseLabel(tailText);
393
398
  if (tailText)
394
399
  summaryLines.push(tailText);
395
400
  for (let j = i + 1; j < lines.length; j++) {
@@ -405,7 +410,7 @@ function collectPrioritySummaryLines(tail, lines, i) {
405
410
  continue;
406
411
  if (shouldSkipDescriptionLine(next))
407
412
  continue;
408
- summaryLines.push(stripInlineMarkdown(next));
413
+ summaryLines.push(stripLeadingProseLabel(stripInlineMarkdown(next)));
409
414
  if (summaryLines.join(' ').length >= DESCRIPTION_MAX_LENGTH)
410
415
  break;
411
416
  }
@@ -123,13 +123,19 @@ export function composeContextualTitle(fallbackTitle, editorialHeadline, runId)
123
123
  */
124
124
  export function composeContextualDescription(lang, baseDescription, editorial, date, _runId) {
125
125
  const labels = getLocalizedString(SEO_CONTEXT_LABELS, lang);
126
- const parts = [baseDescription.trim()];
127
- parts.push(`${labels.date} ${date}.`);
126
+ const base = baseDescription.trim();
127
+ const parts = [base];
128
+ const datePart = `${labels.date} ${date}.`;
129
+ if (!containsNormalized(base, `${labels.date} ${date}`)) {
130
+ parts.push(datePart);
131
+ }
128
132
  const context = pickFirstNonEmpty([editorial.summary, editorial.headline]);
129
133
  if (context && !containsNormalized(parts[0] ?? '', context)) {
130
134
  parts.push(`${labels.context}: ${context}`);
131
135
  }
132
- parts.push(labels.reader);
136
+ if (!containsNormalized(parts.join(' '), labels.reader)) {
137
+ parts.push(labels.reader);
138
+ }
133
139
  return truncateDescription(parts.join(' '));
134
140
  }
135
141
  /**
@@ -142,6 +142,13 @@ export const METADATA_LINE_PREFIXES = [
142
142
  'SPDX-License-Identifier',
143
143
  'Topic',
144
144
  'Type',
145
+ // Bare `WEP:` (Words of Estimative Probability) lines appear in
146
+ // `intelligence/synthesis-summary.md` between a KJ-N heading and its
147
+ // prose body (e.g. `**WEP: ALMOST CERTAINLY (>95%)** | Admiralty: A1`).
148
+ // The line is grade/confidence metadata, not editorial prose — without
149
+ // this prefix it leaked into `<meta description>` as an all-caps shout
150
+ // (run #26223932441, propositions 2026-05-21).
151
+ 'WEP',
145
152
  'WEP Band',
146
153
  'WEP Grade',
147
154
  'Window',
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Filename suffix pattern that identifies a translated sibling brief
3
+ * (e.g. `executive-brief_ar.md`, `synthesis-summary_zh.md`). The
4
+ * `_<lang>` token is matched against {@link ALL_LANGUAGES} so we never
5
+ * exclude a legitimate English artefact whose name happens to end in
6
+ * `_<two-letter-suffix>.md`.
7
+ *
8
+ * Matching is case-insensitive to handle uppercase variants.
9
+ */
10
+ export declare const TRANSLATED_SIBLING_SUFFIX_RE: RegExp;
11
+ /**
12
+ * Return `true` when a top-level `.md` filename looks like a translated
13
+ * sibling of a canonical editorial artefact (e.g.
14
+ * `executive-brief_ar.md`). These files must be excluded from the
15
+ * top-level fallback scan in {@link extractArtifactHighlight} because
16
+ * their localized H1s evade the English-only generic-heading detector
17
+ * and would otherwise hijack the English SEO surfaces.
18
+ *
19
+ * @param filename - Run-relative `.md` filename (no path separators)
20
+ * @returns `true` when the file is a translated sibling brief
21
+ */
22
+ export declare function isTranslatedSiblingBrief(filename: string): boolean;
23
+ //# sourceMappingURL=translated-sibling.d.ts.map
@@ -0,0 +1,39 @@
1
+ // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * @module Aggregator/Metadata/TranslatedSibling
5
+ * @description Filter predicate that identifies translated sibling brief
6
+ * files (e.g. `executive-brief_ar.md`) so they can be excluded from the
7
+ * English-only top-level artefact fallback scan in
8
+ * {@link extractArtifactHighlight}.
9
+ *
10
+ * High-reuse module: import this predicate wherever translated siblings
11
+ * must be excluded from discovery (e.g. tradecraft discovery,
12
+ * template-frontmatter sync).
13
+ */
14
+ import { ALL_LANGUAGES } from '../../constants/language-core.js';
15
+ /**
16
+ * Filename suffix pattern that identifies a translated sibling brief
17
+ * (e.g. `executive-brief_ar.md`, `synthesis-summary_zh.md`). The
18
+ * `_<lang>` token is matched against {@link ALL_LANGUAGES} so we never
19
+ * exclude a legitimate English artefact whose name happens to end in
20
+ * `_<two-letter-suffix>.md`.
21
+ *
22
+ * Matching is case-insensitive to handle uppercase variants.
23
+ */
24
+ export const TRANSLATED_SIBLING_SUFFIX_RE = new RegExp(`_(${ALL_LANGUAGES.join('|')})\\.md$`, 'i');
25
+ /**
26
+ * Return `true` when a top-level `.md` filename looks like a translated
27
+ * sibling of a canonical editorial artefact (e.g.
28
+ * `executive-brief_ar.md`). These files must be excluded from the
29
+ * top-level fallback scan in {@link extractArtifactHighlight} because
30
+ * their localized H1s evade the English-only generic-heading detector
31
+ * and would otherwise hijack the English SEO surfaces.
32
+ *
33
+ * @param filename - Run-relative `.md` filename (no path separators)
34
+ * @returns `true` when the file is a translated sibling brief
35
+ */
36
+ export function isTranslatedSiblingBrief(filename) {
37
+ return TRANSLATED_SIBLING_SUFFIX_RE.test(filename);
38
+ }
39
+ //# sourceMappingURL=translated-sibling.js.map