euparliamentmonitor 0.9.21 → 0.9.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/package.json +6 -2
  2. package/scripts/aggregator/article-metadata.js +69 -14
  3. package/scripts/aggregator/editorial-brief-resolver.js +23 -0
  4. package/scripts/aggregator/html/headline.d.ts +41 -9
  5. package/scripts/aggregator/html/headline.js +69 -10
  6. package/scripts/aggregator/html/shell.js +73 -17
  7. package/scripts/aggregator/manifest/index.d.ts +1 -1
  8. package/scripts/aggregator/manifest/index.js +1 -1
  9. package/scripts/aggregator/manifest/resolver.d.ts +28 -1
  10. package/scripts/aggregator/manifest/resolver.js +61 -5
  11. package/scripts/aggregator/markdown-renderer.js +11 -0
  12. package/scripts/aggregator/metadata/artifact-category-heading.d.ts +81 -0
  13. package/scripts/aggregator/metadata/artifact-category-heading.js +353 -0
  14. package/scripts/aggregator/metadata/artifact-walker.js +29 -10
  15. package/scripts/aggregator/metadata/brief-body.d.ts +12 -0
  16. package/scripts/aggregator/metadata/brief-body.js +69 -0
  17. package/scripts/aggregator/metadata/briefing-highlight.d.ts +47 -0
  18. package/scripts/aggregator/metadata/briefing-highlight.js +469 -0
  19. package/scripts/aggregator/metadata/editorial-highlight.d.ts +18 -0
  20. package/scripts/aggregator/metadata/editorial-highlight.js +40 -1
  21. package/scripts/aggregator/metadata/heading-rules.d.ts +2 -81
  22. package/scripts/aggregator/metadata/heading-rules.js +78 -269
  23. package/scripts/aggregator/metadata/keyword-filters.d.ts +60 -0
  24. package/scripts/aggregator/metadata/keyword-filters.js +156 -0
  25. package/scripts/aggregator/metadata/lede-extractor.js +11 -2
  26. package/scripts/aggregator/metadata/priority-finding-cleaning.d.ts +22 -0
  27. package/scripts/aggregator/metadata/priority-finding-cleaning.js +181 -0
  28. package/scripts/aggregator/metadata/priority-finding-highlight.js +75 -159
  29. package/scripts/aggregator/metadata/resolve-helpers.d.ts +34 -0
  30. package/scripts/aggregator/metadata/resolve-helpers.js +202 -15
  31. package/scripts/aggregator/metadata/seo-budgets.d.ts +140 -0
  32. package/scripts/aggregator/metadata/seo-budgets.js +202 -0
  33. package/scripts/aggregator/metadata/text-truncate.d.ts +75 -0
  34. package/scripts/aggregator/metadata/text-truncate.js +277 -0
  35. package/scripts/aggregator/metadata/text-utils-constants.d.ts +96 -0
  36. package/scripts/aggregator/metadata/text-utils-constants.js +209 -0
  37. package/scripts/aggregator/metadata/text-utils.d.ts +32 -143
  38. package/scripts/aggregator/metadata/text-utils.js +119 -439
  39. package/scripts/aggregator/metadata/title-rejection.d.ts +37 -0
  40. package/scripts/aggregator/metadata/title-rejection.js +179 -0
  41. package/scripts/copy-vendor.js +84 -112
  42. package/scripts/dump-article-seo.js +640 -0
  43. package/scripts/fix-mermaid-diagrams.js +931 -0
  44. package/scripts/generators/news-indexes/backfill.d.ts +6 -1
  45. package/scripts/generators/news-indexes/backfill.js +71 -4
  46. package/scripts/validate-article-seo.js +534 -0
  47. package/scripts/validate-mermaid-diagrams.js +306 -0
@@ -0,0 +1,306 @@
1
+ // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * @module ValidateMermaidDiagrams
5
+ * @description
6
+ * Systematic validator for every fenced ```mermaid block under one or
7
+ * more roots (default `analysis/`). Extracts each block, runs the real
8
+ * Mermaid v11 parser against it (via happy-dom) and reports broken
9
+ * diagrams with file, line, diagram type and a one-line error message.
10
+ *
11
+ * Designed to be deterministic, side-effect free, and fast enough to
12
+ * run as a Vitest gate and an `npm` script:
13
+ *
14
+ * node scripts/validate-mermaid-diagrams.js # default: analysis/
15
+ * node scripts/validate-mermaid-diagrams.js path/to/dir # custom root
16
+ * node scripts/validate-mermaid-diagrams.js --json # JSON output
17
+ * node scripts/validate-mermaid-diagrams.js --quiet # exit-code only
18
+ * node scripts/validate-mermaid-diagrams.js --limit 50 # cap files
19
+ *
20
+ * Exit code: 0 = all blocks parse, 1 = one or more blocks failed.
21
+ *
22
+ * The extractor mirrors the markdown-it fence rules used by the
23
+ * aggregator pipeline (triple-backtick or triple-tilde fences of any
24
+ * length ≥ 3, with a `mermaid` info string). Init directives (`%%{ … }%%`)
25
+ * are intentionally **not** stripped because the canonical universal init
26
+ * block is part of every valid diagram in this corpus.
27
+ */
28
+
29
+ import { promises as fs } from 'node:fs';
30
+ import path from 'node:path';
31
+ import process from 'node:process';
32
+ import { fileURLToPath } from 'node:url';
33
+
34
+ const FENCE_OPEN = /^(\s*)(`{3,}|~{3,})\s*mermaid\s*$/i;
35
+
36
+ /**
37
+ * Recursively walk a directory and collect every `.md` file (lexically
38
+ * sorted at each level so the output is deterministic).
39
+ *
40
+ * @param {string} dir Absolute or relative directory path
41
+ * @param {string[]} out Accumulator (caller-owned)
42
+ * @returns {Promise<string[]>}
43
+ */
44
+ async function walkMarkdownFiles(dir, out) {
45
+ let entries;
46
+ try {
47
+ entries = await fs.readdir(dir, { withFileTypes: true });
48
+ } catch (e) {
49
+ if (e.code === 'ENOENT') return out;
50
+ throw e;
51
+ }
52
+ entries.sort((a, b) => a.name.localeCompare(b.name));
53
+ for (const e of entries) {
54
+ const p = path.join(dir, e.name);
55
+ if (e.isDirectory()) {
56
+ if (e.name === 'node_modules' || e.name.startsWith('.')) continue;
57
+ await walkMarkdownFiles(p, out);
58
+ } else if (e.isFile() && p.endsWith('.md')) {
59
+ out.push(p);
60
+ }
61
+ }
62
+ return out;
63
+ }
64
+
65
+ /**
66
+ * Detect the diagram type declared by the first non-blank, non-comment,
67
+ * non-init-directive line of a mermaid block. Returns `unknown` when no
68
+ * recognised opener is found.
69
+ *
70
+ * @param {string} body Raw mermaid block body (no fences)
71
+ * @returns {string}
72
+ */
73
+ export function detectDiagramType(body) {
74
+ const lines = body.split('\n');
75
+ let inInitDirective = false;
76
+ for (const raw of lines) {
77
+ const line = raw.trim();
78
+ if (line === '') continue;
79
+ if (inInitDirective) {
80
+ if (line.endsWith('}}%%')) inInitDirective = false;
81
+ continue;
82
+ }
83
+ if (line.startsWith('%%{')) {
84
+ if (!line.endsWith('}}%%')) inInitDirective = true;
85
+ continue;
86
+ }
87
+ if (line.startsWith('%%')) continue;
88
+ // First content line — extract the diagram keyword.
89
+ const m = line.match(/^([A-Za-z][A-Za-z0-9_-]*)\b/);
90
+ return m ? m[1] : 'unknown';
91
+ }
92
+ return 'unknown';
93
+ }
94
+
95
+ /**
96
+ * Extract every ```mermaid fenced block from a Markdown document.
97
+ *
98
+ * Tracks the opening fence's character (` or ~) and length so an
99
+ * inner fence using a different character or shorter length does not
100
+ * prematurely terminate the block (matching CommonMark fence rules).
101
+ *
102
+ * @param {string} text Markdown source
103
+ * @returns {{ startLine: number, body: string }[]}
104
+ * `startLine` is 1-based, pointing at the opening fence line itself.
105
+ */
106
+ export function extractMermaidBlocks(text) {
107
+ const lines = text.split('\n');
108
+ const blocks = [];
109
+ const skipRe = /<!--\s*mermaid:skip\b[^>]*-->/i;
110
+ let i = 0;
111
+ while (i < lines.length) {
112
+ const line = lines[i] ?? '';
113
+ const m = line.match(FENCE_OPEN);
114
+ if (m) {
115
+ // Look back over blank lines to find a skip directive directly
116
+ // preceding the fence (`<!-- mermaid:skip [reason] -->`).
117
+ let skipped = false;
118
+ for (let j = i - 1; j >= 0; j--) {
119
+ const prev = (lines[j] ?? '').trim();
120
+ if (prev === '') continue;
121
+ if (skipRe.test(prev)) skipped = true;
122
+ break;
123
+ }
124
+ const fence = m[2];
125
+ const char = fence[0];
126
+ const minLen = fence.length;
127
+ const closeRe = new RegExp(`^\\s*${char === '`' ? '`' : '~'}{${minLen},}\\s*$`);
128
+ const startLine = i + 1;
129
+ i++;
130
+ const buf = [];
131
+ while (i < lines.length) {
132
+ if (closeRe.test(lines[i] ?? '')) break;
133
+ buf.push(lines[i] ?? '');
134
+ i++;
135
+ }
136
+ if (!skipped) blocks.push({ startLine, body: buf.join('\n') });
137
+ }
138
+ i++;
139
+ }
140
+ return blocks;
141
+ }
142
+
143
+ let mermaidPromise = null;
144
+ /**
145
+ * Lazily install the happy-dom globals required by the Mermaid client
146
+ * library and return a memoised, initialised `mermaid` instance.
147
+ *
148
+ * Mermaid expects browser globals (`document`, `DOMParser`,
149
+ * `XMLSerializer`, `Element.getBoundingClientRect`) even for `parse()`
150
+ * which only validates the source — happy-dom is sufficient and
151
+ * already a project dependency.
152
+ *
153
+ * @returns {Promise<import('mermaid').default>}
154
+ */
155
+ async function loadMermaid() {
156
+ if (mermaidPromise) return mermaidPromise;
157
+ mermaidPromise = (async () => {
158
+ const { Window } = await import('happy-dom');
159
+ const win = new Window();
160
+ // Only install if not already present so this stays idempotent
161
+ // when run from inside Vitest (which provides its own DOM).
162
+ if (typeof globalThis.window === 'undefined') globalThis.window = win;
163
+ if (typeof globalThis.document === 'undefined') globalThis.document = win.document;
164
+ if (typeof globalThis.DOMParser === 'undefined') globalThis.DOMParser = win.DOMParser;
165
+ if (typeof globalThis.XMLSerializer === 'undefined') globalThis.XMLSerializer = win.XMLSerializer;
166
+ if (typeof globalThis.HTMLElement === 'undefined') globalThis.HTMLElement = win.HTMLElement;
167
+ if (typeof globalThis.SVGElement === 'undefined') globalThis.SVGElement = win.SVGElement;
168
+ const mermaid = (await import('mermaid')).default;
169
+ mermaid.initialize({ startOnLoad: false, securityLevel: 'loose', suppressErrorRendering: true });
170
+ return mermaid;
171
+ })();
172
+ return mermaidPromise;
173
+ }
174
+
175
+ /**
176
+ * Parse one mermaid block. Returns `{ ok: true }` on success or
177
+ * `{ ok: false, error }` with a single-line message and the detected
178
+ * diagram type.
179
+ *
180
+ * @param {string} body Mermaid block body (no fences)
181
+ * @param {import('mermaid').default} mermaid Pre-loaded mermaid instance
182
+ * @returns {Promise<{ ok: true, diagramType: string } | { ok: false, diagramType: string, error: string }>}
183
+ */
184
+ export async function validateBlock(body, mermaid) {
185
+ const diagramType = detectDiagramType(body);
186
+ try {
187
+ await mermaid.parse(body);
188
+ return { ok: true, diagramType };
189
+ } catch (e) {
190
+ const msg = (e && e.message) ? String(e.message) : String(e);
191
+ return { ok: false, diagramType, error: msg.split('\n')[0].slice(0, 240) };
192
+ }
193
+ }
194
+
195
+ /**
196
+ * Validate every mermaid block under one or more roots.
197
+ *
198
+ * @param {object} opts
199
+ * @param {string[]} opts.roots Directory roots (default `['analysis']`)
200
+ * @param {number} [opts.limit] Stop after this many files (debugging)
201
+ * @returns {Promise<{
202
+ * filesScanned: number,
203
+ * filesWithBlocks: number,
204
+ * totalBlocks: number,
205
+ * okBlocks: number,
206
+ * failedBlocks: Array<{ file: string, startLine: number, diagramType: string, error: string }>,
207
+ * byDiagramType: Record<string, { ok: number, fail: number }>
208
+ * }>}
209
+ */
210
+ export async function validateRoots({ roots = ['analysis'], limit = Infinity } = {}) {
211
+ const mermaid = await loadMermaid();
212
+ const allFiles = [];
213
+ for (const root of roots) await walkMarkdownFiles(root, allFiles);
214
+ const files = limit < allFiles.length ? allFiles.slice(0, limit) : allFiles;
215
+ const failedBlocks = [];
216
+ const byDiagramType = {};
217
+ let filesWithBlocks = 0;
218
+ let totalBlocks = 0;
219
+ let okBlocks = 0;
220
+ for (const file of files) {
221
+ const text = await fs.readFile(file, 'utf8');
222
+ const blocks = extractMermaidBlocks(text);
223
+ if (blocks.length > 0) filesWithBlocks++;
224
+ for (const b of blocks) {
225
+ totalBlocks++;
226
+ const result = await validateBlock(b.body, mermaid);
227
+ const bucket = byDiagramType[result.diagramType] ??= { ok: 0, fail: 0 };
228
+ if (result.ok) {
229
+ okBlocks++;
230
+ bucket.ok++;
231
+ } else {
232
+ bucket.fail++;
233
+ failedBlocks.push({
234
+ file: path.relative(process.cwd(), file),
235
+ startLine: b.startLine,
236
+ diagramType: result.diagramType,
237
+ error: result.error,
238
+ });
239
+ }
240
+ }
241
+ }
242
+ return {
243
+ filesScanned: files.length,
244
+ filesWithBlocks,
245
+ totalBlocks,
246
+ okBlocks,
247
+ failedBlocks,
248
+ byDiagramType,
249
+ };
250
+ }
251
+
252
+ /**
253
+ * Format a human-readable summary report.
254
+ *
255
+ * @param {Awaited<ReturnType<typeof validateRoots>>} report
256
+ * @returns {string}
257
+ */
258
+ export function formatTextReport(report) {
259
+ const lines = [];
260
+ lines.push(`Mermaid diagram audit`);
261
+ lines.push(` files scanned : ${report.filesScanned}`);
262
+ lines.push(` files with mermaid : ${report.filesWithBlocks}`);
263
+ lines.push(` total mermaid blocks : ${report.totalBlocks}`);
264
+ lines.push(` parsed OK : ${report.okBlocks}`);
265
+ lines.push(` FAILED : ${report.failedBlocks.length}`);
266
+ lines.push('');
267
+ lines.push(' by diagram type:');
268
+ const types = Object.keys(report.byDiagramType).sort();
269
+ for (const t of types) {
270
+ const { ok, fail } = report.byDiagramType[t];
271
+ lines.push(` ${t.padEnd(22)} ok=${String(ok).padStart(5)} fail=${String(fail).padStart(4)}`);
272
+ }
273
+ if (report.failedBlocks.length > 0) {
274
+ lines.push('');
275
+ lines.push(' failing blocks:');
276
+ for (const f of report.failedBlocks) {
277
+ lines.push(` ${f.file}:${f.startLine} [${f.diagramType}] ${f.error}`);
278
+ }
279
+ }
280
+ return lines.join('\n');
281
+ }
282
+
283
+ /* istanbul ignore next */
284
+ async function mainCli() {
285
+ const args = process.argv.slice(2);
286
+ const json = args.includes('--json');
287
+ const quiet = args.includes('--quiet');
288
+ const limitIdx = args.indexOf('--limit');
289
+ const limit = limitIdx >= 0 ? Number(args[limitIdx + 1] ?? Infinity) : Infinity;
290
+ const roots = args.filter((a, i) => !a.startsWith('--') && args[i - 1] !== '--limit');
291
+ const report = await validateRoots({ roots: roots.length ? roots : ['analysis'], limit });
292
+ if (json) {
293
+ process.stdout.write(JSON.stringify(report, null, 2) + '\n');
294
+ } else if (!quiet) {
295
+ process.stdout.write(formatTextReport(report) + '\n');
296
+ }
297
+ process.exit(report.failedBlocks.length === 0 ? 0 : 1);
298
+ }
299
+
300
+ const isMain = (() => {
301
+ try { return import.meta.url === `file://${process.argv[1]}` || import.meta.url === fileURLToPath(process.argv[1]); }
302
+ catch { return false; }
303
+ })();
304
+ if (isMain) {
305
+ mainCli().catch((e) => { console.error(e); process.exit(2); });
306
+ }