euparliamentmonitor 0.9.21 → 0.9.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -2
- package/scripts/aggregator/article-metadata.js +69 -14
- package/scripts/aggregator/editorial-brief-resolver.js +23 -0
- package/scripts/aggregator/html/headline.d.ts +41 -9
- package/scripts/aggregator/html/headline.js +69 -10
- package/scripts/aggregator/html/shell.js +73 -17
- package/scripts/aggregator/manifest/index.d.ts +1 -1
- package/scripts/aggregator/manifest/index.js +1 -1
- package/scripts/aggregator/manifest/resolver.d.ts +28 -1
- package/scripts/aggregator/manifest/resolver.js +61 -5
- package/scripts/aggregator/markdown-renderer.js +11 -0
- package/scripts/aggregator/metadata/artifact-category-heading.d.ts +81 -0
- package/scripts/aggregator/metadata/artifact-category-heading.js +353 -0
- package/scripts/aggregator/metadata/artifact-walker.js +29 -10
- package/scripts/aggregator/metadata/brief-body.d.ts +12 -0
- package/scripts/aggregator/metadata/brief-body.js +69 -0
- package/scripts/aggregator/metadata/briefing-highlight.d.ts +47 -0
- package/scripts/aggregator/metadata/briefing-highlight.js +469 -0
- package/scripts/aggregator/metadata/editorial-highlight.d.ts +18 -0
- package/scripts/aggregator/metadata/editorial-highlight.js +40 -1
- package/scripts/aggregator/metadata/heading-rules.d.ts +2 -81
- package/scripts/aggregator/metadata/heading-rules.js +78 -269
- package/scripts/aggregator/metadata/keyword-filters.d.ts +60 -0
- package/scripts/aggregator/metadata/keyword-filters.js +156 -0
- package/scripts/aggregator/metadata/lede-extractor.js +11 -2
- package/scripts/aggregator/metadata/priority-finding-cleaning.d.ts +22 -0
- package/scripts/aggregator/metadata/priority-finding-cleaning.js +181 -0
- package/scripts/aggregator/metadata/priority-finding-highlight.js +75 -159
- package/scripts/aggregator/metadata/resolve-helpers.d.ts +34 -0
- package/scripts/aggregator/metadata/resolve-helpers.js +202 -15
- package/scripts/aggregator/metadata/seo-budgets.d.ts +140 -0
- package/scripts/aggregator/metadata/seo-budgets.js +202 -0
- package/scripts/aggregator/metadata/text-truncate.d.ts +75 -0
- package/scripts/aggregator/metadata/text-truncate.js +277 -0
- package/scripts/aggregator/metadata/text-utils-constants.d.ts +96 -0
- package/scripts/aggregator/metadata/text-utils-constants.js +209 -0
- package/scripts/aggregator/metadata/text-utils.d.ts +32 -143
- package/scripts/aggregator/metadata/text-utils.js +119 -439
- package/scripts/aggregator/metadata/title-rejection.d.ts +37 -0
- package/scripts/aggregator/metadata/title-rejection.js +179 -0
- package/scripts/copy-vendor.js +84 -112
- package/scripts/dump-article-seo.js +640 -0
- package/scripts/fix-mermaid-diagrams.js +931 -0
- package/scripts/generators/news-indexes/backfill.d.ts +6 -1
- package/scripts/generators/news-indexes/backfill.js +71 -4
- package/scripts/validate-article-seo.js +534 -0
- package/scripts/validate-mermaid-diagrams.js +306 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* @module ValidateMermaidDiagrams
|
|
5
|
+
* @description
|
|
6
|
+
* Systematic validator for every fenced ```mermaid block under one or
|
|
7
|
+
* more roots (default `analysis/`). Extracts each block, runs the real
|
|
8
|
+
* Mermaid v11 parser against it (via happy-dom) and reports broken
|
|
9
|
+
* diagrams with file, line, diagram type and a one-line error message.
|
|
10
|
+
*
|
|
11
|
+
* Designed to be deterministic, side-effect free, and fast enough to
|
|
12
|
+
* run as a Vitest gate and an `npm` script:
|
|
13
|
+
*
|
|
14
|
+
* node scripts/validate-mermaid-diagrams.js # default: analysis/
|
|
15
|
+
* node scripts/validate-mermaid-diagrams.js path/to/dir # custom root
|
|
16
|
+
* node scripts/validate-mermaid-diagrams.js --json # JSON output
|
|
17
|
+
* node scripts/validate-mermaid-diagrams.js --quiet # exit-code only
|
|
18
|
+
* node scripts/validate-mermaid-diagrams.js --limit 50 # cap files
|
|
19
|
+
*
|
|
20
|
+
* Exit code: 0 = all blocks parse, 1 = one or more blocks failed.
|
|
21
|
+
*
|
|
22
|
+
* The extractor mirrors the markdown-it fence rules used by the
|
|
23
|
+
* aggregator pipeline (triple-backtick or triple-tilde fences of any
|
|
24
|
+
* length ≥ 3, with a `mermaid` info string). Init directives (`%%{ … }%%`)
|
|
25
|
+
* are intentionally **not** stripped because the canonical universal init
|
|
26
|
+
* block is part of every valid diagram in this corpus.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import { promises as fs } from 'node:fs';
|
|
30
|
+
import path from 'node:path';
|
|
31
|
+
import process from 'node:process';
|
|
32
|
+
import { fileURLToPath } from 'node:url';
|
|
33
|
+
|
|
34
|
+
const FENCE_OPEN = /^(\s*)(`{3,}|~{3,})\s*mermaid\s*$/i;
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Recursively walk a directory and collect every `.md` file (lexically
|
|
38
|
+
* sorted at each level so the output is deterministic).
|
|
39
|
+
*
|
|
40
|
+
* @param {string} dir Absolute or relative directory path
|
|
41
|
+
* @param {string[]} out Accumulator (caller-owned)
|
|
42
|
+
* @returns {Promise<string[]>}
|
|
43
|
+
*/
|
|
44
|
+
async function walkMarkdownFiles(dir, out) {
|
|
45
|
+
let entries;
|
|
46
|
+
try {
|
|
47
|
+
entries = await fs.readdir(dir, { withFileTypes: true });
|
|
48
|
+
} catch (e) {
|
|
49
|
+
if (e.code === 'ENOENT') return out;
|
|
50
|
+
throw e;
|
|
51
|
+
}
|
|
52
|
+
entries.sort((a, b) => a.name.localeCompare(b.name));
|
|
53
|
+
for (const e of entries) {
|
|
54
|
+
const p = path.join(dir, e.name);
|
|
55
|
+
if (e.isDirectory()) {
|
|
56
|
+
if (e.name === 'node_modules' || e.name.startsWith('.')) continue;
|
|
57
|
+
await walkMarkdownFiles(p, out);
|
|
58
|
+
} else if (e.isFile() && p.endsWith('.md')) {
|
|
59
|
+
out.push(p);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return out;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Detect the diagram type declared by the first non-blank, non-comment,
|
|
67
|
+
* non-init-directive line of a mermaid block. Returns `unknown` when no
|
|
68
|
+
* recognised opener is found.
|
|
69
|
+
*
|
|
70
|
+
* @param {string} body Raw mermaid block body (no fences)
|
|
71
|
+
* @returns {string}
|
|
72
|
+
*/
|
|
73
|
+
export function detectDiagramType(body) {
|
|
74
|
+
const lines = body.split('\n');
|
|
75
|
+
let inInitDirective = false;
|
|
76
|
+
for (const raw of lines) {
|
|
77
|
+
const line = raw.trim();
|
|
78
|
+
if (line === '') continue;
|
|
79
|
+
if (inInitDirective) {
|
|
80
|
+
if (line.endsWith('}}%%')) inInitDirective = false;
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
if (line.startsWith('%%{')) {
|
|
84
|
+
if (!line.endsWith('}}%%')) inInitDirective = true;
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
if (line.startsWith('%%')) continue;
|
|
88
|
+
// First content line — extract the diagram keyword.
|
|
89
|
+
const m = line.match(/^([A-Za-z][A-Za-z0-9_-]*)\b/);
|
|
90
|
+
return m ? m[1] : 'unknown';
|
|
91
|
+
}
|
|
92
|
+
return 'unknown';
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Extract every ```mermaid fenced block from a Markdown document.
|
|
97
|
+
*
|
|
98
|
+
* Tracks the opening fence's character (` or ~) and length so an
|
|
99
|
+
* inner fence using a different character or shorter length does not
|
|
100
|
+
* prematurely terminate the block (matching CommonMark fence rules).
|
|
101
|
+
*
|
|
102
|
+
* @param {string} text Markdown source
|
|
103
|
+
* @returns {{ startLine: number, body: string }[]}
|
|
104
|
+
* `startLine` is 1-based, pointing at the opening fence line itself.
|
|
105
|
+
*/
|
|
106
|
+
export function extractMermaidBlocks(text) {
|
|
107
|
+
const lines = text.split('\n');
|
|
108
|
+
const blocks = [];
|
|
109
|
+
const skipRe = /<!--\s*mermaid:skip\b[^>]*-->/i;
|
|
110
|
+
let i = 0;
|
|
111
|
+
while (i < lines.length) {
|
|
112
|
+
const line = lines[i] ?? '';
|
|
113
|
+
const m = line.match(FENCE_OPEN);
|
|
114
|
+
if (m) {
|
|
115
|
+
// Look back over blank lines to find a skip directive directly
|
|
116
|
+
// preceding the fence (`<!-- mermaid:skip [reason] -->`).
|
|
117
|
+
let skipped = false;
|
|
118
|
+
for (let j = i - 1; j >= 0; j--) {
|
|
119
|
+
const prev = (lines[j] ?? '').trim();
|
|
120
|
+
if (prev === '') continue;
|
|
121
|
+
if (skipRe.test(prev)) skipped = true;
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
const fence = m[2];
|
|
125
|
+
const char = fence[0];
|
|
126
|
+
const minLen = fence.length;
|
|
127
|
+
const closeRe = new RegExp(`^\\s*${char === '`' ? '`' : '~'}{${minLen},}\\s*$`);
|
|
128
|
+
const startLine = i + 1;
|
|
129
|
+
i++;
|
|
130
|
+
const buf = [];
|
|
131
|
+
while (i < lines.length) {
|
|
132
|
+
if (closeRe.test(lines[i] ?? '')) break;
|
|
133
|
+
buf.push(lines[i] ?? '');
|
|
134
|
+
i++;
|
|
135
|
+
}
|
|
136
|
+
if (!skipped) blocks.push({ startLine, body: buf.join('\n') });
|
|
137
|
+
}
|
|
138
|
+
i++;
|
|
139
|
+
}
|
|
140
|
+
return blocks;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
let mermaidPromise = null;
|
|
144
|
+
/**
|
|
145
|
+
* Lazily install the happy-dom globals required by the Mermaid client
|
|
146
|
+
* library and return a memoised, initialised `mermaid` instance.
|
|
147
|
+
*
|
|
148
|
+
* Mermaid expects browser globals (`document`, `DOMParser`,
|
|
149
|
+
* `XMLSerializer`, `Element.getBoundingClientRect`) even for `parse()`
|
|
150
|
+
* which only validates the source — happy-dom is sufficient and
|
|
151
|
+
* already a project dependency.
|
|
152
|
+
*
|
|
153
|
+
* @returns {Promise<import('mermaid').default>}
|
|
154
|
+
*/
|
|
155
|
+
async function loadMermaid() {
|
|
156
|
+
if (mermaidPromise) return mermaidPromise;
|
|
157
|
+
mermaidPromise = (async () => {
|
|
158
|
+
const { Window } = await import('happy-dom');
|
|
159
|
+
const win = new Window();
|
|
160
|
+
// Only install if not already present so this stays idempotent
|
|
161
|
+
// when run from inside Vitest (which provides its own DOM).
|
|
162
|
+
if (typeof globalThis.window === 'undefined') globalThis.window = win;
|
|
163
|
+
if (typeof globalThis.document === 'undefined') globalThis.document = win.document;
|
|
164
|
+
if (typeof globalThis.DOMParser === 'undefined') globalThis.DOMParser = win.DOMParser;
|
|
165
|
+
if (typeof globalThis.XMLSerializer === 'undefined') globalThis.XMLSerializer = win.XMLSerializer;
|
|
166
|
+
if (typeof globalThis.HTMLElement === 'undefined') globalThis.HTMLElement = win.HTMLElement;
|
|
167
|
+
if (typeof globalThis.SVGElement === 'undefined') globalThis.SVGElement = win.SVGElement;
|
|
168
|
+
const mermaid = (await import('mermaid')).default;
|
|
169
|
+
mermaid.initialize({ startOnLoad: false, securityLevel: 'loose', suppressErrorRendering: true });
|
|
170
|
+
return mermaid;
|
|
171
|
+
})();
|
|
172
|
+
return mermaidPromise;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Parse one mermaid block. Returns `{ ok: true }` on success or
|
|
177
|
+
* `{ ok: false, error }` with a single-line message and the detected
|
|
178
|
+
* diagram type.
|
|
179
|
+
*
|
|
180
|
+
* @param {string} body Mermaid block body (no fences)
|
|
181
|
+
* @param {import('mermaid').default} mermaid Pre-loaded mermaid instance
|
|
182
|
+
* @returns {Promise<{ ok: true, diagramType: string } | { ok: false, diagramType: string, error: string }>}
|
|
183
|
+
*/
|
|
184
|
+
export async function validateBlock(body, mermaid) {
|
|
185
|
+
const diagramType = detectDiagramType(body);
|
|
186
|
+
try {
|
|
187
|
+
await mermaid.parse(body);
|
|
188
|
+
return { ok: true, diagramType };
|
|
189
|
+
} catch (e) {
|
|
190
|
+
const msg = (e && e.message) ? String(e.message) : String(e);
|
|
191
|
+
return { ok: false, diagramType, error: msg.split('\n')[0].slice(0, 240) };
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Validate every mermaid block under one or more roots.
|
|
197
|
+
*
|
|
198
|
+
* @param {object} opts
|
|
199
|
+
* @param {string[]} opts.roots Directory roots (default `['analysis']`)
|
|
200
|
+
* @param {number} [opts.limit] Stop after this many files (debugging)
|
|
201
|
+
* @returns {Promise<{
|
|
202
|
+
* filesScanned: number,
|
|
203
|
+
* filesWithBlocks: number,
|
|
204
|
+
* totalBlocks: number,
|
|
205
|
+
* okBlocks: number,
|
|
206
|
+
* failedBlocks: Array<{ file: string, startLine: number, diagramType: string, error: string }>,
|
|
207
|
+
* byDiagramType: Record<string, { ok: number, fail: number }>
|
|
208
|
+
* }>}
|
|
209
|
+
*/
|
|
210
|
+
export async function validateRoots({ roots = ['analysis'], limit = Infinity } = {}) {
|
|
211
|
+
const mermaid = await loadMermaid();
|
|
212
|
+
const allFiles = [];
|
|
213
|
+
for (const root of roots) await walkMarkdownFiles(root, allFiles);
|
|
214
|
+
const files = limit < allFiles.length ? allFiles.slice(0, limit) : allFiles;
|
|
215
|
+
const failedBlocks = [];
|
|
216
|
+
const byDiagramType = {};
|
|
217
|
+
let filesWithBlocks = 0;
|
|
218
|
+
let totalBlocks = 0;
|
|
219
|
+
let okBlocks = 0;
|
|
220
|
+
for (const file of files) {
|
|
221
|
+
const text = await fs.readFile(file, 'utf8');
|
|
222
|
+
const blocks = extractMermaidBlocks(text);
|
|
223
|
+
if (blocks.length > 0) filesWithBlocks++;
|
|
224
|
+
for (const b of blocks) {
|
|
225
|
+
totalBlocks++;
|
|
226
|
+
const result = await validateBlock(b.body, mermaid);
|
|
227
|
+
const bucket = byDiagramType[result.diagramType] ??= { ok: 0, fail: 0 };
|
|
228
|
+
if (result.ok) {
|
|
229
|
+
okBlocks++;
|
|
230
|
+
bucket.ok++;
|
|
231
|
+
} else {
|
|
232
|
+
bucket.fail++;
|
|
233
|
+
failedBlocks.push({
|
|
234
|
+
file: path.relative(process.cwd(), file),
|
|
235
|
+
startLine: b.startLine,
|
|
236
|
+
diagramType: result.diagramType,
|
|
237
|
+
error: result.error,
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
return {
|
|
243
|
+
filesScanned: files.length,
|
|
244
|
+
filesWithBlocks,
|
|
245
|
+
totalBlocks,
|
|
246
|
+
okBlocks,
|
|
247
|
+
failedBlocks,
|
|
248
|
+
byDiagramType,
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Format a human-readable summary report.
|
|
254
|
+
*
|
|
255
|
+
* @param {Awaited<ReturnType<typeof validateRoots>>} report
|
|
256
|
+
* @returns {string}
|
|
257
|
+
*/
|
|
258
|
+
export function formatTextReport(report) {
|
|
259
|
+
const lines = [];
|
|
260
|
+
lines.push(`Mermaid diagram audit`);
|
|
261
|
+
lines.push(` files scanned : ${report.filesScanned}`);
|
|
262
|
+
lines.push(` files with mermaid : ${report.filesWithBlocks}`);
|
|
263
|
+
lines.push(` total mermaid blocks : ${report.totalBlocks}`);
|
|
264
|
+
lines.push(` parsed OK : ${report.okBlocks}`);
|
|
265
|
+
lines.push(` FAILED : ${report.failedBlocks.length}`);
|
|
266
|
+
lines.push('');
|
|
267
|
+
lines.push(' by diagram type:');
|
|
268
|
+
const types = Object.keys(report.byDiagramType).sort();
|
|
269
|
+
for (const t of types) {
|
|
270
|
+
const { ok, fail } = report.byDiagramType[t];
|
|
271
|
+
lines.push(` ${t.padEnd(22)} ok=${String(ok).padStart(5)} fail=${String(fail).padStart(4)}`);
|
|
272
|
+
}
|
|
273
|
+
if (report.failedBlocks.length > 0) {
|
|
274
|
+
lines.push('');
|
|
275
|
+
lines.push(' failing blocks:');
|
|
276
|
+
for (const f of report.failedBlocks) {
|
|
277
|
+
lines.push(` ${f.file}:${f.startLine} [${f.diagramType}] ${f.error}`);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
return lines.join('\n');
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/* istanbul ignore next */
|
|
284
|
+
async function mainCli() {
|
|
285
|
+
const args = process.argv.slice(2);
|
|
286
|
+
const json = args.includes('--json');
|
|
287
|
+
const quiet = args.includes('--quiet');
|
|
288
|
+
const limitIdx = args.indexOf('--limit');
|
|
289
|
+
const limit = limitIdx >= 0 ? Number(args[limitIdx + 1] ?? Infinity) : Infinity;
|
|
290
|
+
const roots = args.filter((a, i) => !a.startsWith('--') && args[i - 1] !== '--limit');
|
|
291
|
+
const report = await validateRoots({ roots: roots.length ? roots : ['analysis'], limit });
|
|
292
|
+
if (json) {
|
|
293
|
+
process.stdout.write(JSON.stringify(report, null, 2) + '\n');
|
|
294
|
+
} else if (!quiet) {
|
|
295
|
+
process.stdout.write(formatTextReport(report) + '\n');
|
|
296
|
+
}
|
|
297
|
+
process.exit(report.failedBlocks.length === 0 ? 0 : 1);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
const isMain = (() => {
|
|
301
|
+
try { return import.meta.url === `file://${process.argv[1]}` || import.meta.url === fileURLToPath(process.argv[1]); }
|
|
302
|
+
catch { return false; }
|
|
303
|
+
})();
|
|
304
|
+
if (isMain) {
|
|
305
|
+
mainCli().catch((e) => { console.error(e); process.exit(2); });
|
|
306
|
+
}
|