euparliamentmonitor 0.9.21 → 0.9.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/package.json +6 -2
  2. package/scripts/aggregator/article-metadata.js +69 -14
  3. package/scripts/aggregator/editorial-brief-resolver.js +23 -0
  4. package/scripts/aggregator/html/headline.d.ts +41 -9
  5. package/scripts/aggregator/html/headline.js +69 -10
  6. package/scripts/aggregator/html/shell.js +73 -17
  7. package/scripts/aggregator/manifest/index.d.ts +1 -1
  8. package/scripts/aggregator/manifest/index.js +1 -1
  9. package/scripts/aggregator/manifest/resolver.d.ts +28 -1
  10. package/scripts/aggregator/manifest/resolver.js +61 -5
  11. package/scripts/aggregator/markdown-renderer.js +11 -0
  12. package/scripts/aggregator/metadata/artifact-category-heading.d.ts +81 -0
  13. package/scripts/aggregator/metadata/artifact-category-heading.js +353 -0
  14. package/scripts/aggregator/metadata/artifact-walker.js +29 -10
  15. package/scripts/aggregator/metadata/brief-body.d.ts +12 -0
  16. package/scripts/aggregator/metadata/brief-body.js +69 -0
  17. package/scripts/aggregator/metadata/briefing-highlight.d.ts +47 -0
  18. package/scripts/aggregator/metadata/briefing-highlight.js +469 -0
  19. package/scripts/aggregator/metadata/editorial-highlight.d.ts +18 -0
  20. package/scripts/aggregator/metadata/editorial-highlight.js +40 -1
  21. package/scripts/aggregator/metadata/heading-rules.d.ts +2 -81
  22. package/scripts/aggregator/metadata/heading-rules.js +78 -269
  23. package/scripts/aggregator/metadata/keyword-filters.d.ts +60 -0
  24. package/scripts/aggregator/metadata/keyword-filters.js +156 -0
  25. package/scripts/aggregator/metadata/lede-extractor.js +11 -2
  26. package/scripts/aggregator/metadata/priority-finding-cleaning.d.ts +22 -0
  27. package/scripts/aggregator/metadata/priority-finding-cleaning.js +181 -0
  28. package/scripts/aggregator/metadata/priority-finding-highlight.js +75 -159
  29. package/scripts/aggregator/metadata/resolve-helpers.d.ts +34 -0
  30. package/scripts/aggregator/metadata/resolve-helpers.js +202 -15
  31. package/scripts/aggregator/metadata/seo-budgets.d.ts +140 -0
  32. package/scripts/aggregator/metadata/seo-budgets.js +202 -0
  33. package/scripts/aggregator/metadata/text-truncate.d.ts +75 -0
  34. package/scripts/aggregator/metadata/text-truncate.js +277 -0
  35. package/scripts/aggregator/metadata/text-utils-constants.d.ts +96 -0
  36. package/scripts/aggregator/metadata/text-utils-constants.js +209 -0
  37. package/scripts/aggregator/metadata/text-utils.d.ts +32 -143
  38. package/scripts/aggregator/metadata/text-utils.js +119 -439
  39. package/scripts/aggregator/metadata/title-rejection.d.ts +37 -0
  40. package/scripts/aggregator/metadata/title-rejection.js +179 -0
  41. package/scripts/copy-vendor.js +84 -112
  42. package/scripts/dump-article-seo.js +640 -0
  43. package/scripts/fix-mermaid-diagrams.js +931 -0
  44. package/scripts/generators/news-indexes/backfill.d.ts +6 -1
  45. package/scripts/generators/news-indexes/backfill.js +71 -4
  46. package/scripts/validate-article-seo.js +534 -0
  47. package/scripts/validate-mermaid-diagrams.js +306 -0
@@ -0,0 +1,156 @@
1
+ // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * @module Aggregator/Metadata/KeywordFilters
5
+ * @description Cross-site keyword catalogue and noise-token filter used
6
+ * by {@link buildSeoKeywords} in `resolve-helpers.ts`.
7
+ *
8
+ * Two responsibilities:
9
+ *
10
+ * 1. **Always-on cross-site keywords** ({@link CROSS_SITE_KEYWORDS})
11
+ * are prepended to every article's `<meta name="keywords">` list
12
+ * regardless of language, so search-engine discovery of the
13
+ * Hack23 civic-tech portfolio (EU Parliament Monitor +
14
+ * Riksdagsmonitor + CIA) is consistent across all 14 localized
15
+ * surfaces. The user explicitly requested
16
+ * `riksdagsmonitor, political intelligence, riksdag, regeringen`
17
+ * (the sister Swedish-Parliament project) plus EP analogues.
18
+ *
19
+ * 2. **Noise-token rejection** ({@link isNoiseKeywordToken}) drops
20
+ * the UUID-fragment tokens (`77fc920c`, `3a76`, `9db5`, …) and
21
+ * synthetic run-id slugs (`propositions-run261-1779431162`) that
22
+ * the previous keyword extractor leaked into `<head>` when a
23
+ * brief mentioned its own run id editorially (e.g.
24
+ * `Analysis run 77fc920c-3a76-4813-9db5-43a7e9acc25e returned
25
+ * 0 classified actors`).
26
+ *
27
+ * Pure leaf module — no imports.
28
+ */
29
+ /**
30
+ * Cross-site SEO keywords prepended to every article in every
31
+ * language. Order is meaningful: stronger civic-tech-portfolio terms
32
+ * first so they appear ahead of the per-article-type keywords when
33
+ * the 16-entry budget is exceeded.
34
+ */
35
+ export const CROSS_SITE_KEYWORDS = [
36
+ 'EU Parliament Monitor',
37
+ 'European Parliament',
38
+ 'European Commission',
39
+ 'political intelligence',
40
+ 'Riksdagsmonitor',
41
+ 'Riksdag',
42
+ 'Regeringen',
43
+ ];
44
+ /**
45
+ * Lower-case allowlist of common English words that the noise filter
46
+ * must always keep, even when their shape would otherwise match the
47
+ * "looks like a hex token" heuristic (e.g. `face`, `dead`, `beef`).
48
+ * Kept intentionally tiny to avoid lexicon drift.
49
+ */
50
+ const HEX_ALPHABETIC_ALLOWLIST = new Set([
51
+ 'face',
52
+ 'fade',
53
+ 'dead',
54
+ 'beef',
55
+ 'cafe',
56
+ 'feed',
57
+ 'deed',
58
+ 'fed',
59
+ 'add',
60
+ 'dad',
61
+ 'bad',
62
+ ]);
63
+ /**
64
+ * Detect run-id slug chains of the form
65
+ * `<letters>(-<letters>)*-run<digits>(-<digits>)*` — e.g.
66
+ * `propositions-run261-1779431162` or
67
+ * `breaking-news-run17-1234567890`. Implemented as a split-and-scan
68
+ * walker (instead of a single backtracking regex) to satisfy the
69
+ * `security/detect-unsafe-regex` lint rule.
70
+ *
71
+ * @param lower - Lower-case candidate token
72
+ * @returns `true` when the token matches the run-id slug shape
73
+ */
74
+ function isRunSlugChain(lower) {
75
+ const parts = lower.split('-');
76
+ if (parts.length < 2)
77
+ return false;
78
+ let runIndex = -1;
79
+ for (let i = 0; i < parts.length; i++) {
80
+ if (/^run\d+$/u.test(parts[i] ?? '')) {
81
+ runIndex = i;
82
+ break;
83
+ }
84
+ }
85
+ if (runIndex <= 0)
86
+ return false;
87
+ // Every segment before `run<digits>` must be all-letters; every
88
+ // segment after must be all-digits.
89
+ for (let i = 0; i < runIndex; i++) {
90
+ if (!/^[a-z]+$/u.test(parts[i] ?? ''))
91
+ return false;
92
+ }
93
+ for (let i = runIndex + 1; i < parts.length; i++) {
94
+ if (!/^\d+$/u.test(parts[i] ?? ''))
95
+ return false;
96
+ }
97
+ return true;
98
+ }
99
+ /**
100
+ * Decide whether a single keyword token should be discarded as noise.
101
+ *
102
+ * The current rules reject tokens that:
103
+ *
104
+ * - Look like a UUID hex chunk: ≥4 chars and consist solely of the
105
+ * `[0-9a-f]` alphabet **and** contain at least one digit (so
106
+ * real English words like `dead` / `face` survive). Tokens of
107
+ * length ≥8 are always rejected (a real English word of that
108
+ * length composed exclusively of hex letters is vanishingly rare;
109
+ * the allowlist guards the short cases).
110
+ * - Are mostly digits (≥80 % digit characters) — runtime epoch
111
+ * suffixes such as `1779431162` and committee-codeoid mashes like
112
+ * `2024k1234`.
113
+ * - Start with `run` and end with all-digits (`run261`, `run17`),
114
+ * the per-run slug suffix the aggregator stamps onto run ids.
115
+ * - Match the full opaque-runId shape `<type>-run<digits>-<digits>`
116
+ * after a strip / normalization round-trip.
117
+ *
118
+ * Returns `false` for normal vocabulary so the keyword list stays
119
+ * useful — every reject path is intentionally narrow.
120
+ *
121
+ * @param token - Single token candidate
122
+ * @returns `true` when the token should be dropped from keywords
123
+ */
124
+ export function isNoiseKeywordToken(token) {
125
+ if (!token)
126
+ return true;
127
+ const trimmed = token.trim();
128
+ if (trimmed.length < 4)
129
+ return true;
130
+ const lower = trimmed.toLowerCase();
131
+ // Reject pure-digit and digit-dominated tokens.
132
+ if (/^\d+$/u.test(lower))
133
+ return true;
134
+ const digitCount = (lower.match(/\d/gu) ?? []).length;
135
+ if (digitCount > 0 && digitCount / lower.length >= 0.8)
136
+ return true;
137
+ // Reject `run<digits>` slugs and `…-run<digits>-<digits>` chains.
138
+ if (/^run\d+$/u.test(lower))
139
+ return true;
140
+ if (isRunSlugChain(lower))
141
+ return true;
142
+ // Reject hex-shaped tokens unless they are common English words.
143
+ const isHex = /^[0-9a-f]+$/u.test(lower);
144
+ if (isHex) {
145
+ if (lower.length >= 8)
146
+ return true;
147
+ if (digitCount > 0)
148
+ return true;
149
+ if (HEX_ALPHABETIC_ALLOWLIST.has(lower))
150
+ return false;
151
+ // Short all-letter hex words: keep (avoids overfitting).
152
+ return false;
153
+ }
154
+ return false;
155
+ }
156
+ //# sourceMappingURL=keyword-filters.js.map
@@ -13,7 +13,7 @@
13
13
  * helpers) and heading-rules (for the editorial-lede whitelist and the
14
14
  * heading-text normaliser).
15
15
  */
16
- import { DESCRIPTION_MAX_LENGTH, EXTENDED_DESCRIPTION_MAX_LENGTH, shouldSkipDescriptionLine, stripInlineMarkdown, stripLeadingProseLabel, truncateDescription, truncateExtendedDescription, } from './text-utils.js';
16
+ import { DESCRIPTION_MAX_LENGTH, EXTENDED_DESCRIPTION_MAX_LENGTH, shouldSkipDescriptionLine, stripInlineMarkdown, stripLeadingBoldLabel, stripLeadingProseLabel, truncateDescription, truncateExtendedDescription, } from './text-utils.js';
17
17
  import { EDITORIAL_LEDE_HEADINGS, isLedeHeadingMatch, normaliseHeadingText, } from './heading-rules.js';
18
18
  /**
19
19
  * Process one Markdown line against the in-progress paragraph buffer.
@@ -35,7 +35,16 @@ function collectProseLine(line, buf) {
35
35
  return 'continue';
36
36
  if (shouldSkipDescriptionLine(line))
37
37
  return hasBuffer ? 'break' : 'continue';
38
- const plain = stripLeadingProseLabel(stripInlineMarkdown(line));
38
+ // Strip the leading `**Label:**` opener (any language) *before*
39
+ // running the inline-markdown stripper, so localized BLUF labels
40
+ // like `**Fråga:**` / `**主題:**` / `**الموضوع:**` are removed
41
+ // structurally rather than leaking into the description as plain
42
+ // text (`"Fråga: …"`). The English `**Issue:**` line is already
43
+ // skipped earlier by METADATA_LINE_PREFIXES; this code path covers
44
+ // the 13 non-English locales for which the label vocabulary is
45
+ // open-ended.
46
+ const stripped = stripLeadingBoldLabel(line);
47
+ const plain = stripLeadingProseLabel(stripInlineMarkdown(stripped));
39
48
  if (!hasBuffer && plain.length < 40)
40
49
  return 'continue';
41
50
  buf.lines.push(plain);
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Strip the trailing parenthesised metadata that briefs append to every
3
+ * priority-finding name — procedure codes, dates, committee tags. The
4
+ * regex is intentionally non-greedy so it removes only the LAST
5
+ * parenthesised group on the line.
6
+ *
7
+ * @param text - Headline or paragraph text
8
+ * @returns Text with the trailing `(…)` stripped
9
+ */
10
+ export declare function stripPriorityTailMetadata(text: string): string;
11
+ /**
12
+ * Normalise a priority-finding headline: drop the
13
+ * `Trigger N:` / `Dossier N:` / leading-numeric prefix, strip trailing
14
+ * parenthesised metadata (`(TA-10-2026-0160, 2026-04-30)`,
15
+ * `(ITRE/ENVI)`), and trim residual punctuation. The result is a
16
+ * headline-shaped string suitable for `<title>` use.
17
+ *
18
+ * @param raw - Raw bold-title or heading text
19
+ * @returns Cleaned headline (may be empty after stripping)
20
+ */
21
+ export declare function cleanPriorityHeadline(raw: string): string;
22
+ //# sourceMappingURL=priority-finding-cleaning.d.ts.map
@@ -0,0 +1,181 @@
1
+ // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * @module Aggregator/Metadata/PriorityFindingCleaning
5
+ * @description Headline-cleaning helpers extracted from
6
+ * `priority-finding-highlight.ts` to keep that module under the
7
+ * 600-line drift-guard budget enforced by
8
+ * `test/unit/source-file-size.test.js`.
9
+ *
10
+ * Public entry point: {@link cleanPriorityHeadline} — normalises a
11
+ * raw bold-title / heading string by stripping priority decorations
12
+ * (`🔴 CRITICAL — `), editorial prefixes (`Trigger 1: `), trailing
13
+ * confidence markers (`🔴 CRITICAL`), and parenthesised tail metadata
14
+ * (`(TA-10-2026-0160, 2026-04-30)`).
15
+ *
16
+ * Bounded-context rules:
17
+ * - **Pure helpers** — no I/O, no globals.
18
+ * - **Deterministic** — same input always produces same output.
19
+ * - **Reusable** — every helper accepts a plain string and returns a
20
+ * plain string; suitable for property-tests.
21
+ */
22
+ import { stripInlineMarkdown } from './text-utils.js';
23
+ /**
24
+ * Leading priority-label tokens stripped by {@link cleanPriorityHeadline}
25
+ * (`🔴 CRITICAL — Title` → `Title`). Kept as a list to bypass the
26
+ * unsafe-regex lint by avoiding deep alternation in a single pattern.
27
+ */
28
+ const PRIORITY_LABEL_TOKENS = [
29
+ 'CRITICAL',
30
+ 'HIGH PRIORITY',
31
+ 'HIGH',
32
+ 'MEDIUM PRIORITY',
33
+ 'MEDIUM',
34
+ 'LOW PRIORITY',
35
+ 'LOW',
36
+ 'URGENT',
37
+ 'ALERT',
38
+ 'PRIORITY',
39
+ ];
40
+ /**
41
+ * Trailing confidence-marker tokens stripped by
42
+ * {@link cleanPriorityHeadline}. Same rationale as
43
+ * {@link PRIORITY_LABEL_TOKENS}.
44
+ */
45
+ const PRIORITY_TRAILING_TOKENS = [
46
+ 'CRITICAL',
47
+ 'HIGH PRIORITY',
48
+ 'HIGH',
49
+ 'MEDIUM PRIORITY',
50
+ 'MEDIUM',
51
+ 'LOW PRIORITY',
52
+ 'LOW',
53
+ ];
54
+ /**
55
+ * Leading editorial-prefix tokens stripped by
56
+ * {@link cleanPriorityHeadline} (`Trigger 1: Title` → `Title`).
57
+ */
58
+ const PRIORITY_LEADING_PREFIX_TOKENS = [
59
+ 'Trigger',
60
+ 'Dossier',
61
+ 'Priority',
62
+ 'Finding',
63
+ 'Item',
64
+ 'Highlight',
65
+ 'Top',
66
+ 'Story',
67
+ 'Alert',
68
+ 'Judgement',
69
+ 'Judgment',
70
+ ];
71
+ /**
72
+ * Strip a leading priority decoration (`🔴 `, `CRITICAL — `) from a
73
+ * candidate headline. Extracted from {@link cleanPriorityHeadline} to
74
+ * keep cognitive complexity within budget.
75
+ *
76
+ * @param text - Candidate headline (already trimmed)
77
+ * @returns Headline with the leading decoration removed
78
+ */
79
+ function stripPriorityLeadingDecoration(text) {
80
+ let out = text;
81
+ for (let pass = 0; pass < 2; pass++) {
82
+ out = out.replace(/^[^\p{L}\p{N}]+/u, '').trim();
83
+ for (const token of PRIORITY_LABEL_TOKENS) {
84
+ if (out.toLowerCase().startsWith(token.toLowerCase())) {
85
+ const rest = out.slice(token.length).trim();
86
+ const sep = rest.match(/^[:—–-]\s*(.+)$/u);
87
+ if (sep?.[1]) {
88
+ out = sep[1].trim();
89
+ break;
90
+ }
91
+ }
92
+ }
93
+ }
94
+ return out;
95
+ }
96
+ /**
97
+ * Strip a leading editorial prefix (`Trigger 1: `, `Dossier 2: `) and a
98
+ * stray leading ordinal (`1. `, `2.1 `) from a candidate headline.
99
+ *
100
+ * @param text - Candidate headline
101
+ * @returns Headline with the leading editorial decoration removed
102
+ */
103
+ function stripPriorityLeadingPrefix(text) {
104
+ let out = text;
105
+ for (const token of PRIORITY_LEADING_PREFIX_TOKENS) {
106
+ if (!out.toLowerCase().startsWith(token.toLowerCase()))
107
+ continue;
108
+ const rest = out.slice(token.length);
109
+ const match = rest.match(/^\s+\d+\s*[:–—-]\s*(.+)$/u);
110
+ if (match?.[1]) {
111
+ out = match[1];
112
+ break;
113
+ }
114
+ }
115
+ // Drop a stray leading "1. " / "2) " ordinal.
116
+ out = out.replace(/^\d+[.):·\s]\s*/u, '');
117
+ return out;
118
+ }
119
+ /**
120
+ * Strip a trailing confidence marker (`🔴 CRITICAL`, `🟡 MEDIUM`) from a
121
+ * candidate headline. Single pass — caller invokes inside a fixed-point
122
+ * loop.
123
+ *
124
+ * @param text - Candidate headline
125
+ * @returns Headline with the trailing confidence marker removed
126
+ */
127
+ function stripPriorityTrailingMarker(text) {
128
+ let out = text;
129
+ for (const token of PRIORITY_TRAILING_TOKENS) {
130
+ const pattern = new RegExp(`\\s+[^\\p{L}\\p{N}\\s]?\\s*${token}\\s*$`, 'iu');
131
+ const next = out.replace(pattern, '');
132
+ if (next !== out) {
133
+ out = next;
134
+ break;
135
+ }
136
+ }
137
+ return out;
138
+ }
139
+ /**
140
+ * Strip the trailing parenthesised metadata that briefs append to every
141
+ * priority-finding name — procedure codes, dates, committee tags. The
142
+ * regex is intentionally non-greedy so it removes only the LAST
143
+ * parenthesised group on the line.
144
+ *
145
+ * @param text - Headline or paragraph text
146
+ * @returns Text with the trailing `(…)` stripped
147
+ */
148
+ export function stripPriorityTailMetadata(text) {
149
+ return text.replace(/\s*\([^()]{3,80}\)\s*$/u, '').trim();
150
+ }
151
+ /**
152
+ * Normalise a priority-finding headline: drop the
153
+ * `Trigger N:` / `Dossier N:` / leading-numeric prefix, strip trailing
154
+ * parenthesised metadata (`(TA-10-2026-0160, 2026-04-30)`,
155
+ * `(ITRE/ENVI)`), and trim residual punctuation. The result is a
156
+ * headline-shaped string suitable for `<title>` use.
157
+ *
158
+ * @param raw - Raw bold-title or heading text
159
+ * @returns Cleaned headline (may be empty after stripping)
160
+ */
161
+ export function cleanPriorityHeadline(raw) {
162
+ let text = stripInlineMarkdown(raw).trim();
163
+ text = stripPriorityLeadingDecoration(text);
164
+ text = stripPriorityLeadingPrefix(text);
165
+ // Trailing cleanup runs in a fixed-point loop so combined patterns
166
+ // like "Title (Confidence, 80%): 🔴" collapse all the way down to
167
+ // "Title".
168
+ let previous = '';
169
+ while (previous !== text) {
170
+ previous = text;
171
+ text = stripPriorityTrailingMarker(text);
172
+ text = stripPriorityTailMetadata(text);
173
+ // Drop a single trailing emoji left after metadata stripping.
174
+ text = text.replace(/\s+[^\p{L}\p{N}\s]+\s*$/u, '');
175
+ // Drop trailing colons / dashes left over.
176
+ text = text.replace(/[\s:—–-]+$/u, '');
177
+ text = text.trim();
178
+ }
179
+ return text;
180
+ }
181
+ //# sourceMappingURL=priority-finding-cleaning.js.map
@@ -9,7 +9,9 @@
9
9
  * artifact-highlight.ts when an artefact has no usable H1.
10
10
  */
11
11
  import { normaliseHeadingText } from './heading-rules.js';
12
+ import { cleanPriorityHeadline, stripPriorityTailMetadata } from './priority-finding-cleaning.js';
12
13
  import { DESCRIPTION_MAX_LENGTH, shouldSkipDescriptionLine, stripInlineMarkdown, stripLeadingProseLabel, truncateDescription, } from './text-utils.js';
14
+ import { findTitleRejectionReason } from './title-rejection.js';
13
15
  /**
14
16
  * Section headings inside the executive brief that introduce the
15
17
  * named-priority-finding block (matched case-insensitively against the
@@ -290,21 +292,69 @@ function extractPriorityFindingItem(lines, i) {
290
292
  */
291
293
  const PRIORITY_METADATA_BOLD_PREFIXES = [
292
294
  'admiralty',
295
+ 'admiralty scale',
296
+ 'admiralty scale used',
297
+ 'analysis owner',
298
+ 'analyst note',
299
+ 'analytical quality',
300
+ 'bluf',
301
+ 'bottom line up front',
302
+ 'caveats and gaps',
293
303
  'classification',
304
+ 'composition layer',
294
305
  'confidence',
295
- 'data sources',
306
+ 'confidence summary',
296
307
  'data quality',
308
+ 'data sources',
297
309
  'date',
310
+ 'emerging patterns',
311
+ 'forward indicators',
312
+ 'gate target',
298
313
  'generated',
314
+ 'headline judgement',
315
+ 'headline judgment',
316
+ 'horizon',
317
+ 'imf status',
318
+ 'issue',
319
+ 'key assumptions',
320
+ 'key assumptions check',
321
+ 'key intelligence',
322
+ 'key risk indicators',
299
323
  'lead author',
324
+ 'master assumptions',
325
+ 'master narrative',
300
326
  'methodology',
327
+ 'parliamentary status',
328
+ 'period',
329
+ 'prepared',
330
+ 'purpose',
331
+ 'quality of information check',
332
+ 'reporting',
301
333
  'reporting window',
302
334
  'run',
335
+ 'sat documentation',
336
+ 'sat documentation below',
337
+ 'scope',
303
338
  'session',
339
+ 'signal assessment',
304
340
  'source',
305
341
  'sources',
342
+ 'threat level',
343
+ 'tier 1 priority issues',
344
+ 'tier 2 priority issues',
345
+ 'tier 3 priority issues',
346
+ 'tier 1 priority',
347
+ 'tier 2 priority',
348
+ 'tier 3 priority',
306
349
  'time horizon',
350
+ 'top line',
351
+ 'top-line judgement',
352
+ 'top-line judgment',
307
353
  'wep',
354
+ 'wep band',
355
+ 'wep bands',
356
+ 'wep bands applied',
357
+ 'window',
308
358
  ];
309
359
  /**
310
360
  * Recognise a metadata-banner bold line (`**Admiralty Grade: B/2**`,
@@ -331,6 +381,22 @@ function isMetadataBoldLine(line) {
331
381
  if (inner.startsWith(`${prefix}—`) || inner.startsWith(`${prefix} —`))
332
382
  return true;
333
383
  }
384
+ // Pipe-banner shape: two or more `|`-separated segments inside the
385
+ // bold body indicate a methodology / SAT-tag banner row, never an
386
+ // editorial headline (e.g.
387
+ // `**WEP Bands Applied | Admiralty Scale Used | SAT Documentation**`).
388
+ // Single `|` is allowed because it occurs in legitimate headlines
389
+ // ("Brexit | A Decade On"). Three or more delimiters is the threshold.
390
+ const pipeSegments = inner
391
+ .split('|')
392
+ .map((s) => s.trim())
393
+ .filter((s) => s.length > 0);
394
+ if (pipeSegments.length >= 3)
395
+ return true;
396
+ // Trailing-ellipsis bold: `**Some long banner line…**` was clipped by
397
+ // the brief author and is not a usable editorial headline.
398
+ if (inner.endsWith('…') || inner.endsWith('...'))
399
+ return true;
334
400
  return false;
335
401
  }
336
402
  /**
@@ -351,6 +417,14 @@ function buildPriorityResult(rawHeadline, tail, lines, i) {
351
417
  const cleaned = cleanPriorityHeadline(rawHeadline);
352
418
  if (cleaned.length < 5)
353
419
  return null;
420
+ // Reject bold-prose section labels (`Strategic significance`,
421
+ // `Threat Level`, `Convergence themes`, …) and other denylisted
422
+ // tokens. Without this, the priority-finding loop would surface a
423
+ // `**Strategic significance:** …` line — which the executive-brief
424
+ // template uses inside every dossier subsection — as the article
425
+ // title. See `title-rejection.ts` for the full denylist.
426
+ if (findTitleRejectionReason(cleaned))
427
+ return null;
354
428
  const summaryLines = collectPrioritySummaryLines(tail, lines, i);
355
429
  const summary = truncateDescription(summaryLines.join(' '));
356
430
  return { headline: cleaned, summary };
@@ -416,162 +490,4 @@ function collectPrioritySummaryLines(tail, lines, i) {
416
490
  }
417
491
  return summaryLines;
418
492
  }
419
- /**
420
- * Leading priority-label tokens stripped by {@link cleanPriorityHeadline}
421
- * (`🔴 CRITICAL — Title` → `Title`). Kept as a list to bypass the
422
- * unsafe-regex lint by avoiding deep alternation in a single pattern.
423
- */
424
- const PRIORITY_LABEL_TOKENS = [
425
- 'CRITICAL',
426
- 'HIGH PRIORITY',
427
- 'HIGH',
428
- 'MEDIUM PRIORITY',
429
- 'MEDIUM',
430
- 'LOW PRIORITY',
431
- 'LOW',
432
- 'URGENT',
433
- 'ALERT',
434
- 'PRIORITY',
435
- ];
436
- /**
437
- * Trailing confidence-marker tokens stripped by
438
- * {@link cleanPriorityHeadline}. Same rationale as
439
- * {@link PRIORITY_LABEL_TOKENS}.
440
- */
441
- const PRIORITY_TRAILING_TOKENS = [
442
- 'CRITICAL',
443
- 'HIGH PRIORITY',
444
- 'HIGH',
445
- 'MEDIUM PRIORITY',
446
- 'MEDIUM',
447
- 'LOW PRIORITY',
448
- 'LOW',
449
- ];
450
- /**
451
- * Leading editorial-prefix tokens stripped by
452
- * {@link cleanPriorityHeadline} (`Trigger 1: Title` → `Title`).
453
- */
454
- const PRIORITY_LEADING_PREFIX_TOKENS = [
455
- 'Trigger',
456
- 'Dossier',
457
- 'Priority',
458
- 'Finding',
459
- 'Item',
460
- 'Highlight',
461
- 'Top',
462
- 'Story',
463
- 'Alert',
464
- 'Judgement',
465
- 'Judgment',
466
- ];
467
- /**
468
- * Strip a leading priority decoration (`🔴 `, `CRITICAL — `) from a
469
- * candidate headline. Extracted from {@link cleanPriorityHeadline} to
470
- * keep cognitive complexity within budget.
471
- *
472
- * @param text - Candidate headline (already trimmed)
473
- * @returns Headline with the leading decoration removed
474
- */
475
- function stripPriorityLeadingDecoration(text) {
476
- let out = text;
477
- for (let pass = 0; pass < 2; pass++) {
478
- out = out.replace(/^[^\p{L}\p{N}]+/u, '').trim();
479
- for (const token of PRIORITY_LABEL_TOKENS) {
480
- if (out.toLowerCase().startsWith(token.toLowerCase())) {
481
- const rest = out.slice(token.length).trim();
482
- const sep = rest.match(/^[:—–-]\s*(.+)$/u);
483
- if (sep?.[1]) {
484
- out = sep[1].trim();
485
- break;
486
- }
487
- }
488
- }
489
- }
490
- return out;
491
- }
492
- /**
493
- * Strip a leading editorial prefix (`Trigger 1: `, `Dossier 2: `) and a
494
- * stray leading ordinal (`1. `, `2.1 `) from a candidate headline.
495
- *
496
- * @param text - Candidate headline
497
- * @returns Headline with the leading editorial decoration removed
498
- */
499
- function stripPriorityLeadingPrefix(text) {
500
- let out = text;
501
- for (const token of PRIORITY_LEADING_PREFIX_TOKENS) {
502
- if (!out.toLowerCase().startsWith(token.toLowerCase()))
503
- continue;
504
- const rest = out.slice(token.length);
505
- const match = rest.match(/^\s+\d+\s*[:–—-]\s*(.+)$/u);
506
- if (match?.[1]) {
507
- out = match[1];
508
- break;
509
- }
510
- }
511
- // Drop a stray leading "1. " / "2) " ordinal.
512
- out = out.replace(/^\d+[.):·\s]\s*/u, '');
513
- return out;
514
- }
515
- /**
516
- * Strip a trailing confidence marker (`🔴 CRITICAL`, `🟡 MEDIUM`) from a
517
- * candidate headline. Single pass — caller invokes inside a fixed-point
518
- * loop.
519
- *
520
- * @param text - Candidate headline
521
- * @returns Headline with the trailing confidence marker removed
522
- */
523
- function stripPriorityTrailingMarker(text) {
524
- let out = text;
525
- for (const token of PRIORITY_TRAILING_TOKENS) {
526
- const pattern = new RegExp(`\\s+[^\\p{L}\\p{N}\\s]?\\s*${token}\\s*$`, 'iu');
527
- const next = out.replace(pattern, '');
528
- if (next !== out) {
529
- out = next;
530
- break;
531
- }
532
- }
533
- return out;
534
- }
535
- /**
536
- * Normalise a priority-finding headline: drop the
537
- * `Trigger N:` / `Dossier N:` / leading-numeric prefix, strip trailing
538
- * parenthesised metadata (`(TA-10-2026-0160, 2026-04-30)`,
539
- * `(ITRE/ENVI)`), and trim residual punctuation. The result is a
540
- * headline-shaped string suitable for `<title>` use.
541
- *
542
- * @param raw - Raw bold-title or heading text
543
- * @returns Cleaned headline (may be empty after stripping)
544
- */
545
- function cleanPriorityHeadline(raw) {
546
- let text = stripInlineMarkdown(raw).trim();
547
- text = stripPriorityLeadingDecoration(text);
548
- text = stripPriorityLeadingPrefix(text);
549
- // Trailing cleanup runs in a fixed-point loop so combined patterns
550
- // like "Title (Confidence, 80%): 🔴" collapse all the way down to
551
- // "Title".
552
- let previous = '';
553
- while (previous !== text) {
554
- previous = text;
555
- text = stripPriorityTrailingMarker(text);
556
- text = stripPriorityTailMetadata(text);
557
- // Drop a single trailing emoji left after metadata stripping.
558
- text = text.replace(/\s+[^\p{L}\p{N}\s]+\s*$/u, '');
559
- // Drop trailing colons / dashes left over.
560
- text = text.replace(/[\s:—–-]+$/u, '');
561
- text = text.trim();
562
- }
563
- return text;
564
- }
565
- /**
566
- * Strip the trailing parenthesised metadata that briefs append to every
567
- * priority-finding name — procedure codes, dates, committee tags. The
568
- * regex is intentionally non-greedy so it removes only the LAST
569
- * parenthesised group on the line.
570
- *
571
- * @param text - Headline or paragraph text
572
- * @returns Text with the trailing `(…)` stripped
573
- */
574
- function stripPriorityTailMetadata(text) {
575
- return text.replace(/\s*\([^()]{3,80}\)\s*$/u, '').trim();
576
- }
577
493
  //# sourceMappingURL=priority-finding-highlight.js.map