euparliamentmonitor 0.9.18 β 0.9.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -5
- package/scripts/aggregator/metadata/artifact-highlight.d.ts +1 -30
- package/scripts/aggregator/metadata/artifact-highlight.js +4 -561
- package/scripts/aggregator/metadata/priority-finding-highlight.d.ts +31 -0
- package/scripts/aggregator/metadata/priority-finding-highlight.js +572 -0
- package/scripts/constants/articles/breaking-strings-eu.d.ts +7 -0
- package/scripts/constants/articles/breaking-strings-eu.js +349 -0
- package/scripts/constants/articles/breaking-strings-global.d.ts +7 -0
- package/scripts/constants/articles/breaking-strings-global.js +349 -0
- package/scripts/constants/articles/breaking.d.ts +0 -1
- package/scripts/constants/articles/breaking.js +4 -687
- package/scripts/constants/articles/committee-reports-content-eu.d.ts +7 -0
- package/scripts/constants/articles/committee-reports-content-eu.js +272 -0
- package/scripts/constants/articles/committee-reports-content-global.d.ts +7 -0
- package/scripts/constants/articles/committee-reports-content-global.js +272 -0
- package/scripts/constants/articles/committee-reports.d.ts +0 -1
- package/scripts/constants/articles/committee-reports.js +4 -533
- package/scripts/constants/articles/dashboard-builder-eu.d.ts +7 -0
- package/scripts/constants/articles/dashboard-builder-eu.js +474 -0
- package/scripts/constants/articles/dashboard-builder-global.d.ts +7 -0
- package/scripts/constants/articles/dashboard-builder-global.js +474 -0
- package/scripts/constants/articles/dashboard.d.ts +0 -4
- package/scripts/constants/articles/dashboard.js +4 -938
- package/scripts/constants/articles/deep-analysis-strings-eu.d.ts +7 -0
- package/scripts/constants/articles/deep-analysis-strings-eu.js +502 -0
- package/scripts/constants/articles/deep-analysis-strings-global.d.ts +7 -0
- package/scripts/constants/articles/deep-analysis-strings-global.js +502 -0
- package/scripts/constants/articles/deep-analysis.js +4 -994
- package/scripts/constants/articles/localized-keywords-eu.d.ts +7 -0
- package/scripts/constants/articles/localized-keywords-eu.js +411 -0
- package/scripts/constants/articles/localized-keywords-global.d.ts +7 -0
- package/scripts/constants/articles/localized-keywords-global.js +236 -0
- package/scripts/constants/articles/localized-keywords.js +4 -637
- package/scripts/constants/articles/swot-builder-eu.d.ts +7 -0
- package/scripts/constants/articles/swot-builder-eu.js +299 -0
- package/scripts/constants/articles/swot-builder-global.d.ts +7 -0
- package/scripts/constants/articles/swot-builder-global.js +299 -0
- package/scripts/constants/articles/swot.d.ts +0 -5
- package/scripts/constants/articles/swot.js +4 -588
- package/scripts/constants/world-bank/committee-map-part1.d.ts +3 -0
- package/scripts/constants/world-bank/committee-map-part1.js +367 -0
- package/scripts/constants/world-bank/committee-map-part2.d.ts +3 -0
- package/scripts/constants/world-bank/committee-map-part2.js +313 -0
- package/scripts/constants/world-bank/committee-map-types.d.ts +38 -0
- package/scripts/constants/world-bank/committee-map-types.js +4 -0
- package/scripts/constants/world-bank/committee-map.d.ts +7 -46
- package/scripts/constants/world-bank/committee-map.js +5 -675
- package/scripts/discover-untranslated-briefs.js +85 -6
- package/scripts/mcp/ep/client.d.ts +33 -694
- package/scripts/mcp/ep/client.js +37 -1057
- package/scripts/mcp/ep/tools-data.d.ts +28 -0
- package/scripts/mcp/ep/tools-data.js +124 -0
- package/scripts/mcp/ep/tools-documents.d.ts +30 -0
- package/scripts/mcp/ep/tools-documents.js +119 -0
- package/scripts/mcp/ep/tools-feeds.d.ts +23 -0
- package/scripts/mcp/ep/tools-feeds.js +153 -0
- package/scripts/mcp/ep/tools-procedures.d.ts +13 -0
- package/scripts/mcp/ep/tools-procedures.js +118 -0
- package/scripts/mcp/ep-mcp-client.d.ts +4 -0
- package/scripts/mcp/ep-mcp-client.js +5 -0
- package/scripts/mcp/imf/client.d.ts +1 -11
- package/scripts/mcp/imf/client.js +4 -31
- package/scripts/mcp/imf/lifecycle.d.ts +23 -0
- package/scripts/mcp/imf/lifecycle.js +35 -0
- package/scripts/mcp/mcp-connection.d.ts +2 -0
- package/scripts/mcp/mcp-connection.js +2 -0
- package/scripts/mcp/transport/connection.d.ts +9 -30
- package/scripts/mcp/transport/connection.js +27 -192
- package/scripts/mcp/transport/gateway.d.ts +68 -0
- package/scripts/mcp/transport/gateway.js +228 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "euparliamentmonitor",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.19",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "European Parliament Intelligence Platform - Monitor political activity with systematic transparency",
|
|
6
6
|
"main": "scripts/index.js",
|
|
@@ -158,15 +158,15 @@
|
|
|
158
158
|
"@types/papaparse": "5.5.2",
|
|
159
159
|
"@typescript-eslint/eslint-plugin": "8.59.4",
|
|
160
160
|
"@typescript-eslint/parser": "8.59.4",
|
|
161
|
-
"@vitest/coverage-v8": "4.1.
|
|
162
|
-
"@vitest/ui": "4.1.
|
|
161
|
+
"@vitest/coverage-v8": "4.1.7",
|
|
162
|
+
"@vitest/ui": "4.1.7",
|
|
163
163
|
"chart.js": "4.5.1",
|
|
164
164
|
"chartjs-plugin-annotation": "3.1.0",
|
|
165
165
|
"clean-css": "^5.3.3",
|
|
166
166
|
"d3": "7.9.0",
|
|
167
167
|
"eslint": "10.4.0",
|
|
168
168
|
"eslint-config-prettier": "10.1.8",
|
|
169
|
-
"eslint-plugin-jsdoc": "
|
|
169
|
+
"eslint-plugin-jsdoc": "63.0.0",
|
|
170
170
|
"eslint-plugin-security": "4.0.0",
|
|
171
171
|
"eslint-plugin-sonarjs": "4.0.3",
|
|
172
172
|
"fast-check": "^4.8.0",
|
|
@@ -187,7 +187,7 @@
|
|
|
187
187
|
"tsx": "4.22.3",
|
|
188
188
|
"typedoc": "0.28.19",
|
|
189
189
|
"typescript": "6.0.3",
|
|
190
|
-
"vitest": "4.1.
|
|
190
|
+
"vitest": "4.1.7"
|
|
191
191
|
},
|
|
192
192
|
"engines": {
|
|
193
193
|
"node": ">=26"
|
|
@@ -24,34 +24,5 @@ export declare function extractArtifactHighlight(runDir: string, articleType: st
|
|
|
24
24
|
* @returns `true` when the file is a translated sibling brief
|
|
25
25
|
*/
|
|
26
26
|
export declare function isTranslatedSiblingBrief(filename: string): boolean;
|
|
27
|
-
|
|
28
|
-
* Mine the FIRST named priority finding from an executive-briefβstyle
|
|
29
|
-
* artefact body. Looks for a section heading from
|
|
30
|
-
* {@link PRIORITY_FINDING_SECTION_HEADINGS} and returns the first dossier
|
|
31
|
-
* name + descriptive paragraph found inside it. Supports the three
|
|
32
|
-
* canonical Stage-B authoring patterns:
|
|
33
|
-
*
|
|
34
|
-
* 1. **Bold-in-numbered-list** (breaking briefs):
|
|
35
|
-
* `1. **Digital Markets Act Enforcement** (TA-10-2026-0160, 2026-04-30)`
|
|
36
|
-
* ` Parliament adopted a resolution β¦`
|
|
37
|
-
* 2. **Numbered subheading** (committee briefs):
|
|
38
|
-
* `### 1. Clean Industrial Deal Implementation (ITRE/ENVI)`
|
|
39
|
-
* `The Clean Industrial Deal framework β¦`
|
|
40
|
-
* 3. **Bold-leading paragraph** (synthesis variants):
|
|
41
|
-
* `**Trigger 1: DMA Enforcement Resolution** (TA-10-2026-0160)`
|
|
42
|
-
* `- Significance: π’ HIGH IMPACT β¦`
|
|
43
|
-
*
|
|
44
|
-
* Trailing parenthesised metadata (`(TA-10-2026-0160, 2026-04-30)`,
|
|
45
|
-
* `(ITRE/ENVI)`) is stripped from the headline so it stays headline-shaped
|
|
46
|
-
* (`Digital Markets Act Enforcement`) rather than boilerplate
|
|
47
|
-
* (`Digital Markets Act Enforcement (TA-10-2026-0160, 2026-04-30)`).
|
|
48
|
-
*
|
|
49
|
-
* @param body - Editorial artefact body
|
|
50
|
-
* @returns `{headline, summary}` when a priority finding was identified;
|
|
51
|
-
* `null` when the body has no priority section or no usable item inside
|
|
52
|
-
*/
|
|
53
|
-
export declare function extractPriorityFindingHighlight(body: string): {
|
|
54
|
-
readonly headline: string;
|
|
55
|
-
readonly summary: string;
|
|
56
|
-
} | null;
|
|
27
|
+
export { extractPriorityFindingHighlight } from './priority-finding-highlight.js';
|
|
57
28
|
//# sourceMappingURL=artifact-highlight.d.ts.map
|
|
@@ -18,8 +18,8 @@ import path from 'path';
|
|
|
18
18
|
import { ALL_LANGUAGES } from '../../constants/language-core.js';
|
|
19
19
|
import { extractFirstH1 } from './h1-extractor.js';
|
|
20
20
|
import { extractLedeAfterHeading, extractStrongProseLine } from './lede-extractor.js';
|
|
21
|
-
import { isGenericHeading,
|
|
22
|
-
import {
|
|
21
|
+
import { isGenericHeading, stripArtifactCategoryAffix } from './heading-rules.js';
|
|
22
|
+
import { truncateTitle } from './text-utils.js';
|
|
23
23
|
/** Ordered list of artefact filenames that typically carry the editorial H1. */
|
|
24
24
|
const EDITORIAL_ARTEFACT_CANDIDATES = [
|
|
25
25
|
// `executive-brief.md` is the canonical Riksdagsmonitor-aligned editorial
|
|
@@ -188,565 +188,8 @@ function probeCandidateForHighlight(runDir, rel, articleType, date) {
|
|
|
188
188
|
}
|
|
189
189
|
return { summary };
|
|
190
190
|
}
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
* named-priority-finding block (matched case-insensitively against the
|
|
194
|
-
* decoration-stripped heading text, see {@link normaliseHeadingText}).
|
|
195
|
-
*/
|
|
196
|
-
const PRIORITY_FINDING_SECTION_HEADINGS = [
|
|
197
|
-
'key developments',
|
|
198
|
-
'key findings',
|
|
199
|
-
'key intelligence summary',
|
|
200
|
-
'key judgements',
|
|
201
|
-
'key judgments',
|
|
202
|
-
'headline intelligence',
|
|
203
|
-
'headline judgements',
|
|
204
|
-
'headline judgments',
|
|
205
|
-
'lead story',
|
|
206
|
-
'policy intelligence alerts',
|
|
207
|
-
'priority dossiers',
|
|
208
|
-
'priority dossiers under committee scrutiny',
|
|
209
|
-
'priority findings',
|
|
210
|
-
'priority intelligence assessment',
|
|
211
|
-
'priority items',
|
|
212
|
-
'top findings',
|
|
213
|
-
'top developments',
|
|
214
|
-
'top dossiers',
|
|
215
|
-
'top trigger events',
|
|
216
|
-
'top triggers',
|
|
217
|
-
'trigger events',
|
|
218
|
-
'top documents',
|
|
219
|
-
'top procedures',
|
|
220
|
-
'top 3 triggers',
|
|
221
|
-
'wep assessment',
|
|
222
|
-
'high priority',
|
|
223
|
-
'highest priority',
|
|
224
|
-
];
|
|
225
|
-
/**
|
|
226
|
-
* Mine the FIRST named priority finding from an executive-briefβstyle
|
|
227
|
-
* artefact body. Looks for a section heading from
|
|
228
|
-
* {@link PRIORITY_FINDING_SECTION_HEADINGS} and returns the first dossier
|
|
229
|
-
* name + descriptive paragraph found inside it. Supports the three
|
|
230
|
-
* canonical Stage-B authoring patterns:
|
|
231
|
-
*
|
|
232
|
-
* 1. **Bold-in-numbered-list** (breaking briefs):
|
|
233
|
-
* `1. **Digital Markets Act Enforcement** (TA-10-2026-0160, 2026-04-30)`
|
|
234
|
-
* ` Parliament adopted a resolution β¦`
|
|
235
|
-
* 2. **Numbered subheading** (committee briefs):
|
|
236
|
-
* `### 1. Clean Industrial Deal Implementation (ITRE/ENVI)`
|
|
237
|
-
* `The Clean Industrial Deal framework β¦`
|
|
238
|
-
* 3. **Bold-leading paragraph** (synthesis variants):
|
|
239
|
-
* `**Trigger 1: DMA Enforcement Resolution** (TA-10-2026-0160)`
|
|
240
|
-
* `- Significance: π’ HIGH IMPACT β¦`
|
|
241
|
-
*
|
|
242
|
-
* Trailing parenthesised metadata (`(TA-10-2026-0160, 2026-04-30)`,
|
|
243
|
-
* `(ITRE/ENVI)`) is stripped from the headline so it stays headline-shaped
|
|
244
|
-
* (`Digital Markets Act Enforcement`) rather than boilerplate
|
|
245
|
-
* (`Digital Markets Act Enforcement (TA-10-2026-0160, 2026-04-30)`).
|
|
246
|
-
*
|
|
247
|
-
* @param body - Editorial artefact body
|
|
248
|
-
* @returns `{headline, summary}` when a priority finding was identified;
|
|
249
|
-
* `null` when the body has no priority section or no usable item inside
|
|
250
|
-
*/
|
|
251
|
-
export function extractPriorityFindingHighlight(body) {
|
|
252
|
-
if (!body)
|
|
253
|
-
return null;
|
|
254
|
-
const lines = body.split('\n');
|
|
255
|
-
return scanPrioritySection(lines) ?? scanH2StoryHeadings(lines);
|
|
256
|
-
}
|
|
257
|
-
/**
|
|
258
|
-
* Strategy 1 β scan inside the first recognised priority-finding
|
|
259
|
-
* section heading for a usable item (Pattern A/B/C/D). Returns `null`
|
|
260
|
-
* when the section is absent or contains no matchable item.
|
|
261
|
-
*
|
|
262
|
-
* @param lines - Body lines (already split on `\n`)
|
|
263
|
-
* @returns `{headline, summary}` when an item was identified
|
|
264
|
-
*/
|
|
265
|
-
function scanPrioritySection(lines) {
|
|
266
|
-
const sectionStart = findPrioritySectionStart(lines);
|
|
267
|
-
if (sectionStart < 0)
|
|
268
|
-
return null;
|
|
269
|
-
for (let i = sectionStart + 1; i < lines.length; i++) {
|
|
270
|
-
const line = (lines[i] ?? '').trim();
|
|
271
|
-
if (!line)
|
|
272
|
-
continue;
|
|
273
|
-
// Stop at the next H2 (sibling section) but allow `### β¦` and
|
|
274
|
-
// `#### β¦` subheadings inside (e.g. `### π΄ HIGH PRIORITY` between
|
|
275
|
-
// the section header and the first list item).
|
|
276
|
-
if (/^##(?!#)/.test(line))
|
|
277
|
-
return null;
|
|
278
|
-
const candidate = extractPriorityFindingItem(lines, i);
|
|
279
|
-
if (candidate)
|
|
280
|
-
return candidate;
|
|
281
|
-
}
|
|
282
|
-
return null;
|
|
283
|
-
}
|
|
284
|
-
/**
|
|
285
|
-
* Story-keyword tokens used by `## Lead Story:` / `## Story N:` /
|
|
286
|
-
* `## Trigger N:` H2 heading detection. Kept as a runtime list so the
|
|
287
|
-
* regex stays bounded and bypasses the unsafe-regex lint by avoiding
|
|
288
|
-
* deep alternation.
|
|
289
|
-
*/
|
|
290
|
-
const H2_STORY_TOKENS = [
|
|
291
|
-
'Lead Story',
|
|
292
|
-
'Story',
|
|
293
|
-
'Trigger',
|
|
294
|
-
'Alert',
|
|
295
|
-
'Judgement',
|
|
296
|
-
'Judgment',
|
|
297
|
-
];
|
|
298
|
-
/**
|
|
299
|
-
* Strategy 2 β walk every `## β¦` H2 heading and try to recognise a
|
|
300
|
-
* story-style heading (`## π Lead Story: Russia Accountability`,
|
|
301
|
-
* `## Story 1 β DMA Enforcement`). Used as a fallback when no priority
|
|
302
|
-
* section was found, because motions briefs publish each lead story as
|
|
303
|
-
* its own H2 without a parent section.
|
|
304
|
-
*
|
|
305
|
-
* @param lines - Body lines (already split on `\n`)
|
|
306
|
-
* @returns `{headline, summary}` when a story heading was identified
|
|
307
|
-
*/
|
|
308
|
-
function scanH2StoryHeadings(lines) {
|
|
309
|
-
for (let i = 0; i < lines.length; i++) {
|
|
310
|
-
const line = (lines[i] ?? '').trim();
|
|
311
|
-
if (!line.startsWith('## '))
|
|
312
|
-
continue;
|
|
313
|
-
const headingText = line.replace(/^##\s+/u, '');
|
|
314
|
-
const storyHeadline = extractH2StoryHeadline(headingText);
|
|
315
|
-
if (!storyHeadline)
|
|
316
|
-
continue;
|
|
317
|
-
const result = buildPriorityResult(storyHeadline, '', lines, i);
|
|
318
|
-
if (result?.headline)
|
|
319
|
-
return result;
|
|
320
|
-
}
|
|
321
|
-
return null;
|
|
322
|
-
}
|
|
323
|
-
/**
|
|
324
|
-
* Recognise the H2-story shape (`π Lead Story: Title`, `Story 1 β
|
|
325
|
-
* Title`, `Trigger 2: Title`) and return the residual headline portion.
|
|
326
|
-
* Returns an empty string when the heading does not match a story
|
|
327
|
-
* keyword. Implemented as discrete string operations (rather than one
|
|
328
|
-
* dense regex) to keep the function under the unsafe-regex linter and
|
|
329
|
-
* cognitive-complexity budgets.
|
|
330
|
-
*
|
|
331
|
-
* @param headingText - Heading text with the leading `## ` already removed
|
|
332
|
-
* @returns Residual headline or empty string
|
|
333
|
-
*/
|
|
334
|
-
function extractH2StoryHeadline(headingText) {
|
|
335
|
-
// Strip a short leading decoration / emoji block (up to 4 non-alphanumerics).
|
|
336
|
-
const stripped = headingText.replace(/^[^A-Za-z0-9]{0,4}\s*/u, '');
|
|
337
|
-
for (const token of H2_STORY_TOKENS) {
|
|
338
|
-
if (!stripped.toLowerCase().startsWith(token.toLowerCase()))
|
|
339
|
-
continue;
|
|
340
|
-
let rest = stripped.slice(token.length).trim();
|
|
341
|
-
// `Story 1` / `Trigger 2` β accept and consume the trailing digit.
|
|
342
|
-
if (token !== 'Lead Story') {
|
|
343
|
-
const digit = rest.match(/^\d+\b/u);
|
|
344
|
-
if (!digit)
|
|
345
|
-
continue;
|
|
346
|
-
rest = rest.slice(digit[0].length).trim();
|
|
347
|
-
}
|
|
348
|
-
// Require an explicit `:` / `β` / `β` / `-` / `.` separator before
|
|
349
|
-
// the residual headline so plain prose H2s never match.
|
|
350
|
-
const sep = rest.match(/^[:ββ\-.]\s+(.+)$/u);
|
|
351
|
-
if (sep?.[1])
|
|
352
|
-
return sep[1].trim();
|
|
353
|
-
}
|
|
354
|
-
return '';
|
|
355
|
-
}
|
|
356
|
-
/**
|
|
357
|
-
* Locate the line index of the first priority-finding section heading
|
|
358
|
-
* inside an artefact body. Returns `-1` when no such heading exists.
|
|
359
|
-
*
|
|
360
|
-
* @param lines - Body lines (already split on `\n`)
|
|
361
|
-
* @returns Line index of the `## β¦` heading, or `-1`
|
|
362
|
-
*/
|
|
363
|
-
function findPrioritySectionStart(lines) {
|
|
364
|
-
for (let i = 0; i < lines.length; i++) {
|
|
365
|
-
const line = (lines[i] ?? '').trim();
|
|
366
|
-
const match = line.match(/^#{2,4}\s+(.+)$/u);
|
|
367
|
-
if (!match)
|
|
368
|
-
continue;
|
|
369
|
-
const text = normaliseHeadingText(match[1] ?? '');
|
|
370
|
-
if (!text)
|
|
371
|
-
continue;
|
|
372
|
-
if (headingMatchesPriorityProbe(text))
|
|
373
|
-
return i;
|
|
374
|
-
}
|
|
375
|
-
return -1;
|
|
376
|
-
}
|
|
377
|
-
/**
|
|
378
|
-
* Word-boundary substring matcher for the priority-finding section
|
|
379
|
-
* detector. Extracted from {@link findPrioritySectionStart} to keep its
|
|
380
|
-
* cognitive complexity within budget.
|
|
381
|
-
*
|
|
382
|
-
* @param text - Heading text already normalised by {@link normaliseHeadingText}
|
|
383
|
-
* @returns `true` when one of {@link PRIORITY_FINDING_SECTION_HEADINGS}
|
|
384
|
-
* appears as a word-bounded substring of {@link text}
|
|
385
|
-
*/
|
|
386
|
-
function headingMatchesPriorityProbe(text) {
|
|
387
|
-
for (const probe of PRIORITY_FINDING_SECTION_HEADINGS) {
|
|
388
|
-
if (text === probe)
|
|
389
|
-
return true;
|
|
390
|
-
const idx = text.indexOf(probe);
|
|
391
|
-
if (idx < 0)
|
|
392
|
-
continue;
|
|
393
|
-
const before = idx === 0 ? ' ' : (text[idx - 1] ?? ' ');
|
|
394
|
-
const after = text[idx + probe.length] ?? ' ';
|
|
395
|
-
if (!/[A-Za-z0-9]/.test(before) && !/[A-Za-z0-9]/.test(after))
|
|
396
|
-
return true;
|
|
397
|
-
}
|
|
398
|
-
return false;
|
|
399
|
-
}
|
|
400
|
-
/**
|
|
401
|
-
* Try to recognise a priority-finding item starting at {@link i}. Returns
|
|
402
|
-
* the resolved `{headline, summary}` pair when the item matches one of the
|
|
403
|
-
* three authoring patterns; returns `null` otherwise so the caller can
|
|
404
|
-
* advance to the next line.
|
|
405
|
-
*
|
|
406
|
-
* @param lines - Body lines (already split on `\n`)
|
|
407
|
-
* @param i - Index of the candidate line
|
|
408
|
-
* @returns Priority-finding pair when matched, `null` otherwise
|
|
409
|
-
*/
|
|
410
|
-
function extractPriorityFindingItem(lines, i) {
|
|
411
|
-
const line = (lines[i] ?? '').trim();
|
|
412
|
-
// Pattern A β numbered list item with bold title:
|
|
413
|
-
// `1. **Digital Markets Act Enforcement** (TA-10-2026-0160, 2026-04-30)`
|
|
414
|
-
const numberedBold = line.match(/^\d+\.\s+\*\*([^*]+?)\*\*\s*(.*)$/u);
|
|
415
|
-
if (numberedBold) {
|
|
416
|
-
return buildPriorityResult(numberedBold[1] ?? '', numberedBold[2] ?? '', lines, i);
|
|
417
|
-
}
|
|
418
|
-
// Pattern B β numbered subheading. Requires an explicit separator
|
|
419
|
-
// (`:` / `.` / `)` / `Β·` / `β` / `β` / `-`) after the number so
|
|
420
|
-
// dotted decimal section labels like `### 2.1 Close to Adoption`
|
|
421
|
-
// do NOT leak into the headline. Examples:
|
|
422
|
-
// `### 1. Clean Industrial Deal Implementation (ITRE/ENVI)`
|
|
423
|
-
// `### 1 Β· Headline Judgements` (middle dot)
|
|
424
|
-
// `### KJ-1: Digital Regulation Enforcement β¦`
|
|
425
|
-
// `### KF-3: Banking Union Completion`
|
|
426
|
-
// `### T-2: DMA Enforcement Resolution`
|
|
427
|
-
// Two narrow patterns instead of one wide alternation to keep the
|
|
428
|
-
// pattern within the unsafe-regex linter's complexity budget.
|
|
429
|
-
const numericHeading = line.match(/^#{3,4}\s+\d+[:.)Β·ββ\s-]\s*(.+)$/u);
|
|
430
|
-
if (numericHeading) {
|
|
431
|
-
return buildPriorityResult(numericHeading[1] ?? '', '', lines, i);
|
|
432
|
-
}
|
|
433
|
-
const tagHeading = line.match(/^#{3,4}\s+[A-Z]{1,3}-?\d+[:.)Β·ββ\s-]\s*(.+)$/u);
|
|
434
|
-
if (tagHeading) {
|
|
435
|
-
return buildPriorityResult(tagHeading[1] ?? '', '', lines, i);
|
|
436
|
-
}
|
|
437
|
-
// Pattern D β word-prefixed subheading (`### Alert 1 β Title π΄`,
|
|
438
|
-
// `### Judgement 1 β Title`, `### Trigger 1: DMA Enforcement`):
|
|
439
|
-
const wordTaggedHeading = line.match(/^#{3,4}\s+(?:Alert|Judgement|Judgment|Finding|Story|Item|Trigger|Highlight|Dossier|Priority|Top)\s+\d+\s*[:.)Β·ββ\s-]+(.+)$/iu);
|
|
440
|
-
if (wordTaggedHeading) {
|
|
441
|
-
return buildPriorityResult(wordTaggedHeading[1] ?? '', '', lines, i);
|
|
442
|
-
}
|
|
443
|
-
// Pattern C β bold-leading paragraph trigger:
|
|
444
|
-
// `**Trigger 1: DMA Enforcement Resolution** (TA-10-2026-0160)`
|
|
445
|
-
// `**Digital Markets Act Enforcement**`
|
|
446
|
-
// Rejected when:
|
|
447
|
-
// - the bold body is longer than a plausible headline (>110 chars) β
|
|
448
|
-
// that's a bold paragraph lede masquerading as a headline (e.g.
|
|
449
|
-
// `**This period captures the April 2026 Strasbourg β¦**`)
|
|
450
|
-
// - the bold body is a metadata key (`**Admiralty Grade: B/2**`,
|
|
451
|
-
// `**Reporting Window:** β¦`, `**Date:** β¦`) β these are banner
|
|
452
|
-
// rows, not editorial headlines
|
|
453
|
-
const boldOnly = line.match(/^\*\*([^*]+?)\*\*\s*(.*)$/u);
|
|
454
|
-
if (boldOnly && !line.startsWith('**Confidence') && !isMetadataBoldLine(line)) {
|
|
455
|
-
const candidate = (boldOnly[1] ?? '').trim();
|
|
456
|
-
if (candidate.length > 0 && candidate.length <= 110) {
|
|
457
|
-
return buildPriorityResult(candidate, boldOnly[2] ?? '', lines, i);
|
|
458
|
-
}
|
|
459
|
-
}
|
|
460
|
-
return null;
|
|
461
|
-
}
|
|
462
|
-
/**
|
|
463
|
-
* Bold prefix tokens that indicate a metadata banner row rather than an
|
|
464
|
-
* editorial headline. The Stage-B brief template uses these consistently
|
|
465
|
-
* as the lede block (`**Reporting Window:** 3 Apr β 1 May 2026`,
|
|
466
|
-
* `**Admiralty Grade:** B/2`, `**Date:** 2026-05-15`); they must never
|
|
467
|
-
* leak into `<title>`.
|
|
468
|
-
*/
|
|
469
|
-
const PRIORITY_METADATA_BOLD_PREFIXES = [
|
|
470
|
-
'admiralty',
|
|
471
|
-
'classification',
|
|
472
|
-
'confidence',
|
|
473
|
-
'data sources',
|
|
474
|
-
'data quality',
|
|
475
|
-
'date',
|
|
476
|
-
'generated',
|
|
477
|
-
'lead author',
|
|
478
|
-
'methodology',
|
|
479
|
-
'reporting window',
|
|
480
|
-
'run',
|
|
481
|
-
'session',
|
|
482
|
-
'source',
|
|
483
|
-
'sources',
|
|
484
|
-
'time horizon',
|
|
485
|
-
'wep',
|
|
486
|
-
];
|
|
487
|
-
/**
|
|
488
|
-
* Recognise a metadata-banner bold line (`**Admiralty Grade: B/2**`,
|
|
489
|
-
* `**Reporting Window:** 3 Apr β 1 May 2026`). The check is
|
|
490
|
-
* deliberately case-insensitive and tolerant of trailing colons inside
|
|
491
|
-
* or outside the bold delimiters.
|
|
492
|
-
*
|
|
493
|
-
* @param line - Trimmed source line (already known to start with `**`)
|
|
494
|
-
* @returns `true` when the line is a metadata banner that must be
|
|
495
|
-
* skipped by Pattern C
|
|
496
|
-
*/
|
|
497
|
-
function isMetadataBoldLine(line) {
|
|
498
|
-
const inner = line
|
|
499
|
-
.replace(/^\*\*([^*]+?)\*\*.*$/u, '$1')
|
|
500
|
-
.trim()
|
|
501
|
-
.toLowerCase();
|
|
502
|
-
for (const prefix of PRIORITY_METADATA_BOLD_PREFIXES) {
|
|
503
|
-
if (inner === prefix)
|
|
504
|
-
return true;
|
|
505
|
-
if (inner.startsWith(`${prefix}:`))
|
|
506
|
-
return true;
|
|
507
|
-
if (inner.startsWith(`${prefix} `) && inner.includes(':'))
|
|
508
|
-
return true;
|
|
509
|
-
if (inner.startsWith(`${prefix}β`) || inner.startsWith(`${prefix} β`))
|
|
510
|
-
return true;
|
|
511
|
-
}
|
|
512
|
-
return false;
|
|
513
|
-
}
|
|
514
|
-
/**
|
|
515
|
-
* Compose the `{headline, summary}` pair for one matched priority-finding
|
|
516
|
-
* item. Cleans `Trigger N:` / `N.` prefixes off the headline, strips the
|
|
517
|
-
* trailing `(TA-10-β¦, β¦)` / `(ITRE/ENVI)` metadata, and gathers the
|
|
518
|
-
* following prose lines as the summary.
|
|
519
|
-
*
|
|
520
|
-
* @param rawHeadline - Raw bold title or numbered-heading text
|
|
521
|
-
* @param tail - Same-line trailing text (after the bold close / heading)
|
|
522
|
-
* @param lines - Body lines (already split on `\n`)
|
|
523
|
-
* @param i - Index of the matched line
|
|
524
|
-
* @returns Cleaned `{headline, summary}` β headline may be empty when
|
|
525
|
-
* cleaning collapses it below a minimum length, in which case the
|
|
526
|
-
* caller falls through
|
|
527
|
-
*/
|
|
528
|
-
function buildPriorityResult(rawHeadline, tail, lines, i) {
|
|
529
|
-
const cleaned = cleanPriorityHeadline(rawHeadline);
|
|
530
|
-
if (cleaned.length < 5)
|
|
531
|
-
return null;
|
|
532
|
-
const summaryLines = collectPrioritySummaryLines(tail, lines, i);
|
|
533
|
-
const summary = truncateDescription(summaryLines.join(' '));
|
|
534
|
-
return { headline: cleaned, summary };
|
|
535
|
-
}
|
|
536
|
-
/**
|
|
537
|
-
* Decide whether a follow-up line is a hard stop for priority-finding
|
|
538
|
-
* summary gathering (next heading / next list item) β collapses three
|
|
539
|
-
* boolean checks out of {@link buildPriorityResult}'s main loop.
|
|
540
|
-
*
|
|
541
|
-
* @param line - Trimmed follow-up line
|
|
542
|
-
* @returns `true` when the gathering loop must break
|
|
543
|
-
*/
|
|
544
|
-
function isPrioritySummaryStopper(line) {
|
|
545
|
-
if (/^#{1,6}\s/.test(line))
|
|
546
|
-
return true;
|
|
547
|
-
if (/^\d+\.\s/.test(line))
|
|
548
|
-
return true;
|
|
549
|
-
if (/^[-*]\s/.test(line))
|
|
550
|
-
return true;
|
|
551
|
-
return false;
|
|
552
|
-
}
|
|
553
|
-
/**
|
|
554
|
-
* Gather the summary prose for a priority-finding item β the same-line
|
|
555
|
-
* tail (with leading procedure-code parens stripped) plus subsequent
|
|
556
|
-
* prose lines until a blank line / new heading / new bullet is hit.
|
|
557
|
-
*
|
|
558
|
-
* @param tail - Same-line text that trails the bold/heading
|
|
559
|
-
* @param lines - Full body lines
|
|
560
|
-
* @param i - Index of the matched headline line
|
|
561
|
-
* @returns Ordered list of summary segments (already clean)
|
|
562
|
-
*/
|
|
563
|
-
function collectPrioritySummaryLines(tail, lines, i) {
|
|
564
|
-
const summaryLines = [];
|
|
565
|
-
// Strip leading parens-metadata (`(TA-10-2026-0160, 2026-04-30)`) and
|
|
566
|
-
// trailing parens-metadata from the tail so the summary starts with
|
|
567
|
-
// editorial prose, not a procedure-code citation.
|
|
568
|
-
let tailText = stripInlineMarkdown(tail).trim();
|
|
569
|
-
tailText = tailText.replace(/^\([^()]{3,80}\)\s*/u, '');
|
|
570
|
-
tailText = stripPriorityTailMetadata(tailText).trim();
|
|
571
|
-
if (tailText)
|
|
572
|
-
summaryLines.push(tailText);
|
|
573
|
-
for (let j = i + 1; j < lines.length; j++) {
|
|
574
|
-
const next = (lines[j] ?? '').trim();
|
|
575
|
-
if (!next) {
|
|
576
|
-
if (summaryLines.length > 0)
|
|
577
|
-
break;
|
|
578
|
-
continue;
|
|
579
|
-
}
|
|
580
|
-
if (isPrioritySummaryStopper(next))
|
|
581
|
-
break;
|
|
582
|
-
if (next.startsWith('**Confidence') || next.startsWith('- **Confidence'))
|
|
583
|
-
continue;
|
|
584
|
-
if (shouldSkipDescriptionLine(next))
|
|
585
|
-
continue;
|
|
586
|
-
summaryLines.push(stripInlineMarkdown(next));
|
|
587
|
-
if (summaryLines.join(' ').length >= DESCRIPTION_MAX_LENGTH)
|
|
588
|
-
break;
|
|
589
|
-
}
|
|
590
|
-
return summaryLines;
|
|
591
|
-
}
|
|
592
|
-
/**
|
|
593
|
-
* Leading priority-label tokens stripped by {@link cleanPriorityHeadline}
|
|
594
|
-
* (`π΄ CRITICAL β Title` β `Title`). Kept as a list to bypass the
|
|
595
|
-
* unsafe-regex lint by avoiding deep alternation in a single pattern.
|
|
596
|
-
*/
|
|
597
|
-
const PRIORITY_LABEL_TOKENS = [
|
|
598
|
-
'CRITICAL',
|
|
599
|
-
'HIGH PRIORITY',
|
|
600
|
-
'HIGH',
|
|
601
|
-
'MEDIUM PRIORITY',
|
|
602
|
-
'MEDIUM',
|
|
603
|
-
'LOW PRIORITY',
|
|
604
|
-
'LOW',
|
|
605
|
-
'URGENT',
|
|
606
|
-
'ALERT',
|
|
607
|
-
'PRIORITY',
|
|
608
|
-
];
|
|
609
|
-
/**
|
|
610
|
-
* Trailing confidence-marker tokens stripped by
|
|
611
|
-
* {@link cleanPriorityHeadline}. Same rationale as
|
|
612
|
-
* {@link PRIORITY_LABEL_TOKENS}.
|
|
613
|
-
*/
|
|
614
|
-
const PRIORITY_TRAILING_TOKENS = [
|
|
615
|
-
'CRITICAL',
|
|
616
|
-
'HIGH PRIORITY',
|
|
617
|
-
'HIGH',
|
|
618
|
-
'MEDIUM PRIORITY',
|
|
619
|
-
'MEDIUM',
|
|
620
|
-
'LOW PRIORITY',
|
|
621
|
-
'LOW',
|
|
622
|
-
];
|
|
623
|
-
/**
|
|
624
|
-
* Leading editorial-prefix tokens stripped by
|
|
625
|
-
* {@link cleanPriorityHeadline} (`Trigger 1: Title` β `Title`).
|
|
626
|
-
*/
|
|
627
|
-
const PRIORITY_LEADING_PREFIX_TOKENS = [
|
|
628
|
-
'Trigger',
|
|
629
|
-
'Dossier',
|
|
630
|
-
'Priority',
|
|
631
|
-
'Finding',
|
|
632
|
-
'Item',
|
|
633
|
-
'Highlight',
|
|
634
|
-
'Top',
|
|
635
|
-
'Story',
|
|
636
|
-
'Alert',
|
|
637
|
-
'Judgement',
|
|
638
|
-
'Judgment',
|
|
639
|
-
];
|
|
640
|
-
/**
|
|
641
|
-
* Strip a leading priority decoration (`π΄ `, `CRITICAL β `) from a
|
|
642
|
-
* candidate headline. Extracted from {@link cleanPriorityHeadline} to
|
|
643
|
-
* keep cognitive complexity within budget.
|
|
644
|
-
*
|
|
645
|
-
* @param text - Candidate headline (already trimmed)
|
|
646
|
-
* @returns Headline with the leading decoration removed
|
|
647
|
-
*/
|
|
648
|
-
function stripPriorityLeadingDecoration(text) {
|
|
649
|
-
let out = text;
|
|
650
|
-
for (let pass = 0; pass < 2; pass++) {
|
|
651
|
-
out = out.replace(/^[^\p{L}\p{N}]+/u, '').trim();
|
|
652
|
-
for (const token of PRIORITY_LABEL_TOKENS) {
|
|
653
|
-
if (out.toLowerCase().startsWith(token.toLowerCase())) {
|
|
654
|
-
const rest = out.slice(token.length).trim();
|
|
655
|
-
const sep = rest.match(/^[:ββ-]\s*(.+)$/u);
|
|
656
|
-
if (sep?.[1]) {
|
|
657
|
-
out = sep[1].trim();
|
|
658
|
-
break;
|
|
659
|
-
}
|
|
660
|
-
}
|
|
661
|
-
}
|
|
662
|
-
}
|
|
663
|
-
return out;
|
|
664
|
-
}
|
|
665
|
-
/**
|
|
666
|
-
* Strip a leading editorial prefix (`Trigger 1: `, `Dossier 2: `) and a
|
|
667
|
-
* stray leading ordinal (`1. `, `2.1 `) from a candidate headline.
|
|
668
|
-
*
|
|
669
|
-
* @param text - Candidate headline
|
|
670
|
-
* @returns Headline with the leading editorial decoration removed
|
|
671
|
-
*/
|
|
672
|
-
function stripPriorityLeadingPrefix(text) {
|
|
673
|
-
let out = text;
|
|
674
|
-
for (const token of PRIORITY_LEADING_PREFIX_TOKENS) {
|
|
675
|
-
if (!out.toLowerCase().startsWith(token.toLowerCase()))
|
|
676
|
-
continue;
|
|
677
|
-
const rest = out.slice(token.length);
|
|
678
|
-
const match = rest.match(/^\s+\d+\s*[:ββ-]\s*(.+)$/u);
|
|
679
|
-
if (match?.[1]) {
|
|
680
|
-
out = match[1];
|
|
681
|
-
break;
|
|
682
|
-
}
|
|
683
|
-
}
|
|
684
|
-
// Drop a stray leading "1. " / "2) " ordinal.
|
|
685
|
-
out = out.replace(/^\d+[.):Β·\s]\s*/u, '');
|
|
686
|
-
return out;
|
|
687
|
-
}
|
|
688
|
-
/**
|
|
689
|
-
* Strip a trailing confidence marker (`π΄ CRITICAL`, `π‘ MEDIUM`) from a
|
|
690
|
-
* candidate headline. Single pass β caller invokes inside a fixed-point
|
|
691
|
-
* loop.
|
|
692
|
-
*
|
|
693
|
-
* @param text - Candidate headline
|
|
694
|
-
* @returns Headline with the trailing confidence marker removed
|
|
695
|
-
*/
|
|
696
|
-
function stripPriorityTrailingMarker(text) {
|
|
697
|
-
let out = text;
|
|
698
|
-
for (const token of PRIORITY_TRAILING_TOKENS) {
|
|
699
|
-
const pattern = new RegExp(`\\s+[^\\p{L}\\p{N}\\s]?\\s*${token}\\s*$`, 'iu');
|
|
700
|
-
const next = out.replace(pattern, '');
|
|
701
|
-
if (next !== out) {
|
|
702
|
-
out = next;
|
|
703
|
-
break;
|
|
704
|
-
}
|
|
705
|
-
}
|
|
706
|
-
return out;
|
|
707
|
-
}
|
|
708
|
-
/**
|
|
709
|
-
* Normalise a priority-finding headline: drop the
|
|
710
|
-
* `Trigger N:` / `Dossier N:` / leading-numeric prefix, strip trailing
|
|
711
|
-
* parenthesised metadata (`(TA-10-2026-0160, 2026-04-30)`,
|
|
712
|
-
* `(ITRE/ENVI)`), and trim residual punctuation. The result is a
|
|
713
|
-
* headline-shaped string suitable for `<title>` use.
|
|
714
|
-
*
|
|
715
|
-
* @param raw - Raw bold-title or heading text
|
|
716
|
-
* @returns Cleaned headline (may be empty after stripping)
|
|
717
|
-
*/
|
|
718
|
-
function cleanPriorityHeadline(raw) {
|
|
719
|
-
let text = stripInlineMarkdown(raw).trim();
|
|
720
|
-
text = stripPriorityLeadingDecoration(text);
|
|
721
|
-
text = stripPriorityLeadingPrefix(text);
|
|
722
|
-
// Trailing cleanup runs in a fixed-point loop so combined patterns
|
|
723
|
-
// like "Title (Confidence, 80%): π΄" collapse all the way down to
|
|
724
|
-
// "Title".
|
|
725
|
-
let previous = '';
|
|
726
|
-
while (previous !== text) {
|
|
727
|
-
previous = text;
|
|
728
|
-
text = stripPriorityTrailingMarker(text);
|
|
729
|
-
text = stripPriorityTailMetadata(text);
|
|
730
|
-
// Drop a single trailing emoji left after metadata stripping.
|
|
731
|
-
text = text.replace(/\s+[^\p{L}\p{N}\s]+\s*$/u, '');
|
|
732
|
-
// Drop trailing colons / dashes left over.
|
|
733
|
-
text = text.replace(/[\s:ββ-]+$/u, '');
|
|
734
|
-
text = text.trim();
|
|
735
|
-
}
|
|
736
|
-
return text;
|
|
737
|
-
}
|
|
738
|
-
/**
|
|
739
|
-
* Strip the trailing parenthesised metadata that briefs append to every
|
|
740
|
-
* priority-finding name β procedure codes, dates, committee tags. The
|
|
741
|
-
* regex is intentionally non-greedy so it removes only the LAST
|
|
742
|
-
* parenthesised group on the line.
|
|
743
|
-
*
|
|
744
|
-
* @param text - Headline or paragraph text
|
|
745
|
-
* @returns Text with the trailing `(β¦)` stripped
|
|
746
|
-
*/
|
|
747
|
-
function stripPriorityTailMetadata(text) {
|
|
748
|
-
return text.replace(/\s*\([^()]{3,80}\)\s*$/u, '').trim();
|
|
749
|
-
}
|
|
191
|
+
export { extractPriorityFindingHighlight } from './priority-finding-highlight.js';
|
|
192
|
+
import { extractPriorityFindingHighlight } from './priority-finding-highlight.js';
|
|
750
193
|
/**
|
|
751
194
|
* Read an artefact file, skipping any SPDX HTML-comment header rows so the
|
|
752
195
|
* first-H1 / first-prose logic is never derailed by the REUSE preamble.
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mine the FIRST named priority finding from an executive-briefβstyle
|
|
3
|
+
* artefact body. Looks for a section heading from
|
|
4
|
+
* {@link PRIORITY_FINDING_SECTION_HEADINGS} and returns the first dossier
|
|
5
|
+
* name + descriptive paragraph found inside it. Supports the three
|
|
6
|
+
* canonical Stage-B authoring patterns:
|
|
7
|
+
*
|
|
8
|
+
* 1. **Bold-in-numbered-list** (breaking briefs):
|
|
9
|
+
* `1. **Digital Markets Act Enforcement** (TA-10-2026-0160, 2026-04-30)`
|
|
10
|
+
* ` Parliament adopted a resolution β¦`
|
|
11
|
+
* 2. **Numbered subheading** (committee briefs):
|
|
12
|
+
* `### 1. Clean Industrial Deal Implementation (ITRE/ENVI)`
|
|
13
|
+
* `The Clean Industrial Deal framework β¦`
|
|
14
|
+
* 3. **Bold-leading paragraph** (synthesis variants):
|
|
15
|
+
* `**Trigger 1: DMA Enforcement Resolution** (TA-10-2026-0160)`
|
|
16
|
+
* `- Significance: π’ HIGH IMPACT β¦`
|
|
17
|
+
*
|
|
18
|
+
* Trailing parenthesised metadata (`(TA-10-2026-0160, 2026-04-30)`,
|
|
19
|
+
* `(ITRE/ENVI)`) is stripped from the headline so it stays headline-shaped
|
|
20
|
+
* (`Digital Markets Act Enforcement`) rather than boilerplate
|
|
21
|
+
* (`Digital Markets Act Enforcement (TA-10-2026-0160, 2026-04-30)`).
|
|
22
|
+
*
|
|
23
|
+
* @param body - Editorial artefact body
|
|
24
|
+
* @returns `{headline, summary}` when a priority finding was identified;
|
|
25
|
+
* `null` when the body has no priority section or no usable item inside
|
|
26
|
+
*/
|
|
27
|
+
export declare function extractPriorityFindingHighlight(body: string): {
|
|
28
|
+
readonly headline: string;
|
|
29
|
+
readonly summary: string;
|
|
30
|
+
} | null;
|
|
31
|
+
//# sourceMappingURL=priority-finding-highlight.d.ts.map
|