euparliamentmonitor 0.9.21 → 0.9.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -2
- package/scripts/aggregator/article-metadata.js +69 -14
- package/scripts/aggregator/editorial-brief-resolver.js +23 -0
- package/scripts/aggregator/html/headline.d.ts +41 -9
- package/scripts/aggregator/html/headline.js +69 -10
- package/scripts/aggregator/html/shell.js +73 -17
- package/scripts/aggregator/manifest/index.d.ts +1 -1
- package/scripts/aggregator/manifest/index.js +1 -1
- package/scripts/aggregator/manifest/resolver.d.ts +28 -1
- package/scripts/aggregator/manifest/resolver.js +61 -5
- package/scripts/aggregator/markdown-renderer.js +11 -0
- package/scripts/aggregator/metadata/artifact-category-heading.d.ts +81 -0
- package/scripts/aggregator/metadata/artifact-category-heading.js +353 -0
- package/scripts/aggregator/metadata/artifact-walker.js +29 -10
- package/scripts/aggregator/metadata/brief-body.d.ts +12 -0
- package/scripts/aggregator/metadata/brief-body.js +69 -0
- package/scripts/aggregator/metadata/briefing-highlight.d.ts +47 -0
- package/scripts/aggregator/metadata/briefing-highlight.js +469 -0
- package/scripts/aggregator/metadata/editorial-highlight.d.ts +18 -0
- package/scripts/aggregator/metadata/editorial-highlight.js +40 -1
- package/scripts/aggregator/metadata/heading-rules.d.ts +2 -81
- package/scripts/aggregator/metadata/heading-rules.js +78 -269
- package/scripts/aggregator/metadata/keyword-filters.d.ts +60 -0
- package/scripts/aggregator/metadata/keyword-filters.js +156 -0
- package/scripts/aggregator/metadata/lede-extractor.js +11 -2
- package/scripts/aggregator/metadata/priority-finding-cleaning.d.ts +22 -0
- package/scripts/aggregator/metadata/priority-finding-cleaning.js +181 -0
- package/scripts/aggregator/metadata/priority-finding-highlight.js +75 -159
- package/scripts/aggregator/metadata/resolve-helpers.d.ts +34 -0
- package/scripts/aggregator/metadata/resolve-helpers.js +202 -15
- package/scripts/aggregator/metadata/seo-budgets.d.ts +140 -0
- package/scripts/aggregator/metadata/seo-budgets.js +202 -0
- package/scripts/aggregator/metadata/text-truncate.d.ts +75 -0
- package/scripts/aggregator/metadata/text-truncate.js +277 -0
- package/scripts/aggregator/metadata/text-utils-constants.d.ts +96 -0
- package/scripts/aggregator/metadata/text-utils-constants.js +209 -0
- package/scripts/aggregator/metadata/text-utils.d.ts +32 -143
- package/scripts/aggregator/metadata/text-utils.js +119 -439
- package/scripts/aggregator/metadata/title-rejection.d.ts +37 -0
- package/scripts/aggregator/metadata/title-rejection.js +179 -0
- package/scripts/copy-vendor.js +84 -112
- package/scripts/dump-article-seo.js +640 -0
- package/scripts/fix-mermaid-diagrams.js +931 -0
- package/scripts/generators/news-indexes/backfill.d.ts +6 -1
- package/scripts/generators/news-indexes/backfill.js +71 -4
- package/scripts/validate-article-seo.js +534 -0
- package/scripts/validate-mermaid-diagrams.js +306 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* @module Aggregator/Metadata/BriefBody
|
|
5
|
+
* @description Reads the English `executive-brief.md` (or compatible
|
|
6
|
+
* fallback artefacts) for a run directory and returns the SPDX-stripped
|
|
7
|
+
* body so language-agnostic structural extractors
|
|
8
|
+
* ({@link briefing-highlight.ts}) can probe it for `## Reader Briefing`
|
|
9
|
+
* / `## Strategic Intelligence Summary` content.
|
|
10
|
+
*
|
|
11
|
+
* Localized brief bodies (`executive-brief_<lang>.md`) are read by the
|
|
12
|
+
* upward-pointing {@link editorial-brief-resolver.ts}; that file
|
|
13
|
+
* cannot import this module without breaking the leaf-module rule, so
|
|
14
|
+
* the localized brief loader stays where it is and re-exports its own
|
|
15
|
+
* body via the dedicated helper.
|
|
16
|
+
*
|
|
17
|
+
* Pure leaf module — depends only on `fs`/`path` and the SPDX-aware
|
|
18
|
+
* reader from {@link artifact-walker}.
|
|
19
|
+
*/
|
|
20
|
+
import fs from 'fs';
|
|
21
|
+
import path from 'path';
|
|
22
|
+
import { readArtefactBody } from './artifact-walker.js';
|
|
23
|
+
/** Ordered list of artefact filenames inspected by {@link readEnglishBriefBody}. */
|
|
24
|
+
const BRIEF_BODY_CANDIDATES = [
|
|
25
|
+
'executive-brief.md',
|
|
26
|
+
'extended/executive-brief.md',
|
|
27
|
+
'intelligence/synthesis-summary.md',
|
|
28
|
+
'intelligence/executive-summary.md',
|
|
29
|
+
'intelligence/intelligence-briefing.md',
|
|
30
|
+
'executive-summary.md',
|
|
31
|
+
'intelligence-briefing.md',
|
|
32
|
+
'synthesis-summary.md',
|
|
33
|
+
];
|
|
34
|
+
/**
|
|
35
|
+
* Read the first existing English brief artefact under `runDir` and
|
|
36
|
+
* return its SPDX-stripped body. Returns the empty string when none of
|
|
37
|
+
* the candidate artefacts exists or the run directory is missing —
|
|
38
|
+
* callers should treat the empty string as "no brief content
|
|
39
|
+
* available" and fall back to their existing extraction ladder.
|
|
40
|
+
*
|
|
41
|
+
* @param runDir - Absolute run directory, or empty string when unavailable
|
|
42
|
+
* @returns Brief body text with SPDX preamble removed
|
|
43
|
+
*/
|
|
44
|
+
export function readEnglishBriefBody(runDir) {
|
|
45
|
+
if (!runDir)
|
|
46
|
+
return '';
|
|
47
|
+
try {
|
|
48
|
+
if (!fs.existsSync(runDir))
|
|
49
|
+
return '';
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
return '';
|
|
53
|
+
}
|
|
54
|
+
for (const candidate of BRIEF_BODY_CANDIDATES) {
|
|
55
|
+
const abs = path.join(runDir, candidate);
|
|
56
|
+
try {
|
|
57
|
+
if (!fs.existsSync(abs))
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
const body = readArtefactBody(abs);
|
|
64
|
+
if (body.trim().length > 0)
|
|
65
|
+
return body;
|
|
66
|
+
}
|
|
67
|
+
return '';
|
|
68
|
+
}
|
|
69
|
+
//# sourceMappingURL=brief-body.js.map
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* One resolved brief highlight. Both `headline` and `summary` may be
|
|
3
|
+
* empty when the underlying section is absent or too short to publish.
|
|
4
|
+
* `extendedSummary` is only emitted when the BLUF/synthesis paragraph
|
|
5
|
+
* exceeds the regular description budget — see
|
|
6
|
+
* {@link truncateExtendedDescription} for the cutoff.
|
|
7
|
+
*/
|
|
8
|
+
export interface BriefingHighlight {
|
|
9
|
+
readonly headline: string;
|
|
10
|
+
readonly summary: string;
|
|
11
|
+
readonly extendedSummary: string;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Extract the {@link BriefingHighlight} for a `## Strategic
|
|
15
|
+
* Intelligence Summary` (or compatible) section. Prefers the first
|
|
16
|
+
* `### Sub-section` heading as headline; falls back to the section's
|
|
17
|
+
* first prose paragraph when no sub-heading exists.
|
|
18
|
+
*
|
|
19
|
+
* @param markdown - Brief body
|
|
20
|
+
* @returns Resolved highlight, or `null` when the section is absent
|
|
21
|
+
*/
|
|
22
|
+
export declare function extractStrategicSynthesisHighlight(markdown: string): BriefingHighlight | null;
|
|
23
|
+
/**
|
|
24
|
+
* Extract the {@link BriefingHighlight} for a `## Reader Briefing` (or
|
|
25
|
+
* compatible) section. Prefers the first numbered-list item as
|
|
26
|
+
* headline when the section is structured as a priority list; falls
|
|
27
|
+
* back to the first prose paragraph when it is written as plain prose
|
|
28
|
+
* (the term-outlook style).
|
|
29
|
+
*
|
|
30
|
+
* @param markdown - Brief body
|
|
31
|
+
* @returns Resolved highlight, or `null` when the section is absent
|
|
32
|
+
*/
|
|
33
|
+
export declare function extractReaderBriefingHighlight(markdown: string): BriefingHighlight | null;
|
|
34
|
+
/**
|
|
35
|
+
* Combined extractor that runs the Strategic Intelligence Summary path
|
|
36
|
+
* first (highest editorial value) and falls back to Reader Briefing
|
|
37
|
+
* when Strategic Intelligence Summary is absent. Merges the two so a
|
|
38
|
+
* brief that contains **both** sections can use the strategic
|
|
39
|
+
* sub-heading as headline and the reader-briefing prose as the
|
|
40
|
+
* extended description.
|
|
41
|
+
*
|
|
42
|
+
* @param markdown - Brief body (SPDX preamble already stripped)
|
|
43
|
+
* @returns Best `{headline, summary, extendedSummary}`, or `null`
|
|
44
|
+
* when neither section exists
|
|
45
|
+
*/
|
|
46
|
+
export declare function extractBriefingHighlight(markdown: string): BriefingHighlight | null;
|
|
47
|
+
//# sourceMappingURL=briefing-highlight.d.ts.map
|
|
@@ -0,0 +1,469 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* @module Aggregator/Metadata/BriefingHighlight
|
|
5
|
+
* @description Highlight extractor specialised on the executive-brief
|
|
6
|
+
* style guide introduced for the May-2026 EP10 briefing series. Where
|
|
7
|
+
* {@link editorial-highlight.ts} treats every Stage-B artefact uniformly
|
|
8
|
+
* (first non-generic H1 → headline; first prose paragraph → summary),
|
|
9
|
+
* this module knows that an `executive-brief.md` carries two strong
|
|
10
|
+
* editorial sections that should outrank a generic `## Key Findings`
|
|
11
|
+
* walk:
|
|
12
|
+
*
|
|
13
|
+
* 1. `## Strategic Intelligence Summary` — the synthesis paragraph.
|
|
14
|
+
* The first `### Sub-section` heading underneath it (e.g.
|
|
15
|
+
* "The Three-Coalition Paradox") makes a clean, journalistic
|
|
16
|
+
* `<title>`. The first prose paragraph that follows that
|
|
17
|
+
* sub-heading makes a clean `<meta description>`.
|
|
18
|
+
*
|
|
19
|
+
* 2. `## Reader Briefing` (a.k.a. `Reader Briefing (Plain Language)`)
|
|
20
|
+
* — the actionable priority list. When the section is structured
|
|
21
|
+
* as a numbered list (`1. **Immediate priority**: …`), the bold
|
|
22
|
+
* label + tail of the first item makes a strong `<title>`. When
|
|
23
|
+
* it is structured as a single prose paragraph (the
|
|
24
|
+
* term-outlook style), the paragraph is used verbatim as
|
|
25
|
+
* description and the first sentence becomes the headline.
|
|
26
|
+
*
|
|
27
|
+
* Both extractors are tolerant of missing sections — they return
|
|
28
|
+
* `null` so the resolver can fall back to the existing
|
|
29
|
+
* `extractArtifactHighlight` ladder for the 200+ historical briefs
|
|
30
|
+
* that pre-date this style guide.
|
|
31
|
+
*
|
|
32
|
+
* Pure leaf module under `metadata/` — depends only on
|
|
33
|
+
* {@link stripInlineMarkdown}, {@link truncateTitle},
|
|
34
|
+
* {@link truncateDescription}, and {@link truncateExtendedDescription}
|
|
35
|
+
* from `text-utils`. No I/O, no upward imports.
|
|
36
|
+
*/
|
|
37
|
+
import { EXTENDED_DESCRIPTION_MAX_LENGTH, shouldSkipDescriptionLine, stripInlineMarkdown, stripLeadingBoldLabel, stripLeadingProseLabel, truncateDescription, truncateExtendedDescription, truncateTitle, } from './text-utils.js';
|
|
38
|
+
/** Heading text that opens the Strategic Intelligence Summary block. */
|
|
39
|
+
const STRATEGIC_SECTION_HEADINGS = [
|
|
40
|
+
'strategic intelligence summary',
|
|
41
|
+
'strategic assessment',
|
|
42
|
+
'intelligence assessment',
|
|
43
|
+
];
|
|
44
|
+
/** Heading text that opens the Reader Briefing block. */
|
|
45
|
+
const READER_BRIEFING_HEADINGS = [
|
|
46
|
+
'reader briefing',
|
|
47
|
+
'reader briefing (plain language)',
|
|
48
|
+
'reader briefing — plain language',
|
|
49
|
+
];
|
|
50
|
+
/**
|
|
51
|
+
* Classify a trimmed Markdown line into one of the structural buckets
|
|
52
|
+
* the section walker cares about. Extracted from the inline walker
|
|
53
|
+
* loops to keep their cognitive complexity below the 15-point limit.
|
|
54
|
+
*
|
|
55
|
+
* @param line - Trimmed Markdown line
|
|
56
|
+
* @returns Line kind sentinel
|
|
57
|
+
*/
|
|
58
|
+
function classifyLine(line) {
|
|
59
|
+
if (line.startsWith('```') || line.startsWith('~~~'))
|
|
60
|
+
return 'fence';
|
|
61
|
+
if (line.startsWith('## '))
|
|
62
|
+
return 'h2';
|
|
63
|
+
if (line.startsWith('### '))
|
|
64
|
+
return 'h3';
|
|
65
|
+
if (line === '')
|
|
66
|
+
return 'blank';
|
|
67
|
+
if (line.startsWith('|') || line.startsWith('>') || line.startsWith('<'))
|
|
68
|
+
return 'structural';
|
|
69
|
+
if (line.startsWith('---') || line.startsWith('==='))
|
|
70
|
+
return 'structural';
|
|
71
|
+
if (/^\d+\.\s+/u.test(line))
|
|
72
|
+
return 'numbered';
|
|
73
|
+
if (line.startsWith('-') || line.startsWith('*'))
|
|
74
|
+
return 'bullet';
|
|
75
|
+
return 'prose';
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Compare a raw `## …` heading line against a whitelist of expected
|
|
79
|
+
* section names. The comparison strips inline Markdown decorations and
|
|
80
|
+
* leading non-alphanumeric characters (emoji, punctuation) so a brief
|
|
81
|
+
* that writes the heading as `## 🧭 Strategic Intelligence Summary`
|
|
82
|
+
* still matches.
|
|
83
|
+
*
|
|
84
|
+
* @param raw - Heading text without the leading `#`s
|
|
85
|
+
* @param needles - Lower-case whitelist entries
|
|
86
|
+
* @returns `true` when the heading text matches any whitelist entry
|
|
87
|
+
*/
|
|
88
|
+
function headingMatches(raw, needles) {
|
|
89
|
+
const normalized = stripInlineMarkdown(raw)
|
|
90
|
+
.replace(/[*_`#]+/g, '')
|
|
91
|
+
.replace(/^[^A-Za-z0-9]+/, '')
|
|
92
|
+
.trim()
|
|
93
|
+
.toLowerCase();
|
|
94
|
+
for (const needle of needles) {
|
|
95
|
+
if (normalized === needle)
|
|
96
|
+
return true;
|
|
97
|
+
if (normalized.startsWith(`${needle} `))
|
|
98
|
+
return true;
|
|
99
|
+
if (normalized.startsWith(`${needle}:`))
|
|
100
|
+
return true;
|
|
101
|
+
if (normalized.startsWith(`${needle}(`))
|
|
102
|
+
return true;
|
|
103
|
+
// Em-dash, en-dash, hyphen separators.
|
|
104
|
+
if (normalized.startsWith(`${needle} —`))
|
|
105
|
+
return true;
|
|
106
|
+
if (normalized.startsWith(`${needle} –`))
|
|
107
|
+
return true;
|
|
108
|
+
if (normalized.startsWith(`${needle} -`))
|
|
109
|
+
return true;
|
|
110
|
+
}
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Build an empty walker state.
|
|
115
|
+
*
|
|
116
|
+
* @returns Fresh, fence-aware {@link WalkerState} with empty buffers.
|
|
117
|
+
*/
|
|
118
|
+
function newState() {
|
|
119
|
+
return { inFence: false, inSection: false, subHeading: '', lines: [], byteCount: 0 };
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Push a prose line into the walker's collected buffer.
|
|
123
|
+
*
|
|
124
|
+
* @param state - Walker state (mutated)
|
|
125
|
+
* @param line - Cleaned line to append
|
|
126
|
+
*/
|
|
127
|
+
function appendLine(state, line) {
|
|
128
|
+
state.lines.push(line);
|
|
129
|
+
state.byteCount += line.length + 1;
|
|
130
|
+
}
|
|
131
|
+
function normalizeBriefingLine(line, preserveLeadingLabel = false) {
|
|
132
|
+
if (shouldSkipDescriptionLine(line))
|
|
133
|
+
return '';
|
|
134
|
+
const withoutMarkdown = stripInlineMarkdown(line);
|
|
135
|
+
const normalized = preserveLeadingLabel
|
|
136
|
+
? withoutMarkdown
|
|
137
|
+
: stripLeadingProseLabel(stripLeadingBoldLabel(withoutMarkdown));
|
|
138
|
+
return normalized.replace(/^[:;—–-]\s+/u, '').trim();
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Decide what to do when the walker sees a `## …` heading.
|
|
142
|
+
*
|
|
143
|
+
* @param state - Walker state
|
|
144
|
+
* @param raw - Raw heading line (already trimmed)
|
|
145
|
+
* @param needles - Lower-case section whitelist
|
|
146
|
+
* @returns `'enter'` when the heading opens the target section,
|
|
147
|
+
* `'leave'` when it closes an already-open target section,
|
|
148
|
+
* `'skip'` otherwise.
|
|
149
|
+
*/
|
|
150
|
+
function transitionForH2(state, raw, needles) {
|
|
151
|
+
const headingText = raw.replace(/^##\s+/, '');
|
|
152
|
+
if (headingMatches(headingText, needles))
|
|
153
|
+
return 'enter';
|
|
154
|
+
if (state.inSection)
|
|
155
|
+
return 'leave';
|
|
156
|
+
return 'skip';
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Handle a `## …` line for the sub-section walker. Returns `true`
|
|
160
|
+
* when the caller should stop walking.
|
|
161
|
+
*
|
|
162
|
+
* @param state - Walker state (mutated)
|
|
163
|
+
* @param line - Trimmed `## …` line
|
|
164
|
+
* @param needles - Section whitelist
|
|
165
|
+
* @returns `true` to stop walking
|
|
166
|
+
*/
|
|
167
|
+
function handleH2ForSubsection(state, line, needles) {
|
|
168
|
+
const t = transitionForH2(state, line, needles);
|
|
169
|
+
if (t === 'enter') {
|
|
170
|
+
state.inSection = true;
|
|
171
|
+
state.subHeading = '';
|
|
172
|
+
state.lines.length = 0;
|
|
173
|
+
state.byteCount = 0;
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
if (t === 'leave') {
|
|
177
|
+
if (state.subHeading && state.lines.length > 0)
|
|
178
|
+
return true;
|
|
179
|
+
state.inSection = false;
|
|
180
|
+
}
|
|
181
|
+
return false;
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Walk the brief body line-by-line and return the first `### …`
|
|
185
|
+
* heading + its first prose paragraph that occur **inside** the
|
|
186
|
+
* matched `## …` block. Returns `null` when the matched block does
|
|
187
|
+
* not contain a `### …` sub-heading.
|
|
188
|
+
*
|
|
189
|
+
* @param markdown - Brief body (SPDX preamble already stripped)
|
|
190
|
+
* @param sectionNeedles - Lower-case `## …` whitelist
|
|
191
|
+
* @returns First `{subHeading, paragraph}` pair under the matched
|
|
192
|
+
* section, or `null` when no sub-heading exists
|
|
193
|
+
*/
|
|
194
|
+
function extractFirstSubsectionUnderSection(markdown, sectionNeedles) {
|
|
195
|
+
const state = newState();
|
|
196
|
+
for (const raw of markdown.split('\n')) {
|
|
197
|
+
const line = raw.trim();
|
|
198
|
+
const kind = classifyLine(line);
|
|
199
|
+
if (kind === 'fence') {
|
|
200
|
+
state.inFence = !state.inFence;
|
|
201
|
+
continue;
|
|
202
|
+
}
|
|
203
|
+
if (state.inFence)
|
|
204
|
+
continue;
|
|
205
|
+
if (kind === 'h2') {
|
|
206
|
+
if (handleH2ForSubsection(state, line, sectionNeedles))
|
|
207
|
+
break;
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
210
|
+
if (!state.inSection)
|
|
211
|
+
continue;
|
|
212
|
+
if (collectSubsectionLine(state, line, kind))
|
|
213
|
+
break;
|
|
214
|
+
}
|
|
215
|
+
if (!state.subHeading || state.lines.length === 0)
|
|
216
|
+
return null;
|
|
217
|
+
return {
|
|
218
|
+
subHeading: state.subHeading.trim(),
|
|
219
|
+
paragraph: state.lines.join(' ').trim(),
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Process one non-heading line inside the matched section for the
|
|
224
|
+
* sub-section extractor. Returns `true` to signal the caller should
|
|
225
|
+
* stop walking (paragraph boundary reached or budget exceeded).
|
|
226
|
+
*
|
|
227
|
+
* @param state - Walker state (mutated)
|
|
228
|
+
* @param line - Trimmed line being processed
|
|
229
|
+
* @param kind - Pre-classified line kind from {@link classifyLine}
|
|
230
|
+
* @returns `true` to stop walking
|
|
231
|
+
*/
|
|
232
|
+
function collectSubsectionLine(state, line, kind) {
|
|
233
|
+
if (kind === 'h3') {
|
|
234
|
+
if (state.subHeading && state.lines.length > 0)
|
|
235
|
+
return true;
|
|
236
|
+
state.subHeading = stripInlineMarkdown(line.replace(/^###\s+/, ''));
|
|
237
|
+
state.lines.length = 0;
|
|
238
|
+
state.byteCount = 0;
|
|
239
|
+
return false;
|
|
240
|
+
}
|
|
241
|
+
if (!state.subHeading)
|
|
242
|
+
return false;
|
|
243
|
+
if (kind === 'blank' || kind === 'structural') {
|
|
244
|
+
return state.lines.length > 0;
|
|
245
|
+
}
|
|
246
|
+
const clean = normalizeBriefingLine(line);
|
|
247
|
+
if (!clean)
|
|
248
|
+
return state.lines.length > 0;
|
|
249
|
+
appendLine(state, clean);
|
|
250
|
+
return state.byteCount >= EXTENDED_DESCRIPTION_MAX_LENGTH;
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Walk the brief body and return the first prose paragraph that occurs
|
|
254
|
+
* **inside** the matched `## …` block (ignoring any `### …`
|
|
255
|
+
* sub-headings). Used as the fallback extractor when the section is a
|
|
256
|
+
* single-paragraph block (the term-outlook Reader Briefing style).
|
|
257
|
+
*
|
|
258
|
+
* @param markdown - Brief body (SPDX preamble already stripped)
|
|
259
|
+
* @param sectionNeedles - Lower-case `## …` whitelist
|
|
260
|
+
* @returns First prose paragraph, or empty string when absent
|
|
261
|
+
*/
|
|
262
|
+
function extractFirstParagraphUnderSection(markdown, sectionNeedles) {
|
|
263
|
+
const state = newState();
|
|
264
|
+
for (const raw of markdown.split('\n')) {
|
|
265
|
+
const line = raw.trim();
|
|
266
|
+
const kind = classifyLine(line);
|
|
267
|
+
if (kind === 'fence') {
|
|
268
|
+
state.inFence = !state.inFence;
|
|
269
|
+
continue;
|
|
270
|
+
}
|
|
271
|
+
if (state.inFence)
|
|
272
|
+
continue;
|
|
273
|
+
if (kind === 'h2') {
|
|
274
|
+
if (handleH2ForParagraph(state, line, sectionNeedles))
|
|
275
|
+
break;
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
if (!state.inSection || kind === 'h3')
|
|
279
|
+
continue;
|
|
280
|
+
if (collectParagraphLine(state, line, kind))
|
|
281
|
+
break;
|
|
282
|
+
}
|
|
283
|
+
return state.lines.length === 0 ? '' : state.lines.join(' ').trim();
|
|
284
|
+
}
|
|
285
|
+
/**
|
|
286
|
+
* Handle a `## …` line for the first-paragraph walker. Returns `true`
|
|
287
|
+
* when the caller should stop walking (a complete paragraph was
|
|
288
|
+
* already captured in a prior matched section).
|
|
289
|
+
*
|
|
290
|
+
* @param state - Walker state (mutated)
|
|
291
|
+
* @param line - Trimmed `## …` line
|
|
292
|
+
* @param needles - Section whitelist
|
|
293
|
+
* @returns `true` to stop walking
|
|
294
|
+
*/
|
|
295
|
+
function handleH2ForParagraph(state, line, needles) {
|
|
296
|
+
if (state.inSection && state.lines.length > 0)
|
|
297
|
+
return true;
|
|
298
|
+
const t = transitionForH2(state, line, needles);
|
|
299
|
+
state.inSection = t === 'enter';
|
|
300
|
+
return false;
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Process one non-heading line inside the matched section for the
|
|
304
|
+
* first-paragraph extractor. Returns `true` when the caller should
|
|
305
|
+
* stop walking.
|
|
306
|
+
*
|
|
307
|
+
* @param state - Walker state (mutated)
|
|
308
|
+
* @param line - Trimmed line being processed
|
|
309
|
+
* @param kind - Pre-classified line kind from {@link classifyLine}
|
|
310
|
+
* @returns `true` to stop walking
|
|
311
|
+
*/
|
|
312
|
+
function collectParagraphLine(state, line, kind) {
|
|
313
|
+
if (kind === 'blank' || kind === 'structural') {
|
|
314
|
+
return state.lines.length > 0;
|
|
315
|
+
}
|
|
316
|
+
const clean = normalizeBriefingLine(line);
|
|
317
|
+
if (!clean)
|
|
318
|
+
return state.lines.length > 0;
|
|
319
|
+
appendLine(state, clean);
|
|
320
|
+
return state.byteCount >= EXTENDED_DESCRIPTION_MAX_LENGTH;
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Walk the brief body and return the first numbered-list item that
|
|
324
|
+
* appears **inside** the matched `## …` block. Recognises the
|
|
325
|
+
* `1. **Immediate priority**: …` shape used by the May-2026
|
|
326
|
+
* Reader Briefing style guide. The bold label and tail are joined into
|
|
327
|
+
* a single headline-shaped string.
|
|
328
|
+
*
|
|
329
|
+
* @param markdown - Brief body
|
|
330
|
+
* @param sectionNeedles - `## …` whitelist
|
|
331
|
+
* @returns Flattened first list item, or empty string when absent
|
|
332
|
+
*/
|
|
333
|
+
function extractFirstNumberedItemUnderSection(markdown, sectionNeedles) {
|
|
334
|
+
const state = { inFence: false, inSection: false, item: [] };
|
|
335
|
+
for (const raw of markdown.split('\n')) {
|
|
336
|
+
const line = raw.trim();
|
|
337
|
+
const kind = classifyLine(line);
|
|
338
|
+
if (kind === 'fence') {
|
|
339
|
+
state.inFence = !state.inFence;
|
|
340
|
+
continue;
|
|
341
|
+
}
|
|
342
|
+
if (state.inFence)
|
|
343
|
+
continue;
|
|
344
|
+
if (kind === 'h2') {
|
|
345
|
+
if (state.inSection && state.item.length > 0)
|
|
346
|
+
break;
|
|
347
|
+
const headingText = line.replace(/^##\s+/, '');
|
|
348
|
+
state.inSection = headingMatches(headingText, sectionNeedles);
|
|
349
|
+
continue;
|
|
350
|
+
}
|
|
351
|
+
if (!state.inSection)
|
|
352
|
+
continue;
|
|
353
|
+
if (handleNumberedLine(state, line, kind))
|
|
354
|
+
break;
|
|
355
|
+
}
|
|
356
|
+
return state.item.join(' ').trim();
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Process one line inside the matched section for the numbered-item
|
|
360
|
+
* extractor. Returns `true` when the caller should stop walking.
|
|
361
|
+
*
|
|
362
|
+
* @param state - Numbered-item walker state (mutated)
|
|
363
|
+
* @param line - Trimmed line being processed
|
|
364
|
+
* @param kind - Pre-classified line kind from {@link classifyLine}
|
|
365
|
+
* @returns `true` to stop walking
|
|
366
|
+
*/
|
|
367
|
+
function handleNumberedLine(state, line, kind) {
|
|
368
|
+
if (state.item.length === 0) {
|
|
369
|
+
if (kind !== 'numbered')
|
|
370
|
+
return false;
|
|
371
|
+
const m = /^1\.\s+(.*)$/u.exec(line);
|
|
372
|
+
const clean = m?.[1] ? normalizeBriefingLine(m[1], true) : '';
|
|
373
|
+
if (clean)
|
|
374
|
+
state.item.push(clean);
|
|
375
|
+
return false;
|
|
376
|
+
}
|
|
377
|
+
if (kind === 'blank' || kind === 'numbered' || kind === 'bullet')
|
|
378
|
+
return true;
|
|
379
|
+
if (kind === 'h2' || kind === 'h3')
|
|
380
|
+
return true;
|
|
381
|
+
const clean = normalizeBriefingLine(line);
|
|
382
|
+
if (!clean)
|
|
383
|
+
return state.item.length > 0;
|
|
384
|
+
state.item.push(clean);
|
|
385
|
+
return false;
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Extract the {@link BriefingHighlight} for a `## Strategic
|
|
389
|
+
* Intelligence Summary` (or compatible) section. Prefers the first
|
|
390
|
+
* `### Sub-section` heading as headline; falls back to the section's
|
|
391
|
+
* first prose paragraph when no sub-heading exists.
|
|
392
|
+
*
|
|
393
|
+
* @param markdown - Brief body
|
|
394
|
+
* @returns Resolved highlight, or `null` when the section is absent
|
|
395
|
+
*/
|
|
396
|
+
export function extractStrategicSynthesisHighlight(markdown) {
|
|
397
|
+
const sub = extractFirstSubsectionUnderSection(markdown, STRATEGIC_SECTION_HEADINGS);
|
|
398
|
+
if (sub) {
|
|
399
|
+
return {
|
|
400
|
+
headline: truncateTitle(sub.subHeading),
|
|
401
|
+
summary: truncateDescription(sub.paragraph),
|
|
402
|
+
extendedSummary: truncateExtendedDescription(sub.paragraph),
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
const paragraph = extractFirstParagraphUnderSection(markdown, STRATEGIC_SECTION_HEADINGS);
|
|
406
|
+
if (!paragraph)
|
|
407
|
+
return null;
|
|
408
|
+
return {
|
|
409
|
+
headline: '',
|
|
410
|
+
summary: truncateDescription(paragraph),
|
|
411
|
+
extendedSummary: truncateExtendedDescription(paragraph),
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
/**
|
|
415
|
+
* Extract the {@link BriefingHighlight} for a `## Reader Briefing` (or
|
|
416
|
+
* compatible) section. Prefers the first numbered-list item as
|
|
417
|
+
* headline when the section is structured as a priority list; falls
|
|
418
|
+
* back to the first prose paragraph when it is written as plain prose
|
|
419
|
+
* (the term-outlook style).
|
|
420
|
+
*
|
|
421
|
+
* @param markdown - Brief body
|
|
422
|
+
* @returns Resolved highlight, or `null` when the section is absent
|
|
423
|
+
*/
|
|
424
|
+
export function extractReaderBriefingHighlight(markdown) {
|
|
425
|
+
const firstItem = extractFirstNumberedItemUnderSection(markdown, READER_BRIEFING_HEADINGS);
|
|
426
|
+
const paragraph = extractFirstParagraphUnderSection(markdown, READER_BRIEFING_HEADINGS);
|
|
427
|
+
if (!firstItem && !paragraph)
|
|
428
|
+
return null;
|
|
429
|
+
const headlineSource = firstItem || paragraph;
|
|
430
|
+
const headline = headlineSource ? truncateTitle(headlineSource) : '';
|
|
431
|
+
const summary = paragraph
|
|
432
|
+
? truncateDescription(paragraph)
|
|
433
|
+
: firstItem
|
|
434
|
+
? truncateDescription(firstItem)
|
|
435
|
+
: '';
|
|
436
|
+
const extendedSummary = paragraph
|
|
437
|
+
? truncateExtendedDescription(paragraph)
|
|
438
|
+
: truncateExtendedDescription(firstItem);
|
|
439
|
+
if (!headline && !summary)
|
|
440
|
+
return null;
|
|
441
|
+
return { headline, summary, extendedSummary };
|
|
442
|
+
}
|
|
443
|
+
/**
|
|
444
|
+
* Combined extractor that runs the Strategic Intelligence Summary path
|
|
445
|
+
* first (highest editorial value) and falls back to Reader Briefing
|
|
446
|
+
* when Strategic Intelligence Summary is absent. Merges the two so a
|
|
447
|
+
* brief that contains **both** sections can use the strategic
|
|
448
|
+
* sub-heading as headline and the reader-briefing prose as the
|
|
449
|
+
* extended description.
|
|
450
|
+
*
|
|
451
|
+
* @param markdown - Brief body (SPDX preamble already stripped)
|
|
452
|
+
* @returns Best `{headline, summary, extendedSummary}`, or `null`
|
|
453
|
+
* when neither section exists
|
|
454
|
+
*/
|
|
455
|
+
export function extractBriefingHighlight(markdown) {
|
|
456
|
+
const strategic = extractStrategicSynthesisHighlight(markdown);
|
|
457
|
+
const reader = extractReaderBriefingHighlight(markdown);
|
|
458
|
+
if (!strategic && !reader)
|
|
459
|
+
return null;
|
|
460
|
+
if (strategic && reader) {
|
|
461
|
+
return {
|
|
462
|
+
headline: strategic.headline || reader.headline,
|
|
463
|
+
summary: strategic.summary || reader.summary,
|
|
464
|
+
extendedSummary: strategic.extendedSummary || reader.extendedSummary,
|
|
465
|
+
};
|
|
466
|
+
}
|
|
467
|
+
return strategic ?? reader;
|
|
468
|
+
}
|
|
469
|
+
//# sourceMappingURL=briefing-highlight.js.map
|
|
@@ -1,3 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolver output filenames that must NEVER be walked as a source by the
|
|
3
|
+
* top-level fallback scan in {@link extractArtifactHighlight}. These are
|
|
4
|
+
* either the resolver's own output (`article.md`, `article-meta.json`)
|
|
5
|
+
* or per-language renderings that contain transcluded metadata-banner
|
|
6
|
+
* lines (`**Threat Level:** …`, `**Key Assumptions Check**: …`) that
|
|
7
|
+
* `priority-finding-highlight.ts` Pattern C would falsely accept as
|
|
8
|
+
* editorial headlines. See the regression catalogue documented in
|
|
9
|
+
* `scripts/validate-article-seo.js` for the smoking-gun live-site
|
|
10
|
+
* defects (2026-05-22 week-ahead `<title>Threat Level</title>`,
|
|
11
|
+
* 2026-05-22 committee-reports `<title>Key Assumptions Check</title>`).
|
|
12
|
+
*
|
|
13
|
+
* Returns `true` for resolver-output filenames.
|
|
14
|
+
*
|
|
15
|
+
* @param filename - Bare filename (no path), e.g. `article.md`
|
|
16
|
+
* @returns `true` when the file is a resolver output and must be skipped
|
|
17
|
+
*/
|
|
18
|
+
export declare function isResolverOutputArtefact(filename: string): boolean;
|
|
1
19
|
/**
|
|
2
20
|
* Attempt to read the first H1 and first prose paragraph from the first
|
|
3
21
|
* existing artefact under {@link EDITORIAL_ARTEFACT_CANDIDATES}. Returns
|
|
@@ -14,6 +14,42 @@
|
|
|
14
14
|
import fs from 'fs';
|
|
15
15
|
import { EDITORIAL_ARTEFACT_CANDIDATES, safeReaddir, scanCandidatesForHighlight, } from './artifact-walker.js';
|
|
16
16
|
import { isTranslatedSiblingBrief } from './translated-sibling.js';
|
|
17
|
+
/**
|
|
18
|
+
* Resolver output filenames that must NEVER be walked as a source by the
|
|
19
|
+
* top-level fallback scan in {@link extractArtifactHighlight}. These are
|
|
20
|
+
* either the resolver's own output (`article.md`, `article-meta.json`)
|
|
21
|
+
* or per-language renderings that contain transcluded metadata-banner
|
|
22
|
+
* lines (`**Threat Level:** …`, `**Key Assumptions Check**: …`) that
|
|
23
|
+
* `priority-finding-highlight.ts` Pattern C would falsely accept as
|
|
24
|
+
* editorial headlines. See the regression catalogue documented in
|
|
25
|
+
* `scripts/validate-article-seo.js` for the smoking-gun live-site
|
|
26
|
+
* defects (2026-05-22 week-ahead `<title>Threat Level</title>`,
|
|
27
|
+
* 2026-05-22 committee-reports `<title>Key Assumptions Check</title>`).
|
|
28
|
+
*
|
|
29
|
+
* Returns `true` for resolver-output filenames.
|
|
30
|
+
*
|
|
31
|
+
* @param filename - Bare filename (no path), e.g. `article.md`
|
|
32
|
+
* @returns `true` when the file is a resolver output and must be skipped
|
|
33
|
+
*/
|
|
34
|
+
export function isResolverOutputArtefact(filename) {
|
|
35
|
+
if (!filename)
|
|
36
|
+
return true;
|
|
37
|
+
if (filename === 'article.md')
|
|
38
|
+
return true;
|
|
39
|
+
if (filename === 'article-meta.json')
|
|
40
|
+
return true;
|
|
41
|
+
if (filename === 'article-meta.jsonl')
|
|
42
|
+
return true;
|
|
43
|
+
// Per-language article renderings: `article.<lang>.md`, `article_<lang>.md`.
|
|
44
|
+
if (/^article[._][a-z]{2,3}\.md$/iu.test(filename))
|
|
45
|
+
return true;
|
|
46
|
+
// Build sidecar files emitted by the generator pipeline.
|
|
47
|
+
if (filename.endsWith('.html'))
|
|
48
|
+
return true;
|
|
49
|
+
if (filename === 'render-log.json')
|
|
50
|
+
return true;
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
17
53
|
/**
|
|
18
54
|
* Attempt to read the first H1 and first prose paragraph from the first
|
|
19
55
|
* existing artefact under {@link EDITORIAL_ARTEFACT_CANDIDATES}. Returns
|
|
@@ -40,7 +76,10 @@ export function extractArtifactHighlight(runDir, articleType, date) {
|
|
|
40
76
|
// `<meta description>` for the 2026-05-15 batch with Arabic content
|
|
41
77
|
// from `executive-brief_ar.md`. See {@link isTranslatedSiblingBrief}
|
|
42
78
|
// and the regression test in `test/unit/article-metadata.test.js`.
|
|
43
|
-
const topLevel = safeReaddir(runDir).filter((f) => f.endsWith('.md') &&
|
|
79
|
+
const topLevel = safeReaddir(runDir).filter((f) => f.endsWith('.md') &&
|
|
80
|
+
f !== 'manifest.json' &&
|
|
81
|
+
!isTranslatedSiblingBrief(f) &&
|
|
82
|
+
!isResolverOutputArtefact(f));
|
|
44
83
|
const fallback = scanCandidatesForHighlight(runDir, topLevel, articleType, date);
|
|
45
84
|
if (fallback.headline)
|
|
46
85
|
return { headline: fallback.headline, summary: fallback.summary };
|