euparliamentmonitor 0.9.24 → 0.9.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +6 -6
- package/scripts/aggregator/article-metadata.d.ts +1 -1
- package/scripts/aggregator/article-metadata.js +18 -128
- package/scripts/aggregator/editorial-brief-resolver.js +6 -2
- package/scripts/aggregator/lead-extractor.js +9 -0
- package/scripts/aggregator/metadata/briefing-highlight-headline.d.ts +14 -0
- package/scripts/aggregator/metadata/briefing-highlight-headline.js +114 -0
- package/scripts/aggregator/metadata/briefing-highlight-i18n.d.ts +78 -0
- package/scripts/aggregator/metadata/briefing-highlight-i18n.js +475 -0
- package/scripts/aggregator/metadata/briefing-highlight-sections.d.ts +79 -0
- package/scripts/aggregator/metadata/briefing-highlight-sections.js +412 -0
- package/scripts/aggregator/metadata/briefing-highlight.d.ts +77 -10
- package/scripts/aggregator/metadata/briefing-highlight.js +189 -408
- package/scripts/aggregator/metadata/description-finalization.d.ts +93 -0
- package/scripts/aggregator/metadata/description-finalization.js +182 -0
- package/scripts/aggregator/metadata/per-language-resolver.d.ts +117 -0
- package/scripts/aggregator/metadata/per-language-resolver.js +413 -0
- package/scripts/aggregator/metadata/resolve-helpers.d.ts +78 -48
- package/scripts/aggregator/metadata/resolve-helpers.js +266 -201
- package/scripts/aggregator/metadata/resolve-utils.d.ts +67 -0
- package/scripts/aggregator/metadata/resolve-utils.js +180 -0
- package/scripts/aggregator/metadata/seo-budgets.js +30 -21
- package/scripts/aggregator/metadata/seo-keywords.d.ts +16 -0
- package/scripts/aggregator/metadata/seo-keywords.js +88 -0
- package/scripts/aggregator/metadata/text-truncate.js +1 -1
- package/scripts/aggregator/metadata/text-utils-constants.d.ts +6 -0
- package/scripts/aggregator/metadata/text-utils-constants.js +22 -1
- package/scripts/aggregator/metadata/title-rejection.d.ts +32 -4
- package/scripts/aggregator/metadata/title-rejection.js +62 -21
- package/scripts/constants/articles/localized-keywords-central.js +91 -0
- package/scripts/constants/articles/localized-keywords-global.js +239 -0
- package/scripts/constants/articles/localized-keywords-nordic.js +245 -0
- package/scripts/generators/news-indexes/backfill.js +28 -1
- package/scripts/mcp/ep/error-classifier.d.ts +2 -2
- package/scripts/mcp/ep/error-classifier.js +2 -2
- package/scripts/utils/fs/directory.js +8 -1
- package/scripts/validate-article-seo.js +25 -12
- package/scripts/validate-manifest-seo.js +3 -0
package/README.md
CHANGED
|
@@ -136,7 +136,7 @@ The published site is the audience-facing companion to this npm/TypeScript packa
|
|
|
136
136
|
|
|
137
137
|
**MCP Server Integration**: The project uses the
|
|
138
138
|
[European-Parliament-MCP-Server](https://github.com/Hack23/European-Parliament-MCP-Server)
|
|
139
|
-
v1.3.
|
|
139
|
+
v1.3.12 for accessing real EU Parliament data via the Model Context Protocol.
|
|
140
140
|
|
|
141
141
|
- **MCP Server Status**: ✅ Fully operational — 60+ EP data tools available
|
|
142
142
|
(feeds, direct lookups, analytical tools, intelligence correlation)
|
|
@@ -432,7 +432,7 @@ import type { ArticleCategory, LanguageCode } from 'euparliamentmonitor/types';
|
|
|
432
432
|
|
|
433
433
|
## 🔌 Data Sources
|
|
434
434
|
|
|
435
|
-
**Primary — European Parliament MCP Server** ([Hack23/European-Parliament-MCP-Server](https://github.com/Hack23/European-Parliament-MCP-Server) v1.3.
|
|
435
|
+
**Primary — European Parliament MCP Server** ([Hack23/European-Parliament-MCP-Server](https://github.com/Hack23/European-Parliament-MCP-Server) v1.3.12+, fully operational):
|
|
436
436
|
|
|
437
437
|
- 🗳️ Plenary sessions, voting records, roll-call votes
|
|
438
438
|
- 📜 Adopted texts, motions, resolutions, urgency files
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "euparliamentmonitor",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.26",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "European Parliament Intelligence Platform - Monitor political activity with systematic transparency",
|
|
6
6
|
"main": "scripts/index.js",
|
|
@@ -158,8 +158,8 @@
|
|
|
158
158
|
"@types/markdown-it": "^14.1.2",
|
|
159
159
|
"@types/node": "25.9.1",
|
|
160
160
|
"@types/papaparse": "5.5.2",
|
|
161
|
-
"@typescript-eslint/eslint-plugin": "8.
|
|
162
|
-
"@typescript-eslint/parser": "8.
|
|
161
|
+
"@typescript-eslint/eslint-plugin": "8.60.0",
|
|
162
|
+
"@typescript-eslint/parser": "8.60.0",
|
|
163
163
|
"@vitest/coverage-v8": "4.1.7",
|
|
164
164
|
"@vitest/ui": "4.1.7",
|
|
165
165
|
"chart.js": "4.5.1",
|
|
@@ -177,7 +177,7 @@
|
|
|
177
177
|
"html-minifier-terser": "^7.2.0",
|
|
178
178
|
"htmlhint": "1.9.2",
|
|
179
179
|
"husky": "9.1.7",
|
|
180
|
-
"jscpd": "4.2.
|
|
180
|
+
"jscpd": "4.2.4",
|
|
181
181
|
"knip": "^6.7.0",
|
|
182
182
|
"lint-staged": "17.0.5",
|
|
183
183
|
"mermaid": "11.15.0",
|
|
@@ -197,10 +197,10 @@
|
|
|
197
197
|
"node": ">=26"
|
|
198
198
|
},
|
|
199
199
|
"dependencies": {
|
|
200
|
-
"european-parliament-mcp-server": "1.3.
|
|
200
|
+
"european-parliament-mcp-server": "1.3.12",
|
|
201
201
|
"markdown-it": "^14.1.1",
|
|
202
202
|
"markdown-it-anchor": "^9.2.0",
|
|
203
|
-
"markdown-it-attrs": "^
|
|
203
|
+
"markdown-it-attrs": "^5.0.0",
|
|
204
204
|
"markdown-it-deflist": "^3.0.0",
|
|
205
205
|
"markdown-it-footnote": "^4.0.0"
|
|
206
206
|
},
|
|
@@ -8,7 +8,7 @@ export { extractStrongProseLine, extractLedeAfterHeading, extractExtendedLedeAft
|
|
|
8
8
|
export { extractArtifactHighlight, extractPriorityFindingHighlight, isTranslatedSiblingBrief, } from './metadata/artifact-highlight.js';
|
|
9
9
|
export { buildTemplateFallback } from './metadata/template-fallback.js';
|
|
10
10
|
export { deriveWeekRange, deriveReportingWindowForWeekInReview, deriveMonthLabel, deriveQuarterLabel, deriveYearLabel, deriveTermLabel, deriveElectionCycleLabel, } from './metadata/date-labels.js';
|
|
11
|
-
export { buildSeoKeywords } from './metadata/
|
|
11
|
+
export { buildSeoKeywords } from './metadata/seo-keywords.js';
|
|
12
12
|
/**
|
|
13
13
|
* Resolve per-language `{title, description}` for one article following
|
|
14
14
|
* the priority ladder documented at the top of this module.
|
|
@@ -63,9 +63,9 @@
|
|
|
63
63
|
*/
|
|
64
64
|
import { ALL_LANGUAGES } from '../constants/language-core.js';
|
|
65
65
|
import { resolveLocalizedBriefHighlight } from './editorial-brief-resolver.js';
|
|
66
|
+
import { resolveOneLanguage } from './metadata/per-language-resolver.js';
|
|
66
67
|
import { buildTemplateFallback } from './metadata/template-fallback.js';
|
|
67
|
-
import {
|
|
68
|
-
import { ENRICHMENT_TRIGGER_LENGTH, truncateDescription, truncateExtendedDescription, truncateTitle, } from './metadata/text-utils.js';
|
|
68
|
+
import { resolveEditorialContent } from './metadata/resolve-helpers.js';
|
|
69
69
|
export { shouldSkipDescriptionLine, stripLeadingProseLabel, stripInlineMarkdown, truncateDescription, truncateExtendedDescription, truncateTitle, extractFirstSentence, } from './metadata/text-utils.js';
|
|
70
70
|
export { isArtifactCategoryHeading, stripArtifactCategoryAffix, isGenericHeading, } from './metadata/heading-rules.js';
|
|
71
71
|
export { humanizeSlug } from './metadata/slug.js';
|
|
@@ -74,7 +74,7 @@ export { extractStrongProseLine, extractLedeAfterHeading, extractExtendedLedeAft
|
|
|
74
74
|
export { extractArtifactHighlight, extractPriorityFindingHighlight, isTranslatedSiblingBrief, } from './metadata/artifact-highlight.js';
|
|
75
75
|
export { buildTemplateFallback } from './metadata/template-fallback.js';
|
|
76
76
|
export { deriveWeekRange, deriveReportingWindowForWeekInReview, deriveMonthLabel, deriveQuarterLabel, deriveYearLabel, deriveTermLabel, deriveElectionCycleLabel, } from './metadata/date-labels.js';
|
|
77
|
-
export { buildSeoKeywords } from './metadata/
|
|
77
|
+
export { buildSeoKeywords } from './metadata/seo-keywords.js';
|
|
78
78
|
// --- Resolver orchestrator ---
|
|
79
79
|
/**
|
|
80
80
|
* Resolve per-language `{title, description}` for one article following
|
|
@@ -87,7 +87,20 @@ export function resolveArticleMetadata(opts) {
|
|
|
87
87
|
const manifest = opts.manifest ?? {};
|
|
88
88
|
const englishEditorial = resolveEditorialContent(opts);
|
|
89
89
|
const template = buildTemplateFallback(opts.articleType, opts.date, manifest.committee);
|
|
90
|
-
|
|
90
|
+
// Manifests may carry runId as a string (UUID) or a number (incrementing counter).
|
|
91
|
+
// Coerce to string before trimming to avoid `runId?.trim is not a function` on numeric IDs.
|
|
92
|
+
// When runId is a UUID (no embedded `runN` token), fall back to `articleTypeSlug`
|
|
93
|
+
// (e.g. "committee-reports-run50") which carries the run number we need for
|
|
94
|
+
// disambiguating same-date sub-runs.
|
|
95
|
+
const rawRunId = manifest.runId === undefined || manifest.runId === null ? '' : String(manifest.runId).trim();
|
|
96
|
+
const slugForRun = typeof manifest.articleTypeSlug === 'string'
|
|
97
|
+
? String(manifest.articleTypeSlug).trim()
|
|
98
|
+
: '';
|
|
99
|
+
const runId = /(?:^|-)run\d+/u.test(rawRunId) || /^\d+$/u.test(rawRunId)
|
|
100
|
+
? rawRunId
|
|
101
|
+
: slugForRun && /-run\d+$/u.test(slugForRun)
|
|
102
|
+
? slugForRun
|
|
103
|
+
: rawRunId;
|
|
91
104
|
const result = Object.create(null);
|
|
92
105
|
for (const lang of ALL_LANGUAGES) {
|
|
93
106
|
const entry = resolveOneLanguage({
|
|
@@ -99,6 +112,7 @@ export function resolveArticleMetadata(opts) {
|
|
|
99
112
|
articleType: opts.articleType,
|
|
100
113
|
date: opts.date,
|
|
101
114
|
runId,
|
|
115
|
+
resolveLocalizedBrief: resolveLocalizedBriefHighlight,
|
|
102
116
|
});
|
|
103
117
|
Object.defineProperty(result, lang, {
|
|
104
118
|
value: entry,
|
|
@@ -109,128 +123,4 @@ export function resolveArticleMetadata(opts) {
|
|
|
109
123
|
}
|
|
110
124
|
return result;
|
|
111
125
|
}
|
|
112
|
-
const LOCALIZED_BRIEF_SOURCE = 'localized-brief';
|
|
113
|
-
/**
|
|
114
|
-
* Resolve `{title, description, keywords, source}` for one language.
|
|
115
|
-
*
|
|
116
|
-
* @param input - Per-language inputs
|
|
117
|
-
* @returns One resolved metadata entry
|
|
118
|
-
*/
|
|
119
|
-
function resolveOneLanguage(input) {
|
|
120
|
-
const manifestTitle = manifestOverrideFor(input.manifest.title, input.lang);
|
|
121
|
-
const manifestDescription = manifestOverrideFor(input.manifest.description, input.lang);
|
|
122
|
-
const perLanguage = resolvePerLanguageEditorial(input);
|
|
123
|
-
const editorial = perLanguage.editorial;
|
|
124
|
-
const contextualTitle = composeContextualTitle(input.template.title, editorial.headline, input.runId);
|
|
125
|
-
const title = pickFirstNonEmpty([manifestTitle, contextualTitle, input.template.title]);
|
|
126
|
-
const rawDescription = sanitizeDescriptionCandidate(pickFirstNonEmpty([manifestDescription, editorial.summary, input.template.subtitle]));
|
|
127
|
-
const safeEditorial = {
|
|
128
|
-
headline: isUsableResolvedTitle(editorial.headline) ? editorial.headline.trim() : '',
|
|
129
|
-
summary: sanitizeDescriptionCandidate(editorial.summary),
|
|
130
|
-
extendedSummary: sanitizeDescriptionCandidate(editorial.extendedSummary),
|
|
131
|
-
};
|
|
132
|
-
const normalizedRawDescription = rawDescription || sanitizeDescriptionCandidate(input.template.subtitle);
|
|
133
|
-
const skipEnrichment = perLanguage.source === LOCALIZED_BRIEF_SOURCE && normalizedRawDescription.length > 0;
|
|
134
|
-
const description = skipEnrichment || normalizedRawDescription.length >= ENRICHMENT_TRIGGER_LENGTH
|
|
135
|
-
? normalizedRawDescription
|
|
136
|
-
: composeContextualDescription(input.lang, normalizedRawDescription, safeEditorial, input.date, input.runId);
|
|
137
|
-
const clippedTitle = truncateTitle(title).trim();
|
|
138
|
-
const explicitTitle = manifestTitle && !hasLeakySeoToken(manifestTitle) ? truncateTitle(manifestTitle).trim() : '';
|
|
139
|
-
const allowShortResolvedTitle = perLanguage.source === LOCALIZED_BRIEF_SOURCE;
|
|
140
|
-
const resolvedTitleCandidate = clippedTitle &&
|
|
141
|
-
!hasLeakySeoToken(clippedTitle) &&
|
|
142
|
-
(allowShortResolvedTitle || isUsableResolvedTitle(clippedTitle))
|
|
143
|
-
? clippedTitle
|
|
144
|
-
: '';
|
|
145
|
-
const summaryDerivedTitle = deriveHeadlineFromSummary(safeEditorial.summary || normalizedRawDescription);
|
|
146
|
-
// `truncateTitle` returns '' when an editorial title overruns the
|
|
147
|
-
// budget with no acceptable clause boundary — fall back to the
|
|
148
|
-
// localized template title in that case so we never emit an empty
|
|
149
|
-
// `<title>`. Live regression: 2026-05-22 breaking
|
|
150
|
-
// `AI Trade Strategy: A Legislative First with Structural…` clipped
|
|
151
|
-
// to '' after the no-ellipsis guard landed; template fallback
|
|
152
|
-
// (`Extended Executive Brief — Breaking News`) is preferable to a
|
|
153
|
-
// blank `<title>`.
|
|
154
|
-
//
|
|
155
|
-
// The fallback path passes the template title back through
|
|
156
|
-
// {@link composeContextualTitle} (with an empty editorial headline)
|
|
157
|
-
// so `withRunQualifier` re-appends the `— Run N` suffix. Without
|
|
158
|
-
// this, two same-date / same-articleType runs (republish, hot-fix
|
|
159
|
-
// re-run) would collapse to byte-identical `<title>` strings, and
|
|
160
|
-
// the duplicate-title gate in `scripts/validate-article-seo.js`
|
|
161
|
-
// would (correctly) fail CI.
|
|
162
|
-
const contextualFallback = composeContextualTitle(input.template.title, '', input.runId);
|
|
163
|
-
const truncatedTitle = pickFirstNonEmpty([
|
|
164
|
-
explicitTitle,
|
|
165
|
-
resolvedTitleCandidate,
|
|
166
|
-
isUsableResolvedTitle(summaryDerivedTitle, { allowFullSentence: true })
|
|
167
|
-
? summaryDerivedTitle
|
|
168
|
-
: '',
|
|
169
|
-
truncateTitle(contextualFallback),
|
|
170
|
-
contextualFallback,
|
|
171
|
-
]);
|
|
172
|
-
const truncatedDescription = truncateDescription(description);
|
|
173
|
-
const extendedSource = sanitizeDescriptionCandidate(manifestDescription || safeEditorial.extendedSummary || normalizedRawDescription);
|
|
174
|
-
// Two-tier extended-description resolution:
|
|
175
|
-
// 1. Direct truncation — preferred when the editorial source paragraph
|
|
176
|
-
// is already ≥181 chars (the truncator's gating threshold). This
|
|
177
|
-
// yields the highest-fidelity og:description text.
|
|
178
|
-
// 2. Contextual synthesis — when direct truncation returns '' (source
|
|
179
|
-
// was too short), synthesize a longer string by stitching together
|
|
180
|
-
// `<source> + Date: YYYY-MM-DD + Context: <editorial> + <reader>`.
|
|
181
|
-
// This is the **only** SEO path that surfaces the localized
|
|
182
|
-
// "for democratic-accountability readers …" framing (the short
|
|
183
|
-
// <meta description> no longer carries it — see comment in
|
|
184
|
-
// {@link composeContextualDescription}). The synthesized string is
|
|
185
|
-
// re-clamped to the 200–300 char og:description budget.
|
|
186
|
-
//
|
|
187
|
-
// Live regression (2026-05): 56 breaking briefs shipped with empty
|
|
188
|
-
// extendedDescription because their lead paragraph was only 80–150
|
|
189
|
-
// chars. AI-overview and Discover surfaces dropped them entirely.
|
|
190
|
-
let truncatedExtendedDescription = truncateExtendedDescription(extendedSource);
|
|
191
|
-
if (!truncatedExtendedDescription) {
|
|
192
|
-
truncatedExtendedDescription = composeContextualExtendedDescription(input.lang, extendedSource || normalizedRawDescription, safeEditorial, input.date);
|
|
193
|
-
}
|
|
194
|
-
const source = manifestTitle || manifestDescription ? 'manifest' : perLanguage.source;
|
|
195
|
-
return {
|
|
196
|
-
title: truncatedTitle,
|
|
197
|
-
description: truncatedDescription,
|
|
198
|
-
extendedDescription: truncatedExtendedDescription,
|
|
199
|
-
keywords: buildSeoKeywords(input.lang, input.articleType, input.date, input.runId, truncatedTitle, truncatedDescription),
|
|
200
|
-
source,
|
|
201
|
-
};
|
|
202
|
-
}
|
|
203
|
-
/**
|
|
204
|
-
* Select the editorial `{headline, summary}` pair for one language,
|
|
205
|
-
* preferring the translated `executive-brief_<lang>.md` over the English
|
|
206
|
-
* brief.
|
|
207
|
-
*
|
|
208
|
-
* @param input - Per-language inputs
|
|
209
|
-
* @returns Editorial pair plus the tier that produced it
|
|
210
|
-
*/
|
|
211
|
-
function resolvePerLanguageEditorial(input) {
|
|
212
|
-
if (input.lang !== 'en' && input.runDir) {
|
|
213
|
-
const localized = resolveLocalizedBriefHighlight(input.runDir, input.lang, input.articleType, input.date);
|
|
214
|
-
if (localized && (localized.headline || localized.summary)) {
|
|
215
|
-
return {
|
|
216
|
-
editorial: {
|
|
217
|
-
headline: localized.headline,
|
|
218
|
-
summary: localized.summary,
|
|
219
|
-
extendedSummary: localized.extendedSummary,
|
|
220
|
-
},
|
|
221
|
-
source: LOCALIZED_BRIEF_SOURCE,
|
|
222
|
-
};
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
if (input.englishEditorial.headline || input.englishEditorial.summary) {
|
|
226
|
-
return {
|
|
227
|
-
editorial: input.englishEditorial,
|
|
228
|
-
source: input.lang === 'en' ? 'english-editorial' : 'english-brief',
|
|
229
|
-
};
|
|
230
|
-
}
|
|
231
|
-
return {
|
|
232
|
-
editorial: { headline: '', summary: '', extendedSummary: '' },
|
|
233
|
-
source: 'template',
|
|
234
|
-
};
|
|
235
|
-
}
|
|
236
126
|
//# sourceMappingURL=article-metadata.js.map
|
|
@@ -186,11 +186,15 @@ export function resolveLocalizedBriefHighlight(runDir, lang, articleType, date)
|
|
|
186
186
|
// additionally localized the section heading the matcher falls
|
|
187
187
|
// back to the legacy lede/H1 path below, producing the
|
|
188
188
|
// localized H1 as headline.
|
|
189
|
-
const briefing = extractBriefingHighlight(body);
|
|
189
|
+
const briefing = extractBriefingHighlight(body, lang);
|
|
190
190
|
if (briefing && (briefing.headline || briefing.summary)) {
|
|
191
191
|
const fallbackHeadline = deriveHeadline(body, articleType, date);
|
|
192
192
|
return {
|
|
193
|
-
|
|
193
|
+
// Prefer the H1-derived headline for translated briefs because
|
|
194
|
+
// the translator explicitly crafts it as the article headline.
|
|
195
|
+
// Only fall back to the structural extraction headline when the
|
|
196
|
+
// H1 is generic/empty (deriveHeadline returns '').
|
|
197
|
+
headline: fallbackHeadline || briefing.headline,
|
|
194
198
|
summary: briefing.summary,
|
|
195
199
|
extendedSummary: briefing.extendedSummary || extractExtendedLedeAfterHeading(body),
|
|
196
200
|
sourceFile: rel,
|
|
@@ -39,8 +39,17 @@ const PREFERRED_HEADINGS = [
|
|
|
39
39
|
'bottom line up front',
|
|
40
40
|
'top findings',
|
|
41
41
|
'key judgments',
|
|
42
|
+
'key judgements',
|
|
42
43
|
'lead',
|
|
43
44
|
'headline',
|
|
45
|
+
'top line',
|
|
46
|
+
'critical findings',
|
|
47
|
+
'situation assessment',
|
|
48
|
+
'strategic synthesis',
|
|
49
|
+
'summary assessment',
|
|
50
|
+
'priority intelligence',
|
|
51
|
+
'most significant',
|
|
52
|
+
'principal intelligence',
|
|
44
53
|
];
|
|
45
54
|
/**
|
|
46
55
|
* Split a Markdown document into headed sections. Fenced code blocks are
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Derive a usable headline from a paragraph when no explicit `### …`
|
|
3
|
+
* sub-heading is available. Uses a journalist's editorial hierarchy:
|
|
4
|
+
*
|
|
5
|
+
* 1. Find the sentence with the strongest news hook (superlatives, novelty)
|
|
6
|
+
* 2. Fall back to the first sentence via `truncateTitle`
|
|
7
|
+
* 3. Extract a clause at a natural boundary (comma, semicolon, dash)
|
|
8
|
+
* 4. Hard-cut at word boundary as last resort
|
|
9
|
+
*
|
|
10
|
+
* @param paragraph - Source paragraph (already normalized)
|
|
11
|
+
* @returns Headline string, or `''` when no usable clause can be derived
|
|
12
|
+
*/
|
|
13
|
+
export declare function deriveHeadlineFromParagraph(paragraph: string): string;
|
|
14
|
+
//# sourceMappingURL=briefing-highlight-headline.d.ts.map
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* @module Aggregator/Metadata/BriefingHighlightHeadline
|
|
5
|
+
* @description Headline derivation primitives extracted from
|
|
6
|
+
* {@link briefing-highlight.ts} so that module stays below the 600-line
|
|
7
|
+
* drift-guard cap (see `test/unit/source-file-size.test.js`). Pure leaf
|
|
8
|
+
* module — depends only on {@link truncateTitle} from `text-utils.js`
|
|
9
|
+
* and the {@link stripTradecraftLabels} helper from
|
|
10
|
+
* {@link briefing-highlight-sections.ts}.
|
|
11
|
+
*
|
|
12
|
+
* Headline derivation kicks in when a brief's `## Strategic Intelligence
|
|
13
|
+
* Summary` block has *no* `### Sub-section` heading underneath it — the
|
|
14
|
+
* resolver falls back to the section's first prose paragraph and we
|
|
15
|
+
* mine it for a journalist-quality top-line.
|
|
16
|
+
*/
|
|
17
|
+
import { truncateTitle } from './text-utils.js';
|
|
18
|
+
import { stripTradecraftLabels } from './briefing-highlight-sections.js';
|
|
19
|
+
/**
|
|
20
|
+
* Patterns that indicate a "news hook" — the most compelling claim in a
|
|
21
|
+
* paragraph. Journalist editors call this the "nut graf" or "top line."
|
|
22
|
+
* {@link extractNewsHookSentence} returns the first sentence that
|
|
23
|
+
* matches any of these signals.
|
|
24
|
+
*/
|
|
25
|
+
const NEWS_HOOK_PATTERNS = [
|
|
26
|
+
/\blandmark\b/i,
|
|
27
|
+
/\bmost (?:significant|consequential|ambitious|contentious|comprehensive)\b/i,
|
|
28
|
+
/\bunprecedented\b/i,
|
|
29
|
+
/\bhistoric(?:ally)?\b/i,
|
|
30
|
+
/\bfirst[\s-](?:ever|time)\b/i,
|
|
31
|
+
/\boverhaul\b/i,
|
|
32
|
+
/\breshape[sd]?\b/i,
|
|
33
|
+
/\brecord[\s-]/i,
|
|
34
|
+
/\bsweeping\b/i,
|
|
35
|
+
/\bbreakthrough\b/i,
|
|
36
|
+
/\bparadox\b/i,
|
|
37
|
+
/\bgame[\s-]chang/i,
|
|
38
|
+
/\bturning[\s-]point\b/i,
|
|
39
|
+
/\bcrisis\b/i,
|
|
40
|
+
/\bshowdown\b/i,
|
|
41
|
+
/\bfracture[sd]?\b/i,
|
|
42
|
+
];
|
|
43
|
+
/**
|
|
44
|
+
* Extract the most newsworthy sentence from a paragraph. Looks for
|
|
45
|
+
* sentences containing strong editorial signals (superlatives, novelty
|
|
46
|
+
* claims, dramatic verbs) rather than always taking the first sentence
|
|
47
|
+
* which is typically bland context-setting.
|
|
48
|
+
*
|
|
49
|
+
* @param paragraph - Cleaned paragraph text
|
|
50
|
+
* @returns The most compelling sentence, or '' if none found
|
|
51
|
+
*/
|
|
52
|
+
function extractNewsHookSentence(paragraph) {
|
|
53
|
+
// Split into sentences (handles ". ", "! ", "? " boundaries — plus
|
|
54
|
+
// CJK 。!? and Arabic ؟ which have no trailing space).
|
|
55
|
+
const sentences = paragraph.split(/(?<=[.!?])\s+|(?<=[。!?؟])/).filter((s) => s.length > 20);
|
|
56
|
+
// Find the first sentence with a news hook signal
|
|
57
|
+
for (const sentence of sentences) {
|
|
58
|
+
if (NEWS_HOOK_PATTERNS.some((re) => re.test(sentence))) {
|
|
59
|
+
const result = truncateTitle(sentence);
|
|
60
|
+
if (result)
|
|
61
|
+
return result;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return '';
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Derive a usable headline from a paragraph when no explicit `### …`
|
|
68
|
+
* sub-heading is available. Uses a journalist's editorial hierarchy:
|
|
69
|
+
*
|
|
70
|
+
* 1. Find the sentence with the strongest news hook (superlatives, novelty)
|
|
71
|
+
* 2. Fall back to the first sentence via `truncateTitle`
|
|
72
|
+
* 3. Extract a clause at a natural boundary (comma, semicolon, dash)
|
|
73
|
+
* 4. Hard-cut at word boundary as last resort
|
|
74
|
+
*
|
|
75
|
+
* @param paragraph - Source paragraph (already normalized)
|
|
76
|
+
* @returns Headline string, or `''` when no usable clause can be derived
|
|
77
|
+
*/
|
|
78
|
+
export function deriveHeadlineFromParagraph(paragraph) {
|
|
79
|
+
// Strip tradecraft labels before headline derivation.
|
|
80
|
+
const cleaned = stripTradecraftLabels(paragraph);
|
|
81
|
+
// Priority 1: Find the most newsworthy sentence (superlatives, drama).
|
|
82
|
+
const newsHook = extractNewsHookSentence(cleaned);
|
|
83
|
+
if (newsHook)
|
|
84
|
+
return newsHook;
|
|
85
|
+
// Priority 2: First sentence via truncateTitle.
|
|
86
|
+
const direct = truncateTitle(cleaned);
|
|
87
|
+
if (direct)
|
|
88
|
+
return direct;
|
|
89
|
+
// Priority 3: Extract the first sentence and try truncateTitle.
|
|
90
|
+
// Recognise CJK 。!? and Arabic ؟ in addition to Western . ! ?.
|
|
91
|
+
const sentenceMatch = /^(.*?(?:[.!?](?=\s|$)|[。!?؟]))/.exec(cleaned);
|
|
92
|
+
if (sentenceMatch?.[1]) {
|
|
93
|
+
const sentenceResult = truncateTitle(sentenceMatch[1]);
|
|
94
|
+
if (sentenceResult)
|
|
95
|
+
return sentenceResult;
|
|
96
|
+
}
|
|
97
|
+
// Priority 4: Take text up to first significant clause separator.
|
|
98
|
+
const CLAUSE_SEPARATORS = [', ', '; ', ' — ', ' – ', ' - '];
|
|
99
|
+
for (const sep of CLAUSE_SEPARATORS) {
|
|
100
|
+
const idx = cleaned.indexOf(sep, 30);
|
|
101
|
+
if (idx > 0 && idx <= 140) {
|
|
102
|
+
return cleaned.slice(0, idx).trim();
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
// Final fallback: hard-cut at 120 chars on a word boundary.
|
|
106
|
+
if (cleaned.length > 120) {
|
|
107
|
+
const slice = cleaned.slice(0, 120);
|
|
108
|
+
const lastSpace = slice.lastIndexOf(' ');
|
|
109
|
+
if (lastSpace > 60)
|
|
110
|
+
return slice.slice(0, lastSpace).trim();
|
|
111
|
+
}
|
|
112
|
+
return cleaned.length <= 140 ? cleaned : '';
|
|
113
|
+
}
|
|
114
|
+
//# sourceMappingURL=briefing-highlight-headline.js.map
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module Aggregator/Metadata/BriefingHighlightI18n
|
|
3
|
+
* @description Per-language section-heading needle dictionaries and
|
|
4
|
+
* boilerplate stem patterns used by {@link briefing-highlight.ts} to
|
|
5
|
+
* recognise translated `## Strategic Intelligence Summary` / `## Top
|
|
6
|
+
* Findings` / `## Reader Briefing` blocks across all 14 supported
|
|
7
|
+
* languages.
|
|
8
|
+
*
|
|
9
|
+
* The English needles (held in the parent module) are kept verbatim
|
|
10
|
+
* because Convention-A briefs (motions, committee-reports) still ship
|
|
11
|
+
* English H2 text inside translated bodies. The per-language entries
|
|
12
|
+
* here cover Convention-B briefs (propositions, breaking) where
|
|
13
|
+
* translators have additionally localised the H2 text per §3 of the
|
|
14
|
+
* executive-brief translation guide.
|
|
15
|
+
*
|
|
16
|
+
* Needles are stored lower-cased to match {@link headingMatches}'s
|
|
17
|
+
* post-normalization comparison. CJK / Arabic / Hebrew strings are
|
|
18
|
+
* left as-is because `.toLowerCase()` is a no-op for those scripts.
|
|
19
|
+
* Empty arrays mean "no language-specific override — base English
|
|
20
|
+
* matchers cover this language" (used for `en`).
|
|
21
|
+
*
|
|
22
|
+
* Pure leaf module — no I/O, no upward imports beyond
|
|
23
|
+
* {@link LanguageCode}.
|
|
24
|
+
*/
|
|
25
|
+
import type { LanguageCode } from '../../types/languages.js';
|
|
26
|
+
/**
|
|
27
|
+
* Localised needles for the `## Strategic Intelligence Summary` /
|
|
28
|
+
* `## Strategic Context` / `## Five Key Judgments` family of section
|
|
29
|
+
* openers, keyed by target language. Only the localised forms are
|
|
30
|
+
* stored here; the English base list lives in
|
|
31
|
+
* {@link briefing-highlight.ts}.
|
|
32
|
+
*/
|
|
33
|
+
export declare const STRATEGIC_SECTION_HEADINGS_BY_LANG: Readonly<Record<LanguageCode, readonly string[]>>;
|
|
34
|
+
/**
|
|
35
|
+
* Localised needles for the `## Top Findings` / `## Key Findings` /
|
|
36
|
+
* `## Key Events` family of section openers.
|
|
37
|
+
*/
|
|
38
|
+
export declare const TOP_FINDINGS_HEADINGS_BY_LANG: Readonly<Record<LanguageCode, readonly string[]>>;
|
|
39
|
+
/**
|
|
40
|
+
* Localised needles for the `## Reader Briefing` family of section
|
|
41
|
+
* openers, plus the `60-Second Read` shortcut headings used in
|
|
42
|
+
* Convention-B propositions/breaking briefs.
|
|
43
|
+
*/
|
|
44
|
+
export declare const READER_BRIEFING_HEADINGS_BY_LANG: Readonly<Record<LanguageCode, readonly string[]>>;
|
|
45
|
+
/**
|
|
46
|
+
* Localised stems of the "This executive brief synthesizes…"
|
|
47
|
+
* boilerplate sentence patterns. Used by both
|
|
48
|
+
* {@link briefing-highlight.ts}'s {@link normalizeBriefingLine} and
|
|
49
|
+
* {@link title-rejection.ts}'s {@link looksLikeBoilerplate} to filter
|
|
50
|
+
* self-referential meta-prose that describes the brief itself rather
|
|
51
|
+
* than the substantive content. Each pattern is anchored at
|
|
52
|
+
* start-of-line so false positives are unlikely.
|
|
53
|
+
*/
|
|
54
|
+
export declare const BOILERPLATE_STEM_PATTERNS_BY_LANG: Readonly<Record<LanguageCode, readonly RegExp[]>>;
|
|
55
|
+
/**
|
|
56
|
+
* Resolve the active needle list for a section family + target
|
|
57
|
+
* language. Returns the English base list unioned with the
|
|
58
|
+
* locale-specific entries from the supplied map. Order is preserved
|
|
59
|
+
* (base first, locale-specific second) so deterministic match order
|
|
60
|
+
* is maintained.
|
|
61
|
+
*
|
|
62
|
+
* @param map - Per-language needle dictionary
|
|
63
|
+
* @param baseEnglish - The English needle list from the parent module
|
|
64
|
+
* @param lang - Target language code (defaults to `'en'`)
|
|
65
|
+
* @returns Combined needle list to pass to `headingMatches`
|
|
66
|
+
*/
|
|
67
|
+
export declare function resolveHeadingNeedles(map: Readonly<Record<LanguageCode, readonly string[]>>, baseEnglish: readonly string[], lang: LanguageCode): readonly string[];
|
|
68
|
+
/**
|
|
69
|
+
* Resolve the active boilerplate-stem RegExp list for a target
|
|
70
|
+
* language. Unions the English base patterns with the locale-specific
|
|
71
|
+
* patterns. Order is preserved.
|
|
72
|
+
*
|
|
73
|
+
* @param baseEnglish - The English RegExp list from the parent module
|
|
74
|
+
* @param lang - Target language code (defaults to `'en'`)
|
|
75
|
+
* @returns Combined RegExp list for `.some(re => re.test(...))`
|
|
76
|
+
*/
|
|
77
|
+
export declare function resolveBoilerplatePatterns(baseEnglish: readonly RegExp[], lang: LanguageCode): readonly RegExp[];
|
|
78
|
+
//# sourceMappingURL=briefing-highlight-i18n.d.ts.map
|