euparliamentmonitor 0.9.13 → 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -4
- package/scripts/aggregator/article-generator.js +2 -1
- package/scripts/aggregator/article-html.d.ts +9 -0
- package/scripts/aggregator/article-html.js +134 -13
- package/scripts/aggregator/article-metadata.d.ts +25 -161
- package/scripts/aggregator/article-metadata.js +71 -649
- package/scripts/aggregator/editorial-brief-resolver.d.ts +9 -0
- package/scripts/aggregator/editorial-brief-resolver.js +3 -1
- package/scripts/aggregator/metadata/date-labels.d.ts +122 -0
- package/scripts/aggregator/metadata/date-labels.js +209 -0
- package/scripts/aggregator/metadata/text-utils.d.ts +188 -0
- package/scripts/aggregator/metadata/text-utils.js +542 -0
- package/scripts/constants/og-locales.d.ts +15 -0
- package/scripts/constants/og-locales.js +17 -0
- package/scripts/constants/seo/index.d.ts +21 -0
- package/scripts/constants/seo/index.js +23 -0
- package/scripts/constants/seo/og-locales.d.ts +59 -0
- package/scripts/constants/seo/og-locales.js +59 -0
- package/scripts/constants/seo/social-handles.d.ts +50 -0
- package/scripts/constants/seo/social-handles.js +65 -0
- package/scripts/constants/social-handles.d.ts +11 -0
- package/scripts/constants/social-handles.js +13 -0
- package/scripts/discover-untranslated-briefs.js +224 -19
- package/scripts/generators/news-indexes.d.ts +35 -0
- package/scripts/generators/news-indexes.js +67 -6
- package/scripts/generators/political-intelligence/html.js +14 -6
- package/scripts/generators/seo-copy.js +42 -0
- package/scripts/generators/sitemap/html.js +13 -5
- package/scripts/lint-src-todos.js +124 -0
- package/scripts/utils/copy-test-reports.js +1 -1
- package/scripts/utils/generate-docs-index.js +1 -1
- package/scripts/validate-brief-translations.js +158 -18
|
@@ -19,6 +19,15 @@ import type { LanguageCode } from '../types/index.js';
|
|
|
19
19
|
export interface LocalizedBriefHighlight {
|
|
20
20
|
readonly headline: string;
|
|
21
21
|
readonly summary: string;
|
|
22
|
+
/**
|
|
23
|
+
* Longer (up to ~300 chars) summary lifted from the same brief BLUF
|
|
24
|
+
* paragraph as {@link summary}, used for `og:description` and
|
|
25
|
+
* `twitter:description`. Empty string when the BLUF is short enough
|
|
26
|
+
* that the regular `summary` already captures it — see
|
|
27
|
+
* `truncateExtendedDescription` for the cutoff. The caller should
|
|
28
|
+
* fall back to {@link summary} when this field is empty.
|
|
29
|
+
*/
|
|
30
|
+
readonly extendedSummary: string;
|
|
22
31
|
readonly sourceFile: string;
|
|
23
32
|
readonly sourceLang: LanguageCode;
|
|
24
33
|
}
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
*/
|
|
30
30
|
import fs from 'fs';
|
|
31
31
|
import path from 'path';
|
|
32
|
-
import { extractFirstH1, extractLedeAfterHeading, extractStrongProseLine, isGenericHeading, stripArtifactCategoryAffix, truncateTitle, } from './article-metadata.js';
|
|
32
|
+
import { extractFirstH1, extractLedeAfterHeading, extractExtendedLedeAfterHeading, extractStrongProseLine, isGenericHeading, stripArtifactCategoryAffix, truncateTitle, } from './article-metadata.js';
|
|
33
33
|
/**
|
|
34
34
|
* Run-relative candidate paths for a translated brief, in precedence
|
|
35
35
|
* order. Mirrors the `executive-brief.md` → `extended/executive-brief.md`
|
|
@@ -177,10 +177,12 @@ export function resolveLocalizedBriefHighlight(runDir, lang, articleType, date)
|
|
|
177
177
|
const headline = deriveHeadline(body, articleType, date);
|
|
178
178
|
const lede = extractLedeAfterHeading(body);
|
|
179
179
|
const summary = lede || extractStrongProseLine(body);
|
|
180
|
+
const extendedSummary = extractExtendedLedeAfterHeading(body);
|
|
180
181
|
if (headline || summary) {
|
|
181
182
|
return {
|
|
182
183
|
headline,
|
|
183
184
|
summary,
|
|
185
|
+
extendedSummary,
|
|
184
186
|
sourceFile: rel,
|
|
185
187
|
sourceLang: lang,
|
|
186
188
|
};
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module Aggregator/Metadata/DateLabels
|
|
3
|
+
* @description Pure date-label derivation helpers extracted from
|
|
4
|
+
* `article-metadata.ts` as a leaf module in the `metadata/` bounded
|
|
5
|
+
* context. Every helper takes an ISO `YYYY-MM-DD` string and returns a
|
|
6
|
+
* human-friendly label (or `{start, end}` window) used by the per-article-type
|
|
7
|
+
* template-fallback title generators.
|
|
8
|
+
*
|
|
9
|
+
* Bounded-context rules for this file:
|
|
10
|
+
* - **No upward imports** — pure helpers, no dependencies on other
|
|
11
|
+
* `src/aggregator/` modules, no I/O, no globals.
|
|
12
|
+
* - **Deterministic** — same input always produces same output; safe to
|
|
13
|
+
* call from property-based tests.
|
|
14
|
+
* - **UTC-only** — all parsing/formatting goes through `Date` UTC accessors,
|
|
15
|
+
* never local-time `getMonth()`/`getDate()`.
|
|
16
|
+
*
|
|
17
|
+
* Cross-references:
|
|
18
|
+
* - EP-term boundary constants follow
|
|
19
|
+
* {@link analysis/methodologies/electoral-cycle-methodology.md}.
|
|
20
|
+
* - The D-36 → D-8 reporting window for `week-in-review` follows ADR-006
|
|
21
|
+
* (EP roll-call publication lag).
|
|
22
|
+
*/
|
|
23
|
+
/** Milliseconds in one UTC day — used by date-window derivation helpers. */
|
|
24
|
+
export declare const MS_PER_DAY = 86400000;
|
|
25
|
+
/**
|
|
26
|
+
* EP-term boundary constants — keep these in sync with
|
|
27
|
+
* {@link analysis/methodologies/electoral-cycle-methodology.md}.
|
|
28
|
+
* - EP10: 16 Jul 2024 → ~end of June 2029
|
|
29
|
+
* - EP11: ~Jul 2029 → ~Jun 2034
|
|
30
|
+
*/
|
|
31
|
+
export declare const EP10_START_YEAR = 2024;
|
|
32
|
+
export declare const EP10_END_YEAR = 2029;
|
|
33
|
+
export declare const EP11_END_YEAR = 2034;
|
|
34
|
+
/** June — EP elections are held the first week of June every 5 years. */
|
|
35
|
+
export declare const EP_ELECTION_MONTH = 6;
|
|
36
|
+
/**
|
|
37
|
+
* Parse an ISO date string as UTC midnight. Returns `null` for malformed
|
|
38
|
+
* input so callers can skip month/week derivation gracefully.
|
|
39
|
+
*
|
|
40
|
+
* @param iso - ISO date string (`YYYY-MM-DD`)
|
|
41
|
+
* @returns Parsed `Date` or `null`
|
|
42
|
+
*/
|
|
43
|
+
export declare function parseIsoDate(iso: string): Date | null;
|
|
44
|
+
/**
|
|
45
|
+
* Format a `Date` as `YYYY-MM-DD` in UTC.
|
|
46
|
+
*
|
|
47
|
+
* @param d - Date object
|
|
48
|
+
* @returns ISO date string
|
|
49
|
+
*/
|
|
50
|
+
export declare function formatIsoDate(d: Date): string;
|
|
51
|
+
/**
|
|
52
|
+
* Parse an ISO date and return the `[start, end]` week range as ISO
|
|
53
|
+
* strings. Week starts on Monday and ends on the following Sunday.
|
|
54
|
+
*
|
|
55
|
+
* @param date - ISO date string (`YYYY-MM-DD`)
|
|
56
|
+
* @returns `{ start, end }` both in `YYYY-MM-DD` form
|
|
57
|
+
*/
|
|
58
|
+
export declare function deriveWeekRange(date: string): {
|
|
59
|
+
readonly start: string;
|
|
60
|
+
readonly end: string;
|
|
61
|
+
};
|
|
62
|
+
/**
|
|
63
|
+
* Return the D-36 → D-8 reporting window for the `week-in-review`
|
|
64
|
+
* article type. EP roll-call voting data is published with a 2–6 week
|
|
65
|
+
* lag, so using the most-recent 7 days structurally produces a
|
|
66
|
+
* vote-empty dataset. Shifting 8 days back and widening to 28 days
|
|
67
|
+
* (start = D-36, end = D-8) ensures the window always contains at
|
|
68
|
+
* least one full EP plenary week with published roll-call data
|
|
69
|
+
* (ADR-006). Direction is consistent with the workflow's
|
|
70
|
+
* `DATE_FROM` (start = D-36) → `DATE_TO` (end = D-8) variables.
|
|
71
|
+
*
|
|
72
|
+
* @param date - ISO article date string (`YYYY-MM-DD`) — typically TODAY
|
|
73
|
+
* @returns `{ start: D-36, end: D-8 }` both as `YYYY-MM-DD` ISO strings
|
|
74
|
+
*/
|
|
75
|
+
export declare function deriveReportingWindowForWeekInReview(date: string): {
|
|
76
|
+
readonly start: string;
|
|
77
|
+
readonly end: string;
|
|
78
|
+
};
|
|
79
|
+
/**
|
|
80
|
+
* Return a human-friendly month label for an ISO date — English month
|
|
81
|
+
* name + four-digit year (e.g. `April 2026`). The non-English template
|
|
82
|
+
* generators accept this same label verbatim because they interpolate it
|
|
83
|
+
* into a localized sentence rather than translating the month itself.
|
|
84
|
+
*
|
|
85
|
+
* @param date - ISO date string
|
|
86
|
+
* @returns Month label, or the input when parsing fails
|
|
87
|
+
*/
|
|
88
|
+
export declare function deriveMonthLabel(date: string): string;
|
|
89
|
+
/**
|
|
90
|
+
* Return a quarter label for an ISO date — `Q<n> <YYYY>` (e.g. `Q2 2026`).
|
|
91
|
+
* Used by `quarter-ahead` and `quarter-in-review` title generators.
|
|
92
|
+
*
|
|
93
|
+
* @param date - ISO date string
|
|
94
|
+
* @returns Quarter label, or the input when parsing fails
|
|
95
|
+
*/
|
|
96
|
+
export declare function deriveQuarterLabel(date: string): string;
|
|
97
|
+
/**
|
|
98
|
+
* Return a four-digit year label for an ISO date. Used by `year-ahead`
|
|
99
|
+
* and `year-in-review` title generators.
|
|
100
|
+
*
|
|
101
|
+
* @param date - ISO date string
|
|
102
|
+
* @returns Year label, or the input when parsing fails
|
|
103
|
+
*/
|
|
104
|
+
export declare function deriveYearLabel(date: string): string;
|
|
105
|
+
/**
|
|
106
|
+
* Return the EP-term label for an ISO date — `EP10 → 2029` or `EP11 → 2034`.
|
|
107
|
+
* Used by `term-outlook` title generator.
|
|
108
|
+
*
|
|
109
|
+
* @param date - ISO date string
|
|
110
|
+
* @returns Term label, or the input when parsing fails
|
|
111
|
+
*/
|
|
112
|
+
export declare function deriveTermLabel(date: string): string;
|
|
113
|
+
/**
|
|
114
|
+
* Return the election-cycle label for an ISO date — pairs the outgoing
|
|
115
|
+
* and incoming EP terms with the election year (e.g. `EP10 → EP11 (2029)`).
|
|
116
|
+
* Used by the `election-cycle` title generator.
|
|
117
|
+
*
|
|
118
|
+
* @param date - ISO date string
|
|
119
|
+
* @returns Cycle label, or the input when parsing fails
|
|
120
|
+
*/
|
|
121
|
+
export declare function deriveElectionCycleLabel(date: string): string;
|
|
122
|
+
//# sourceMappingURL=date-labels.d.ts.map
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* @module Aggregator/Metadata/DateLabels
|
|
5
|
+
* @description Pure date-label derivation helpers extracted from
|
|
6
|
+
* `article-metadata.ts` as a leaf module in the `metadata/` bounded
|
|
7
|
+
* context. Every helper takes an ISO `YYYY-MM-DD` string and returns a
|
|
8
|
+
* human-friendly label (or `{start, end}` window) used by the per-article-type
|
|
9
|
+
* template-fallback title generators.
|
|
10
|
+
*
|
|
11
|
+
* Bounded-context rules for this file:
|
|
12
|
+
* - **No upward imports** — pure helpers, no dependencies on other
|
|
13
|
+
* `src/aggregator/` modules, no I/O, no globals.
|
|
14
|
+
* - **Deterministic** — same input always produces same output; safe to
|
|
15
|
+
* call from property-based tests.
|
|
16
|
+
* - **UTC-only** — all parsing/formatting goes through `Date` UTC accessors,
|
|
17
|
+
* never local-time `getMonth()`/`getDate()`.
|
|
18
|
+
*
|
|
19
|
+
* Cross-references:
|
|
20
|
+
* - EP-term boundary constants follow
|
|
21
|
+
* {@link analysis/methodologies/electoral-cycle-methodology.md}.
|
|
22
|
+
* - The D-36 → D-8 reporting window for `week-in-review` follows ADR-006
|
|
23
|
+
* (EP roll-call publication lag).
|
|
24
|
+
*/
|
|
25
|
+
/** Milliseconds in one UTC day — used by date-window derivation helpers. */
|
|
26
|
+
export const MS_PER_DAY = 86_400_000;
|
|
27
|
+
/**
|
|
28
|
+
* EP-term boundary constants — keep these in sync with
|
|
29
|
+
* {@link analysis/methodologies/electoral-cycle-methodology.md}.
|
|
30
|
+
* - EP10: 16 Jul 2024 → ~end of June 2029
|
|
31
|
+
* - EP11: ~Jul 2029 → ~Jun 2034
|
|
32
|
+
*/
|
|
33
|
+
export const EP10_START_YEAR = 2024;
|
|
34
|
+
export const EP10_END_YEAR = 2029;
|
|
35
|
+
export const EP11_END_YEAR = 2034;
|
|
36
|
+
/** June — EP elections are held the first week of June every 5 years. */
|
|
37
|
+
export const EP_ELECTION_MONTH = 6;
|
|
38
|
+
/**
|
|
39
|
+
* Parse an ISO date string as UTC midnight. Returns `null` for malformed
|
|
40
|
+
* input so callers can skip month/week derivation gracefully.
|
|
41
|
+
*
|
|
42
|
+
* @param iso - ISO date string (`YYYY-MM-DD`)
|
|
43
|
+
* @returns Parsed `Date` or `null`
|
|
44
|
+
*/
|
|
45
|
+
export function parseIsoDate(iso) {
|
|
46
|
+
if (!/^\d{4}-\d{2}-\d{2}$/.test(iso))
|
|
47
|
+
return null;
|
|
48
|
+
const parsed = new Date(`${iso}T00:00:00Z`);
|
|
49
|
+
return Number.isNaN(parsed.getTime()) ? null : parsed;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Format a `Date` as `YYYY-MM-DD` in UTC.
|
|
53
|
+
*
|
|
54
|
+
* @param d - Date object
|
|
55
|
+
* @returns ISO date string
|
|
56
|
+
*/
|
|
57
|
+
export function formatIsoDate(d) {
|
|
58
|
+
const y = d.getUTCFullYear();
|
|
59
|
+
const m = String(d.getUTCMonth() + 1).padStart(2, '0');
|
|
60
|
+
const day = String(d.getUTCDate()).padStart(2, '0');
|
|
61
|
+
return `${y}-${m}-${day}`;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Parse an ISO date and return the `[start, end]` week range as ISO
|
|
65
|
+
* strings. Week starts on Monday and ends on the following Sunday.
|
|
66
|
+
*
|
|
67
|
+
* @param date - ISO date string (`YYYY-MM-DD`)
|
|
68
|
+
* @returns `{ start, end }` both in `YYYY-MM-DD` form
|
|
69
|
+
*/
|
|
70
|
+
export function deriveWeekRange(date) {
|
|
71
|
+
const parsed = parseIsoDate(date);
|
|
72
|
+
if (!parsed)
|
|
73
|
+
return { start: date, end: date };
|
|
74
|
+
const day = parsed.getUTCDay();
|
|
75
|
+
const shift = (day + 6) % 7;
|
|
76
|
+
const startMs = parsed.getTime() - shift * MS_PER_DAY;
|
|
77
|
+
const endMs = startMs + 6 * MS_PER_DAY;
|
|
78
|
+
return { start: formatIsoDate(new Date(startMs)), end: formatIsoDate(new Date(endMs)) };
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Return the D-36 → D-8 reporting window for the `week-in-review`
|
|
82
|
+
* article type. EP roll-call voting data is published with a 2–6 week
|
|
83
|
+
* lag, so using the most-recent 7 days structurally produces a
|
|
84
|
+
* vote-empty dataset. Shifting 8 days back and widening to 28 days
|
|
85
|
+
* (start = D-36, end = D-8) ensures the window always contains at
|
|
86
|
+
* least one full EP plenary week with published roll-call data
|
|
87
|
+
* (ADR-006). Direction is consistent with the workflow's
|
|
88
|
+
* `DATE_FROM` (start = D-36) → `DATE_TO` (end = D-8) variables.
|
|
89
|
+
*
|
|
90
|
+
* @param date - ISO article date string (`YYYY-MM-DD`) — typically TODAY
|
|
91
|
+
* @returns `{ start: D-36, end: D-8 }` both as `YYYY-MM-DD` ISO strings
|
|
92
|
+
*/
|
|
93
|
+
export function deriveReportingWindowForWeekInReview(date) {
|
|
94
|
+
const parsed = parseIsoDate(date);
|
|
95
|
+
if (!parsed)
|
|
96
|
+
return { start: date, end: date };
|
|
97
|
+
return {
|
|
98
|
+
start: formatIsoDate(new Date(parsed.getTime() - 36 * MS_PER_DAY)),
|
|
99
|
+
end: formatIsoDate(new Date(parsed.getTime() - 8 * MS_PER_DAY)),
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Return a human-friendly month label for an ISO date — English month
|
|
104
|
+
* name + four-digit year (e.g. `April 2026`). The non-English template
|
|
105
|
+
* generators accept this same label verbatim because they interpolate it
|
|
106
|
+
* into a localized sentence rather than translating the month itself.
|
|
107
|
+
*
|
|
108
|
+
* @param date - ISO date string
|
|
109
|
+
* @returns Month label, or the input when parsing fails
|
|
110
|
+
*/
|
|
111
|
+
export function deriveMonthLabel(date) {
|
|
112
|
+
const parsed = parseIsoDate(date);
|
|
113
|
+
if (!parsed)
|
|
114
|
+
return date;
|
|
115
|
+
const monthNames = [
|
|
116
|
+
'January',
|
|
117
|
+
'February',
|
|
118
|
+
'March',
|
|
119
|
+
'April',
|
|
120
|
+
'May',
|
|
121
|
+
'June',
|
|
122
|
+
'July',
|
|
123
|
+
'August',
|
|
124
|
+
'September',
|
|
125
|
+
'October',
|
|
126
|
+
'November',
|
|
127
|
+
'December',
|
|
128
|
+
];
|
|
129
|
+
const name = monthNames[parsed.getUTCMonth()] ?? '';
|
|
130
|
+
return `${name} ${parsed.getUTCFullYear()}`.trim();
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Return a quarter label for an ISO date — `Q<n> <YYYY>` (e.g. `Q2 2026`).
|
|
134
|
+
* Used by `quarter-ahead` and `quarter-in-review` title generators.
|
|
135
|
+
*
|
|
136
|
+
* @param date - ISO date string
|
|
137
|
+
* @returns Quarter label, or the input when parsing fails
|
|
138
|
+
*/
|
|
139
|
+
export function deriveQuarterLabel(date) {
|
|
140
|
+
const parsed = parseIsoDate(date);
|
|
141
|
+
if (!parsed)
|
|
142
|
+
return date;
|
|
143
|
+
const quarter = Math.floor(parsed.getUTCMonth() / 3) + 1;
|
|
144
|
+
return `Q${quarter} ${parsed.getUTCFullYear()}`;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Return a four-digit year label for an ISO date. Used by `year-ahead`
|
|
148
|
+
* and `year-in-review` title generators.
|
|
149
|
+
*
|
|
150
|
+
* @param date - ISO date string
|
|
151
|
+
* @returns Year label, or the input when parsing fails
|
|
152
|
+
*/
|
|
153
|
+
export function deriveYearLabel(date) {
|
|
154
|
+
const parsed = parseIsoDate(date);
|
|
155
|
+
if (!parsed)
|
|
156
|
+
return date;
|
|
157
|
+
return String(parsed.getUTCFullYear());
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Return the EP-term label for an ISO date — `EP10 → 2029` or `EP11 → 2034`.
|
|
161
|
+
* Used by `term-outlook` title generator.
|
|
162
|
+
*
|
|
163
|
+
* @param date - ISO date string
|
|
164
|
+
* @returns Term label, or the input when parsing fails
|
|
165
|
+
*/
|
|
166
|
+
export function deriveTermLabel(date) {
|
|
167
|
+
const parsed = parseIsoDate(date);
|
|
168
|
+
if (!parsed)
|
|
169
|
+
return date;
|
|
170
|
+
const year = parsed.getUTCFullYear();
|
|
171
|
+
const month = parsed.getUTCMonth() + 1;
|
|
172
|
+
if (year < EP10_START_YEAR)
|
|
173
|
+
return `EP9 → ${EP10_START_YEAR}`;
|
|
174
|
+
if (year < EP10_END_YEAR || (year === EP10_END_YEAR && month <= EP_ELECTION_MONTH)) {
|
|
175
|
+
return `EP10 → ${EP10_END_YEAR}`;
|
|
176
|
+
}
|
|
177
|
+
if (year < EP11_END_YEAR || (year === EP11_END_YEAR && month <= EP_ELECTION_MONTH)) {
|
|
178
|
+
return `EP11 → ${EP11_END_YEAR}`;
|
|
179
|
+
}
|
|
180
|
+
const yearsBeyond = year - EP11_END_YEAR;
|
|
181
|
+
const offset = month <= EP_ELECTION_MONTH ? 0 : 1;
|
|
182
|
+
const termsBeyond = Math.floor((yearsBeyond - 1 + offset) / 5) + 1;
|
|
183
|
+
const termIndex = 11 + termsBeyond;
|
|
184
|
+
const termEnd = EP11_END_YEAR + 5 * termsBeyond;
|
|
185
|
+
return `EP${termIndex} → ${termEnd}`;
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Return the election-cycle label for an ISO date — pairs the outgoing
|
|
189
|
+
* and incoming EP terms with the election year (e.g. `EP10 → EP11 (2029)`).
|
|
190
|
+
* Used by the `election-cycle` title generator.
|
|
191
|
+
*
|
|
192
|
+
* @param date - ISO date string
|
|
193
|
+
* @returns Cycle label, or the input when parsing fails
|
|
194
|
+
*/
|
|
195
|
+
export function deriveElectionCycleLabel(date) {
|
|
196
|
+
const parsed = parseIsoDate(date);
|
|
197
|
+
if (!parsed)
|
|
198
|
+
return date;
|
|
199
|
+
const year = parsed.getUTCFullYear();
|
|
200
|
+
if (year <= EP10_END_YEAR)
|
|
201
|
+
return `EP10 → EP11 (${EP10_END_YEAR})`;
|
|
202
|
+
if (year <= EP11_END_YEAR)
|
|
203
|
+
return `EP11 → EP12 (${EP11_END_YEAR})`;
|
|
204
|
+
const cyclesBeyond = Math.ceil((year - EP11_END_YEAR) / 5);
|
|
205
|
+
const electionYear = EP11_END_YEAR + 5 * cyclesBeyond;
|
|
206
|
+
const out = 11 + cyclesBeyond;
|
|
207
|
+
return `EP${out} → EP${out + 1} (${electionYear})`;
|
|
208
|
+
}
|
|
209
|
+
//# sourceMappingURL=date-labels.js.map
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module Aggregator/Metadata/TextUtils
|
|
3
|
+
* @description Pure text / Markdown utility helpers extracted from
|
|
4
|
+
* `article-metadata.ts` as a leaf module in the `metadata/` bounded
|
|
5
|
+
* context. Every helper here is concerned with **how to massage a
|
|
6
|
+
* string** into a meta-tag-safe shape — strip Markdown decorations,
|
|
7
|
+
* recognise banner / metadata rows that must never reach the
|
|
8
|
+
* description, clamp text to byte budgets without producing broken
|
|
9
|
+
* copy, and identify the first complete sentence in a prose paragraph.
|
|
10
|
+
*
|
|
11
|
+
* Bounded-context rules for this file:
|
|
12
|
+
* - **No upward imports** — pure helpers, no dependencies on other
|
|
13
|
+
* `src/aggregator/` modules, no I/O, no globals.
|
|
14
|
+
* - **Deterministic** — same input always produces same output; safe to
|
|
15
|
+
* property-test.
|
|
16
|
+
* - **Locale-agnostic** — every helper works on raw Markdown / prose
|
|
17
|
+
* in any of the 14 publishing languages. Banner-row detection is
|
|
18
|
+
* driven by structural shape (double-bold + pipe-separator), not by
|
|
19
|
+
* a hard-coded English vocabulary.
|
|
20
|
+
*
|
|
21
|
+
* The companion file `article-metadata.ts` re-exports the public surface
|
|
22
|
+
* for back-compat. New code should import directly from this module.
|
|
23
|
+
*/
|
|
24
|
+
/** Maximum `<meta description>` length we will emit. */
|
|
25
|
+
export declare const DESCRIPTION_MAX_LENGTH = 180;
|
|
26
|
+
/**
|
|
27
|
+
* Maximum `og:description` / `twitter:description` length we will
|
|
28
|
+
* emit. Facebook truncates at ~300 characters in the preview card;
|
|
29
|
+
* Twitter at ~200. We aim for the longer cap so LinkedIn / Slack
|
|
30
|
+
* (which use the full OG payload) get the full BLUF context, then
|
|
31
|
+
* let Twitter clip naturally. Below this length the extended
|
|
32
|
+
* description is emitted verbatim; above it we sentence-boundary
|
|
33
|
+
* truncate the same way as {@link truncateDescription}.
|
|
34
|
+
*/
|
|
35
|
+
export declare const EXTENDED_DESCRIPTION_MAX_LENGTH = 300;
|
|
36
|
+
/** Target minimum extended-description length before we even emit it. */
|
|
37
|
+
export declare const EXTENDED_DESCRIPTION_MIN_LENGTH = 200;
|
|
38
|
+
/** Target minimum `<meta description>` length before we append context. */
|
|
39
|
+
export declare const DESCRIPTION_MIN_LENGTH = 140;
|
|
40
|
+
/**
|
|
41
|
+
* Length below which a raw description is considered too short to stand
|
|
42
|
+
* on its own and gets enriched with date/context. Independent from
|
|
43
|
+
* {@link DESCRIPTION_MIN_LENGTH} (which controls sentence-boundary
|
|
44
|
+
* truncation behaviour). Set lower than DESCRIPTION_MIN_LENGTH so a
|
|
45
|
+
* clean 100-140 char prose lede is preserved verbatim instead of being
|
|
46
|
+
* padded with date/context boilerplate.
|
|
47
|
+
*/
|
|
48
|
+
export declare const ENRICHMENT_TRIGGER_LENGTH = 100;
|
|
49
|
+
/** Maximum `<title>` length — anything longer is truncated with an ellipsis. */
|
|
50
|
+
export declare const TITLE_MAX_LENGTH = 140;
|
|
51
|
+
/**
|
|
52
|
+
* Soft target for headline-style titles produced as a fallback from
|
|
53
|
+
* BLUF/lede prose. When the candidate exceeds `TITLE_MAX_LENGTH`, the
|
|
54
|
+
* truncator first looks for a natural clause boundary
|
|
55
|
+
* (`.`, `:`, `—`, `;`) inside the `[HEADLINE_SOFT_MIN, TITLE_MAX_LENGTH]`
|
|
56
|
+
* window and breaks there instead of mid-clause-with-ellipsis. This
|
|
57
|
+
* turns a 137-character truncated prose paragraph into a complete
|
|
58
|
+
* journalistic clause, which scans much better in news cards and SERP
|
|
59
|
+
* snippets without sacrificing the keyword-rich opening.
|
|
60
|
+
*/
|
|
61
|
+
export declare const HEADLINE_SOFT_MIN = 60;
|
|
62
|
+
/**
|
|
63
|
+
* Punctuation marks that signal a natural clause boundary inside a
|
|
64
|
+
* BLUF / lede paragraph. Listed in preferred-break order: a colon or
|
|
65
|
+
* em-dash that introduces a list of consequences is the best break,
|
|
66
|
+
* full stops are next, and semicolons last. Single ASCII space is
|
|
67
|
+
* always a fallback boundary handled separately.
|
|
68
|
+
*/
|
|
69
|
+
export declare const HEADLINE_CLAUSE_BOUNDARIES: readonly string[];
|
|
70
|
+
/**
|
|
71
|
+
* Emoji-banner prefixes that Stage-B agents use to decorate metadata rows
|
|
72
|
+
* (e.g. `📋 Analysis Owner:`). Any line starting with one of these is
|
|
73
|
+
* metadata, never prose.
|
|
74
|
+
*/
|
|
75
|
+
export declare const EMOJI_BANNER_CHARS: string[];
|
|
76
|
+
/**
|
|
77
|
+
* Label prefixes that a prose description must never start with. Every
|
|
78
|
+
* entry matches case-insensitively at the start of a trimmed line, followed
|
|
79
|
+
* by optional space and a colon.
|
|
80
|
+
*/
|
|
81
|
+
export declare const METADATA_LINE_PREFIXES: readonly string[];
|
|
82
|
+
/** Connector / determiner words that read as broken copy when they are
|
|
83
|
+
* the final token before a truncation ellipsis. */
|
|
84
|
+
export declare const TRAILING_STOP_WORDS: Set<string>;
|
|
85
|
+
/** Trailing characters we always strip before appending our own ellipsis,
|
|
86
|
+
* so we never emit double-ellipsis or stray punctuation. */
|
|
87
|
+
export declare const TRAILING_PUNCT: RegExp;
|
|
88
|
+
/**
|
|
89
|
+
* Abbreviation tokens (lowercase, including the trailing period) that
|
|
90
|
+
* should NOT count as sentence terminators when {@link extractFirstSentence}
|
|
91
|
+
* scans for a `.` boundary. Single-letter all-caps initials
|
|
92
|
+
* (`U.S.`, `E.U.`) are handled by the all-caps-initial check below.
|
|
93
|
+
*/
|
|
94
|
+
export declare const ABBREVIATION_PREFIXES: readonly string[];
|
|
95
|
+
/**
|
|
96
|
+
* Return `true` when a line cannot serve as a prose description. Rejects
|
|
97
|
+
* Markdown structural lines (headings, blockquotes, tables, HTML),
|
|
98
|
+
* mermaid/chart directives, emoji-banner metadata rows, and the known
|
|
99
|
+
* `Key: value` banners that Stage-B agents emit as artefact preamble.
|
|
100
|
+
*
|
|
101
|
+
* @param line - Trimmed line from the aggregated Markdown source
|
|
102
|
+
* @returns `true` when the line is not prose and should be skipped
|
|
103
|
+
*/
|
|
104
|
+
export declare function shouldSkipDescriptionLine(line: string): boolean;
|
|
105
|
+
/**
|
|
106
|
+
* Strip a leading all-caps prose label (e.g. `SITUATION:`, `KEY MOTION:`,
|
|
107
|
+
* `BLUF:`, `BOTTOM LINE:`, `TIER-1:`) from a prose line. These labels
|
|
108
|
+
* are common in BLUF-style editorial writing — they survive
|
|
109
|
+
* {@link stripInlineMarkdown} (which strips the `**bold**` wrapper but
|
|
110
|
+
* keeps the literal text) and would otherwise leak into the SEO
|
|
111
|
+
* description as a confusing all-caps shout.
|
|
112
|
+
*
|
|
113
|
+
* Matches up to 4 hyphenated all-caps tokens, optionally followed by a
|
|
114
|
+
* digit suffix (`TIER-1`), terminating at a colon. Returns the original
|
|
115
|
+
* line when no opener is present.
|
|
116
|
+
*
|
|
117
|
+
* @param line - Plain prose line (post-{@link stripInlineMarkdown})
|
|
118
|
+
* @returns Line with the all-caps opener removed
|
|
119
|
+
*/
|
|
120
|
+
export declare function stripLeadingProseLabel(line: string): string;
|
|
121
|
+
/**
|
|
122
|
+
* Strip inline Markdown decorations so we can use the remaining text as
|
|
123
|
+
* plain-text meta-tag content. Removes link syntax, emphasis, inline code
|
|
124
|
+
* backticks, and HTML-entity fragments that the Markdown source sometimes
|
|
125
|
+
* smuggles in. Keeps the visible text readable.
|
|
126
|
+
*
|
|
127
|
+
* @param raw - Trimmed Markdown line
|
|
128
|
+
* @returns Plain-text variant
|
|
129
|
+
*/
|
|
130
|
+
export declare function stripInlineMarkdown(raw: string): string;
|
|
131
|
+
/**
|
|
132
|
+
* Clamp a string to `DESCRIPTION_MAX_LENGTH` characters, appending
|
|
133
|
+
* an ellipsis when truncation actually happens. Does not break words if
|
|
134
|
+
* avoidable — a trailing partial word is trimmed back to the previous
|
|
135
|
+
* space first.
|
|
136
|
+
*
|
|
137
|
+
* @param text - Raw description text
|
|
138
|
+
* @returns Truncated description with trailing ellipsis when clipped
|
|
139
|
+
*/
|
|
140
|
+
export declare function truncateDescription(text: string): string;
|
|
141
|
+
/**
|
|
142
|
+
* Clamp an extended description to {@link EXTENDED_DESCRIPTION_MAX_LENGTH}
|
|
143
|
+
* characters using the same sentence-boundary-preserving logic as
|
|
144
|
+
* {@link truncateDescription}. Returns `''` when the input is empty
|
|
145
|
+
* or shorter than the meta-description maximum (no point in emitting
|
|
146
|
+
* an "extended" description that's actually shorter than the regular
|
|
147
|
+
* one).
|
|
148
|
+
*
|
|
149
|
+
* @param text - Raw extended-description text (e.g. full BLUF paragraph)
|
|
150
|
+
* @returns Truncated extended description, or `''` when not worth emitting
|
|
151
|
+
*/
|
|
152
|
+
export declare function truncateExtendedDescription(text: string): string;
|
|
153
|
+
/**
|
|
154
|
+
* Clamp a title to `TITLE_MAX_LENGTH` characters in the same
|
|
155
|
+
* word-boundary-preserving fashion as {@link truncateDescription}.
|
|
156
|
+
*
|
|
157
|
+
* @param text - Raw title text
|
|
158
|
+
* @returns Truncated title with trailing ellipsis when clipped
|
|
159
|
+
*/
|
|
160
|
+
export declare function truncateTitle(text: string): string;
|
|
161
|
+
/**
|
|
162
|
+
* Return the first complete sentence from a prose paragraph, suitable
|
|
163
|
+
* for use as a fallback editorial title when the artefact H1 is
|
|
164
|
+
* categorical (e.g. `# EU Parliament Committee Reports`) and the
|
|
165
|
+
* resolver must derive `<title>` from the BLUF / lede summary instead.
|
|
166
|
+
*
|
|
167
|
+
* A "sentence" is the prefix up to the first sentence-terminator
|
|
168
|
+
* (`. `, `! `, `? `, `; `) inside the `[HEADLINE_SOFT_MIN,
|
|
169
|
+
* TITLE_MAX_LENGTH]` window. Common abbreviations (`Q1.`, `Q2.`,
|
|
170
|
+
* `H1.`, `H2.`, `Mr.`, `Mrs.`, `e.g.`, `i.e.`, `vs.`) are skipped
|
|
171
|
+
* so they don't terminate the sentence prematurely. When no
|
|
172
|
+
* acceptable terminator exists in the window, returns the entire
|
|
173
|
+
* input unchanged so {@link truncateTitle} can handle clause-boundary
|
|
174
|
+
* truncation downstream.
|
|
175
|
+
*
|
|
176
|
+
* This produces journalistically clean titles even for the
|
|
177
|
+
* propositions / committee-reports cases where the BLUF paragraph
|
|
178
|
+
* opens with a single long sentence that exceeds 140 chars —
|
|
179
|
+
* `truncateTitle` then breaks on a clause boundary, and the result is
|
|
180
|
+
* still grammatical because the input was a sentence prefix rather
|
|
181
|
+
* than an arbitrary paragraph slice.
|
|
182
|
+
*
|
|
183
|
+
* @param paragraph - Prose paragraph (post-{@link stripInlineMarkdown})
|
|
184
|
+
* @returns First sentence, or the original paragraph when none can be
|
|
185
|
+
* identified within the soft-min window
|
|
186
|
+
*/
|
|
187
|
+
export declare function extractFirstSentence(paragraph: string): string;
|
|
188
|
+
//# sourceMappingURL=text-utils.d.ts.map
|