euparliamentmonitor 0.9.13 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/package.json +6 -4
  2. package/scripts/aggregator/article-generator.js +2 -1
  3. package/scripts/aggregator/article-html.d.ts +9 -0
  4. package/scripts/aggregator/article-html.js +134 -13
  5. package/scripts/aggregator/article-metadata.d.ts +25 -161
  6. package/scripts/aggregator/article-metadata.js +71 -649
  7. package/scripts/aggregator/editorial-brief-resolver.d.ts +9 -0
  8. package/scripts/aggregator/editorial-brief-resolver.js +3 -1
  9. package/scripts/aggregator/metadata/date-labels.d.ts +122 -0
  10. package/scripts/aggregator/metadata/date-labels.js +209 -0
  11. package/scripts/aggregator/metadata/text-utils.d.ts +188 -0
  12. package/scripts/aggregator/metadata/text-utils.js +542 -0
  13. package/scripts/constants/og-locales.d.ts +15 -0
  14. package/scripts/constants/og-locales.js +17 -0
  15. package/scripts/constants/seo/index.d.ts +21 -0
  16. package/scripts/constants/seo/index.js +23 -0
  17. package/scripts/constants/seo/og-locales.d.ts +59 -0
  18. package/scripts/constants/seo/og-locales.js +59 -0
  19. package/scripts/constants/seo/social-handles.d.ts +50 -0
  20. package/scripts/constants/seo/social-handles.js +65 -0
  21. package/scripts/constants/social-handles.d.ts +11 -0
  22. package/scripts/constants/social-handles.js +13 -0
  23. package/scripts/discover-untranslated-briefs.js +224 -19
  24. package/scripts/generators/news-indexes.d.ts +35 -0
  25. package/scripts/generators/news-indexes.js +67 -6
  26. package/scripts/generators/political-intelligence/html.js +14 -6
  27. package/scripts/generators/seo-copy.js +42 -0
  28. package/scripts/generators/sitemap/html.js +13 -5
  29. package/scripts/lint-src-todos.js +124 -0
  30. package/scripts/utils/copy-test-reports.js +1 -1
  31. package/scripts/utils/generate-docs-index.js +1 -1
  32. package/scripts/validate-brief-translations.js +158 -18
@@ -19,6 +19,15 @@ import type { LanguageCode } from '../types/index.js';
19
19
  export interface LocalizedBriefHighlight {
20
20
  readonly headline: string;
21
21
  readonly summary: string;
22
+ /**
23
+ * Longer (up to ~300 chars) summary lifted from the same brief BLUF
24
+ * paragraph as {@link summary}, used for `og:description` and
25
+ * `twitter:description`. Empty string when the BLUF is short enough
26
+ * that the regular `summary` already captures it — see
27
+ * `truncateExtendedDescription` for the cutoff. The caller should
28
+ * fall back to {@link summary} when this field is empty.
29
+ */
30
+ readonly extendedSummary: string;
22
31
  readonly sourceFile: string;
23
32
  readonly sourceLang: LanguageCode;
24
33
  }
@@ -29,7 +29,7 @@
29
29
  */
30
30
  import fs from 'fs';
31
31
  import path from 'path';
32
- import { extractFirstH1, extractLedeAfterHeading, extractStrongProseLine, isGenericHeading, stripArtifactCategoryAffix, truncateTitle, } from './article-metadata.js';
32
+ import { extractFirstH1, extractLedeAfterHeading, extractExtendedLedeAfterHeading, extractStrongProseLine, isGenericHeading, stripArtifactCategoryAffix, truncateTitle, } from './article-metadata.js';
33
33
  /**
34
34
  * Run-relative candidate paths for a translated brief, in precedence
35
35
  * order. Mirrors the `executive-brief.md` → `extended/executive-brief.md`
@@ -177,10 +177,12 @@ export function resolveLocalizedBriefHighlight(runDir, lang, articleType, date)
177
177
  const headline = deriveHeadline(body, articleType, date);
178
178
  const lede = extractLedeAfterHeading(body);
179
179
  const summary = lede || extractStrongProseLine(body);
180
+ const extendedSummary = extractExtendedLedeAfterHeading(body);
180
181
  if (headline || summary) {
181
182
  return {
182
183
  headline,
183
184
  summary,
185
+ extendedSummary,
184
186
  sourceFile: rel,
185
187
  sourceLang: lang,
186
188
  };
@@ -0,0 +1,122 @@
1
+ /**
2
+ * @module Aggregator/Metadata/DateLabels
3
+ * @description Pure date-label derivation helpers extracted from
4
+ * `article-metadata.ts` as a leaf module in the `metadata/` bounded
5
+ * context. Every helper takes an ISO `YYYY-MM-DD` string and returns a
6
+ * human-friendly label (or `{start, end}` window) used by the per-article-type
7
+ * template-fallback title generators.
8
+ *
9
+ * Bounded-context rules for this file:
10
+ * - **No upward imports** — pure helpers, no dependencies on other
11
+ * `src/aggregator/` modules, no I/O, no globals.
12
+ * - **Deterministic** — same input always produces same output; safe to
13
+ * call from property-based tests.
14
+ * - **UTC-only** — all parsing/formatting goes through `Date` UTC accessors,
15
+ * never local-time `getMonth()`/`getDate()`.
16
+ *
17
+ * Cross-references:
18
+ * - EP-term boundary constants follow
19
+ * {@link analysis/methodologies/electoral-cycle-methodology.md}.
20
+ * - The D-36 → D-8 reporting window for `week-in-review` follows ADR-006
21
+ * (EP roll-call publication lag).
22
+ */
23
+ /** Milliseconds in one UTC day — used by date-window derivation helpers. */
24
+ export declare const MS_PER_DAY = 86400000;
25
+ /**
26
+ * EP-term boundary constants — keep these in sync with
27
+ * {@link analysis/methodologies/electoral-cycle-methodology.md}.
28
+ * - EP10: 16 Jul 2024 → ~end of June 2029
29
+ * - EP11: ~Jul 2029 → ~Jun 2034
30
+ */
31
+ export declare const EP10_START_YEAR = 2024;
32
+ export declare const EP10_END_YEAR = 2029;
33
+ export declare const EP11_END_YEAR = 2034;
34
+ /** June — EP elections are held the first week of June every 5 years. */
35
+ export declare const EP_ELECTION_MONTH = 6;
36
+ /**
37
+ * Parse an ISO date string as UTC midnight. Returns `null` for malformed
38
+ * input so callers can skip month/week derivation gracefully.
39
+ *
40
+ * @param iso - ISO date string (`YYYY-MM-DD`)
41
+ * @returns Parsed `Date` or `null`
42
+ */
43
+ export declare function parseIsoDate(iso: string): Date | null;
44
+ /**
45
+ * Format a `Date` as `YYYY-MM-DD` in UTC.
46
+ *
47
+ * @param d - Date object
48
+ * @returns ISO date string
49
+ */
50
+ export declare function formatIsoDate(d: Date): string;
51
+ /**
52
+ * Parse an ISO date and return the `[start, end]` week range as ISO
53
+ * strings. Week starts on Monday and ends on the following Sunday.
54
+ *
55
+ * @param date - ISO date string (`YYYY-MM-DD`)
56
+ * @returns `{ start, end }` both in `YYYY-MM-DD` form
57
+ */
58
+ export declare function deriveWeekRange(date: string): {
59
+ readonly start: string;
60
+ readonly end: string;
61
+ };
62
+ /**
63
+ * Return the D-36 → D-8 reporting window for the `week-in-review`
64
+ * article type. EP roll-call voting data is published with a 2–6 week
65
+ * lag, so using the most-recent 7 days structurally produces a
66
+ * vote-empty dataset. Shifting 8 days back and widening to 28 days
67
+ * (start = D-36, end = D-8) ensures the window always contains at
68
+ * least one full EP plenary week with published roll-call data
69
+ * (ADR-006). Direction is consistent with the workflow's
70
+ * `DATE_FROM` (start = D-36) → `DATE_TO` (end = D-8) variables.
71
+ *
72
+ * @param date - ISO article date string (`YYYY-MM-DD`) — typically TODAY
73
+ * @returns `{ start: D-36, end: D-8 }` both as `YYYY-MM-DD` ISO strings
74
+ */
75
+ export declare function deriveReportingWindowForWeekInReview(date: string): {
76
+ readonly start: string;
77
+ readonly end: string;
78
+ };
79
+ /**
80
+ * Return a human-friendly month label for an ISO date — English month
81
+ * name + four-digit year (e.g. `April 2026`). The non-English template
82
+ * generators accept this same label verbatim because they interpolate it
83
+ * into a localized sentence rather than translating the month itself.
84
+ *
85
+ * @param date - ISO date string
86
+ * @returns Month label, or the input when parsing fails
87
+ */
88
+ export declare function deriveMonthLabel(date: string): string;
89
+ /**
90
+ * Return a quarter label for an ISO date — `Q<n> <YYYY>` (e.g. `Q2 2026`).
91
+ * Used by `quarter-ahead` and `quarter-in-review` title generators.
92
+ *
93
+ * @param date - ISO date string
94
+ * @returns Quarter label, or the input when parsing fails
95
+ */
96
+ export declare function deriveQuarterLabel(date: string): string;
97
+ /**
98
+ * Return a four-digit year label for an ISO date. Used by `year-ahead`
99
+ * and `year-in-review` title generators.
100
+ *
101
+ * @param date - ISO date string
102
+ * @returns Year label, or the input when parsing fails
103
+ */
104
+ export declare function deriveYearLabel(date: string): string;
105
+ /**
106
+ * Return the EP-term label for an ISO date — `EP10 → 2029` or `EP11 → 2034`.
107
+ * Used by `term-outlook` title generator.
108
+ *
109
+ * @param date - ISO date string
110
+ * @returns Term label, or the input when parsing fails
111
+ */
112
+ export declare function deriveTermLabel(date: string): string;
113
+ /**
114
+ * Return the election-cycle label for an ISO date — pairs the outgoing
115
+ * and incoming EP terms with the election year (e.g. `EP10 → EP11 (2029)`).
116
+ * Used by the `election-cycle` title generator.
117
+ *
118
+ * @param date - ISO date string
119
+ * @returns Cycle label, or the input when parsing fails
120
+ */
121
+ export declare function deriveElectionCycleLabel(date: string): string;
122
+ //# sourceMappingURL=date-labels.d.ts.map
@@ -0,0 +1,209 @@
1
+ // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
2
+ // SPDX-License-Identifier: Apache-2.0
3
+ /**
4
+ * @module Aggregator/Metadata/DateLabels
5
+ * @description Pure date-label derivation helpers extracted from
6
+ * `article-metadata.ts` as a leaf module in the `metadata/` bounded
7
+ * context. Every helper takes an ISO `YYYY-MM-DD` string and returns a
8
+ * human-friendly label (or `{start, end}` window) used by the per-article-type
9
+ * template-fallback title generators.
10
+ *
11
+ * Bounded-context rules for this file:
12
+ * - **No upward imports** — pure helpers, no dependencies on other
13
+ * `src/aggregator/` modules, no I/O, no globals.
14
+ * - **Deterministic** — same input always produces same output; safe to
15
+ * call from property-based tests.
16
+ * - **UTC-only** — all parsing/formatting goes through `Date` UTC accessors,
17
+ * never local-time `getMonth()`/`getDate()`.
18
+ *
19
+ * Cross-references:
20
+ * - EP-term boundary constants follow
21
+ * {@link analysis/methodologies/electoral-cycle-methodology.md}.
22
+ * - The D-36 → D-8 reporting window for `week-in-review` follows ADR-006
23
+ * (EP roll-call publication lag).
24
+ */
25
+ /** Milliseconds in one UTC day — used by date-window derivation helpers. */
26
+ export const MS_PER_DAY = 86_400_000;
27
+ /**
28
+ * EP-term boundary constants — keep these in sync with
29
+ * {@link analysis/methodologies/electoral-cycle-methodology.md}.
30
+ * - EP10: 16 Jul 2024 → ~end of June 2029
31
+ * - EP11: ~Jul 2029 → ~Jun 2034
32
+ */
33
+ export const EP10_START_YEAR = 2024;
34
+ export const EP10_END_YEAR = 2029;
35
+ export const EP11_END_YEAR = 2034;
36
+ /** June — EP elections are held the first week of June every 5 years. */
37
+ export const EP_ELECTION_MONTH = 6;
38
+ /**
39
+ * Parse an ISO date string as UTC midnight. Returns `null` for malformed
40
+ * input so callers can skip month/week derivation gracefully.
41
+ *
42
+ * @param iso - ISO date string (`YYYY-MM-DD`)
43
+ * @returns Parsed `Date` or `null`
44
+ */
45
+ export function parseIsoDate(iso) {
46
+ if (!/^\d{4}-\d{2}-\d{2}$/.test(iso))
47
+ return null;
48
+ const parsed = new Date(`${iso}T00:00:00Z`);
49
+ return Number.isNaN(parsed.getTime()) ? null : parsed;
50
+ }
51
+ /**
52
+ * Format a `Date` as `YYYY-MM-DD` in UTC.
53
+ *
54
+ * @param d - Date object
55
+ * @returns ISO date string
56
+ */
57
+ export function formatIsoDate(d) {
58
+ const y = d.getUTCFullYear();
59
+ const m = String(d.getUTCMonth() + 1).padStart(2, '0');
60
+ const day = String(d.getUTCDate()).padStart(2, '0');
61
+ return `${y}-${m}-${day}`;
62
+ }
63
+ /**
64
+ * Parse an ISO date and return the `[start, end]` week range as ISO
65
+ * strings. Week starts on Monday and ends on the following Sunday.
66
+ *
67
+ * @param date - ISO date string (`YYYY-MM-DD`)
68
+ * @returns `{ start, end }` both in `YYYY-MM-DD` form
69
+ */
70
+ export function deriveWeekRange(date) {
71
+ const parsed = parseIsoDate(date);
72
+ if (!parsed)
73
+ return { start: date, end: date };
74
+ const day = parsed.getUTCDay();
75
+ const shift = (day + 6) % 7;
76
+ const startMs = parsed.getTime() - shift * MS_PER_DAY;
77
+ const endMs = startMs + 6 * MS_PER_DAY;
78
+ return { start: formatIsoDate(new Date(startMs)), end: formatIsoDate(new Date(endMs)) };
79
+ }
80
+ /**
81
+ * Return the D-36 → D-8 reporting window for the `week-in-review`
82
+ * article type. EP roll-call voting data is published with a 2–6 week
83
+ * lag, so using the most-recent 7 days structurally produces a
84
+ * vote-empty dataset. Shifting 8 days back and widening to 28 days
85
+ * (start = D-36, end = D-8) ensures the window always contains at
86
+ * least one full EP plenary week with published roll-call data
87
+ * (ADR-006). Direction is consistent with the workflow's
88
+ * `DATE_FROM` (start = D-36) → `DATE_TO` (end = D-8) variables.
89
+ *
90
+ * @param date - ISO article date string (`YYYY-MM-DD`) — typically TODAY
91
+ * @returns `{ start: D-36, end: D-8 }` both as `YYYY-MM-DD` ISO strings
92
+ */
93
+ export function deriveReportingWindowForWeekInReview(date) {
94
+ const parsed = parseIsoDate(date);
95
+ if (!parsed)
96
+ return { start: date, end: date };
97
+ return {
98
+ start: formatIsoDate(new Date(parsed.getTime() - 36 * MS_PER_DAY)),
99
+ end: formatIsoDate(new Date(parsed.getTime() - 8 * MS_PER_DAY)),
100
+ };
101
+ }
102
+ /**
103
+ * Return a human-friendly month label for an ISO date — English month
104
+ * name + four-digit year (e.g. `April 2026`). The non-English template
105
+ * generators accept this same label verbatim because they interpolate it
106
+ * into a localized sentence rather than translating the month itself.
107
+ *
108
+ * @param date - ISO date string
109
+ * @returns Month label, or the input when parsing fails
110
+ */
111
+ export function deriveMonthLabel(date) {
112
+ const parsed = parseIsoDate(date);
113
+ if (!parsed)
114
+ return date;
115
+ const monthNames = [
116
+ 'January',
117
+ 'February',
118
+ 'March',
119
+ 'April',
120
+ 'May',
121
+ 'June',
122
+ 'July',
123
+ 'August',
124
+ 'September',
125
+ 'October',
126
+ 'November',
127
+ 'December',
128
+ ];
129
+ const name = monthNames[parsed.getUTCMonth()] ?? '';
130
+ return `${name} ${parsed.getUTCFullYear()}`.trim();
131
+ }
132
+ /**
133
+ * Return a quarter label for an ISO date — `Q<n> <YYYY>` (e.g. `Q2 2026`).
134
+ * Used by `quarter-ahead` and `quarter-in-review` title generators.
135
+ *
136
+ * @param date - ISO date string
137
+ * @returns Quarter label, or the input when parsing fails
138
+ */
139
+ export function deriveQuarterLabel(date) {
140
+ const parsed = parseIsoDate(date);
141
+ if (!parsed)
142
+ return date;
143
+ const quarter = Math.floor(parsed.getUTCMonth() / 3) + 1;
144
+ return `Q${quarter} ${parsed.getUTCFullYear()}`;
145
+ }
146
+ /**
147
+ * Return a four-digit year label for an ISO date. Used by `year-ahead`
148
+ * and `year-in-review` title generators.
149
+ *
150
+ * @param date - ISO date string
151
+ * @returns Year label, or the input when parsing fails
152
+ */
153
+ export function deriveYearLabel(date) {
154
+ const parsed = parseIsoDate(date);
155
+ if (!parsed)
156
+ return date;
157
+ return String(parsed.getUTCFullYear());
158
+ }
159
+ /**
160
+ * Return the EP-term label for an ISO date — `EP10 → 2029` or `EP11 → 2034`.
161
+ * Used by `term-outlook` title generator.
162
+ *
163
+ * @param date - ISO date string
164
+ * @returns Term label, or the input when parsing fails
165
+ */
166
+ export function deriveTermLabel(date) {
167
+ const parsed = parseIsoDate(date);
168
+ if (!parsed)
169
+ return date;
170
+ const year = parsed.getUTCFullYear();
171
+ const month = parsed.getUTCMonth() + 1;
172
+ if (year < EP10_START_YEAR)
173
+ return `EP9 → ${EP10_START_YEAR}`;
174
+ if (year < EP10_END_YEAR || (year === EP10_END_YEAR && month <= EP_ELECTION_MONTH)) {
175
+ return `EP10 → ${EP10_END_YEAR}`;
176
+ }
177
+ if (year < EP11_END_YEAR || (year === EP11_END_YEAR && month <= EP_ELECTION_MONTH)) {
178
+ return `EP11 → ${EP11_END_YEAR}`;
179
+ }
180
+ const yearsBeyond = year - EP11_END_YEAR;
181
+ const offset = month <= EP_ELECTION_MONTH ? 0 : 1;
182
+ const termsBeyond = Math.floor((yearsBeyond - 1 + offset) / 5) + 1;
183
+ const termIndex = 11 + termsBeyond;
184
+ const termEnd = EP11_END_YEAR + 5 * termsBeyond;
185
+ return `EP${termIndex} → ${termEnd}`;
186
+ }
187
+ /**
188
+ * Return the election-cycle label for an ISO date — pairs the outgoing
189
+ * and incoming EP terms with the election year (e.g. `EP10 → EP11 (2029)`).
190
+ * Used by the `election-cycle` title generator.
191
+ *
192
+ * @param date - ISO date string
193
+ * @returns Cycle label, or the input when parsing fails
194
+ */
195
+ export function deriveElectionCycleLabel(date) {
196
+ const parsed = parseIsoDate(date);
197
+ if (!parsed)
198
+ return date;
199
+ const year = parsed.getUTCFullYear();
200
+ if (year <= EP10_END_YEAR)
201
+ return `EP10 → EP11 (${EP10_END_YEAR})`;
202
+ if (year <= EP11_END_YEAR)
203
+ return `EP11 → EP12 (${EP11_END_YEAR})`;
204
+ const cyclesBeyond = Math.ceil((year - EP11_END_YEAR) / 5);
205
+ const electionYear = EP11_END_YEAR + 5 * cyclesBeyond;
206
+ const out = 11 + cyclesBeyond;
207
+ return `EP${out} → EP${out + 1} (${electionYear})`;
208
+ }
209
+ //# sourceMappingURL=date-labels.js.map
@@ -0,0 +1,188 @@
1
+ /**
2
+ * @module Aggregator/Metadata/TextUtils
3
+ * @description Pure text / Markdown utility helpers extracted from
4
+ * `article-metadata.ts` as a leaf module in the `metadata/` bounded
5
+ * context. Every helper here is concerned with **how to massage a
6
+ * string** into a meta-tag-safe shape — strip Markdown decorations,
7
+ * recognise banner / metadata rows that must never reach the
8
+ * description, clamp text to byte budgets without producing broken
9
+ * copy, and identify the first complete sentence in a prose paragraph.
10
+ *
11
+ * Bounded-context rules for this file:
12
+ * - **No upward imports** — pure helpers, no dependencies on other
13
+ * `src/aggregator/` modules, no I/O, no globals.
14
+ * - **Deterministic** — same input always produces same output; safe to
15
+ * property-test.
16
+ * - **Locale-agnostic** — every helper works on raw Markdown / prose
17
+ * in any of the 14 publishing languages. Banner-row detection is
18
+ * driven by structural shape (double-bold + pipe-separator), not by
19
+ * a hard-coded English vocabulary.
20
+ *
21
+ * The companion file `article-metadata.ts` re-exports the public surface
22
+ * for back-compat. New code should import directly from this module.
23
+ */
24
+ /** Maximum `<meta description>` length we will emit. */
25
+ export declare const DESCRIPTION_MAX_LENGTH = 180;
26
+ /**
27
+ * Maximum `og:description` / `twitter:description` length we will
28
+ * emit. Facebook truncates at ~300 characters in the preview card;
29
+ * Twitter at ~200. We aim for the longer cap so LinkedIn / Slack
30
+ * (which use the full OG payload) get the full BLUF context, then
31
+ * let Twitter clip naturally. Below this length the extended
32
+ * description is emitted verbatim; above it we sentence-boundary
33
+ * truncate the same way as {@link truncateDescription}.
34
+ */
35
+ export declare const EXTENDED_DESCRIPTION_MAX_LENGTH = 300;
36
+ /** Target minimum extended-description length before we even emit it. */
37
+ export declare const EXTENDED_DESCRIPTION_MIN_LENGTH = 200;
38
+ /** Target minimum `<meta description>` length before we append context. */
39
+ export declare const DESCRIPTION_MIN_LENGTH = 140;
40
+ /**
41
+ * Length below which a raw description is considered too short to stand
42
+ * on its own and gets enriched with date/context. Independent from
43
+ * {@link DESCRIPTION_MIN_LENGTH} (which controls sentence-boundary
44
+ * truncation behaviour). Set lower than DESCRIPTION_MIN_LENGTH so a
45
+ * clean 100-140 char prose lede is preserved verbatim instead of being
46
+ * padded with date/context boilerplate.
47
+ */
48
+ export declare const ENRICHMENT_TRIGGER_LENGTH = 100;
49
+ /** Maximum `<title>` length — anything longer is truncated with an ellipsis. */
50
+ export declare const TITLE_MAX_LENGTH = 140;
51
+ /**
52
+ * Soft target for headline-style titles produced as a fallback from
53
+ * BLUF/lede prose. When the candidate exceeds `TITLE_MAX_LENGTH`, the
54
+ * truncator first looks for a natural clause boundary
55
+ * (`.`, `:`, `—`, `;`) inside the `[HEADLINE_SOFT_MIN, TITLE_MAX_LENGTH]`
56
+ * window and breaks there instead of mid-clause-with-ellipsis. This
57
+ * turns a 137-character truncated prose paragraph into a complete
58
+ * journalistic clause, which scans much better in news cards and SERP
59
+ * snippets without sacrificing the keyword-rich opening.
60
+ */
61
+ export declare const HEADLINE_SOFT_MIN = 60;
62
+ /**
63
+ * Punctuation marks that signal a natural clause boundary inside a
64
+ * BLUF / lede paragraph. Listed in preferred-break order: a colon or
65
+ * em-dash that introduces a list of consequences is the best break,
66
+ * full stops are next, and semicolons last. Single ASCII space is
67
+ * always a fallback boundary handled separately.
68
+ */
69
+ export declare const HEADLINE_CLAUSE_BOUNDARIES: readonly string[];
70
+ /**
71
+ * Emoji-banner prefixes that Stage-B agents use to decorate metadata rows
72
+ * (e.g. `📋 Analysis Owner:`). Any line starting with one of these is
73
+ * metadata, never prose.
74
+ */
75
+ export declare const EMOJI_BANNER_CHARS: string[];
76
+ /**
77
+ * Label prefixes that a prose description must never start with. Every
78
+ * entry matches case-insensitively at the start of a trimmed line, followed
79
+ * by optional space and a colon.
80
+ */
81
+ export declare const METADATA_LINE_PREFIXES: readonly string[];
82
+ /** Connector / determiner words that read as broken copy when they are
83
+ * the final token before a truncation ellipsis. */
84
+ export declare const TRAILING_STOP_WORDS: Set<string>;
85
+ /** Trailing characters we always strip before appending our own ellipsis,
86
+ * so we never emit double-ellipsis or stray punctuation. */
87
+ export declare const TRAILING_PUNCT: RegExp;
88
+ /**
89
+ * Abbreviation tokens (lowercase, including the trailing period) that
90
+ * should NOT count as sentence terminators when {@link extractFirstSentence}
91
+ * scans for a `.` boundary. Single-letter all-caps initials
92
+ * (`U.S.`, `E.U.`) are handled by the all-caps-initial check below.
93
+ */
94
+ export declare const ABBREVIATION_PREFIXES: readonly string[];
95
+ /**
96
+ * Return `true` when a line cannot serve as a prose description. Rejects
97
+ * Markdown structural lines (headings, blockquotes, tables, HTML),
98
+ * mermaid/chart directives, emoji-banner metadata rows, and the known
99
+ * `Key: value` banners that Stage-B agents emit as artefact preamble.
100
+ *
101
+ * @param line - Trimmed line from the aggregated Markdown source
102
+ * @returns `true` when the line is not prose and should be skipped
103
+ */
104
+ export declare function shouldSkipDescriptionLine(line: string): boolean;
105
+ /**
106
+ * Strip a leading all-caps prose label (e.g. `SITUATION:`, `KEY MOTION:`,
107
+ * `BLUF:`, `BOTTOM LINE:`, `TIER-1:`) from a prose line. These labels
108
+ * are common in BLUF-style editorial writing — they survive
109
+ * {@link stripInlineMarkdown} (which strips the `**bold**` wrapper but
110
+ * keeps the literal text) and would otherwise leak into the SEO
111
+ * description as a confusing all-caps shout.
112
+ *
113
+ * Matches up to 4 hyphenated all-caps tokens, optionally followed by a
114
+ * digit suffix (`TIER-1`), terminating at a colon. Returns the original
115
+ * line when no opener is present.
116
+ *
117
+ * @param line - Plain prose line (post-{@link stripInlineMarkdown})
118
+ * @returns Line with the all-caps opener removed
119
+ */
120
+ export declare function stripLeadingProseLabel(line: string): string;
121
+ /**
122
+ * Strip inline Markdown decorations so we can use the remaining text as
123
+ * plain-text meta-tag content. Removes link syntax, emphasis, inline code
124
+ * backticks, and HTML-entity fragments that the Markdown source sometimes
125
+ * smuggles in. Keeps the visible text readable.
126
+ *
127
+ * @param raw - Trimmed Markdown line
128
+ * @returns Plain-text variant
129
+ */
130
+ export declare function stripInlineMarkdown(raw: string): string;
131
+ /**
132
+ * Clamp a string to `DESCRIPTION_MAX_LENGTH` characters, appending
133
+ * an ellipsis when truncation actually happens. Does not break words if
134
+ * avoidable — a trailing partial word is trimmed back to the previous
135
+ * space first.
136
+ *
137
+ * @param text - Raw description text
138
+ * @returns Truncated description with trailing ellipsis when clipped
139
+ */
140
+ export declare function truncateDescription(text: string): string;
141
+ /**
142
+ * Clamp an extended description to {@link EXTENDED_DESCRIPTION_MAX_LENGTH}
143
+ * characters using the same sentence-boundary-preserving logic as
144
+ * {@link truncateDescription}. Returns `''` when the input is empty
145
+ * or shorter than the meta-description maximum (no point in emitting
146
+ * an "extended" description that's actually shorter than the regular
147
+ * one).
148
+ *
149
+ * @param text - Raw extended-description text (e.g. full BLUF paragraph)
150
+ * @returns Truncated extended description, or `''` when not worth emitting
151
+ */
152
+ export declare function truncateExtendedDescription(text: string): string;
153
+ /**
154
+ * Clamp a title to `TITLE_MAX_LENGTH` characters in the same
155
+ * word-boundary-preserving fashion as {@link truncateDescription}.
156
+ *
157
+ * @param text - Raw title text
158
+ * @returns Truncated title with trailing ellipsis when clipped
159
+ */
160
+ export declare function truncateTitle(text: string): string;
161
+ /**
162
+ * Return the first complete sentence from a prose paragraph, suitable
163
+ * for use as a fallback editorial title when the artefact H1 is
164
+ * categorical (e.g. `# EU Parliament Committee Reports`) and the
165
+ * resolver must derive `<title>` from the BLUF / lede summary instead.
166
+ *
167
+ * A "sentence" is the prefix up to the first sentence-terminator
168
+ * (`. `, `! `, `? `, `; `) inside the `[HEADLINE_SOFT_MIN,
169
+ * TITLE_MAX_LENGTH]` window. Common abbreviations (`Q1.`, `Q2.`,
170
+ * `H1.`, `H2.`, `Mr.`, `Mrs.`, `e.g.`, `i.e.`, `vs.`) are skipped
171
+ * so they don't terminate the sentence prematurely. When no
172
+ * acceptable terminator exists in the window, returns the entire
173
+ * input unchanged so {@link truncateTitle} can handle clause-boundary
174
+ * truncation downstream.
175
+ *
176
+ * This produces journalistically clean titles even for the
177
+ * propositions / committee-reports cases where the BLUF paragraph
178
+ * opens with a single long sentence that exceeds 140 chars —
179
+ * `truncateTitle` then breaks on a clause boundary, and the result is
180
+ * still grammatical because the input was a sentence prefix rather
181
+ * than an arbitrary paragraph slice.
182
+ *
183
+ * @param paragraph - Prose paragraph (post-{@link stripInlineMarkdown})
184
+ * @returns First sentence, or the original paragraph when none can be
185
+ * identified within the soft-min window
186
+ */
187
+ export declare function extractFirstSentence(paragraph: string): string;
188
+ //# sourceMappingURL=text-utils.d.ts.map