euparliamentmonitor 0.8.19 → 0.8.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/package.json +7 -7
  2. package/scripts/constants/language-articles.d.ts +4 -0
  3. package/scripts/constants/language-articles.js +20 -0
  4. package/scripts/constants/language-ui.d.ts +8 -8
  5. package/scripts/constants/language-ui.js +64 -64
  6. package/scripts/constants/languages.d.ts +2 -2
  7. package/scripts/constants/languages.js +2 -2
  8. package/scripts/generators/news-enhanced.js +13 -3
  9. package/scripts/generators/pipeline/analysis-classification.d.ts +49 -0
  10. package/scripts/generators/pipeline/analysis-classification.js +333 -0
  11. package/scripts/generators/pipeline/analysis-existing.d.ts +67 -0
  12. package/scripts/generators/pipeline/analysis-existing.js +547 -0
  13. package/scripts/generators/pipeline/analysis-helpers.d.ts +140 -0
  14. package/scripts/generators/pipeline/analysis-helpers.js +266 -0
  15. package/scripts/generators/pipeline/analysis-risk.d.ts +49 -0
  16. package/scripts/generators/pipeline/analysis-risk.js +417 -0
  17. package/scripts/generators/pipeline/analysis-stage.d.ts +19 -39
  18. package/scripts/generators/pipeline/analysis-stage.js +219 -1704
  19. package/scripts/generators/pipeline/analysis-threats.d.ts +41 -0
  20. package/scripts/generators/pipeline/analysis-threats.js +142 -0
  21. package/scripts/generators/pipeline/fetch-stage.d.ts +25 -15
  22. package/scripts/generators/pipeline/fetch-stage.js +293 -117
  23. package/scripts/generators/strategies/article-strategy.d.ts +126 -7
  24. package/scripts/generators/strategies/article-strategy.js +491 -1
  25. package/scripts/generators/strategies/breaking-news-strategy.js +98 -8
  26. package/scripts/generators/strategies/committee-reports-strategy.js +23 -2
  27. package/scripts/generators/strategies/month-ahead-strategy.js +23 -2
  28. package/scripts/generators/strategies/monthly-review-strategy.js +13 -1
  29. package/scripts/generators/strategies/motions-strategy.js +15 -1
  30. package/scripts/generators/strategies/propositions-strategy.js +15 -1
  31. package/scripts/generators/strategies/week-ahead-strategy.js +19 -1
  32. package/scripts/generators/strategies/weekly-review-strategy.js +17 -1
  33. package/scripts/generators/synthesis-summary.d.ts +93 -0
  34. package/scripts/generators/synthesis-summary.js +364 -0
  35. package/scripts/index.d.ts +5 -2
  36. package/scripts/index.js +6 -1
  37. package/scripts/mcp/ep-mcp-client.d.ts +34 -1
  38. package/scripts/mcp/ep-mcp-client.js +110 -2
  39. package/scripts/mcp/mcp-connection.d.ts +3 -1
  40. package/scripts/mcp/mcp-connection.js +35 -4
  41. package/scripts/templates/article-template.js +24 -22
  42. package/scripts/templates/section-builders.js +2 -5
  43. package/scripts/types/index.d.ts +2 -1
  44. package/scripts/types/mcp.d.ts +7 -0
  45. package/scripts/types/political-classification.d.ts +1 -1
  46. package/scripts/types/quality.d.ts +9 -6
  47. package/scripts/types/significance.d.ts +130 -0
  48. package/scripts/types/significance.js +4 -0
  49. package/scripts/utils/article-quality-scorer.d.ts +13 -11
  50. package/scripts/utils/article-quality-scorer.js +36 -23
  51. package/scripts/utils/file-utils.d.ts +2 -2
  52. package/scripts/utils/file-utils.js +2 -2
  53. package/scripts/utils/html-sanitize.d.ts +10 -0
  54. package/scripts/utils/html-sanitize.js +32 -0
  55. package/scripts/utils/political-classification.d.ts +8 -7
  56. package/scripts/utils/political-classification.js +8 -7
  57. package/scripts/utils/political-risk-assessment.d.ts +1 -1
  58. package/scripts/utils/political-risk-assessment.js +1 -1
  59. package/scripts/utils/significance-scoring.d.ts +97 -0
  60. package/scripts/utils/significance-scoring.js +190 -0
@@ -1,14 +1,131 @@
1
- /**
2
- * @module Generators/Strategies/ArticleStrategy
3
- * @description Base interface and shared types for article generation strategies.
4
- * Each strategy encapsulates the fetch, build, and metadata logic for one
5
- * {@link ArticleCategory}, making it trivial to add new article types without
6
- * touching the orchestration layer.
7
- */
8
1
  import type { ArticleCategory } from '../../types/index.js';
9
2
  import type { LanguageCode } from '../../types/index.js';
10
3
  import type { ArticleSource } from '../../types/index.js';
11
4
  import type { EuropeanParliamentMCPClient } from '../../mcp/ep-mcp-client.js';
5
+ /** Content of a single loaded analysis file */
6
+ export interface AnalysisFileContent {
7
+ /** Analysis method that produced this file */
8
+ readonly method: string;
9
+ /** Subdirectory category (e.g. 'classification', 'risk-scoring') */
10
+ readonly subdir: string;
11
+ /** Raw markdown content (frontmatter included) */
12
+ readonly content: string;
13
+ /** Absolute file path on disk */
14
+ readonly filePath: string;
15
+ }
16
+ /**
17
+ * Analysis context loaded from the analysis pipeline output directory.
18
+ *
19
+ * Strategies call {@link loadAnalysisContext} during {@link ArticleStrategy.fetchData}
20
+ * and store the result in their data payload. The context is then consumed by
21
+ * {@link ArticleStrategy.buildContent} to enrich articles with analytical depth.
22
+ *
23
+ * When analysis files are not available (e.g. the analysis stage was skipped),
24
+ * the context is `null` and strategies degrade gracefully to their existing
25
+ * behaviour.
26
+ */
27
+ export interface LoadedAnalysisContext {
28
+ /** ISO date of the analysis */
29
+ readonly date: string;
30
+ /** Resolved analysis directory path */
31
+ readonly analysisDir: string;
32
+ /** Parsed manifest.json (null when manifest not found) */
33
+ readonly manifest: Record<string, unknown> | null;
34
+ /** Overall confidence from the manifest */
35
+ readonly overallConfidence: string | null;
36
+ /** Loaded analysis files keyed by method name */
37
+ readonly files: ReadonlyMap<string, AnalysisFileContent>;
38
+ }
39
+ /**
40
+ * Load analysis context from the analysis pipeline output directory.
41
+ *
42
+ * Scans `{baseDir}/{date}/{articleTypeSlug}/` for a `manifest.json` and
43
+ * analysis markdown files in known subdirectories. When the directory
44
+ * does not exist or contains no analysis files, returns `null` for graceful
45
+ * degradation — strategies then behave exactly as before.
46
+ *
47
+ * Handles suffixed directories (e.g. `breaking-2`, `breaking-3`) by
48
+ * scanning for the latest match.
49
+ *
50
+ * Resolution order for base directory:
51
+ * 1. Explicit `baseDir` parameter (when non-default)
52
+ * 2. `EP_ANALYSIS_DIR` environment variable (set by orchestration)
53
+ * 3. Default `'analysis/daily'`
54
+ *
55
+ * Resolution order for slug:
56
+ * 1. `EP_ANALYSIS_SLUG` environment variable (set by orchestration)
57
+ * 2. The `articleTypeSlug` parameter passed by each strategy
58
+ *
59
+ * @param date - ISO 8601 date (YYYY-MM-DD) of the analysis run
60
+ * @param articleTypeSlug - Article type slug (e.g. 'breaking', 'week-ahead')
61
+ * @param baseDir - Base analysis directory (defaults to 'analysis/daily')
62
+ * @returns Loaded analysis context or null when unavailable
63
+ */
64
+ export declare function loadAnalysisContext(date: string, articleTypeSlug: string, baseDir?: string): LoadedAnalysisContext | null;
65
+ /**
66
+ * Extract the `method:` value from YAML frontmatter in a markdown string.
67
+ *
68
+ * Analysis files produced by the pipeline embed the canonical method ID in
69
+ * their frontmatter (e.g. `method: coalition-analysis`). When this differs
70
+ * from the filename (e.g. `coalition-dynamics.md`), the frontmatter value is
71
+ * the authoritative key for strategy lookups.
72
+ *
73
+ * @param content - Raw markdown content
74
+ * @returns The frontmatter `method` value, or `null` if absent/unparseable
75
+ */
76
+ export declare function extractFrontmatterMethod(content: string): string | null;
77
+ /**
78
+ * Check whether an analysis file contains only scaffold/template content
79
+ * (i.e. the AI agent did not fill in the analysis).
80
+ *
81
+ * @param content - Raw markdown file content
82
+ * @returns `true` when the file is an unfilled scaffold
83
+ */
84
+ export declare function isScaffoldContent(content: string): boolean;
85
+ /**
86
+ * Extract the first meaningful paragraph from an analysis markdown file.
87
+ * Strips YAML frontmatter, headings, fenced code blocks, tables,
88
+ * scaffold markers, and markdown formatting. Returns plain prose content.
89
+ *
90
+ * @param content - Raw markdown content
91
+ * @param maxLength - Maximum character length to return (default 500)
92
+ * @returns Extracted summary text or empty string
93
+ */
94
+ export declare function extractAnalysisSummary(content: string, maxLength?: number): string;
95
+ /**
96
+ * Extract multiple meaningful paragraphs from an analysis markdown file.
97
+ * Provides richer content than the single-paragraph extractAnalysisSummary.
98
+ *
99
+ * @param content - Raw markdown file content
100
+ * @param maxParagraphs - Maximum number of paragraphs to return (default 3)
101
+ * @param maxTotalLength - Maximum total character length (default 1500)
102
+ * @returns Array of extracted prose paragraphs
103
+ */
104
+ export declare function extractAnalysisParagraphs(content: string, maxParagraphs?: number, maxTotalLength?: number): readonly string[];
105
+ /**
106
+ * Check whether an analysis file contains substantive AI-produced content
107
+ * (as opposed to pipeline scaffolding or empty templates).
108
+ *
109
+ * @param content - Raw markdown file content
110
+ * @returns `true` when the file contains real analytical prose
111
+ */
112
+ export declare function hasSubstantiveAIContent(content: string): boolean;
113
+ /**
114
+ * Build an HTML section summarising analysis pipeline insights.
115
+ *
116
+ * Creates a structured `<section class="analysis-pipeline-insights">` element
117
+ * containing key findings from loaded analysis files. Each strategy passes
118
+ * the methods it considers relevant; only those with loaded content are rendered.
119
+ *
120
+ * Filters out scaffold/template files and files with no substantive AI content.
121
+ * Uses extended paragraph extraction for richer insight content.
122
+ *
123
+ * @param ctx - Loaded analysis context (null-safe: returns empty string)
124
+ * @param relevantMethods - Method names this strategy wants to display
125
+ * @param lang - Target language code (used for localized section heading)
126
+ * @returns HTML string (empty when no context or no relevant files)
127
+ */
128
+ export declare function buildAnalysisInsightsSection(ctx: LoadedAnalysisContext | null | undefined, relevantMethods: readonly string[], lang: LanguageCode): string;
12
129
  /**
13
130
  * Minimum payload every strategy must carry: the article's publication date.
14
131
  * Strategy-specific data interfaces extend this base.
@@ -16,6 +133,8 @@ import type { EuropeanParliamentMCPClient } from '../../mcp/ep-mcp-client.js';
16
133
  export interface ArticleData {
17
134
  /** ISO 8601 publication date (YYYY-MM-DD) */
18
135
  readonly date: string;
136
+ /** Loaded analysis context from the analysis pipeline (when available) */
137
+ readonly analysisContext?: LoadedAnalysisContext | null | undefined;
19
138
  }
20
139
  /**
21
140
  * Resolved title, subtitle, keywords, and optional sources for one
@@ -1,4 +1,494 @@
1
1
  // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
2
2
  // SPDX-License-Identifier: Apache-2.0
3
- export {};
3
+ /**
4
+ * @module Generators/Strategies/ArticleStrategy
5
+ * @description Base interface and shared types for article generation strategies.
6
+ * Each strategy encapsulates the fetch, build, and metadata logic for one
7
+ * {@link ArticleCategory}, making it trivial to add new article types without
8
+ * touching the orchestration layer.
9
+ *
10
+ * Includes utilities for loading analysis pipeline output so that strategies
11
+ * can consume classification, threat assessment, risk scoring, and other
12
+ * analysis artifacts produced by the analysis stage.
13
+ */
14
+ import fs from 'fs';
15
+ import path from 'path';
16
+ import { escapeHTML } from '../../utils/file-utils.js';
17
+ import { ANALYSIS_INSIGHTS_HEADING, getLocalizedString } from '../../constants/languages.js';
18
+ // ─── Analysis loading defaults ───────────────────────────────────────────────
19
+ /** Default base directory for analysis output */
20
+ const DEFAULT_ANALYSIS_BASE_DIR = 'analysis/daily';
21
+ /**
22
+ * Environment variable name for overriding the analysis base directory.
23
+ * Set by the orchestration layer when `--analysis-dir` is provided.
24
+ */
25
+ const ENV_ANALYSIS_DIR = 'EP_ANALYSIS_DIR';
26
+ /**
27
+ * Environment variable name for overriding the analysis slug.
28
+ * Set by the orchestration layer with the resolved slug from
29
+ * `deriveArticleTypeSlug()`, so multi-type runs and custom analysis
30
+ * directories are correctly resolved without hard-coding per-strategy slugs.
31
+ */
32
+ const ENV_ANALYSIS_SLUG = 'EP_ANALYSIS_SLUG';
33
+ /** Analysis subdirectories to scan for markdown files */
34
+ const ANALYSIS_SUBDIRS = [
35
+ 'classification',
36
+ 'threat-assessment',
37
+ 'risk-scoring',
38
+ 'existing',
39
+ ];
40
+ /**
41
+ * Load analysis context from the analysis pipeline output directory.
42
+ *
43
+ * Scans `{baseDir}/{date}/{articleTypeSlug}/` for a `manifest.json` and
44
+ * analysis markdown files in known subdirectories. When the directory
45
+ * does not exist or contains no analysis files, returns `null` for graceful
46
+ * degradation — strategies then behave exactly as before.
47
+ *
48
+ * Handles suffixed directories (e.g. `breaking-2`, `breaking-3`) by
49
+ * scanning for the latest match.
50
+ *
51
+ * Resolution order for base directory:
52
+ * 1. Explicit `baseDir` parameter (when non-default)
53
+ * 2. `EP_ANALYSIS_DIR` environment variable (set by orchestration)
54
+ * 3. Default `'analysis/daily'`
55
+ *
56
+ * Resolution order for slug:
57
+ * 1. `EP_ANALYSIS_SLUG` environment variable (set by orchestration)
58
+ * 2. The `articleTypeSlug` parameter passed by each strategy
59
+ *
60
+ * @param date - ISO 8601 date (YYYY-MM-DD) of the analysis run
61
+ * @param articleTypeSlug - Article type slug (e.g. 'breaking', 'week-ahead')
62
+ * @param baseDir - Base analysis directory (defaults to 'analysis/daily')
63
+ * @returns Loaded analysis context or null when unavailable
64
+ */
65
+ export function loadAnalysisContext(date, articleTypeSlug, baseDir = DEFAULT_ANALYSIS_BASE_DIR) {
66
+ // Validate date format (YYYY-MM-DD) and reject path traversal
67
+ if (!/^\d{4}-\d{2}-\d{2}$/u.test(date))
68
+ return null;
69
+ // Resolve base dir: prefer explicit non-default param, then env var, then default
70
+ const resolvedBaseDir = baseDir !== DEFAULT_ANALYSIS_BASE_DIR
71
+ ? baseDir
72
+ : process.env[ENV_ANALYSIS_DIR]?.trim() || DEFAULT_ANALYSIS_BASE_DIR;
73
+ // Resolve slug: prefer env var override, then per-strategy slug
74
+ const resolvedSlug = process.env[ENV_ANALYSIS_SLUG]?.trim() || articleTypeSlug;
75
+ // Validate slug: alphanumeric, hyphens only — no path separators
76
+ if (!/^[\da-z][\da-z-]*$/u.test(resolvedSlug))
77
+ return null;
78
+ const dateDir = path.resolve(resolvedBaseDir, date);
79
+ if (!fs.existsSync(dateDir))
80
+ return null;
81
+ // Find the best matching analysis directory (exact or latest suffixed)
82
+ const analysisDir = findAnalysisDirectory(dateDir, resolvedSlug);
83
+ if (!analysisDir)
84
+ return null;
85
+ // Load manifest.json
86
+ const manifest = loadManifest(analysisDir);
87
+ // Load analysis markdown files from known subdirectories
88
+ const files = loadAnalysisFiles(analysisDir);
89
+ if (files.size === 0 && !manifest)
90
+ return null;
91
+ const overallConfidence = manifest && typeof manifest['overallConfidence'] === 'string'
92
+ ? manifest['overallConfidence']
93
+ : null;
94
+ return { date, analysisDir, manifest, overallConfidence, files };
95
+ }
96
+ /**
97
+ * Find the best matching analysis directory for an article type slug.
98
+ * Checks exact match first, then scans for suffixed variants and picks
99
+ * the latest (highest suffix number).
100
+ *
101
+ * @param dateDir - Date-scoped parent directory
102
+ * @param slug - Article type slug
103
+ * @returns Resolved directory path or null
104
+ */
105
+ function findAnalysisDirectory(dateDir, slug) {
106
+ // Always scan for all matching directories (exact + suffixed) to find the latest
107
+ try {
108
+ const entries = fs.readdirSync(dateDir, { withFileTypes: true });
109
+ const suffixPattern = new RegExp(`^${escapeRegExp(slug)}(?:-(\\d+))?$`);
110
+ let bestPath = null;
111
+ let bestSuffix = -1;
112
+ for (const entry of entries) {
113
+ if (!entry.isDirectory())
114
+ continue;
115
+ const match = suffixPattern.exec(entry.name);
116
+ if (match) {
117
+ const suffix = match[1] ? parseInt(match[1], 10) : 0;
118
+ if (suffix > bestSuffix) {
119
+ bestSuffix = suffix;
120
+ bestPath = path.join(dateDir, entry.name);
121
+ }
122
+ }
123
+ }
124
+ return bestPath;
125
+ }
126
+ catch {
127
+ return null;
128
+ }
129
+ }
130
+ /**
131
+ * Escape special regex characters in a string.
132
+ *
133
+ * @param str - Input string to escape
134
+ * @returns Escaped string safe for use in RegExp constructor
135
+ */
136
+ function escapeRegExp(str) {
137
+ return str.replace(/[.*+?^${}()|[\]\\]/gu, '\\$&');
138
+ }
139
+ /**
140
+ * Load and parse `manifest.json` from an analysis directory.
141
+ *
142
+ * @param analysisDir - Analysis output directory
143
+ * @returns Parsed manifest or null
144
+ */
145
+ function loadManifest(analysisDir) {
146
+ const manifestPath = path.join(analysisDir, 'manifest.json');
147
+ try {
148
+ if (!fs.existsSync(manifestPath))
149
+ return null;
150
+ const raw = fs.readFileSync(manifestPath, 'utf-8');
151
+ return JSON.parse(raw);
152
+ }
153
+ catch {
154
+ return null;
155
+ }
156
+ }
157
+ /**
158
+ * Extract the `method:` value from YAML frontmatter in a markdown string.
159
+ *
160
+ * Analysis files produced by the pipeline embed the canonical method ID in
161
+ * their frontmatter (e.g. `method: coalition-analysis`). When this differs
162
+ * from the filename (e.g. `coalition-dynamics.md`), the frontmatter value is
163
+ * the authoritative key for strategy lookups.
164
+ *
165
+ * @param content - Raw markdown content
166
+ * @returns The frontmatter `method` value, or `null` if absent/unparseable
167
+ */
168
+ export function extractFrontmatterMethod(content) {
169
+ if (!content.startsWith('---'))
170
+ return null;
171
+ const endIdx = content.indexOf('---', 3);
172
+ if (endIdx === -1)
173
+ return null;
174
+ const frontmatter = content.slice(3, endIdx);
175
+ const match = /^method:\s*(.+)$/mu.exec(frontmatter);
176
+ return match?.[1]?.trim() ?? null;
177
+ }
178
+ /**
179
+ * Load a single analysis markdown file, register it in the map by both its
180
+ * frontmatter-derived method key and filename-derived alias.
181
+ *
182
+ * @param files - Map to register the file content into
183
+ * @param filePath - Absolute path to the .md file
184
+ * @param entry - Filename (e.g. `coalition-dynamics.md`)
185
+ * @param subdir - Parent subdirectory name (e.g. `existing`)
186
+ */
187
+ function loadSingleAnalysisFile(files, filePath, entry, subdir) {
188
+ try {
189
+ const content = fs.readFileSync(filePath, 'utf-8');
190
+ const filenameKey = entry.replace(/\.md$/u, '');
191
+ const frontmatterMethod = extractFrontmatterMethod(content);
192
+ // Primary key: frontmatter method (canonical ID), fallback to filename
193
+ const method = frontmatterMethod ?? filenameKey;
194
+ const fileContent = { method, subdir, content, filePath };
195
+ files.set(method, fileContent);
196
+ // Register filename alias when it differs from the frontmatter method
197
+ if (frontmatterMethod && frontmatterMethod !== filenameKey) {
198
+ files.set(filenameKey, fileContent);
199
+ }
200
+ }
201
+ catch {
202
+ // Skip unreadable files
203
+ }
204
+ }
205
+ /**
206
+ * Load analysis markdown files from known subdirectories.
207
+ *
208
+ * Keys the returned map by the `method:` value extracted from the file's
209
+ * YAML frontmatter (canonical method ID). When the filename differs from
210
+ * the frontmatter method (e.g. `coalition-dynamics.md` with frontmatter
211
+ * `method: coalition-analysis`), both the frontmatter key and the
212
+ * filename-derived key are registered so that callers can look up files
213
+ * by either identifier.
214
+ *
215
+ * @param analysisDir - Analysis output directory
216
+ * @returns Map of method name → file content
217
+ */
218
+ function loadAnalysisFiles(analysisDir) {
219
+ const files = new Map();
220
+ for (const subdir of ANALYSIS_SUBDIRS) {
221
+ const subdirPath = path.join(analysisDir, subdir);
222
+ try {
223
+ if (!fs.existsSync(subdirPath) || !fs.statSync(subdirPath).isDirectory())
224
+ continue;
225
+ const entries = fs.readdirSync(subdirPath);
226
+ for (const entry of entries) {
227
+ if (!entry.endsWith('.md'))
228
+ continue;
229
+ loadSingleAnalysisFile(files, path.join(subdirPath, entry), entry, subdir);
230
+ }
231
+ }
232
+ catch {
233
+ // Skip unreadable directories
234
+ }
235
+ }
236
+ return files;
237
+ }
238
+ /**
239
+ * Check whether a line is part of a fenced code block delimiter or table row.
240
+ * Used to filter out non-prose content from analysis summaries.
241
+ *
242
+ * @param trimmed - Trimmed line of text to check
243
+ * @returns `true` when the line is non-prose content (code, table, HTML)
244
+ */
245
+ function isNonProseContent(trimmed) {
246
+ // Fenced code block delimiters
247
+ if (trimmed.startsWith('```'))
248
+ return true;
249
+ // Markdown table rows — lines starting with | or containing multiple | separators
250
+ if (trimmed.startsWith('|') && trimmed.includes('|', 1))
251
+ return true;
252
+ // Table separator rows (e.g. |---|---|)
253
+ if (/^[\s|:|-]+$/u.test(trimmed) && trimmed.includes('|'))
254
+ return true;
255
+ // HTML-like content
256
+ if (trimmed.startsWith('<') && trimmed.endsWith('>'))
257
+ return true;
258
+ return false;
259
+ }
260
+ /** Patterns that indicate scaffold/placeholder content — not real analysis */
261
+ const SCAFFOLD_PATTERNS = [
262
+ /\[TO BE FILLED BY AI AGENT/i,
263
+ /\[AI_ANALYSIS_REQUIRED\]/i,
264
+ /\[REQUIRED\]/i,
265
+ /\[\?\]/,
266
+ /Quality gate: minimum \d+ words/i,
267
+ /Instructions for AI Agent/i,
268
+ ];
269
+ /**
270
+ * Check whether an analysis file contains only scaffold/template content
271
+ * (i.e. the AI agent did not fill in the analysis).
272
+ *
273
+ * @param content - Raw markdown file content
274
+ * @returns `true` when the file is an unfilled scaffold
275
+ */
276
+ export function isScaffoldContent(content) {
277
+ return SCAFFOLD_PATTERNS.some((pattern) => pattern.test(content));
278
+ }
279
+ /**
280
+ * Check whether a line should be included as prose content.
281
+ *
282
+ * @param trimmed - Trimmed line text
283
+ * @returns `true` when the line is valid prose (not heading, separator, blockquote, or non-prose)
284
+ */
285
+ function isProseContent(trimmed) {
286
+ if (trimmed === '')
287
+ return false;
288
+ if (trimmed.startsWith('#'))
289
+ return false;
290
+ if (trimmed.startsWith('---'))
291
+ return false;
292
+ if (trimmed.startsWith('>'))
293
+ return false;
294
+ if (isNonProseContent(trimmed))
295
+ return false;
296
+ return true;
297
+ }
298
+ /**
299
+ * Strip markdown formatting (bold, italic) from a text string.
300
+ *
301
+ * @param text - Raw markdown text
302
+ * @returns Plain text with bold/italic markers removed
303
+ */
304
+ function stripMarkdownFormatting(text) {
305
+ return text.replace(/\*\*([^*]+)\*\*/g, '$1').replace(/\*([^*]+)\*/g, '$1');
306
+ }
307
+ /**
308
+ * Prepare analysis content body by stripping frontmatter and code blocks.
309
+ *
310
+ * @param content - Raw markdown content
311
+ * @returns Body text ready for paragraph extraction, or empty string for scaffold content
312
+ */
313
+ function prepareAnalysisBody(content) {
314
+ if (isScaffoldContent(content))
315
+ return '';
316
+ let body = content;
317
+ if (body.startsWith('---')) {
318
+ const endIdx = body.indexOf('---', 3);
319
+ if (endIdx !== -1)
320
+ body = body.slice(endIdx + 3);
321
+ }
322
+ return body.replace(/```[\s\S]*?```/g, '');
323
+ }
324
+ /**
325
+ * Collect prose paragraphs from prepared analysis body text.
326
+ *
327
+ * @param body - Analysis body with frontmatter/code blocks removed
328
+ * @returns Array of prose paragraphs
329
+ */
330
+ function collectParagraphs(body) {
331
+ const lines = body.split('\n');
332
+ const paragraphs = [];
333
+ let current = '';
334
+ for (const line of lines) {
335
+ const trimmed = line.trim();
336
+ if (trimmed === '' && current) {
337
+ paragraphs.push(current.trim());
338
+ current = '';
339
+ }
340
+ else if (isProseContent(trimmed)) {
341
+ current += (current ? ' ' : '') + stripMarkdownFormatting(trimmed);
342
+ }
343
+ }
344
+ if (current)
345
+ paragraphs.push(current.trim());
346
+ return paragraphs;
347
+ }
348
+ /**
349
+ * Extract the first meaningful paragraph from an analysis markdown file.
350
+ * Strips YAML frontmatter, headings, fenced code blocks, tables,
351
+ * scaffold markers, and markdown formatting. Returns plain prose content.
352
+ *
353
+ * @param content - Raw markdown content
354
+ * @param maxLength - Maximum character length to return (default 500)
355
+ * @returns Extracted summary text or empty string
356
+ */
357
+ export function extractAnalysisSummary(content, maxLength = 500) {
358
+ const body = prepareAnalysisBody(content);
359
+ if (!body)
360
+ return '';
361
+ const paragraphs = collectParagraphs(body);
362
+ const meaningful = filterMeaningfulParagraphs(paragraphs, 20);
363
+ const summary = meaningful[0] ?? '';
364
+ return summary.length > maxLength ? summary.slice(0, maxLength - 3) + '...' : summary;
365
+ }
366
+ /**
367
+ * Filter paragraphs to only include meaningful prose content.
368
+ * Removes short fragments and data-only paragraphs (e.g. "— | — | —").
369
+ *
370
+ * @param paragraphs - Array of paragraph strings to filter
371
+ * @param minLength - Minimum character length for a paragraph to be considered meaningful
372
+ * @returns Filtered array of meaningful paragraphs
373
+ */
374
+ function filterMeaningfulParagraphs(paragraphs, minLength) {
375
+ return paragraphs.filter((p) => p.length > minLength && !/^[\d\s|—–-]+$/u.test(p) && !/^\s*—\s*$/u.test(p));
376
+ }
377
+ /**
378
+ * Extract multiple meaningful paragraphs from an analysis markdown file.
379
+ * Provides richer content than the single-paragraph extractAnalysisSummary.
380
+ *
381
+ * @param content - Raw markdown file content
382
+ * @param maxParagraphs - Maximum number of paragraphs to return (default 3)
383
+ * @param maxTotalLength - Maximum total character length (default 1500)
384
+ * @returns Array of extracted prose paragraphs
385
+ */
386
+ export function extractAnalysisParagraphs(content, maxParagraphs = 3, maxTotalLength = 1500) {
387
+ const body = prepareAnalysisBody(content);
388
+ if (!body)
389
+ return [];
390
+ const paragraphs = collectParagraphs(body);
391
+ const meaningful = filterMeaningfulParagraphs(paragraphs, 50);
392
+ const result = [];
393
+ let totalLength = 0;
394
+ for (const p of meaningful) {
395
+ if (result.length >= maxParagraphs)
396
+ break;
397
+ const remaining = maxTotalLength - totalLength;
398
+ if (remaining <= 0)
399
+ break;
400
+ if (p.length > remaining) {
401
+ // Truncate overlong paragraph when result is still empty so we
402
+ // never return [] for content that has substantive prose.
403
+ if (result.length === 0) {
404
+ result.push(p.slice(0, remaining).trimEnd());
405
+ }
406
+ break;
407
+ }
408
+ result.push(p);
409
+ totalLength += p.length;
410
+ }
411
+ return result;
412
+ }
413
+ /**
414
+ * Check whether an analysis file contains substantive AI-produced content
415
+ * (as opposed to pipeline scaffolding or empty templates).
416
+ *
417
+ * @param content - Raw markdown file content
418
+ * @returns `true` when the file contains real analytical prose
419
+ */
420
+ export function hasSubstantiveAIContent(content) {
421
+ const body = prepareAnalysisBody(content);
422
+ if (!body)
423
+ return false;
424
+ // Count words in prose lines (not tables, not headings, not blockquotes)
425
+ let wordCount = 0;
426
+ for (const line of body.split('\n')) {
427
+ const trimmed = line.trim();
428
+ if (isProseContent(trimmed) && !trimmed.startsWith('>')) {
429
+ wordCount += trimmed.split(/\s+/u).length;
430
+ }
431
+ }
432
+ // Minimum 5 words of prose — primarily relies on scaffold detection above
433
+ return wordCount >= 5;
434
+ }
435
+ /**
436
+ * Build an HTML section summarising analysis pipeline insights.
437
+ *
438
+ * Creates a structured `<section class="analysis-pipeline-insights">` element
439
+ * containing key findings from loaded analysis files. Each strategy passes
440
+ * the methods it considers relevant; only those with loaded content are rendered.
441
+ *
442
+ * Filters out scaffold/template files and files with no substantive AI content.
443
+ * Uses extended paragraph extraction for richer insight content.
444
+ *
445
+ * @param ctx - Loaded analysis context (null-safe: returns empty string)
446
+ * @param relevantMethods - Method names this strategy wants to display
447
+ * @param lang - Target language code (used for localized section heading)
448
+ * @returns HTML string (empty when no context or no relevant files)
449
+ */
450
+ export function buildAnalysisInsightsSection(ctx, relevantMethods, lang) {
451
+ if (!ctx)
452
+ return '';
453
+ const items = [];
454
+ for (const method of relevantMethods) {
455
+ const file = ctx.files.get(method);
456
+ if (!file)
457
+ continue;
458
+ // `extractAnalysisParagraphs()` already filters scaffold, empty, and
459
+ // non-substantive analysis bodies, so use it as the single gate here.
460
+ const paragraphs = extractAnalysisParagraphs(file.content, 2, 800);
461
+ if (paragraphs.length === 0)
462
+ continue;
463
+ const label = formatMethodLabel(method);
464
+ const paragraphHtml = paragraphs.map((p) => `<p>${escapeHTML(p)}</p>`).join('\n');
465
+ items.push(`<div class="analysis-insight-item" data-method="${escapeHTML(method)}">\n` +
466
+ `<h4>${escapeHTML(label)}</h4>\n` +
467
+ paragraphHtml +
468
+ '\n' +
469
+ `</div>`);
470
+ }
471
+ if (items.length === 0)
472
+ return '';
473
+ const heading = getLocalizedString(ANALYSIS_INSIGHTS_HEADING, lang);
474
+ const confidence = ctx.overallConfidence
475
+ ? ` <span class="confidence-badge">${escapeHTML(ctx.overallConfidence)}</span>`
476
+ : '';
477
+ return (`<section class="analysis-pipeline-insights" role="region" aria-label="${escapeHTML(heading)}">\n` +
478
+ `<h2>${escapeHTML(heading)}${confidence}</h2>\n` +
479
+ items.join('\n') +
480
+ `\n</section>\n`);
481
+ }
482
+ /**
483
+ * Format an analysis method identifier into a human-readable label.
484
+ *
485
+ * @param method - Method identifier (e.g. 'significance-classification')
486
+ * @returns Formatted label (e.g. 'Significance Classification')
487
+ */
488
+ function formatMethodLabel(method) {
489
+ return method
490
+ .split('-')
491
+ .map((w) => w.charAt(0).toUpperCase() + w.slice(1))
492
+ .join(' ');
493
+ }
4
494
  //# sourceMappingURL=article-strategy.js.map