euparliamentmonitor 0.8.19 → 0.8.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -7
- package/scripts/constants/language-articles.d.ts +4 -0
- package/scripts/constants/language-articles.js +20 -0
- package/scripts/constants/language-ui.d.ts +8 -8
- package/scripts/constants/language-ui.js +64 -64
- package/scripts/constants/languages.d.ts +2 -2
- package/scripts/constants/languages.js +2 -2
- package/scripts/generators/news-enhanced.js +13 -3
- package/scripts/generators/pipeline/analysis-classification.d.ts +49 -0
- package/scripts/generators/pipeline/analysis-classification.js +333 -0
- package/scripts/generators/pipeline/analysis-existing.d.ts +67 -0
- package/scripts/generators/pipeline/analysis-existing.js +547 -0
- package/scripts/generators/pipeline/analysis-helpers.d.ts +140 -0
- package/scripts/generators/pipeline/analysis-helpers.js +266 -0
- package/scripts/generators/pipeline/analysis-risk.d.ts +49 -0
- package/scripts/generators/pipeline/analysis-risk.js +417 -0
- package/scripts/generators/pipeline/analysis-stage.d.ts +19 -39
- package/scripts/generators/pipeline/analysis-stage.js +219 -1704
- package/scripts/generators/pipeline/analysis-threats.d.ts +41 -0
- package/scripts/generators/pipeline/analysis-threats.js +142 -0
- package/scripts/generators/pipeline/fetch-stage.d.ts +25 -15
- package/scripts/generators/pipeline/fetch-stage.js +293 -117
- package/scripts/generators/strategies/article-strategy.d.ts +126 -7
- package/scripts/generators/strategies/article-strategy.js +491 -1
- package/scripts/generators/strategies/breaking-news-strategy.js +98 -8
- package/scripts/generators/strategies/committee-reports-strategy.js +23 -2
- package/scripts/generators/strategies/month-ahead-strategy.js +23 -2
- package/scripts/generators/strategies/monthly-review-strategy.js +13 -1
- package/scripts/generators/strategies/motions-strategy.js +15 -1
- package/scripts/generators/strategies/propositions-strategy.js +15 -1
- package/scripts/generators/strategies/week-ahead-strategy.js +19 -1
- package/scripts/generators/strategies/weekly-review-strategy.js +17 -1
- package/scripts/generators/synthesis-summary.d.ts +93 -0
- package/scripts/generators/synthesis-summary.js +364 -0
- package/scripts/index.d.ts +5 -2
- package/scripts/index.js +6 -1
- package/scripts/mcp/ep-mcp-client.d.ts +34 -1
- package/scripts/mcp/ep-mcp-client.js +110 -2
- package/scripts/mcp/mcp-connection.d.ts +3 -1
- package/scripts/mcp/mcp-connection.js +35 -4
- package/scripts/templates/article-template.js +24 -22
- package/scripts/templates/section-builders.js +2 -5
- package/scripts/types/index.d.ts +2 -1
- package/scripts/types/mcp.d.ts +7 -0
- package/scripts/types/political-classification.d.ts +1 -1
- package/scripts/types/quality.d.ts +9 -6
- package/scripts/types/significance.d.ts +130 -0
- package/scripts/types/significance.js +4 -0
- package/scripts/utils/article-quality-scorer.d.ts +13 -11
- package/scripts/utils/article-quality-scorer.js +36 -23
- package/scripts/utils/file-utils.d.ts +2 -2
- package/scripts/utils/file-utils.js +2 -2
- package/scripts/utils/html-sanitize.d.ts +10 -0
- package/scripts/utils/html-sanitize.js +32 -0
- package/scripts/utils/political-classification.d.ts +8 -7
- package/scripts/utils/political-classification.js +8 -7
- package/scripts/utils/political-risk-assessment.d.ts +1 -1
- package/scripts/utils/political-risk-assessment.js +1 -1
- package/scripts/utils/significance-scoring.d.ts +97 -0
- package/scripts/utils/significance-scoring.js +190 -0
|
@@ -1,14 +1,131 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @module Generators/Strategies/ArticleStrategy
|
|
3
|
-
* @description Base interface and shared types for article generation strategies.
|
|
4
|
-
* Each strategy encapsulates the fetch, build, and metadata logic for one
|
|
5
|
-
* {@link ArticleCategory}, making it trivial to add new article types without
|
|
6
|
-
* touching the orchestration layer.
|
|
7
|
-
*/
|
|
8
1
|
import type { ArticleCategory } from '../../types/index.js';
|
|
9
2
|
import type { LanguageCode } from '../../types/index.js';
|
|
10
3
|
import type { ArticleSource } from '../../types/index.js';
|
|
11
4
|
import type { EuropeanParliamentMCPClient } from '../../mcp/ep-mcp-client.js';
|
|
5
|
+
/** Content of a single loaded analysis file */
|
|
6
|
+
export interface AnalysisFileContent {
|
|
7
|
+
/** Analysis method that produced this file */
|
|
8
|
+
readonly method: string;
|
|
9
|
+
/** Subdirectory category (e.g. 'classification', 'risk-scoring') */
|
|
10
|
+
readonly subdir: string;
|
|
11
|
+
/** Raw markdown content (frontmatter included) */
|
|
12
|
+
readonly content: string;
|
|
13
|
+
/** Absolute file path on disk */
|
|
14
|
+
readonly filePath: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Analysis context loaded from the analysis pipeline output directory.
|
|
18
|
+
*
|
|
19
|
+
* Strategies call {@link loadAnalysisContext} during {@link ArticleStrategy.fetchData}
|
|
20
|
+
* and store the result in their data payload. The context is then consumed by
|
|
21
|
+
* {@link ArticleStrategy.buildContent} to enrich articles with analytical depth.
|
|
22
|
+
*
|
|
23
|
+
* When analysis files are not available (e.g. the analysis stage was skipped),
|
|
24
|
+
* the context is `null` and strategies degrade gracefully to their existing
|
|
25
|
+
* behaviour.
|
|
26
|
+
*/
|
|
27
|
+
export interface LoadedAnalysisContext {
|
|
28
|
+
/** ISO date of the analysis */
|
|
29
|
+
readonly date: string;
|
|
30
|
+
/** Resolved analysis directory path */
|
|
31
|
+
readonly analysisDir: string;
|
|
32
|
+
/** Parsed manifest.json (null when manifest not found) */
|
|
33
|
+
readonly manifest: Record<string, unknown> | null;
|
|
34
|
+
/** Overall confidence from the manifest */
|
|
35
|
+
readonly overallConfidence: string | null;
|
|
36
|
+
/** Loaded analysis files keyed by method name */
|
|
37
|
+
readonly files: ReadonlyMap<string, AnalysisFileContent>;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Load analysis context from the analysis pipeline output directory.
|
|
41
|
+
*
|
|
42
|
+
* Scans `{baseDir}/{date}/{articleTypeSlug}/` for a `manifest.json` and
|
|
43
|
+
* analysis markdown files in known subdirectories. When the directory
|
|
44
|
+
* does not exist or contains no analysis files, returns `null` for graceful
|
|
45
|
+
* degradation — strategies then behave exactly as before.
|
|
46
|
+
*
|
|
47
|
+
* Handles suffixed directories (e.g. `breaking-2`, `breaking-3`) by
|
|
48
|
+
* scanning for the latest match.
|
|
49
|
+
*
|
|
50
|
+
* Resolution order for base directory:
|
|
51
|
+
* 1. Explicit `baseDir` parameter (when non-default)
|
|
52
|
+
* 2. `EP_ANALYSIS_DIR` environment variable (set by orchestration)
|
|
53
|
+
* 3. Default `'analysis/daily'`
|
|
54
|
+
*
|
|
55
|
+
* Resolution order for slug:
|
|
56
|
+
* 1. `EP_ANALYSIS_SLUG` environment variable (set by orchestration)
|
|
57
|
+
* 2. The `articleTypeSlug` parameter passed by each strategy
|
|
58
|
+
*
|
|
59
|
+
* @param date - ISO 8601 date (YYYY-MM-DD) of the analysis run
|
|
60
|
+
* @param articleTypeSlug - Article type slug (e.g. 'breaking', 'week-ahead')
|
|
61
|
+
* @param baseDir - Base analysis directory (defaults to 'analysis/daily')
|
|
62
|
+
* @returns Loaded analysis context or null when unavailable
|
|
63
|
+
*/
|
|
64
|
+
export declare function loadAnalysisContext(date: string, articleTypeSlug: string, baseDir?: string): LoadedAnalysisContext | null;
|
|
65
|
+
/**
|
|
66
|
+
* Extract the `method:` value from YAML frontmatter in a markdown string.
|
|
67
|
+
*
|
|
68
|
+
* Analysis files produced by the pipeline embed the canonical method ID in
|
|
69
|
+
* their frontmatter (e.g. `method: coalition-analysis`). When this differs
|
|
70
|
+
* from the filename (e.g. `coalition-dynamics.md`), the frontmatter value is
|
|
71
|
+
* the authoritative key for strategy lookups.
|
|
72
|
+
*
|
|
73
|
+
* @param content - Raw markdown content
|
|
74
|
+
* @returns The frontmatter `method` value, or `null` if absent/unparseable
|
|
75
|
+
*/
|
|
76
|
+
export declare function extractFrontmatterMethod(content: string): string | null;
|
|
77
|
+
/**
|
|
78
|
+
* Check whether an analysis file contains only scaffold/template content
|
|
79
|
+
* (i.e. the AI agent did not fill in the analysis).
|
|
80
|
+
*
|
|
81
|
+
* @param content - Raw markdown file content
|
|
82
|
+
* @returns `true` when the file is an unfilled scaffold
|
|
83
|
+
*/
|
|
84
|
+
export declare function isScaffoldContent(content: string): boolean;
|
|
85
|
+
/**
|
|
86
|
+
* Extract the first meaningful paragraph from an analysis markdown file.
|
|
87
|
+
* Strips YAML frontmatter, headings, fenced code blocks, tables,
|
|
88
|
+
* scaffold markers, and markdown formatting. Returns plain prose content.
|
|
89
|
+
*
|
|
90
|
+
* @param content - Raw markdown content
|
|
91
|
+
* @param maxLength - Maximum character length to return (default 500)
|
|
92
|
+
* @returns Extracted summary text or empty string
|
|
93
|
+
*/
|
|
94
|
+
export declare function extractAnalysisSummary(content: string, maxLength?: number): string;
|
|
95
|
+
/**
|
|
96
|
+
* Extract multiple meaningful paragraphs from an analysis markdown file.
|
|
97
|
+
* Provides richer content than the single-paragraph extractAnalysisSummary.
|
|
98
|
+
*
|
|
99
|
+
* @param content - Raw markdown file content
|
|
100
|
+
* @param maxParagraphs - Maximum number of paragraphs to return (default 3)
|
|
101
|
+
* @param maxTotalLength - Maximum total character length (default 1500)
|
|
102
|
+
* @returns Array of extracted prose paragraphs
|
|
103
|
+
*/
|
|
104
|
+
export declare function extractAnalysisParagraphs(content: string, maxParagraphs?: number, maxTotalLength?: number): readonly string[];
|
|
105
|
+
/**
|
|
106
|
+
* Check whether an analysis file contains substantive AI-produced content
|
|
107
|
+
* (as opposed to pipeline scaffolding or empty templates).
|
|
108
|
+
*
|
|
109
|
+
* @param content - Raw markdown file content
|
|
110
|
+
* @returns `true` when the file contains real analytical prose
|
|
111
|
+
*/
|
|
112
|
+
export declare function hasSubstantiveAIContent(content: string): boolean;
|
|
113
|
+
/**
|
|
114
|
+
* Build an HTML section summarising analysis pipeline insights.
|
|
115
|
+
*
|
|
116
|
+
* Creates a structured `<section class="analysis-pipeline-insights">` element
|
|
117
|
+
* containing key findings from loaded analysis files. Each strategy passes
|
|
118
|
+
* the methods it considers relevant; only those with loaded content are rendered.
|
|
119
|
+
*
|
|
120
|
+
* Filters out scaffold/template files and files with no substantive AI content.
|
|
121
|
+
* Uses extended paragraph extraction for richer insight content.
|
|
122
|
+
*
|
|
123
|
+
* @param ctx - Loaded analysis context (null-safe: returns empty string)
|
|
124
|
+
* @param relevantMethods - Method names this strategy wants to display
|
|
125
|
+
* @param lang - Target language code (used for localized section heading)
|
|
126
|
+
* @returns HTML string (empty when no context or no relevant files)
|
|
127
|
+
*/
|
|
128
|
+
export declare function buildAnalysisInsightsSection(ctx: LoadedAnalysisContext | null | undefined, relevantMethods: readonly string[], lang: LanguageCode): string;
|
|
12
129
|
/**
|
|
13
130
|
* Minimum payload every strategy must carry: the article's publication date.
|
|
14
131
|
* Strategy-specific data interfaces extend this base.
|
|
@@ -16,6 +133,8 @@ import type { EuropeanParliamentMCPClient } from '../../mcp/ep-mcp-client.js';
|
|
|
16
133
|
export interface ArticleData {
|
|
17
134
|
/** ISO 8601 publication date (YYYY-MM-DD) */
|
|
18
135
|
readonly date: string;
|
|
136
|
+
/** Loaded analysis context from the analysis pipeline (when available) */
|
|
137
|
+
readonly analysisContext?: LoadedAnalysisContext | null | undefined;
|
|
19
138
|
}
|
|
20
139
|
/**
|
|
21
140
|
* Resolved title, subtitle, keywords, and optional sources for one
|
|
@@ -1,4 +1,494 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024-2026 Hack23 AB
|
|
2
2
|
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
3
|
+
/**
|
|
4
|
+
* @module Generators/Strategies/ArticleStrategy
|
|
5
|
+
* @description Base interface and shared types for article generation strategies.
|
|
6
|
+
* Each strategy encapsulates the fetch, build, and metadata logic for one
|
|
7
|
+
* {@link ArticleCategory}, making it trivial to add new article types without
|
|
8
|
+
* touching the orchestration layer.
|
|
9
|
+
*
|
|
10
|
+
* Includes utilities for loading analysis pipeline output so that strategies
|
|
11
|
+
* can consume classification, threat assessment, risk scoring, and other
|
|
12
|
+
* analysis artifacts produced by the analysis stage.
|
|
13
|
+
*/
|
|
14
|
+
import fs from 'fs';
|
|
15
|
+
import path from 'path';
|
|
16
|
+
import { escapeHTML } from '../../utils/file-utils.js';
|
|
17
|
+
import { ANALYSIS_INSIGHTS_HEADING, getLocalizedString } from '../../constants/languages.js';
|
|
18
|
+
// ─── Analysis loading defaults ───────────────────────────────────────────────
|
|
19
|
+
/** Default base directory for analysis output */
|
|
20
|
+
const DEFAULT_ANALYSIS_BASE_DIR = 'analysis/daily';
|
|
21
|
+
/**
|
|
22
|
+
* Environment variable name for overriding the analysis base directory.
|
|
23
|
+
* Set by the orchestration layer when `--analysis-dir` is provided.
|
|
24
|
+
*/
|
|
25
|
+
const ENV_ANALYSIS_DIR = 'EP_ANALYSIS_DIR';
|
|
26
|
+
/**
|
|
27
|
+
* Environment variable name for overriding the analysis slug.
|
|
28
|
+
* Set by the orchestration layer with the resolved slug from
|
|
29
|
+
* `deriveArticleTypeSlug()`, so multi-type runs and custom analysis
|
|
30
|
+
* directories are correctly resolved without hard-coding per-strategy slugs.
|
|
31
|
+
*/
|
|
32
|
+
const ENV_ANALYSIS_SLUG = 'EP_ANALYSIS_SLUG';
|
|
33
|
+
/** Analysis subdirectories to scan for markdown files */
|
|
34
|
+
const ANALYSIS_SUBDIRS = [
|
|
35
|
+
'classification',
|
|
36
|
+
'threat-assessment',
|
|
37
|
+
'risk-scoring',
|
|
38
|
+
'existing',
|
|
39
|
+
];
|
|
40
|
+
/**
|
|
41
|
+
* Load analysis context from the analysis pipeline output directory.
|
|
42
|
+
*
|
|
43
|
+
* Scans `{baseDir}/{date}/{articleTypeSlug}/` for a `manifest.json` and
|
|
44
|
+
* analysis markdown files in known subdirectories. When the directory
|
|
45
|
+
* does not exist or contains no analysis files, returns `null` for graceful
|
|
46
|
+
* degradation — strategies then behave exactly as before.
|
|
47
|
+
*
|
|
48
|
+
* Handles suffixed directories (e.g. `breaking-2`, `breaking-3`) by
|
|
49
|
+
* scanning for the latest match.
|
|
50
|
+
*
|
|
51
|
+
* Resolution order for base directory:
|
|
52
|
+
* 1. Explicit `baseDir` parameter (when non-default)
|
|
53
|
+
* 2. `EP_ANALYSIS_DIR` environment variable (set by orchestration)
|
|
54
|
+
* 3. Default `'analysis/daily'`
|
|
55
|
+
*
|
|
56
|
+
* Resolution order for slug:
|
|
57
|
+
* 1. `EP_ANALYSIS_SLUG` environment variable (set by orchestration)
|
|
58
|
+
* 2. The `articleTypeSlug` parameter passed by each strategy
|
|
59
|
+
*
|
|
60
|
+
* @param date - ISO 8601 date (YYYY-MM-DD) of the analysis run
|
|
61
|
+
* @param articleTypeSlug - Article type slug (e.g. 'breaking', 'week-ahead')
|
|
62
|
+
* @param baseDir - Base analysis directory (defaults to 'analysis/daily')
|
|
63
|
+
* @returns Loaded analysis context or null when unavailable
|
|
64
|
+
*/
|
|
65
|
+
export function loadAnalysisContext(date, articleTypeSlug, baseDir = DEFAULT_ANALYSIS_BASE_DIR) {
|
|
66
|
+
// Validate date format (YYYY-MM-DD) and reject path traversal
|
|
67
|
+
if (!/^\d{4}-\d{2}-\d{2}$/u.test(date))
|
|
68
|
+
return null;
|
|
69
|
+
// Resolve base dir: prefer explicit non-default param, then env var, then default
|
|
70
|
+
const resolvedBaseDir = baseDir !== DEFAULT_ANALYSIS_BASE_DIR
|
|
71
|
+
? baseDir
|
|
72
|
+
: process.env[ENV_ANALYSIS_DIR]?.trim() || DEFAULT_ANALYSIS_BASE_DIR;
|
|
73
|
+
// Resolve slug: prefer env var override, then per-strategy slug
|
|
74
|
+
const resolvedSlug = process.env[ENV_ANALYSIS_SLUG]?.trim() || articleTypeSlug;
|
|
75
|
+
// Validate slug: alphanumeric, hyphens only — no path separators
|
|
76
|
+
if (!/^[\da-z][\da-z-]*$/u.test(resolvedSlug))
|
|
77
|
+
return null;
|
|
78
|
+
const dateDir = path.resolve(resolvedBaseDir, date);
|
|
79
|
+
if (!fs.existsSync(dateDir))
|
|
80
|
+
return null;
|
|
81
|
+
// Find the best matching analysis directory (exact or latest suffixed)
|
|
82
|
+
const analysisDir = findAnalysisDirectory(dateDir, resolvedSlug);
|
|
83
|
+
if (!analysisDir)
|
|
84
|
+
return null;
|
|
85
|
+
// Load manifest.json
|
|
86
|
+
const manifest = loadManifest(analysisDir);
|
|
87
|
+
// Load analysis markdown files from known subdirectories
|
|
88
|
+
const files = loadAnalysisFiles(analysisDir);
|
|
89
|
+
if (files.size === 0 && !manifest)
|
|
90
|
+
return null;
|
|
91
|
+
const overallConfidence = manifest && typeof manifest['overallConfidence'] === 'string'
|
|
92
|
+
? manifest['overallConfidence']
|
|
93
|
+
: null;
|
|
94
|
+
return { date, analysisDir, manifest, overallConfidence, files };
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Find the best matching analysis directory for an article type slug.
|
|
98
|
+
* Checks exact match first, then scans for suffixed variants and picks
|
|
99
|
+
* the latest (highest suffix number).
|
|
100
|
+
*
|
|
101
|
+
* @param dateDir - Date-scoped parent directory
|
|
102
|
+
* @param slug - Article type slug
|
|
103
|
+
* @returns Resolved directory path or null
|
|
104
|
+
*/
|
|
105
|
+
function findAnalysisDirectory(dateDir, slug) {
|
|
106
|
+
// Always scan for all matching directories (exact + suffixed) to find the latest
|
|
107
|
+
try {
|
|
108
|
+
const entries = fs.readdirSync(dateDir, { withFileTypes: true });
|
|
109
|
+
const suffixPattern = new RegExp(`^${escapeRegExp(slug)}(?:-(\\d+))?$`);
|
|
110
|
+
let bestPath = null;
|
|
111
|
+
let bestSuffix = -1;
|
|
112
|
+
for (const entry of entries) {
|
|
113
|
+
if (!entry.isDirectory())
|
|
114
|
+
continue;
|
|
115
|
+
const match = suffixPattern.exec(entry.name);
|
|
116
|
+
if (match) {
|
|
117
|
+
const suffix = match[1] ? parseInt(match[1], 10) : 0;
|
|
118
|
+
if (suffix > bestSuffix) {
|
|
119
|
+
bestSuffix = suffix;
|
|
120
|
+
bestPath = path.join(dateDir, entry.name);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
return bestPath;
|
|
125
|
+
}
|
|
126
|
+
catch {
|
|
127
|
+
return null;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Escape special regex characters in a string.
|
|
132
|
+
*
|
|
133
|
+
* @param str - Input string to escape
|
|
134
|
+
* @returns Escaped string safe for use in RegExp constructor
|
|
135
|
+
*/
|
|
136
|
+
function escapeRegExp(str) {
|
|
137
|
+
return str.replace(/[.*+?^${}()|[\]\\]/gu, '\\$&');
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Load and parse `manifest.json` from an analysis directory.
|
|
141
|
+
*
|
|
142
|
+
* @param analysisDir - Analysis output directory
|
|
143
|
+
* @returns Parsed manifest or null
|
|
144
|
+
*/
|
|
145
|
+
function loadManifest(analysisDir) {
|
|
146
|
+
const manifestPath = path.join(analysisDir, 'manifest.json');
|
|
147
|
+
try {
|
|
148
|
+
if (!fs.existsSync(manifestPath))
|
|
149
|
+
return null;
|
|
150
|
+
const raw = fs.readFileSync(manifestPath, 'utf-8');
|
|
151
|
+
return JSON.parse(raw);
|
|
152
|
+
}
|
|
153
|
+
catch {
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Extract the `method:` value from YAML frontmatter in a markdown string.
|
|
159
|
+
*
|
|
160
|
+
* Analysis files produced by the pipeline embed the canonical method ID in
|
|
161
|
+
* their frontmatter (e.g. `method: coalition-analysis`). When this differs
|
|
162
|
+
* from the filename (e.g. `coalition-dynamics.md`), the frontmatter value is
|
|
163
|
+
* the authoritative key for strategy lookups.
|
|
164
|
+
*
|
|
165
|
+
* @param content - Raw markdown content
|
|
166
|
+
* @returns The frontmatter `method` value, or `null` if absent/unparseable
|
|
167
|
+
*/
|
|
168
|
+
export function extractFrontmatterMethod(content) {
|
|
169
|
+
if (!content.startsWith('---'))
|
|
170
|
+
return null;
|
|
171
|
+
const endIdx = content.indexOf('---', 3);
|
|
172
|
+
if (endIdx === -1)
|
|
173
|
+
return null;
|
|
174
|
+
const frontmatter = content.slice(3, endIdx);
|
|
175
|
+
const match = /^method:\s*(.+)$/mu.exec(frontmatter);
|
|
176
|
+
return match?.[1]?.trim() ?? null;
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Load a single analysis markdown file, register it in the map by both its
|
|
180
|
+
* frontmatter-derived method key and filename-derived alias.
|
|
181
|
+
*
|
|
182
|
+
* @param files - Map to register the file content into
|
|
183
|
+
* @param filePath - Absolute path to the .md file
|
|
184
|
+
* @param entry - Filename (e.g. `coalition-dynamics.md`)
|
|
185
|
+
* @param subdir - Parent subdirectory name (e.g. `existing`)
|
|
186
|
+
*/
|
|
187
|
+
function loadSingleAnalysisFile(files, filePath, entry, subdir) {
|
|
188
|
+
try {
|
|
189
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
190
|
+
const filenameKey = entry.replace(/\.md$/u, '');
|
|
191
|
+
const frontmatterMethod = extractFrontmatterMethod(content);
|
|
192
|
+
// Primary key: frontmatter method (canonical ID), fallback to filename
|
|
193
|
+
const method = frontmatterMethod ?? filenameKey;
|
|
194
|
+
const fileContent = { method, subdir, content, filePath };
|
|
195
|
+
files.set(method, fileContent);
|
|
196
|
+
// Register filename alias when it differs from the frontmatter method
|
|
197
|
+
if (frontmatterMethod && frontmatterMethod !== filenameKey) {
|
|
198
|
+
files.set(filenameKey, fileContent);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
catch {
|
|
202
|
+
// Skip unreadable files
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Load analysis markdown files from known subdirectories.
|
|
207
|
+
*
|
|
208
|
+
* Keys the returned map by the `method:` value extracted from the file's
|
|
209
|
+
* YAML frontmatter (canonical method ID). When the filename differs from
|
|
210
|
+
* the frontmatter method (e.g. `coalition-dynamics.md` with frontmatter
|
|
211
|
+
* `method: coalition-analysis`), both the frontmatter key and the
|
|
212
|
+
* filename-derived key are registered so that callers can look up files
|
|
213
|
+
* by either identifier.
|
|
214
|
+
*
|
|
215
|
+
* @param analysisDir - Analysis output directory
|
|
216
|
+
* @returns Map of method name → file content
|
|
217
|
+
*/
|
|
218
|
+
function loadAnalysisFiles(analysisDir) {
|
|
219
|
+
const files = new Map();
|
|
220
|
+
for (const subdir of ANALYSIS_SUBDIRS) {
|
|
221
|
+
const subdirPath = path.join(analysisDir, subdir);
|
|
222
|
+
try {
|
|
223
|
+
if (!fs.existsSync(subdirPath) || !fs.statSync(subdirPath).isDirectory())
|
|
224
|
+
continue;
|
|
225
|
+
const entries = fs.readdirSync(subdirPath);
|
|
226
|
+
for (const entry of entries) {
|
|
227
|
+
if (!entry.endsWith('.md'))
|
|
228
|
+
continue;
|
|
229
|
+
loadSingleAnalysisFile(files, path.join(subdirPath, entry), entry, subdir);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
catch {
|
|
233
|
+
// Skip unreadable directories
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
return files;
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Check whether a line is part of a fenced code block delimiter or table row.
|
|
240
|
+
* Used to filter out non-prose content from analysis summaries.
|
|
241
|
+
*
|
|
242
|
+
* @param trimmed - Trimmed line of text to check
|
|
243
|
+
* @returns `true` when the line is non-prose content (code, table, HTML)
|
|
244
|
+
*/
|
|
245
|
+
function isNonProseContent(trimmed) {
|
|
246
|
+
// Fenced code block delimiters
|
|
247
|
+
if (trimmed.startsWith('```'))
|
|
248
|
+
return true;
|
|
249
|
+
// Markdown table rows — lines starting with | or containing multiple | separators
|
|
250
|
+
if (trimmed.startsWith('|') && trimmed.includes('|', 1))
|
|
251
|
+
return true;
|
|
252
|
+
// Table separator rows (e.g. |---|---|)
|
|
253
|
+
if (/^[\s|:|-]+$/u.test(trimmed) && trimmed.includes('|'))
|
|
254
|
+
return true;
|
|
255
|
+
// HTML-like content
|
|
256
|
+
if (trimmed.startsWith('<') && trimmed.endsWith('>'))
|
|
257
|
+
return true;
|
|
258
|
+
return false;
|
|
259
|
+
}
|
|
260
|
+
/** Patterns that indicate scaffold/placeholder content — not real analysis */
|
|
261
|
+
const SCAFFOLD_PATTERNS = [
|
|
262
|
+
/\[TO BE FILLED BY AI AGENT/i,
|
|
263
|
+
/\[AI_ANALYSIS_REQUIRED\]/i,
|
|
264
|
+
/\[REQUIRED\]/i,
|
|
265
|
+
/\[\?\]/,
|
|
266
|
+
/Quality gate: minimum \d+ words/i,
|
|
267
|
+
/Instructions for AI Agent/i,
|
|
268
|
+
];
|
|
269
|
+
/**
|
|
270
|
+
* Check whether an analysis file contains only scaffold/template content
|
|
271
|
+
* (i.e. the AI agent did not fill in the analysis).
|
|
272
|
+
*
|
|
273
|
+
* @param content - Raw markdown file content
|
|
274
|
+
* @returns `true` when the file is an unfilled scaffold
|
|
275
|
+
*/
|
|
276
|
+
export function isScaffoldContent(content) {
|
|
277
|
+
return SCAFFOLD_PATTERNS.some((pattern) => pattern.test(content));
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Check whether a line should be included as prose content.
|
|
281
|
+
*
|
|
282
|
+
* @param trimmed - Trimmed line text
|
|
283
|
+
* @returns `true` when the line is valid prose (not heading, separator, blockquote, or non-prose)
|
|
284
|
+
*/
|
|
285
|
+
function isProseContent(trimmed) {
|
|
286
|
+
if (trimmed === '')
|
|
287
|
+
return false;
|
|
288
|
+
if (trimmed.startsWith('#'))
|
|
289
|
+
return false;
|
|
290
|
+
if (trimmed.startsWith('---'))
|
|
291
|
+
return false;
|
|
292
|
+
if (trimmed.startsWith('>'))
|
|
293
|
+
return false;
|
|
294
|
+
if (isNonProseContent(trimmed))
|
|
295
|
+
return false;
|
|
296
|
+
return true;
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* Strip markdown formatting (bold, italic) from a text string.
|
|
300
|
+
*
|
|
301
|
+
* @param text - Raw markdown text
|
|
302
|
+
* @returns Plain text with bold/italic markers removed
|
|
303
|
+
*/
|
|
304
|
+
function stripMarkdownFormatting(text) {
|
|
305
|
+
return text.replace(/\*\*([^*]+)\*\*/g, '$1').replace(/\*([^*]+)\*/g, '$1');
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Prepare analysis content body by stripping frontmatter and code blocks.
|
|
309
|
+
*
|
|
310
|
+
* @param content - Raw markdown content
|
|
311
|
+
* @returns Body text ready for paragraph extraction, or empty string for scaffold content
|
|
312
|
+
*/
|
|
313
|
+
function prepareAnalysisBody(content) {
|
|
314
|
+
if (isScaffoldContent(content))
|
|
315
|
+
return '';
|
|
316
|
+
let body = content;
|
|
317
|
+
if (body.startsWith('---')) {
|
|
318
|
+
const endIdx = body.indexOf('---', 3);
|
|
319
|
+
if (endIdx !== -1)
|
|
320
|
+
body = body.slice(endIdx + 3);
|
|
321
|
+
}
|
|
322
|
+
return body.replace(/```[\s\S]*?```/g, '');
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Collect prose paragraphs from prepared analysis body text.
|
|
326
|
+
*
|
|
327
|
+
* @param body - Analysis body with frontmatter/code blocks removed
|
|
328
|
+
* @returns Array of prose paragraphs
|
|
329
|
+
*/
|
|
330
|
+
function collectParagraphs(body) {
|
|
331
|
+
const lines = body.split('\n');
|
|
332
|
+
const paragraphs = [];
|
|
333
|
+
let current = '';
|
|
334
|
+
for (const line of lines) {
|
|
335
|
+
const trimmed = line.trim();
|
|
336
|
+
if (trimmed === '' && current) {
|
|
337
|
+
paragraphs.push(current.trim());
|
|
338
|
+
current = '';
|
|
339
|
+
}
|
|
340
|
+
else if (isProseContent(trimmed)) {
|
|
341
|
+
current += (current ? ' ' : '') + stripMarkdownFormatting(trimmed);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
if (current)
|
|
345
|
+
paragraphs.push(current.trim());
|
|
346
|
+
return paragraphs;
|
|
347
|
+
}
|
|
348
|
+
/**
|
|
349
|
+
* Extract the first meaningful paragraph from an analysis markdown file.
|
|
350
|
+
* Strips YAML frontmatter, headings, fenced code blocks, tables,
|
|
351
|
+
* scaffold markers, and markdown formatting. Returns plain prose content.
|
|
352
|
+
*
|
|
353
|
+
* @param content - Raw markdown content
|
|
354
|
+
* @param maxLength - Maximum character length to return (default 500)
|
|
355
|
+
* @returns Extracted summary text or empty string
|
|
356
|
+
*/
|
|
357
|
+
export function extractAnalysisSummary(content, maxLength = 500) {
|
|
358
|
+
const body = prepareAnalysisBody(content);
|
|
359
|
+
if (!body)
|
|
360
|
+
return '';
|
|
361
|
+
const paragraphs = collectParagraphs(body);
|
|
362
|
+
const meaningful = filterMeaningfulParagraphs(paragraphs, 20);
|
|
363
|
+
const summary = meaningful[0] ?? '';
|
|
364
|
+
return summary.length > maxLength ? summary.slice(0, maxLength - 3) + '...' : summary;
|
|
365
|
+
}
|
|
366
|
+
/**
|
|
367
|
+
* Filter paragraphs to only include meaningful prose content.
|
|
368
|
+
* Removes short fragments and data-only paragraphs (e.g. "— | — | —").
|
|
369
|
+
*
|
|
370
|
+
* @param paragraphs - Array of paragraph strings to filter
|
|
371
|
+
* @param minLength - Minimum character length for a paragraph to be considered meaningful
|
|
372
|
+
* @returns Filtered array of meaningful paragraphs
|
|
373
|
+
*/
|
|
374
|
+
function filterMeaningfulParagraphs(paragraphs, minLength) {
|
|
375
|
+
return paragraphs.filter((p) => p.length > minLength && !/^[\d\s|—–-]+$/u.test(p) && !/^\s*—\s*$/u.test(p));
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Extract multiple meaningful paragraphs from an analysis markdown file.
|
|
379
|
+
* Provides richer content than the single-paragraph extractAnalysisSummary.
|
|
380
|
+
*
|
|
381
|
+
* @param content - Raw markdown file content
|
|
382
|
+
* @param maxParagraphs - Maximum number of paragraphs to return (default 3)
|
|
383
|
+
* @param maxTotalLength - Maximum total character length (default 1500)
|
|
384
|
+
* @returns Array of extracted prose paragraphs
|
|
385
|
+
*/
|
|
386
|
+
export function extractAnalysisParagraphs(content, maxParagraphs = 3, maxTotalLength = 1500) {
|
|
387
|
+
const body = prepareAnalysisBody(content);
|
|
388
|
+
if (!body)
|
|
389
|
+
return [];
|
|
390
|
+
const paragraphs = collectParagraphs(body);
|
|
391
|
+
const meaningful = filterMeaningfulParagraphs(paragraphs, 50);
|
|
392
|
+
const result = [];
|
|
393
|
+
let totalLength = 0;
|
|
394
|
+
for (const p of meaningful) {
|
|
395
|
+
if (result.length >= maxParagraphs)
|
|
396
|
+
break;
|
|
397
|
+
const remaining = maxTotalLength - totalLength;
|
|
398
|
+
if (remaining <= 0)
|
|
399
|
+
break;
|
|
400
|
+
if (p.length > remaining) {
|
|
401
|
+
// Truncate overlong paragraph when result is still empty so we
|
|
402
|
+
// never return [] for content that has substantive prose.
|
|
403
|
+
if (result.length === 0) {
|
|
404
|
+
result.push(p.slice(0, remaining).trimEnd());
|
|
405
|
+
}
|
|
406
|
+
break;
|
|
407
|
+
}
|
|
408
|
+
result.push(p);
|
|
409
|
+
totalLength += p.length;
|
|
410
|
+
}
|
|
411
|
+
return result;
|
|
412
|
+
}
|
|
413
|
+
/**
|
|
414
|
+
* Check whether an analysis file contains substantive AI-produced content
|
|
415
|
+
* (as opposed to pipeline scaffolding or empty templates).
|
|
416
|
+
*
|
|
417
|
+
* @param content - Raw markdown file content
|
|
418
|
+
* @returns `true` when the file contains real analytical prose
|
|
419
|
+
*/
|
|
420
|
+
export function hasSubstantiveAIContent(content) {
|
|
421
|
+
const body = prepareAnalysisBody(content);
|
|
422
|
+
if (!body)
|
|
423
|
+
return false;
|
|
424
|
+
// Count words in prose lines (not tables, not headings, not blockquotes)
|
|
425
|
+
let wordCount = 0;
|
|
426
|
+
for (const line of body.split('\n')) {
|
|
427
|
+
const trimmed = line.trim();
|
|
428
|
+
if (isProseContent(trimmed) && !trimmed.startsWith('>')) {
|
|
429
|
+
wordCount += trimmed.split(/\s+/u).length;
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
// Minimum 5 words of prose — primarily relies on scaffold detection above
|
|
433
|
+
return wordCount >= 5;
|
|
434
|
+
}
|
|
435
|
+
/**
|
|
436
|
+
* Build an HTML section summarising analysis pipeline insights.
|
|
437
|
+
*
|
|
438
|
+
* Creates a structured `<section class="analysis-pipeline-insights">` element
|
|
439
|
+
* containing key findings from loaded analysis files. Each strategy passes
|
|
440
|
+
* the methods it considers relevant; only those with loaded content are rendered.
|
|
441
|
+
*
|
|
442
|
+
* Filters out scaffold/template files and files with no substantive AI content.
|
|
443
|
+
* Uses extended paragraph extraction for richer insight content.
|
|
444
|
+
*
|
|
445
|
+
* @param ctx - Loaded analysis context (null-safe: returns empty string)
|
|
446
|
+
* @param relevantMethods - Method names this strategy wants to display
|
|
447
|
+
* @param lang - Target language code (used for localized section heading)
|
|
448
|
+
* @returns HTML string (empty when no context or no relevant files)
|
|
449
|
+
*/
|
|
450
|
+
export function buildAnalysisInsightsSection(ctx, relevantMethods, lang) {
|
|
451
|
+
if (!ctx)
|
|
452
|
+
return '';
|
|
453
|
+
const items = [];
|
|
454
|
+
for (const method of relevantMethods) {
|
|
455
|
+
const file = ctx.files.get(method);
|
|
456
|
+
if (!file)
|
|
457
|
+
continue;
|
|
458
|
+
// `extractAnalysisParagraphs()` already filters scaffold, empty, and
|
|
459
|
+
// non-substantive analysis bodies, so use it as the single gate here.
|
|
460
|
+
const paragraphs = extractAnalysisParagraphs(file.content, 2, 800);
|
|
461
|
+
if (paragraphs.length === 0)
|
|
462
|
+
continue;
|
|
463
|
+
const label = formatMethodLabel(method);
|
|
464
|
+
const paragraphHtml = paragraphs.map((p) => `<p>${escapeHTML(p)}</p>`).join('\n');
|
|
465
|
+
items.push(`<div class="analysis-insight-item" data-method="${escapeHTML(method)}">\n` +
|
|
466
|
+
`<h4>${escapeHTML(label)}</h4>\n` +
|
|
467
|
+
paragraphHtml +
|
|
468
|
+
'\n' +
|
|
469
|
+
`</div>`);
|
|
470
|
+
}
|
|
471
|
+
if (items.length === 0)
|
|
472
|
+
return '';
|
|
473
|
+
const heading = getLocalizedString(ANALYSIS_INSIGHTS_HEADING, lang);
|
|
474
|
+
const confidence = ctx.overallConfidence
|
|
475
|
+
? ` <span class="confidence-badge">${escapeHTML(ctx.overallConfidence)}</span>`
|
|
476
|
+
: '';
|
|
477
|
+
return (`<section class="analysis-pipeline-insights" role="region" aria-label="${escapeHTML(heading)}">\n` +
|
|
478
|
+
`<h2>${escapeHTML(heading)}${confidence}</h2>\n` +
|
|
479
|
+
items.join('\n') +
|
|
480
|
+
`\n</section>\n`);
|
|
481
|
+
}
|
|
482
|
+
/**
|
|
483
|
+
* Format an analysis method identifier into a human-readable label.
|
|
484
|
+
*
|
|
485
|
+
* @param method - Method identifier (e.g. 'significance-classification')
|
|
486
|
+
* @returns Formatted label (e.g. 'Significance Classification')
|
|
487
|
+
*/
|
|
488
|
+
function formatMethodLabel(method) {
|
|
489
|
+
return method
|
|
490
|
+
.split('-')
|
|
491
|
+
.map((w) => w.charAt(0).toUpperCase() + w.slice(1))
|
|
492
|
+
.join(' ');
|
|
493
|
+
}
|
|
4
494
|
//# sourceMappingURL=article-strategy.js.map
|