npm - euparliamentmonitor - Versions diffs - 0.8.32 → 0.8.34 - Mend

euparliamentmonitor 0.8.32 → 0.8.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +1 -1
package/package.json +4 -4
package/scripts/constants/analysis-constants.d.ts +1 -1
package/scripts/constants/analysis-constants.js +1 -1
package/scripts/constants/language-articles.js +1 -1
package/scripts/generators/news-enhanced.d.ts +2 -2
package/scripts/generators/news-enhanced.js +3 -3
package/scripts/generators/pipeline/generate-stage.js +2 -2
package/scripts/mcp/wb-mcp-client.d.ts +10 -0
package/scripts/mcp/wb-mcp-client.js +18 -0
package/scripts/templates/article-template.d.ts +4 -0
package/scripts/templates/article-template.js +10 -44
package/scripts/utils/content-validator.d.ts +67 -0
package/scripts/utils/content-validator.js +394 -0
package/scripts/utils/validate-articles.js +111 -2

package/README.md CHANGED Viewed

@@ -984,7 +984,7 @@ Projected workflow counts below include all CI/CD workflow definitions, agentic
 | Year | Projected Workflow Definitions | AI Model | Key Capability |
 |------|-------------------------------|----------|----------------|
-| **2026** | 44–50 | Opus 4.6–4.9 | 🟢 Agentic news generation |
+| **2026** | 44–50 | Opus 4.7–4.9 | 🟢 Agentic news generation |
 | **2027** | 50–55 | Opus 5.x | 🔵 Predictive analytics |
 | **2028** | 55–65 | Opus 6.x | 🟣 Multi-modal content |
 | **2029** | 65–75 | Opus 7.x | 🟠 Autonomous pipeline |

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "euparliamentmonitor",
-  "version": "0.8.32",
+  "version": "0.8.34",
   "type": "module",
   "description": "European Parliament Intelligence Platform - Monitor political activity with systematic transparency",
   "main": "scripts/index.js",
@@ -135,7 +135,7 @@
   },
   "homepage": "https://euparliamentmonitor.com",
   "devDependencies": {
-    "@axe-core/playwright": "4.11.1",
+    "@axe-core/playwright": "4.11.2",
     "@eslint/js": "10.0.1",
     "@playwright/test": "1.59.1",
     "@types/d3": "7.4.3",
@@ -148,7 +148,7 @@
     "chart.js": "4.5.1",
     "chartjs-plugin-annotation": "3.1.0",
     "d3": "7.9.0",
-    "eslint": "10.2.0",
+    "eslint": "10.2.1",
     "eslint-config-prettier": "10.1.8",
     "eslint-plugin-jsdoc": "62.9.0",
     "eslint-plugin-security": "4.0.0",
@@ -163,7 +163,7 @@
     "ts-api-utils": "2.5.0",
     "tsx": "4.21.0",
     "typedoc": "0.28.19",
-    "typescript": "6.0.2",
+    "typescript": "6.0.3",
     "vitest": "4.1.4"
   },
   "engines": {

package/scripts/constants/analysis-constants.d.ts CHANGED Viewed

@@ -15,7 +15,7 @@
  * Any narrative or interpretive analysis text (for example: why, outlook,
  * impact assessments, stakeholder reasoning, or mistake/consequence
  * explanations) that is LEFT AS THIS MARKER is expected to be generated by
- * the AI agent (Opus 4.6) in the agentic workflow, not by code.
+ * the AI agent in the agentic workflow, not by code.
  */
 export declare const AI_MARKER = "[AI_ANALYSIS_REQUIRED]";
 /**

package/scripts/constants/analysis-constants.js CHANGED Viewed

@@ -17,7 +17,7 @@
  * Any narrative or interpretive analysis text (for example: why, outlook,
  * impact assessments, stakeholder reasoning, or mistake/consequence
  * explanations) that is LEFT AS THIS MARKER is expected to be generated by
- * the AI agent (Opus 4.6) in the agentic workflow, not by code.
+ * the AI agent in the agentic workflow, not by code.
  */
 export const AI_MARKER = '[AI_ANALYSIS_REQUIRED]';
 /**

package/scripts/constants/language-articles.js CHANGED Viewed

@@ -3140,7 +3140,7 @@ export const WEEK_AHEAD_STAKEHOLDER_STRINGS = {
         reasonInstitutionsCoordination: '需要机构间协调',
     },
 };
-// ─── AI analysis marker — all analysis text is produced by the AI agent (Opus 4.6) ───
+// ─── AI analysis marker — all analysis text is produced by the AI agent ───
 const AI_ANALYSIS_MARKER = '[AI_ANALYSIS_REQUIRED]';
 const BRK_WHY_ANOMALIES = AI_ANALYSIS_MARKER;
 const BRK_WHY_NORMAL = AI_ANALYSIS_MARKER;

package/scripts/generators/news-enhanced.d.ts CHANGED Viewed

@@ -26,13 +26,13 @@ export declare const runId: string;
 /**
  * AI-generated article title passed by the agentic workflow.
  * When provided, this OVERRIDES any script-generated title.
- * The AI agent (Opus 4.6) must analyse the content and produce this.
+ * The AI agent must analyse the content and produce this.
  */
 export declare const aiTitle: string;
 /**
  * AI-generated article description/subtitle passed by the agentic workflow.
  * When provided, this OVERRIDES any script-generated description.
- * The AI agent (Opus 4.6) must analyse the content and produce this.
+ * The AI agent must analyse the content and produce this.
  */
 export declare const aiDescription: string;
 /**

package/scripts/generators/news-enhanced.js CHANGED Viewed

@@ -96,13 +96,13 @@ export const runId = (runIdArg?.slice('--run-id='.length).trim() ||
 /**
  * AI-generated article title passed by the agentic workflow.
  * When provided, this OVERRIDES any script-generated title.
- * The AI agent (Opus 4.6) must analyse the content and produce this.
+ * The AI agent must analyse the content and produce this.
  */
 export const aiTitle = titleArg ? titleArg.slice('--title='.length).trim() : '';
 /**
  * AI-generated article description/subtitle passed by the agentic workflow.
  * When provided, this OVERRIDES any script-generated description.
- * The AI agent (Opus 4.6) must analyse the content and produce this.
+ * The AI agent must analyse the content and produce this.
  */
 export const aiDescription = descriptionArg
     ? descriptionArg.slice('--description='.length).trim()
@@ -378,7 +378,7 @@ async function runAnalysisWithGuard(date, client) {
 }
 /**
  * Wire AI-provided title/description from CLI `--title` and `--description` flags.
- * The AI agent (Opus 4.6) passes these after analysing the content.
+ * The AI agent passes these after analysing the content.
  * They override ALL script-generated metadata for the English version.
  */
 function wireAIMetadata() {

package/scripts/generators/pipeline/generate-stage.js CHANGED Viewed

@@ -19,7 +19,7 @@ import { writeSingleArticle } from './output-stage.js';
 /**
  * AI-generated article title provided by the agentic workflow.
  * When non-empty, this OVERRIDES any script-generated title for the
- * English version.  The AI agent (Opus 4.6) must analyse the article
+ * English version.  The AI agent must analyse the article
  * content and produce this — titles must NEVER be generated by code.
  */
 let _aiTitle = '';
@@ -130,7 +130,7 @@ function generateSingleLanguageArticle(strategy, data, lang, dateStr, slug, outp
     // preserved, but title and description enrichment is now subordinate
     // to AI-provided values from --title and --description CLI flags.
     //
-    // Architecture: The AI agent (Opus 4.6) analyses the content and
+    // Architecture: The AI agent analyses the content and
     // provides titles/descriptions via CLI flags. Script code NEVER
     // generates final titles or descriptions — it only provides fallbacks.
     const enrichedMetadata = enrichMetadataFromContent(content, baseMetadata);

package/scripts/mcp/wb-mcp-client.d.ts CHANGED Viewed

@@ -1,5 +1,15 @@
 import { MCPConnection } from './mcp-connection.js';
 import type { MCPToolResult, MCPClientOptions } from '../types/index.js';
+/**
+ * Canonical list of tools exposed by the World Bank MCP gateway. The news
+ * workflows, probe script, and the integration test suite all reference this
+ * list so a regression that adds/removes a tool fails a single drift guard
+ * (`test/integration/mcp/worldbank-mcp.test.js`) instead of silently breaking
+ * prompt/validator/probe coverage.
+ *
+ * Kept in sync with `analysis/methodologies/worldbank-indicator-mapping.md`.
+ */
+export declare const WORLD_BANK_MCP_TOOLS: readonly string[];
 /**
  * MCP Client for World Bank economic data access.
  * Extends {@link MCPConnection} with World Bank-specific tool wrapper methods.

package/scripts/mcp/wb-mcp-client.js CHANGED Viewed

@@ -25,6 +25,24 @@ const WB_BINARY_FILE = process.platform === 'win32' ? `${WB_BINARY_NAME}.cmd` :
 const WB_DEFAULT_SERVER = resolve(dirname(fileURLToPath(import.meta.url)), `../../node_modules/.bin/${WB_BINARY_FILE}`);
 /** Fallback payload when indicator data is unavailable (empty CSV) */
 const INDICATOR_FALLBACK = '';
+/**
+ * Canonical list of tools exposed by the World Bank MCP gateway. The news
+ * workflows, probe script, and the integration test suite all reference this
+ * list so a regression that adds/removes a tool fails a single drift guard
+ * (`test/integration/mcp/worldbank-mcp.test.js`) instead of silently breaking
+ * prompt/validator/probe coverage.
+ *
+ * Kept in sync with `analysis/methodologies/worldbank-indicator-mapping.md`.
+ */
+export const WORLD_BANK_MCP_TOOLS = [
+    'search-indicators',
+    'get-countries',
+    'get-country-info',
+    'get-economic-data',
+    'get-social-data',
+    'get-education-data',
+    'get-health-data',
+];
 /**
  * MCP Client for World Bank economic data access.
  * Extends {@link MCPConnection} with World Bank-specific tool wrapper methods.

package/scripts/templates/article-template.d.ts CHANGED Viewed

@@ -1,3 +1,7 @@
+/**
+ * @module Templates/ArticleTemplate
+ * @description Generates HTML templates for news articles with proper structure and metadata
+ */
 import type { ArticleOptions, LanguageCode, AnalysisFileEntry } from '../types/index.js';
 /**
  * Generate complete HTML for a news article

package/scripts/templates/article-template.js CHANGED Viewed

@@ -1,14 +1,9 @@
 // SPDX-FileCopyrightText: 2024-2026 Hack23 AB
 // SPDX-License-Identifier: Apache-2.0
-/**
- * @module Templates/ArticleTemplate
- * @description Generates HTML templates for news articles with proper structure and metadata
- */
-import { createHash } from 'crypto';
 import { ALL_LANGUAGES, LANGUAGE_FLAGS, LANGUAGE_NAMES, ARTICLE_TYPE_LABELS, READ_TIME_LABELS, BACK_TO_NEWS_LABELS, ARTICLE_NAV_LABELS, RELATED_ARTICLES_NAV_LABELS, BREADCRUMB_HOME_LABELS, BREADCRUMB_NEWS_LABELS, SKIP_LINK_TEXTS, SOURCES_HEADING_LABELS, HEADER_SUBTITLE_LABELS, THEME_TOGGLE_LABELS, FOOTER_ABOUT_HEADING_LABELS, FOOTER_ABOUT_TEXT_LABELS, FOOTER_QUICK_LINKS_LABELS, FOOTER_BUILT_BY_LABELS, FOOTER_LANGUAGES_LABELS, ANALYSIS_TRANSPARENCY_LABELS, ANALYSIS_SUMMARY_LABELS, METHODOLOGY_LABELS, TRANSPARENCY_DISCLOSURE_LABELS, CLASSIFICATION_ANALYSIS_LABELS, THREAT_ASSESSMENT_LABELS, RISK_SCORING_LABELS, DEEP_ANALYSIS_LABELS, VIEW_SOURCE_LABELS, OPEN_SOURCE_NOTE_LABELS, AI_ANALYSIS_GUIDE_LABELS, SWOT_FRAMEWORK_LABELS, RISK_METHODOLOGY_LABELS, THREAT_FRAMEWORK_LABELS, CLASSIFICATION_GUIDE_LABELS, STYLE_GUIDE_LABELS, SIGNIFICANCE_CLASSIFICATION_LABELS, ACTOR_MAPPING_LABELS, FORCES_ANALYSIS_LABELS, IMPACT_MATRIX_LABELS, POLITICAL_THREAT_LANDSCAPE_LABELS, ACTOR_THREAT_PROFILING_LABELS, CONSEQUENCE_TREES_LABELS, LEGISLATIVE_DISRUPTION_LABELS, RISK_MATRIX_LABELS, QUANTITATIVE_SWOT_LABELS, POLITICAL_CAPITAL_RISK_LABELS, LEGISLATIVE_VELOCITY_RISK_LABELS, AGENT_RISK_WORKFLOW_LABELS, STAKEHOLDER_IMPACT_LABELS, COALITION_DYNAMICS_LABELS, VOTING_PATTERNS_LABELS, CROSS_SESSION_INTELLIGENCE_LABELS, SYNTHESIS_SUMMARY_LABELS, DOCUMENT_ANALYSIS_LABELS, SIGNIFICANCE_SCORING_LABELS, getLocalizedString, getTextDirection, } from '../constants/languages.js';
 import { escapeHTML, isSafeURL } from '../utils/file-utils.js';
 import { stripHtmlTags } from '../utils/html-sanitize.js';
-import { APP_VERSION, createThemeToggleButton, THEME_TOGGLE_SCRIPT, THEME_TOGGLE_SCRIPT_CONTENT, } from '../constants/config.js';
+import { APP_VERSION, createThemeToggleButton } from '../constants/config.js';
 /** Pattern for valid article dates (YYYY-MM-DD) */
 const DATE_PATTERN = /^\d{4}-\d{2}-\d{2}$/u;
 /** Pattern for valid article slugs (lowercase letters, digits, hyphens) */
@@ -288,22 +283,12 @@ export function generateArticleHTML(options) {
     const safeSriAttrs = stylesHash && SRI_HASH_PATTERN.test(stylesHash)
         ? ` integrity="${escapeHTML(stylesHash)}" crossorigin="anonymous"`
         : '';
-    // Compute SHA-256 hash of the inline JSON-LD script content for CSP.
-    // IMPORTANT: The whitespace here ("\n  " prefix and "\n  " suffix) must exactly
-    // match the script tag content in the HTML template below:
-    //   <script type="application/ld+json">
-    //   ${jsonLd}
-    //   </script>
-    const jsonLdScriptContent = `\n  ${jsonLd}\n  `;
-    const jsonLdHash = `sha256-${createHash('sha256').update(jsonLdScriptContent).digest('base64')}`;
-    // Compute CSP hash for BreadcrumbList JSON-LD script
-    const breadcrumbLdScriptContent = `\n  ${breadcrumbLd}\n  `;
-    const breadcrumbLdHash = `sha256-${createHash('sha256').update(breadcrumbLdScriptContent).digest('base64')}`;
-    // Reading-progress script hash — content must exactly match the <script> block.
-    const readingProgressScript = `\n  (function(){\n    var bar=document.querySelector('.reading-progress');\n    if(!bar)return;\n    bar.style.display='block';\n    var ticking=false;\n    window.addEventListener('scroll',function(){\n      if(!ticking){\n        window.requestAnimationFrame(function(){\n          var h=document.documentElement;\n          var scrollTop=h.scrollTop||document.body.scrollTop;\n          var scrollHeight=h.scrollHeight-h.clientHeight;\n          bar.style.width=scrollHeight>0?((scrollTop/scrollHeight)*100)+'%':'0%';\n          ticking=false;\n        });\n        ticking=true;\n      }\n    },{passive:true});\n  })();\n  `;
-    const readingProgressHash = `sha256-${createHash('sha256').update(readingProgressScript).digest('base64')}`;
-    // Theme toggle CSP hash — derived from the shared THEME_TOGGLE_SCRIPT_CONTENT constant
-    const themeToggleHash = `sha256-${createHash('sha256').update(THEME_TOGGLE_SCRIPT_CONTENT).digest('base64')}`;
+    // Compute SHA-256 hashes were previously required for inline <script>
+    // blocks (JSON-LD, reading progress, theme toggle). All executable inline
+    // scripts have been externalised to `js/article-runtime.js`, so the CSP
+    // reduces to `script-src 'self'`. JSON-LD blocks use
+    // `type="application/ld+json"` which is non-executable and not governed
+    // by `script-src`.
     // Localized theme toggle button
     const themeToggleLabel = escapeHTML(getLocalizedString(THEME_TOGGLE_LABELS, lang));
     // Related articles navigation HTML (optional)
@@ -315,7 +300,7 @@ export function generateArticleHTML(options) {
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   <meta http-equiv="X-Content-Type-Options" content="nosniff">
   <meta name="referrer" content="no-referrer">
-  <meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'self' '${jsonLdHash}' '${breadcrumbLdHash}' '${readingProgressHash}' '${themeToggleHash}'; style-src 'self' 'unsafe-inline'; img-src 'self' https: data:; font-src 'self'; connect-src 'self'; frame-src 'none'; base-uri 'self'; form-action 'none'">
+  <meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' https: data:; font-src 'self'; connect-src 'self'; frame-src 'none'; base-uri 'self'; form-action 'none'">
   <title>${safeTitle} | EU Parliament Monitor</title>
   <meta name="description" content="${safeSubtitle}">
   <meta name="keywords" content="${safeKeywords}">
@@ -453,26 +438,7 @@ export function generateArticleHTML(options) {
     </div>
   </footer>
-  <script>
-  (function(){
-    var bar=document.querySelector('.reading-progress');
-    if(!bar)return;
-    bar.style.display='block';
-    var ticking=false;
-    window.addEventListener('scroll',function(){
-      if(!ticking){
-        window.requestAnimationFrame(function(){
-          var h=document.documentElement;
-          var scrollTop=h.scrollTop||document.body.scrollTop;
-          var scrollHeight=h.scrollHeight-h.clientHeight;
-          bar.style.width=scrollHeight>0?((scrollTop/scrollHeight)*100)+'%':'0%';
-          ticking=false;
-        });
-        ticking=true;
-      }
-    },{passive:true});
-  })();
-  </script>${content.includes('data-chart-config')
+  <script src="../js/article-runtime.js" defer></script>${content.includes('data-chart-config')
         ? `
   <script src="../js/vendor/chart.umd.min.js" defer></script>
   <script src="../js/vendor/chartjs-plugin-annotation.min.js" defer></script>
@@ -481,7 +447,7 @@ export function generateArticleHTML(options) {
         ? `
   <script src="../js/vendor/d3.min.js" defer></script>
   <script src="../js/d3-init.js" defer></script>`
-        : ''}${THEME_TOGGLE_SCRIPT}
+        : ''}
 </body>
 </html>`;
 }

package/scripts/utils/content-validator.d.ts CHANGED Viewed

@@ -56,6 +56,73 @@ export interface TranslationValidationResult {
     /** Collected translation quality metrics */
     metrics: TranslationValidationMetrics;
 }
+/**
+ * Detect whether the article contains at least one Chart.js canvas with a
+ * well-formed `data-chart-config` JSON payload.
+ *
+ * A valid chart must:
+ *  - be rendered via `<canvas data-chart-config="…">` (the declarative
+ *    CSP-safe pattern hydrated by `js/chart-init.js`)
+ *  - declare a supported Chart.js `type`
+ *  - carry at least 3 data points in the first dataset (single-point charts
+ *    are rejected by `SHARED_PROMPT_PATTERNS.md` anti-patterns)
+ *
+ * @param html - Raw article HTML
+ * @returns `true` when ≥1 chart meeting the rules is present
+ */
+export declare function articleHasChart(html: string): boolean;
+/**
+ * Strong World Bank evidence tokens — plain substring match is enough to
+ * satisfy the gate because each is specific (the literal attribution phrase
+ * or an MCP tool name). Kept aligned with
+ * `analysis/methodologies/worldbank-indicator-mapping.md`.
+ */
+export declare const WORLD_BANK_STRONG_FINGERPRINTS: readonly string[];
+/**
+ * Short indicator codes published by the World Bank MCP server. These are
+ * matched with a word boundary (`[^A-Z0-9_]` look-arounds) so that prose like
+ * "GDP growth slowed" does NOT count as World Bank evidence, but an analysis
+ * file line like `INDICATOR: GDP` does. All codes are uppercase, so the match
+ * is case-sensitive — case-insensitive mentions in English prose are intentionally
+ * rejected.
+ */
+export declare const WORLD_BANK_INDICATOR_CODES: readonly string[];
+/**
+ * Backwards-compatible union of strong + short fingerprints. Kept exported so
+ * callers that only need a flat list (e.g. existing consumers that shipped
+ * before the strong/short split) continue to compile. New code SHOULD prefer
+ * {@link hasWorldBankEvidence}, which enforces the stricter word-boundary rule
+ * for short codes.
+ */
+export declare const WORLD_BANK_FINGERPRINTS: readonly string[];
+/**
+ * Detect World Bank sourcing in any piece of text (article body OR analysis
+ * markdown). Returns `true` when the text contains either a strong fingerprint
+ * (the phrase "World Bank", an MCP tool name, etc.) or an indicator code with
+ * clean word boundaries.
+ *
+ * This is the single source of truth for the policy quality gate — both the
+ * content validator and the CLI validator's filesystem fallback use it so a
+ * legitimate evidence trail on either side satisfies the rule, and generic
+ * prose mentions of economic terms do not.
+ *
+ * @param text - Text to scan
+ * @returns `true` when at least one strong or word-bounded fingerprint matches
+ */
+export declare function hasWorldBankEvidence(text: string): boolean;
+/**
+ * Verify that a policy article (or the linked analysis artifacts) contains at
+ * least one World Bank fingerprint — indicator code (word-bounded), MCP
+ * tool-trace token, or the phrase "World Bank" itself. Returns `true` if the
+ * gate is satisfied OR the article type is not on the mandatory list.
+ *
+ * @param html - Article HTML
+ * @param articleType - Slug of the article category (e.g. `"committee-reports"`)
+ * @param _analysisDir - Reserved for API symmetry; filesystem recursion is
+ *   performed by the caller in `validate-articles.ts` to keep this module pure.
+ * @returns `true` when the World Bank evidence requirement is met or not applicable
+ */
+export declare function articlePolicyHasWorldBank(html: string, articleType: string, _analysisDir?: string): boolean;
 /**
  * Validate the quality of a generated article.
  *

package/scripts/utils/content-validator.js CHANGED Viewed

@@ -697,6 +697,400 @@ function collectQualityGateWarnings(html, warnings) {
     if (emptySectionCount > 0) {
         warnings.push(`Article contains ${emptySectionCount} empty or near-empty <section> element(s) that should be removed`);
     }
+    // Chart presence gate
+    if (!articleHasChart(html)) {
+        warnings.push('Missing required Chart.js visualization: no <canvas data-chart-config="…"> element with a valid type found (≥1 required, see ai-first-quality.md quality gates)');
+    }
+    // Structural integrity gates — catch hand-written HTML bypassing the template
+    const langSwitcherCount = countLanguageSwitcherLinks(html);
+    if (langSwitcherCount < MIN_LANG_SWITCHER_LINKS) {
+        warnings.push(`Language switcher has only ${langSwitcherCount} link(s); the template always emits ${MIN_LANG_SWITCHER_LINKS} — this article may have been hand-written and skipped the template`);
+    }
+    if (!hasStandardFooterContent(html)) {
+        warnings.push('Footer is missing the standard `.footer-content` + `.footer-bottom` blocks — the template always emits these; article may have been hand-written');
+    }
+}
+/** Minimum number of language switcher links the template always emits (14 languages). */
+const MIN_LANG_SWITCHER_LINKS = 14;
+/** Chart.js types accepted by the `data-chart-config` declarative pattern. */
+const CHART_JS_TYPES = /"type"\s*:\s*"(bar|line|pie|doughnut|radar|polarArea|scatter|bubble)"/u;
+/**
+ * Check whether a character is HTML whitespace per the WHATWG spec
+ * (space, tab, LF, CR, FF).
+ *
+ * @param ch - Single character to test (may be empty string)
+ * @returns `true` when `ch` is one of the recognised whitespace chars
+ */
+function isHtmlWhitespace(ch) {
+    return ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' || ch === '\f';
+}
+/**
+ * Decode the five entity escapes that `escapeHTML` emits into literal chars.
+ *
+ * @param raw - Entity-encoded substring extracted from an attribute value
+ * @returns Decoded literal string
+ */
+function decodeHtmlEntities(raw) {
+    return raw
+        .replace(/&quot;/gu, '"')
+        .replace(/&#39;/gu, "'")
+        .replace(/&gt;/gu, '>')
+        .replace(/&lt;/gu, '<')
+        .replace(/&amp;/gu, '&');
+}
+/**
+ * Check that the positions immediately before and after an attribute name
+ * form valid HTML word-boundary characters. Prevents `xdata-chart-config`
+ * from being treated as the `data-chart-config` attribute.
+ *
+ * @param tag - Full opening-tag text (without trailing `>`)
+ * @param attrIdx - Index where the attribute name was found
+ * @param attrLen - Length of the attribute name
+ * @returns `true` when both boundaries are whitespace / `<` / `=` / start-of-tag
+ */
+function hasAttributeBoundaries(tag, attrIdx, attrLen) {
+    const before = attrIdx === 0 ? '' : (tag[attrIdx - 1] ?? '');
+    const afterIdx = attrIdx + attrLen;
+    const after = afterIdx < tag.length ? (tag[afterIdx] ?? '') : '';
+    const leadOk = before === '' || isHtmlWhitespace(before) || before === '<';
+    const trailOk = after === '' || isHtmlWhitespace(after) || after === '=';
+    return leadOk && trailOk;
+}
+/**
+ * Starting just after an attribute name, locate the opening quote character
+ * (either `"` or `'`) that begins the attribute value, tolerating optional
+ * HTML whitespace on either side of the `=`.
+ *
+ * @param tag - Full opening-tag text
+ * @param from - Index immediately after the attribute name
+ * @returns `{quote, valueStart}` when a proper `=<whitespace?><quote>` run is
+ *   present; `null` when the attribute is malformed or unquoted
+ */
+function findAttributeValueStart(tag, from) {
+    let i = from;
+    while (i < tag.length && isHtmlWhitespace(tag[i] ?? ''))
+        i++;
+    if (i >= tag.length || tag[i] !== '=')
+        return null;
+    i++;
+    while (i < tag.length && isHtmlWhitespace(tag[i] ?? ''))
+        i++;
+    if (i >= tag.length)
+        return null;
+    const quote = tag[i] ?? '';
+    if (quote !== '"' && quote !== "'")
+        return null;
+    return { quote, valueStart: i + 1 };
+}
+/**
+ * Scan an HTML attribute value in a single `<canvas>` tag starting at
+ * `tagStart`. Returns the decoded value of `attr` or `null` if not present.
+ * Uses only `indexOf` + single-character look-arounds so runtime is strictly
+ * linear in input length — this avoids the polynomial-ReDoS class of regex
+ * that CodeQL flags when nested character classes match the same tag prefix.
+ *
+ * Tolerates all HTML-compliant attribute forms:
+ *  - double-quoted: `data-chart-config="..."`
+ *  - single-quoted: `data-chart-config='...'`
+ *  - optional whitespace around `=`: `data-chart-config = "..."`
+ *
+ * @param html - Full article HTML
+ * @param tagStart - Byte offset of the `<` that opens the canvas tag
+ * @param attr - Attribute name (e.g. `data-chart-config`)
+ * @returns Decoded attribute value, or `null` when the attribute is missing
+ */
+function extractCanvasAttribute(html, tagStart, attr) {
+    const tagEnd = html.indexOf('>', tagStart);
+    if (tagEnd === -1)
+        return null;
+    const tag = html.slice(tagStart, tagEnd);
+    let searchFrom = 0;
+    while (searchFrom < tag.length) {
+        const attrIdx = tag.indexOf(attr, searchFrom);
+        if (attrIdx === -1)
+            return null;
+        // Keep scanning past false matches with bad boundaries or without a
+        // proper `=<quote>` run; this keeps the function linear in tag length.
+        if (!hasAttributeBoundaries(tag, attrIdx, attr.length)) {
+            searchFrom = attrIdx + attr.length;
+            continue;
+        }
+        const valueHead = findAttributeValueStart(tag, attrIdx + attr.length);
+        if (!valueHead) {
+            searchFrom = attrIdx + attr.length;
+            continue;
+        }
+        const valueEnd = tag.indexOf(valueHead.quote, valueHead.valueStart);
+        if (valueEnd === -1)
+            return null;
+        return decodeHtmlEntities(tag.slice(valueHead.valueStart, valueEnd));
+    }
+    return null;
+}
+/**
+ * Detect whether the article contains at least one Chart.js canvas with a
+ * well-formed `data-chart-config` JSON payload.
+ *
+ * A valid chart must:
+ *  - be rendered via `<canvas data-chart-config="…">` (the declarative
+ *    CSP-safe pattern hydrated by `js/chart-init.js`)
+ *  - declare a supported Chart.js `type`
+ *  - carry at least 3 data points in the first dataset (single-point charts
+ *    are rejected by `SHARED_PROMPT_PATTERNS.md` anti-patterns)
+ *
+ * @param html - Raw article HTML
+ * @returns `true` when ≥1 chart meeting the rules is present
+ */
+export function articleHasChart(html) {
+    let cursor = 0;
+    while (cursor < html.length) {
+        const tagStart = html.indexOf('<canvas', cursor);
+        if (tagStart === -1)
+            return false;
+        const decoded = extractCanvasAttribute(html, tagStart, 'data-chart-config');
+        if (decoded !== null && CHART_JS_TYPES.test(decoded) && countFirstDatasetPoints(decoded) >= 3) {
+            return true;
+        }
+        // Advance past `<canvas` so overlapping matches cannot occur.
+        cursor = tagStart + '<canvas'.length;
+    }
+    return false;
+}
+/**
+ * Count data points in the first dataset of a Chart.js config JSON payload.
+ *
+ * Parses the decoded `data-chart-config` as JSON and returns the length of
+ * `config.data.datasets[0].data`. Handles both numeric-array datasets
+ * (`[1, 2, 3]`) and object-point datasets (`[{x:0,y:1}, …]`) correctly —
+ * the previous indexOf-based implementation miscounted scatter/bubble
+ * configs and accidentally looked at `data.labels` for typical layouts.
+ *
+ * @param json - Decoded Chart.js config JSON string
+ * @returns Number of data points in `data.datasets[0].data`, or 0 when absent/invalid
+ */
+function countFirstDatasetPoints(json) {
+    try {
+        const config = JSON.parse(json);
+        const firstDataset = config.data?.datasets?.[0];
+        return Array.isArray(firstDataset?.data) ? firstDataset.data.length : 0;
+    }
+    catch {
+        return 0;
+    }
+}
+/**
+ * Count distinct language switcher links emitted in the article header.
+ *
+ * @param html - Complete article HTML
+ * @returns Number of `.lang-link` anchors inside the header `site-header__langs` nav
+ */
+function countLanguageSwitcherLinks(html) {
+    // Linear scan: locate the nav element by its unique class, then count
+    // `.lang-link` classes inside. Avoids the nested `[^">]*` regex pattern
+    // that CodeQL flags as polynomial-ReDoS-prone.
+    const marker = 'site-header__langs';
+    const markerIdx = html.indexOf(marker);
+    const NAV_CLOSE = '</nav>';
+    let scope = html;
+    if (markerIdx !== -1) {
+        // Find the closing `</nav>` of the enclosing nav (simple assumption:
+        // the next `</nav>` after the marker is the one we want). Falls back to
+        // the whole HTML if not found.
+        const endIdx = html.indexOf(NAV_CLOSE, markerIdx);
+        if (endIdx !== -1) {
+            // Walk backwards to find the opening `<nav`.
+            const startIdx = html.lastIndexOf('<nav', markerIdx);
+            if (startIdx !== -1) {
+                scope = html.slice(startIdx, endIdx);
+            }
+        }
+    }
+    // Count `lang-link` class tokens — bounded linear count.
+    const matches = scope.match(/\blang-link\b/gu);
+    return matches ? matches.length : 0;
+}
+/**
+ * Detect the two standard footer blocks always produced by `article-template.ts`.
+ *
+ * @param html - Complete article HTML
+ * @returns `true` when both `.footer-content` and `.footer-bottom` classes are present
+ */
+function hasStandardFooterContent(html) {
+    return /class="footer-content"/u.test(html) && /class="footer-bottom"/u.test(html);
+}
+/** Slugs for article types that MUST include World Bank economic context. */
+const POLICY_SLUGS_REQUIRING_WORLD_BANK = new Set([
+    'committee-reports',
+    'propositions',
+    'motions',
+    'weekly-review',
+    'monthly-review',
+    'week-in-review',
+    'month-in-review',
+    'month-ahead',
+]);
+/**
+ * Strong World Bank evidence tokens — plain substring match is enough to
+ * satisfy the gate because each is specific (the literal attribution phrase
+ * or an MCP tool name). Kept aligned with
+ * `analysis/methodologies/worldbank-indicator-mapping.md`.
+ */
+export const WORLD_BANK_STRONG_FINGERPRINTS = [
+    'World Bank',
+    'world bank',
+    'worldbank',
+    'get-economic-data',
+    'get-social-data',
+    'get-education-data',
+    'get-health-data',
+    'get-country-info',
+    'get-countries',
+    'search-indicators',
+];
+/**
+ * Short indicator codes published by the World Bank MCP server. These are
+ * matched with a word boundary (`[^A-Z0-9_]` look-arounds) so that prose like
+ * "GDP growth slowed" does NOT count as World Bank evidence, but an analysis
+ * file line like `INDICATOR: GDP` does. All codes are uppercase, so the match
+ * is case-sensitive — case-insensitive mentions in English prose are intentionally
+ * rejected.
+ */
+export const WORLD_BANK_INDICATOR_CODES = [
+    'GDP',
+    'GDP_GROWTH',
+    'GDP_PER_CAPITA',
+    'GNI',
+    'GNI_PER_CAPITA',
+    'UNEMPLOYMENT',
+    'INFLATION',
+    'EXPORTS',
+    'EXPORTS_GDP',
+    'FDI',
+    'FDI_NET',
+    'POPULATION',
+    'LIFE_EXPECTANCY',
+    'BIRTH_RATE',
+    'DEATH_RATE',
+    'INTERNET_USERS',
+    'LITERACY_RATE',
+    'SCHOOL_ENROLLMENT',
+    'SCHOOL_COMPLETION',
+    'TEACHERS_PRIMARY',
+    'EDUCATION_EXPENDITURE',
+    'HEALTH_EXPENDITURE',
+    'PHYSICIANS',
+    'HOSPITAL_BEDS',
+    'IMMUNIZATION',
+    'HIV_PREVALENCE',
+    'MALNUTRITION',
+    'TUBERCULOSIS',
+];
+/**
+ * Backwards-compatible union of strong + short fingerprints. Kept exported so
+ * callers that only need a flat list (e.g. existing consumers that shipped
+ * before the strong/short split) continue to compile. New code SHOULD prefer
+ * {@link hasWorldBankEvidence}, which enforces the stricter word-boundary rule
+ * for short codes.
+ */
+export const WORLD_BANK_FINGERPRINTS = [
+    ...WORLD_BANK_STRONG_FINGERPRINTS,
+    ...WORLD_BANK_INDICATOR_CODES,
+];
+/**
+ * Return true when any WORLD_BANK_INDICATOR_CODES entry appears in `text` with
+ * word-boundary isolation on both sides. We treat `[A-Z0-9_]` as "identifier"
+ * characters — that keeps `GDP_GROWTH` from accidentally matching inside the
+ * shorter `GDP` scan, and keeps the English word "gdp" out of the match set.
+ */
+/** Characters that count as part of an identifier-style token for the word-boundary check. */
+const WORD_BOUNDARY_PATTERN = /[A-Z0-9_]/u;
+/**
+ * Check whether `ch` is NOT an identifier-style character (so it qualifies
+ * as a word boundary on either side of a World Bank indicator code).
+ *
+ * @param ch - Single character (may be empty string for start/end-of-string)
+ * @returns `true` when `ch` is empty or a non-identifier character
+ */
+function isIdentifierBoundary(ch) {
+    return ch === '' || !WORD_BOUNDARY_PATTERN.test(ch);
+}
+/**
+ * Return `true` when `code` appears in `text` surrounded by identifier
+ * boundaries on both sides. Linear scan over `text`.
+ *
+ * @param text - Text to scan
+ * @param code - Indicator code to look for (all uppercase)
+ * @returns `true` when a word-bounded occurrence is present
+ */
+function textContainsIndicatorCode(text, code) {
+    let from = 0;
+    while (from < text.length) {
+        const idx = text.indexOf(code, from);
+        if (idx === -1)
+            return false;
+        const before = idx === 0 ? '' : (text[idx - 1] ?? '');
+        const afterIdx = idx + code.length;
+        const after = afterIdx < text.length ? (text[afterIdx] ?? '') : '';
+        if (isIdentifierBoundary(before) && isIdentifierBoundary(after))
+            return true;
+        from = idx + 1;
+    }
+    return false;
+}
+/**
+ * Return true when any `WORLD_BANK_INDICATOR_CODES` entry appears in `text`
+ * with word-boundary isolation on both sides. We treat `[A-Z0-9_]` as
+ * "identifier" characters — that keeps `GDP_GROWTH` from accidentally matching
+ * inside the shorter `GDP` scan, and keeps the English word "gdp" out of the
+ * match set.
+ *
+ * @param text - Article body or analysis markdown to scan
+ * @returns `true` when at least one canonical indicator code is present
+ */
+function hasIndicatorCodeWithBoundary(text) {
+    for (const code of WORLD_BANK_INDICATOR_CODES) {
+        if (textContainsIndicatorCode(text, code))
+            return true;
+    }
+    return false;
+}
+/**
+ * Detect World Bank sourcing in any piece of text (article body OR analysis
+ * markdown). Returns `true` when the text contains either a strong fingerprint
+ * (the phrase "World Bank", an MCP tool name, etc.) or an indicator code with
+ * clean word boundaries.
+ *
+ * This is the single source of truth for the policy quality gate — both the
+ * content validator and the CLI validator's filesystem fallback use it so a
+ * legitimate evidence trail on either side satisfies the rule, and generic
+ * prose mentions of economic terms do not.
+ *
+ * @param text - Text to scan
+ * @returns `true` when at least one strong or word-bounded fingerprint matches
+ */
+export function hasWorldBankEvidence(text) {
+    for (const fp of WORLD_BANK_STRONG_FINGERPRINTS) {
+        if (text.includes(fp))
+            return true;
+    }
+    return hasIndicatorCodeWithBoundary(text);
+}
+/**
+ * Verify that a policy article (or the linked analysis artifacts) contains at
+ * least one World Bank fingerprint — indicator code (word-bounded), MCP
+ * tool-trace token, or the phrase "World Bank" itself. Returns `true` if the
+ * gate is satisfied OR the article type is not on the mandatory list.
+ *
+ * @param html - Article HTML
+ * @param articleType - Slug of the article category (e.g. `"committee-reports"`)
+ * @param _analysisDir - Reserved for API symmetry; filesystem recursion is
+ *   performed by the caller in `validate-articles.ts` to keep this module pure.
+ * @returns `true` when the World Bank evidence requirement is met or not applicable
+ */
+export function articlePolicyHasWorldBank(html, articleType, _analysisDir) {
+    if (!POLICY_SLUGS_REQUIRING_WORLD_BANK.has(articleType))
+        return true;
+    return hasWorldBankEvidence(html);
 }
 /**
  * Validate the quality of a generated article.

package/scripts/utils/validate-articles.js CHANGED Viewed

@@ -18,8 +18,8 @@
  */
 import fs from 'node:fs';
 import path from 'node:path';
-import { NEWS_DIR, ARTICLE_FILENAME_PATTERN } from '../constants/config.js';
-import { validateArticleContent } from './content-validator.js';
+import { NEWS_DIR, ARTICLE_FILENAME_PATTERN, PROJECT_ROOT } from '../constants/config.js';
+import { validateArticleContent, articlePolicyHasWorldBank, hasWorldBankEvidence, } from './content-validator.js';
 import { scoreArticleQuality } from './article-quality-scorer.js';
 // ─── CLI argument parsing ─────────────────────────────────────────────────────
 const args = process.argv.slice(2);
@@ -63,6 +63,110 @@ function slugToArticleType(slug) {
     return mapping[slug] ?? slug;
 }
 // ─── Main validation logic ────────────────────────────────────────────────────
+/**
+ * For policy article types, verify World Bank evidence in either the article
+ * body OR any `.md` file under the article's `analysis/daily/{date}/{slug}*`
+ * directory. Non-policy article types are always considered satisfied.
+ *
+ * @param html - Full HTML of the article being validated
+ * @param articleType - Article category slug (e.g. `"committee-reports"`)
+ * @param date - Article publication date (`YYYY-MM-DD`)
+ * @param slug - Article slug used to locate the matching analysis directory
+ * @returns Warning string when the gate fails, or `null` when satisfied.
+ */
+function checkWorldBankEvidence(html, articleType, date, slug) {
+    // Short-circuit for non-policy article types.
+    if (articlePolicyHasWorldBank(html, articleType))
+        return null;
+    // Sweep sibling analysis directories: analysis/daily/{date}/{slug}*
+    const analysisRoot = path.join(PROJECT_ROOT, 'analysis', 'daily', date);
+    if (!fs.existsSync(analysisRoot)) {
+        return `Missing required World Bank economic context for "${articleType}" article; analysis directory ${analysisRoot} does not exist`;
+    }
+    const candidates = safeReaddir(analysisRoot).filter((entry) => entry === slug || entry.startsWith(`${slug}-`) || entry.startsWith(`${slug}_`));
+    for (const dirName of candidates) {
+        if (directoryContainsWorldBankFingerprint(path.join(analysisRoot, dirName))) {
+            return null;
+        }
+    }
+    return `Missing required World Bank economic context for "${articleType}" article; neither article body nor analysis files under ${analysisRoot} reference any World Bank indicator`;
+}
+/**
+ * List directory entries, returning `[]` on any error (tolerate missing paths).
+ *
+ * @param dir - Directory to list
+ * @returns Array of entry names or `[]` when the directory cannot be read
+ */
+function safeReaddir(dir) {
+    try {
+        return fs.readdirSync(dir);
+    }
+    catch {
+        return [];
+    }
+}
+/**
+ * Maximum recursion depth when searching an analysis directory for World Bank
+ * fingerprints. The starting directory is depth 0; the guard
+ * `depth >= ANALYSIS_SEARCH_MAX_DEPTH` stops recursion once it would exceed
+ * this depth. With `ANALYSIS_SEARCH_MAX_DEPTH = 3` the scanner reads files at
+ * depths 0, 1, 2 and 3 — enough to cover the expected layout
+ * `analysis/daily/{date}/{slug}/<subdir>/<file>.md` (depth 2) with one level
+ * of tolerance for deeper run artefacts. Trees deeper than this are truncated
+ * to guarantee bounded I/O during validator runs.
+ */
+const ANALYSIS_SEARCH_MAX_DEPTH = 3;
+/**
+ * Depth-limited recursive search for any World Bank fingerprint in `.md` files.
+ * Uses {@link hasWorldBankEvidence} so the gate enforces the same
+ * strong-phrase / word-bounded-indicator rule used on article bodies.
+ *
+ * @param dir - Directory to scan
+ * @param depth - Current recursion depth (callers should omit; max is
+ *   {@link ANALYSIS_SEARCH_MAX_DEPTH}, inclusive)
+ * @returns `true` when at least one `.md` file contains a World Bank fingerprint
+ */
+function directoryContainsWorldBankFingerprint(dir, depth = 0) {
+    if (depth > ANALYSIS_SEARCH_MAX_DEPTH)
+        return false;
+    let entries;
+    try {
+        entries = fs.readdirSync(dir, { withFileTypes: true });
+    }
+    catch {
+        return false;
+    }
+    for (const entry of entries) {
+        if (entryContainsWorldBankFingerprint(dir, entry, depth))
+            return true;
+    }
+    return false;
+}
+/**
+ * Test a single directory entry for World Bank fingerprints, recursing into
+ * subdirectories up to the shared depth cap.
+ *
+ * @param dir - Parent directory of `entry`
+ * @param entry - Directory entry to test
+ * @param depth - Current recursion depth of the caller
+ * @returns `true` when this entry (or any descendant) matches a fingerprint
+ */
+function entryContainsWorldBankFingerprint(dir, entry, depth) {
+    const full = path.join(dir, entry.name);
+    if (entry.isDirectory()) {
+        return directoryContainsWorldBankFingerprint(full, depth + 1);
+    }
+    if (!entry.isFile() || !entry.name.endsWith('.md'))
+        return false;
+    let content;
+    try {
+        content = fs.readFileSync(full, 'utf-8');
+    }
+    catch {
+        return false;
+    }
+    return hasWorldBankEvidence(content);
+}
 /**
  * Validate a single article file and return a summary.
  *
@@ -80,6 +184,11 @@ function validateSingleFile(filename) {
     const html = fs.readFileSync(filePath, 'utf-8');
     const articleType = slugToArticleType(slug);
     const result = validateArticleContent(html, lang, articleType);
+    // World Bank gate — extend search to linked analysis markdown files
+    const wbWarning = checkWorldBankEvidence(html, articleType, date, slug);
+    if (wbWarning) {
+        result.warnings.push(wbWarning);
+    }
     const summary = {
         filename,
         lang,