npm - promptup-plugin - Versions diffs - 0.1.1 - Mend

promptup-plugin 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/LICENSE +21 -0
package/README.md +78 -0
package/bin/install.cjs +306 -0
package/bin/promptup-plugin +8 -0
package/dist/config.d.ts +40 -0
package/dist/config.js +123 -0
package/dist/db.d.ts +35 -0
package/dist/db.js +327 -0
package/dist/decision-detector.d.ts +11 -0
package/dist/decision-detector.js +47 -0
package/dist/evaluator.d.ts +10 -0
package/dist/evaluator.js +844 -0
package/dist/git-activity-extractor.d.ts +35 -0
package/dist/git-activity-extractor.js +167 -0
package/dist/index.d.ts +12 -0
package/dist/index.js +54 -0
package/dist/pr-report-generator.d.ts +20 -0
package/dist/pr-report-generator.js +421 -0
package/dist/shared/decision-classifier.d.ts +60 -0
package/dist/shared/decision-classifier.js +385 -0
package/dist/shared/decision-score.d.ts +7 -0
package/dist/shared/decision-score.js +31 -0
package/dist/shared/dimensions.d.ts +43 -0
package/dist/shared/dimensions.js +361 -0
package/dist/shared/scoring.d.ts +89 -0
package/dist/shared/scoring.js +161 -0
package/dist/shared/types.d.ts +108 -0
package/dist/shared/types.js +9 -0
package/dist/tools.d.ts +30 -0
package/dist/tools.js +456 -0
package/dist/transcript-parser.d.ts +36 -0
package/dist/transcript-parser.js +201 -0
package/hooks/auto-eval.sh +44 -0
package/hooks/check-update.sh +26 -0
package/hooks/debug-hook.sh +3 -0
package/hooks/hooks.json +36 -0
package/hooks/render-eval.sh +137 -0
package/package.json +60 -0
package/skills/eval/SKILL.md +12 -0
package/skills/pr-report/SKILL.md +37 -0
package/skills/status/SKILL.md +28 -0
package/statusline.sh +46 -0

package/dist/shared/dimensions.js ADDED Viewed

@@ -0,0 +1,361 @@
+/**
+ * Base 6-dimension rubric definitions for PromptUp evaluation.
+ * Sourced from the PromptUp Evaluator rubric v0.1.
+ *
+ * STANDALONE copy — no imports from @promptup/shared.
+ */
+export const BASE_DIMENSION_KEYS = [
+    'task_decomposition',
+    'prompt_specificity',
+    'output_validation',
+    'iteration_quality',
+    'strategic_tool_usage',
+    'context_management',
+];
+export const BASE_DIMENSIONS = {
+    task_decomposition: {
+        key: 'task_decomposition',
+        label: 'Task Decomposition',
+        description: 'How well the developer breaks down complex problems before prompting.',
+        scoring_guidance: 'Score 0-100 based on how effectively the developer decomposes complex tasks into manageable sub-tasks with clear sequencing and dependency management.',
+        signals: [
+            'One thing at a time vs everything at once?',
+            'References prior step outputs?',
+            'Visible plan or improvising?',
+        ],
+        ranges: [
+            { min: 0, max: 20, description: 'Dumps entire complex task in one prompt, no structure' },
+            { min: 21, max: 40, description: 'Some structure but mixes multiple concerns per prompt' },
+            { min: 41, max: 60, description: 'Logical steps but suboptimal sequencing' },
+            { min: 61, max: 80, description: 'Clear decomposition with logical sequencing and dependencies' },
+            { min: 81, max: 100, description: 'Expert — optimal subtask order, explicit dependency management' },
+        ],
+    },
+    prompt_specificity: {
+        key: 'prompt_specificity',
+        label: 'Prompt Specificity',
+        description: 'How precise and well-structured the prompts are.',
+        scoring_guidance: 'Score 0-100 based on information density, constraints, examples, output format requirements, and edge case coverage.',
+        signals: [
+            'Information density (long != good, dense = good)',
+            'Output format requirements?',
+            'Few-shot examples?',
+            'Constraints (what NOT to do)?',
+        ],
+        ranges: [
+            { min: 0, max: 20, description: 'Vague/ambiguous ("make it better", "fix this")' },
+            { min: 21, max: 40, description: 'Some specificity, missing key constraints or context' },
+            { min: 41, max: 60, description: 'Adequate, gets reasonable results but room for misinterpretation' },
+            { min: 61, max: 80, description: 'Well-structured with clear constraints, examples, or format requirements' },
+            {
+                min: 81,
+                max: 100,
+                description: 'Expert — role setting, constraints, examples, output format, edge cases, success criteria',
+            },
+        ],
+    },
+    output_validation: {
+        key: 'output_validation',
+        label: 'Output Validation',
+        description: 'Does the developer critically evaluate AI responses or accept blindly?',
+        scoring_guidance: 'Score 0-100 based on how thoroughly the developer validates, questions, and tests AI-generated outputs.',
+        signals: [
+            'Says "that\'s wrong" or "check that"?',
+            'Tests code before accepting?',
+            'Catches hallucinations?',
+            'Asks for sources?',
+            'Notices logical inconsistencies?',
+        ],
+        ranges: [
+            { min: 0, max: 20, description: 'Accepts every response without question' },
+            { min: 21, max: 40, description: 'Occasionally pushes back on obvious errors only' },
+            { min: 41, max: 60, description: 'Checks key claims/results, catches some errors' },
+            {
+                min: 61,
+                max: 80,
+                description: 'Systematic — tests outputs, cross-references, identifies hallucinations',
+            },
+            {
+                min: 81,
+                max: 100,
+                description: 'Expert — challenges assumptions, tests edge cases, verifies against external sources',
+            },
+        ],
+    },
+    iteration_quality: {
+        key: 'iteration_quality',
+        label: 'Iteration Quality',
+        description: 'When iterating, does each iteration meaningfully improve on the previous?',
+        scoring_guidance: 'Score 0-100 based on whether each follow-up prompt builds purposefully on previous responses and converges efficiently.',
+        signals: [
+            'Clear purpose per follow-up?',
+            'Converging or going in circles?',
+            'References specific parts of previous responses?',
+            'Pivots when approach fails?',
+        ],
+        ranges: [
+            { min: 0, max: 20, description: 'Repeats similar prompts hoping for different results' },
+            { min: 21, max: 40, description: 'Changes are random/unfocused' },
+            { min: 41, max: 60, description: 'Shows direction but includes unnecessary repetition' },
+            { min: 61, max: 80, description: 'Focused iterations addressing specific weaknesses' },
+            {
+                min: 81,
+                max: 100,
+                description: 'Expert — each prompt builds precisely on previous, converges efficiently',
+            },
+        ],
+    },
+    strategic_tool_usage: {
+        key: 'strategic_tool_usage',
+        label: 'Strategic Tool Usage',
+        description: 'Intelligent choices about which AI capabilities to use.',
+        scoring_guidance: 'Score 0-100 based on deliberate selection of models, tools, and capabilities matched to task requirements.',
+        signals: [
+            'Switches models/tools when appropriate?',
+            'Uses code execution for verification?',
+            'Leverages search for factual accuracy?',
+            'Understands capability differences?',
+        ],
+        ranges: [
+            { min: 0, max: 20, description: 'One model/approach for everything' },
+            { min: 21, max: 40, description: 'Aware of different capabilities but doesn\'t leverage them' },
+            { min: 41, max: 60, description: 'Some tool/model choices but not always optimal' },
+            { min: 61, max: 80, description: 'Deliberately selects models/tools based on task requirements' },
+            {
+                min: 81,
+                max: 100,
+                description: 'Expert — switches models mid-workflow, uses appropriate tools at right moments',
+            },
+        ],
+    },
+    context_management: {
+        key: 'context_management',
+        label: 'Context Management',
+        description: 'How well the developer manages conversation context and information flow.',
+        scoring_guidance: 'Score 0-100 based on deliberate context structuring, summarization, and information flow management.',
+        signals: [
+            'Provides context at start?',
+            'References previous parts?',
+            'Summarises/checkpoints progress?',
+            'Re-provides context when conversation gets long?',
+        ],
+        ranges: [
+            {
+                min: 0,
+                max: 20,
+                description: 'No context management, each prompt independent, repeats information',
+            },
+            { min: 21, max: 40, description: 'Some reference to previous context but inconsistent' },
+            {
+                min: 41,
+                max: 60,
+                description: "Maintains thread but doesn't explicitly manage context window",
+            },
+            {
+                min: 61,
+                max: 80,
+                description: 'Deliberately structures info flow — summarises, references, builds on prior exchanges',
+            },
+            {
+                min: 81,
+                max: 100,
+                description: 'Expert — relevant background upfront, summarises intermediate results, manages long conversations',
+            },
+        ],
+    },
+};
+// ─── Domain Dimensions (5 depth-of-understanding dimensions) ──────────
+export const DOMAIN_DIMENSION_KEYS = [
+    'architectural_awareness',
+    'error_anticipation',
+    'technical_vocabulary',
+    'dependency_reasoning',
+    'tradeoff_articulation',
+];
+export const DOMAIN_DIMENSIONS = {
+    architectural_awareness: {
+        key: 'architectural_awareness',
+        label: 'Architectural Awareness',
+        description: 'Understanding of system architecture, patterns, and design trade-offs.',
+        scoring_guidance: 'Score 0-100 based on how well the developer demonstrates awareness of system-level architecture, design patterns, and structural implications of their changes.',
+        signals: [
+            'References architecture when making decisions?',
+            'Considers cross-cutting concerns?',
+            'Understands component boundaries?',
+            'Anticipates scaling implications?',
+        ],
+        ranges: [
+            { min: 0, max: 20, description: 'No awareness of system structure, treats code in isolation' },
+            { min: 21, max: 40, description: 'Aware of immediate file/module scope only' },
+            { min: 41, max: 60, description: 'Understands local architecture but misses wider implications' },
+            { min: 61, max: 80, description: 'Considers system-level design patterns and trade-offs' },
+            { min: 81, max: 100, description: 'Expert — reasons about architecture holistically, anticipates cascading effects' },
+        ],
+    },
+    error_anticipation: {
+        key: 'error_anticipation',
+        label: 'Error Anticipation',
+        description: 'Proactively considering failure modes, edge cases, and error handling.',
+        scoring_guidance: 'Score 0-100 based on how proactively the developer considers failure modes, edge cases, and error recovery strategies.',
+        signals: [
+            'Asks about edge cases proactively?',
+            'Considers failure scenarios?',
+            'Plans error recovery?',
+            'Tests unhappy paths?',
+        ],
+        ranges: [
+            { min: 0, max: 20, description: 'No consideration of failure modes or edge cases' },
+            { min: 21, max: 40, description: 'Handles obvious errors only when prompted' },
+            { min: 41, max: 60, description: 'Some proactive error thinking but gaps in coverage' },
+            { min: 61, max: 80, description: 'Systematically considers failure modes and edge cases' },
+            { min: 81, max: 100, description: 'Expert — anticipates subtle failures, designs for resilience' },
+        ],
+    },
+    technical_vocabulary: {
+        key: 'technical_vocabulary',
+        label: 'Technical Vocabulary',
+        description: 'Precision of technical language and domain-specific terminology.',
+        scoring_guidance: 'Score 0-100 based on the precision and appropriateness of technical language, correct use of domain terminology, and communication clarity.',
+        signals: [
+            'Uses correct technical terms?',
+            'Distinguishes similar concepts precisely?',
+            'Communicates intent clearly to AI?',
+            'Names things well in code?',
+        ],
+        ranges: [
+            { min: 0, max: 20, description: 'Vague or incorrect terminology, imprecise communication' },
+            { min: 21, max: 40, description: 'Basic terms used correctly but lacks precision' },
+            { min: 41, max: 60, description: 'Generally correct terminology with occasional imprecision' },
+            { min: 61, max: 80, description: 'Precise technical language, clear domain-specific communication' },
+            { min: 81, max: 100, description: 'Expert — nuanced vocabulary, distinguishes subtle concept differences' },
+        ],
+    },
+    dependency_reasoning: {
+        key: 'dependency_reasoning',
+        label: 'Dependency Reasoning',
+        description: 'Understanding how components interact, import chains, and side effects.',
+        scoring_guidance: 'Score 0-100 based on awareness of dependency graphs, understanding of side effects, and ability to trace interaction chains.',
+        signals: [
+            'Understands import/dependency chains?',
+            'Considers side effects of changes?',
+            'Traces data flow across boundaries?',
+            'Identifies coupling risks?',
+        ],
+        ranges: [
+            { min: 0, max: 20, description: 'No awareness of dependencies or side effects' },
+            { min: 21, max: 40, description: 'Understands direct dependencies only' },
+            { min: 41, max: 60, description: 'Traces some dependency chains but misses indirect effects' },
+            { min: 61, max: 80, description: 'Good understanding of interaction patterns and side effects' },
+            { min: 81, max: 100, description: 'Expert — traces full dependency graphs, predicts cascading side effects' },
+        ],
+    },
+    tradeoff_articulation: {
+        key: 'tradeoff_articulation',
+        label: 'Tradeoff Articulation',
+        description: 'Ability to weigh and explain trade-offs between different approaches.',
+        scoring_guidance: 'Score 0-100 based on ability to identify, compare, and clearly articulate trade-offs when choosing between approaches.',
+        signals: [
+            'Compares alternatives explicitly?',
+            'Weighs pros/cons of approaches?',
+            'Explains reasoning for choices?',
+            'Considers non-functional requirements?',
+        ],
+        ranges: [
+            { min: 0, max: 20, description: 'Accepts first solution without considering alternatives' },
+            { min: 21, max: 40, description: 'Occasionally mentions alternatives but no structured comparison' },
+            { min: 41, max: 60, description: 'Identifies trade-offs but analysis lacks depth' },
+            { min: 61, max: 80, description: 'Structured comparison of approaches with clear reasoning' },
+            { min: 81, max: 100, description: 'Expert — multi-dimensional trade-off analysis including performance, maintainability, and business impact' },
+        ],
+    },
+};
+/** All 11 dimension keys (6 base + 5 domain) */
+export const ALL_DIMENSION_KEYS = [...BASE_DIMENSION_KEYS, ...DOMAIN_DIMENSION_KEYS];
+/** Default base dimension configuration for a new rubric */
+export const DEFAULT_BASE_DIMENSIONS = {
+    task_decomposition: { weight: 1.0, enabled: true },
+    prompt_specificity: { weight: 1.0, enabled: true },
+    output_validation: { weight: 1.0, enabled: true },
+    iteration_quality: { weight: 1.0, enabled: true },
+    strategic_tool_usage: { weight: 1.0, enabled: true },
+    context_management: { weight: 1.0, enabled: true },
+};
+// ─── Preset Weight Profiles ──────────────────────────────────────────────
+export const WEIGHT_PROFILE_KEYS = [
+    'balanced',
+    'greenfield',
+    'bugfix',
+    'refactor',
+    'security_review',
+];
+export const WEIGHT_PROFILES = {
+    balanced: {
+        key: 'balanced',
+        label: 'Balanced',
+        description: 'Equal weight across all dimensions. Good default for general development tasks.',
+        weights: {
+            task_decomposition: 0.167,
+            prompt_specificity: 0.167,
+            output_validation: 0.167,
+            iteration_quality: 0.167,
+            strategic_tool_usage: 0.167,
+            context_management: 0.165,
+        },
+    },
+    greenfield: {
+        key: 'greenfield',
+        label: 'Greenfield',
+        description: 'Emphasizes task decomposition and prompt clarity for new feature development.',
+        weights: {
+            task_decomposition: 0.25,
+            prompt_specificity: 0.20,
+            output_validation: 0.15,
+            iteration_quality: 0.15,
+            strategic_tool_usage: 0.15,
+            context_management: 0.10,
+        },
+    },
+    bugfix: {
+        key: 'bugfix',
+        label: 'Bugfix',
+        description: 'Emphasizes output validation and iteration for debugging and fixing issues.',
+        weights: {
+            task_decomposition: 0.10,
+            prompt_specificity: 0.15,
+            output_validation: 0.30,
+            iteration_quality: 0.20,
+            strategic_tool_usage: 0.15,
+            context_management: 0.10,
+        },
+    },
+    refactor: {
+        key: 'refactor',
+        label: 'Refactor',
+        description: 'Emphasizes decomposition, validation, and context management for code restructuring.',
+        weights: {
+            task_decomposition: 0.20,
+            prompt_specificity: 0.15,
+            output_validation: 0.20,
+            iteration_quality: 0.15,
+            strategic_tool_usage: 0.10,
+            context_management: 0.20,
+        },
+    },
+    security_review: {
+        key: 'security_review',
+        label: 'Security Review',
+        description: 'Heavily weights output validation for security-focused code review tasks.',
+        weights: {
+            task_decomposition: 0.10,
+            prompt_specificity: 0.15,
+            output_validation: 0.35,
+            iteration_quality: 0.15,
+            strategic_tool_usage: 0.10,
+            context_management: 0.15,
+        },
+    },
+};
+/** Look up a weight profile by key. Returns undefined if not found. */
+export function getWeightProfile(key) {
+    return WEIGHT_PROFILES[key];
+}

package/dist/shared/scoring.d.ts ADDED Viewed

@@ -0,0 +1,89 @@
+/**
+ * Multi-layer composite scoring for PromptUp evaluations.
+ *
+ * Score hierarchy:
+ *   base composite       — weighted avg of 6 base dimensions
+ *   domain composite     — weighted avg of 5 domain dimensions
+ *   tech composite       — avg across roadmap-level tech expertise scores
+ *   overall composite    — blend of base + domain (60/40)
+ *   grand composite      — blend of overall + tech (70/30)
+ *
+ * STANDALONE copy — no imports from @promptup/shared.
+ */
+export interface CompositeScores {
+    composite_score: number;
+    domain_composite_score: number | null;
+    tech_composite_score: number | null;
+    overall_composite_score: number | null;
+    grand_composite_score: number | null;
+}
+export interface CompositeDimensionInput {
+    score: number;
+    weight?: number;
+}
+export interface TechExpertiseScoreInput {
+    score: number;
+}
+export interface RiskFlag {
+    type: string;
+    dimension: string;
+    score: number;
+    severity: 'warning' | 'critical';
+    message: string;
+}
+export interface RiskFlagDimensionScore {
+    dimension: string;
+    score: number;
+}
+/** Weight of base composite in the overall composite blend */
+export declare const OVERALL_BASE_WEIGHT = 0.6;
+/** Weight of domain composite in the overall composite blend */
+export declare const OVERALL_DOMAIN_WEIGHT = 0.4;
+/** Weight of overall composite in the grand composite blend */
+export declare const GRAND_OVERALL_WEIGHT = 0.7;
+/** Weight of tech composite in the grand composite blend */
+export declare const GRAND_TECH_WEIGHT = 0.3;
+export declare function clamp(min: number, max: number, value: number): number;
+/**
+ * Compute weighted composite score from dimension scores + weights.
+ * Dimensions with weight 0 are excluded from calculation.
+ */
+export declare function computeCompositeScore(dimensions: {
+    score: number;
+    weight: number;
+}[]): number;
+/**
+ * Compute domain composite from 5 domain dimension scores.
+ * Returns null if no scores provided.
+ */
+export declare function computeDomainComposite(domainScores: Record<string, CompositeDimensionInput>): number | null;
+/**
+ * Compute tech composite from roadmap-level expertise scores.
+ * Returns null if no tech expertise entries.
+ */
+export declare function computeTechComposite(techExpertise: TechExpertiseScoreInput[]): number | null;
+/**
+ * Compute overall composite = blend of base + domain composites.
+ * Returns null if domain composite is null.
+ */
+export declare function computeOverallComposite(baseComposite: number, domainComposite: number | null): number | null;
+/**
+ * Compute grand composite = blend of overall + tech composites.
+ * Returns null if either overall or tech composite is null.
+ */
+export declare function computeGrandComposite(overallComposite: number | null, techComposite: number | null): number | null;
+/**
+ * Compute risk flags from dimension scores and composite score.
+ *
+ * Flags:
+ * - "Hidden Weakness": any dimension < 30 while composite > 60 (critical)
+ * - "Extreme Imbalance": any dimension > 85 while another < 35 (warning)
+ */
+export declare function computeRiskFlags(dimensionScores: RiskFlagDimensionScore[], compositeScore: number): RiskFlag[];
+/**
+ * Compute risk flags including historical comparison.
+ *
+ * In addition to the base flags from computeRiskFlags, adds:
+ * - "Volatile": delta > 40 between consecutive checkpoints on any dimension (warning)
+ */
+export declare function computeRiskFlagsWithHistory(current: RiskFlagDimensionScore[], previous: RiskFlagDimensionScore[] | null, compositeScore?: number): RiskFlag[];

package/dist/shared/scoring.js ADDED Viewed

@@ -0,0 +1,161 @@
+/**
+ * Multi-layer composite scoring for PromptUp evaluations.
+ *
+ * Score hierarchy:
+ *   base composite       — weighted avg of 6 base dimensions
+ *   domain composite     — weighted avg of 5 domain dimensions
+ *   tech composite       — avg across roadmap-level tech expertise scores
+ *   overall composite    — blend of base + domain (60/40)
+ *   grand composite      — blend of overall + tech (70/30)
+ *
+ * STANDALONE copy — no imports from @promptup/shared.
+ */
+// ─── Constants ──────────────────────────────────────────────────────────
+/** Weight of base composite in the overall composite blend */
+export const OVERALL_BASE_WEIGHT = 0.6;
+/** Weight of domain composite in the overall composite blend */
+export const OVERALL_DOMAIN_WEIGHT = 0.4;
+/** Weight of overall composite in the grand composite blend */
+export const GRAND_OVERALL_WEIGHT = 0.7;
+/** Weight of tech composite in the grand composite blend */
+export const GRAND_TECH_WEIGHT = 0.3;
+// ─── Helpers ─────────────────────────────────────────────────────────────
+export function clamp(min, max, value) {
+    return Math.max(min, Math.min(max, value));
+}
+// ─── Functions ──────────────────────────────────────────────────────────
+/**
+ * Compute weighted composite score from dimension scores + weights.
+ * Dimensions with weight 0 are excluded from calculation.
+ */
+export function computeCompositeScore(dimensions) {
+    const totalWeight = dimensions.reduce((sum, d) => (d.weight > 0 ? sum + d.weight : sum), 0);
+    if (totalWeight === 0)
+        return 0;
+    const weightedSum = dimensions.reduce((sum, d) => (d.weight > 0 ? sum + d.score * d.weight : sum), 0);
+    return Math.round((weightedSum / totalWeight) * 100) / 100;
+}
+/**
+ * Compute domain composite from 5 domain dimension scores.
+ * Returns null if no scores provided.
+ */
+export function computeDomainComposite(domainScores) {
+    const entries = Object.values(domainScores);
+    if (entries.length === 0)
+        return null;
+    const totalWeight = entries.reduce((sum, d) => sum + (d.weight ?? 1), 0);
+    if (totalWeight === 0)
+        return null;
+    const weighted = entries.reduce((sum, d) => sum + d.score * (d.weight ?? 1), 0);
+    return Math.round((weighted / totalWeight) * 10) / 10;
+}
+/**
+ * Compute tech composite from roadmap-level expertise scores.
+ * Returns null if no tech expertise entries.
+ */
+export function computeTechComposite(techExpertise) {
+    if (techExpertise.length === 0)
+        return null;
+    const sum = techExpertise.reduce((s, te) => s + te.score, 0);
+    return Math.round((sum / techExpertise.length) * 10) / 10;
+}
+/**
+ * Compute overall composite = blend of base + domain composites.
+ * Returns null if domain composite is null.
+ */
+export function computeOverallComposite(baseComposite, domainComposite) {
+    if (domainComposite === null)
+        return null;
+    const score = baseComposite * OVERALL_BASE_WEIGHT +
+        domainComposite * OVERALL_DOMAIN_WEIGHT;
+    return Math.round(score * 10) / 10;
+}
+/**
+ * Compute grand composite = blend of overall + tech composites.
+ * Returns null if either overall or tech composite is null.
+ */
+export function computeGrandComposite(overallComposite, techComposite) {
+    if (overallComposite === null || techComposite === null)
+        return null;
+    const score = overallComposite * GRAND_OVERALL_WEIGHT +
+        techComposite * GRAND_TECH_WEIGHT;
+    return Math.round(score * 10) / 10;
+}
+// ─── Risk Flags ──────────────────────────────────────────────────────────
+/**
+ * Compute risk flags from dimension scores and composite score.
+ *
+ * Flags:
+ * - "Hidden Weakness": any dimension < 30 while composite > 60 (critical)
+ * - "Extreme Imbalance": any dimension > 85 while another < 35 (warning)
+ */
+export function computeRiskFlags(dimensionScores, compositeScore) {
+    const flags = [];
+    // Hidden Weakness: any dim < 30 AND composite > 60
+    if (compositeScore > 60) {
+        for (const ds of dimensionScores) {
+            if (ds.score < 30) {
+                flags.push({
+                    type: 'Hidden Weakness',
+                    dimension: ds.dimension,
+                    score: ds.score,
+                    severity: 'critical',
+                    message: `${ds.dimension} scored ${ds.score} despite composite score of ${compositeScore}. This weakness may be masked by strong performance in other areas.`,
+                });
+            }
+        }
+    }
+    // Extreme Imbalance: any dim > 85 AND any other dim < 35
+    const highDims = dimensionScores.filter((d) => d.score > 85);
+    const lowDims = dimensionScores.filter((d) => d.score < 35);
+    if (highDims.length > 0 && lowDims.length > 0) {
+        for (const low of lowDims) {
+            for (const high of highDims) {
+                if (low.dimension !== high.dimension) {
+                    flags.push({
+                        type: 'Extreme Imbalance',
+                        dimension: low.dimension,
+                        score: low.score,
+                        severity: 'warning',
+                        message: `${low.dimension} (${low.score}) is critically low while ${high.dimension} (${high.score}) is very high. Consider balancing effort across dimensions.`,
+                    });
+                    break; // One flag per low dimension is sufficient
+                }
+            }
+        }
+    }
+    return flags;
+}
+/**
+ * Compute risk flags including historical comparison.
+ *
+ * In addition to the base flags from computeRiskFlags, adds:
+ * - "Volatile": delta > 40 between consecutive checkpoints on any dimension (warning)
+ */
+export function computeRiskFlagsWithHistory(current, previous, compositeScore = 0) {
+    // Compute composite from current if not provided
+    const composite = compositeScore > 0
+        ? compositeScore
+        : current.reduce((sum, d) => sum + d.score, 0) / (current.length || 1);
+    const flags = computeRiskFlags(current, composite);
+    // Volatile: delta > 40 between consecutive checkpoints
+    if (previous) {
+        const prevMap = new Map(previous.map((d) => [d.dimension, d.score]));
+        for (const curr of current) {
+            const prevScore = prevMap.get(curr.dimension);
+            if (prevScore !== undefined) {
+                const delta = Math.abs(curr.score - prevScore);
+                if (delta > 40) {
+                    flags.push({
+                        type: 'Volatile',
+                        dimension: curr.dimension,
+                        score: curr.score,
+                        severity: 'warning',
+                        message: `${curr.dimension} changed by ${delta} points (${prevScore} -> ${curr.score}) between consecutive checkpoints.`,
+                    });
+                }
+            }
+        }
+    }
+    return flags;
+}