npm - chekk - Versions diffs - 0.3.0 → 0.4.0 - Mend

chekk 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/src/index.js CHANGED Viewed

@@ -11,6 +11,14 @@ import { computeDebugCycles } from './metrics/debug-cycles.js';
 import { computeAILeverage } from './metrics/ai-leverage.js';
 import { computeSessionStructure } from './metrics/session-structure.js';
 import { computeOverallScore } from './scorer.js';
+import {
+  computeSignatures,
+  computeWatchPoints,
+  computeTrajectory,
+  computeProjectComplexity,
+  generateAssessment,
+  computeConfidence,
+} from './insights.js';
 import {
   displayHeader,
   displayScan,
@@ -147,6 +155,7 @@ export async function run(options = {}) {
     overall: result.overall,
     tier: result.tier,
     archetype: result.archetype.name,
+    scores: result.scores,
     date: new Date().toISOString(),
   });
   // Keep last 20 scans
@@ -158,12 +167,23 @@ export async function run(options = {}) {
     totalExchanges,
     projectCount: projects.length,
     dateRange: dateRangeFull,
+    dateRangeShort,
     tools: tools.map(t => t.tool),
   };
+  // ── Step 3b: Compute insights ──
+  const signatures = computeSignatures(allSessions, metrics);
+  const watchPoints = computeWatchPoints(allSessions, metrics);
+  const trajectory = computeTrajectory(allSessions);
+  const projectComplexity = computeProjectComplexity(allSessions);
+  const assessment = generateAssessment(result, metrics, signatures, watchPoints);
+  const confidence = computeConfidence(sessionStats);
+  const insights = { signatures, watchPoints, trajectory, projectComplexity, assessment, confidence };
   // ── JSON output ──
   if (options.json) {
-    console.log(JSON.stringify({ metrics, result, sessionStats, perToolScores, scoreDelta }, null, 2));
+    console.log(JSON.stringify({ metrics, result, sessionStats, perToolScores, scoreDelta, insights }, null, 2));
     return;
   }
@@ -180,10 +200,11 @@ export async function run(options = {}) {
   }
   // ── Step 5: Display results ──
+  const extra = { scoreDelta, perToolScores, insights, sessionStats };
   if (options.offline) {
-    displayOffline(result, metrics, { scoreDelta, perToolScores });
+    displayOffline(result, metrics, extra);
   } else {
-    displayFull(result, metrics, prose, { scoreDelta, perToolScores });
+    displayFull(result, metrics, prose, extra);
   }
   // ── Step 6: Verbose prompt (interactive) ──

package/src/insights.js ADDED Viewed

@@ -0,0 +1,503 @@
+/**
+ * Insights Engine
+ *
+ * Computes higher-order analysis from raw metrics and sessions:
+ * - Signatures: distinctive patterns that make an engineer unique
+ * - Watch Points: anti-patterns and areas for improvement
+ * - Trajectory: weekly score evolution over time
+ * - Project Complexity: classification of project sophistication
+ * - Assessment: narrative paragraph for the engineer's profile
+ * - Confidence: statistical confidence based on data volume
+ */
+import { computeDecomposition } from './metrics/decomposition.js';
+import { computeDebugCycles } from './metrics/debug-cycles.js';
+import { computeAILeverage } from './metrics/ai-leverage.js';
+import { computeSessionStructure } from './metrics/session-structure.js';
+import { computeOverallScore } from './scorer.js';
+// ── Benchmarks (early-stage estimates, refined as data grows) ──
+export const BENCHMARKS = {
+  avgExchangesPerSession: 34.2,
+  avgPromptLength: 187,
+  avgTurnsToResolve: 3.8,
+  specificReportRatio: 62,
+  highLevelRatio: 18,
+  contextSetRatio: 35,
+  refinementRatio: 15,
+  reviewEndRatio: 28,
+};
+// ── Dimension score ranges (observed distribution) ──
+export const DIM_RANGES = {
+  decomposition: { min: 15, max: 95 },
+  debugCycles: { min: 20, max: 98 },
+  aiLeverage: { min: 10, max: 92 },
+  sessionStructure: { min: 12, max: 88 },
+};
+// ══════════════════════════════════════════════
+// SIGNATURES — Distinctive patterns
+// ══════════════════════════════════════════════
+const constraintPatterns = /\b(don'?t|do not|never|avoid|without|no |not |shouldn'?t|must not|skip|exclude)\b/i;
+const preflightPatterns = /^(before (we|you|i)|don'?t code|review (first|this|my|the plan)|let'?s (think|plan|discuss)|check my (approach|plan|thinking))/i;
+const testFirstPatterns = /\b(write (the )?tests? (first|before)|test.?driven|TDD|spec first|start with (tests?|specs?))\b/i;
+const negativeConstraintPatterns = /\b(don'?t|do not|never|avoid|must not|shouldn'?t)\b.*\b(add|create|use|include|change|modify|touch|remove)\b/i;
+export function computeSignatures(allSessions, metrics) {
+  const signatures = [];
+  const d = metrics.decomposition.details;
+  const db = metrics.debugCycles.details;
+  const ai = metrics.aiLeverage.details;
+  const ss = metrics.sessionStructure.details;
+  let totalPrompts = 0;
+  let constraintPrompts = 0;
+  let preflightSessions = 0;
+  let testFirstSessions = 0;
+  let modificationCount = 0;
+  let acceptCount = 0;
+  for (const session of allSessions) {
+    const { exchanges } = session;
+    if (exchanges.length === 0) continue;
+    // Check first prompt for preflight review
+    const firstPrompt = exchanges[0].userPrompt || '';
+    if (preflightPatterns.test(firstPrompt)) {
+      preflightSessions++;
+    }
+    let hasTestFirst = false;
+    for (let i = 0; i < exchanges.length; i++) {
+      const prompt = exchanges[i].userPrompt || '';
+      totalPrompts++;
+      if (constraintPatterns.test(prompt) && negativeConstraintPatterns.test(prompt)) {
+        constraintPrompts++;
+      }
+      if (testFirstPatterns.test(prompt)) {
+        hasTestFirst = true;
+      }
+      // Track modification vs acceptance
+      if (i > 0 && /\b(actually|wait|instead|change|no,?|not quite|modify|tweak)\b/i.test(prompt)) {
+        modificationCount++;
+      } else if (i > 0) {
+        acceptCount++;
+      }
+    }
+    if (hasTestFirst) testFirstSessions++;
+  }
+  const sessionsWithExchanges = allSessions.filter(s => s.exchanges.length > 0).length;
+  // Pre-flight reviews
+  const preflightRatio = sessionsWithExchanges > 0 ? preflightSessions / sessionsWithExchanges : 0;
+  if (preflightRatio > 0.15 && preflightSessions >= 3) {
+    signatures.push({
+      name: 'Pre-flight reviews',
+      detail: `You ask AI to review your plan before coding in ${Math.round(preflightRatio * 100)}% of sessions. Only 8% of engineers do this consistently. This correlates with fewer debug cycles.`,
+    });
+  }
+  // Constraint-first prompting
+  const constraintRatio = totalPrompts > 0 ? constraintPrompts / totalPrompts : 0;
+  if (constraintRatio > 0.1 && constraintPrompts >= 5) {
+    signatures.push({
+      name: 'Constraint-first prompting',
+      detail: `You specify what NOT to do in ${Math.round(constraintRatio * 100)}% of prompts. This is a hallmark of senior architectural thinking that prevents scope creep.`,
+    });
+  }
+  // Test-driven AI usage
+  const testFirstRatio = sessionsWithExchanges > 0 ? testFirstSessions / sessionsWithExchanges : 0;
+  if (testFirstRatio > 0.05 && testFirstSessions >= 2) {
+    signatures.push({
+      name: 'Test-driven AI usage',
+      detail: `You request tests before implementation in ${Math.round(testFirstRatio * 100)}% of sessions. Engineers who do this ship fewer bugs post-merge.`,
+    });
+  }
+  // Deep session marathons
+  if (d.avgExchangesPerSession > BENCHMARKS.avgExchangesPerSession * 2) {
+    signatures.push({
+      name: 'Marathon sessions',
+      detail: `Avg session depth of ${d.avgExchangesPerSession} exchanges is ${Math.round(d.avgExchangesPerSession / BENCHMARKS.avgExchangesPerSession)}x the benchmark (${BENCHMARKS.avgExchangesPerSession}). You sustain deep, focused work.`,
+    });
+  }
+  // Zero vague debugging
+  if (db.vagueReports === 0 && db.totalDebugSequences > 5) {
+    signatures.push({
+      name: 'Precision debugging',
+      detail: `Zero vague error reports across ${db.totalDebugSequences} debug sequences. Every bug report includes specific context. This is rare.`,
+    });
+  }
+  // High architectural ratio
+  if (ai.highLevelRatio > 30) {
+    signatures.push({
+      name: 'Strategic AI usage',
+      detail: `${ai.highLevelRatio}% of prompts are architectural or planning-level (benchmark: ${BENCHMARKS.highLevelRatio}%). You use AI as a thinking partner, not just a code generator.`,
+    });
+  }
+  // Critical reviewer
+  const totalFollowups = modificationCount + acceptCount;
+  const modRatio = totalFollowups > 0 ? modificationCount / totalFollowups : 0;
+  if (modRatio > 0.25 && modificationCount > 10) {
+    signatures.push({
+      name: 'Critical reviewer',
+      detail: `You modify or redirect AI output in ${Math.round(modRatio * 100)}% of follow-up prompts. This indicates active evaluation rather than passive acceptance.`,
+    });
+  }
+  return signatures.slice(0, 4); // Max 4 signatures
+}
+// ══════════════════════════════════════════════
+// WATCH POINTS — Anti-patterns
+// ══════════════════════════════════════════════
+export function computeWatchPoints(allSessions, metrics) {
+  const watchPoints = [];
+  const d = metrics.decomposition.details;
+  const db = metrics.debugCycles.details;
+  const ai = metrics.aiLeverage.details;
+  const ss = metrics.sessionStructure.details;
+  // Context amnesia — restarting from scratch on same project
+  const projectSessions = {};
+  for (const s of allSessions) {
+    const p = s.project || 'unknown';
+    if (!projectSessions[p]) projectSessions[p] = [];
+    projectSessions[p].push(s);
+  }
+  let contextRestarts = 0;
+  let multiSessionProjects = 0;
+  for (const [, sessions] of Object.entries(projectSessions)) {
+    if (sessions.length < 2) continue;
+    multiSessionProjects++;
+    for (let i = 1; i < sessions.length; i++) {
+      const firstPrompt = sessions[i].exchanges[0]?.userPrompt || '';
+      // If first prompt doesn't reference previous work, it's a context restart
+      if (firstPrompt.length > 50 && !/\b(continuing|following up|as discussed|last time|previously|where we left|earlier)\b/i.test(firstPrompt)) {
+        contextRestarts++;
+      }
+    }
+  }
+  const totalFollowupSessions = Object.values(projectSessions).reduce((sum, s) => sum + Math.max(0, s.length - 1), 0);
+  if (totalFollowupSessions > 3 && contextRestarts / totalFollowupSessions > 0.5) {
+    watchPoints.push({
+      name: 'Context amnesia',
+      detail: `You restart context from scratch in ${Math.round(contextRestarts / totalFollowupSessions * 100)}% of follow-up sessions on the same project. Engineers who maintain context across sessions are more efficient.`,
+    });
+  }
+  // Low modification rate — accepting AI output without review
+  let modCount = 0;
+  let followupCount = 0;
+  for (const session of allSessions) {
+    for (let i = 1; i < session.exchanges.length; i++) {
+      followupCount++;
+      const prompt = session.exchanges[i].userPrompt || '';
+      if (/\b(actually|wait|instead|change|no,?|not quite|modify|tweak|hmm|but )\b/i.test(prompt)) {
+        modCount++;
+      }
+    }
+  }
+  const modRatio = followupCount > 10 ? modCount / followupCount : 0.5;
+  if (modRatio < 0.15 && followupCount > 20) {
+    watchPoints.push({
+      name: 'Acceptance without review',
+      detail: `You accept AI output without modification in ${Math.round((1 - modRatio) * 100)}% of cases. Top engineers modify or redirect 30%+ of initial suggestions.`,
+    });
+  }
+  // Monologue prompting — excessively long first prompts
+  if (d.avgPromptLength > 2000) {
+    watchPoints.push({
+      name: 'Monologue prompting',
+      detail: `Avg prompt length of ${d.avgPromptLength} chars is ${Math.round(d.avgPromptLength / BENCHMARKS.avgPromptLength)}x the benchmark. Breaking complex requests into 2-3 shorter prompts typically yields better AI output.`,
+    });
+  }
+  // Low context-setting
+  if (ss.contextSetRatio < 20) {
+    watchPoints.push({
+      name: 'Missing context',
+      detail: `Only ${ss.contextSetRatio}% of sessions start with context-setting (benchmark: ${BENCHMARKS.contextSetRatio}%). Upfront context leads to better first responses and fewer corrections.`,
+    });
+  }
+  // Extended debug spirals
+  if (db.longLoops > 2) {
+    watchPoints.push({
+      name: 'Debug spirals',
+      detail: `${db.longLoops} extended debug loops (>5 turns) detected. When stuck, try providing more specific error context or breaking the problem differently.`,
+    });
+  }
+  return watchPoints.slice(0, 3); // Max 3 watch points
+}
+// ══════════════════════════════════════════════
+// TRAJECTORY — Weekly score evolution
+// ══════════════════════════════════════════════
+export function computeTrajectory(allSessions) {
+  // Group sessions by week
+  const sessionsWithTime = allSessions.filter(s => s.startTime);
+  if (sessionsWithTime.length < 5) return null;
+  sessionsWithTime.sort((a, b) => new Date(a.startTime) - new Date(b.startTime));
+  const firstDate = new Date(sessionsWithTime[0].startTime);
+  const lastDate = new Date(sessionsWithTime[sessionsWithTime.length - 1].startTime);
+  // Need at least 2 weeks of data
+  const daySpan = (lastDate - firstDate) / (1000 * 60 * 60 * 24);
+  if (daySpan < 10) return null;
+  // Create weekly buckets
+  const weeks = [];
+  let weekStart = new Date(firstDate);
+  weekStart.setHours(0, 0, 0, 0);
+  // Align to Monday
+  weekStart.setDate(weekStart.getDate() - weekStart.getDay() + 1);
+  while (weekStart <= lastDate) {
+    const weekEnd = new Date(weekStart);
+    weekEnd.setDate(weekEnd.getDate() + 7);
+    const weekSessions = sessionsWithTime.filter(s => {
+      const t = new Date(s.startTime);
+      return t >= weekStart && t < weekEnd;
+    });
+    if (weekSessions.length >= 2) {
+      // Compute score for this week
+      const m = {
+        decomposition: computeDecomposition(weekSessions),
+        debugCycles: computeDebugCycles(weekSessions),
+        aiLeverage: computeAILeverage(weekSessions),
+        sessionStructure: computeSessionStructure(weekSessions),
+      };
+      const r = computeOverallScore(m);
+      const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
+      weeks.push({
+        label: `${months[weekStart.getMonth()]} ${weekStart.getDate()}-${weekEnd.getDate() - 1}`,
+        score: r.overall,
+        sessions: weekSessions.length,
+      });
+    }
+    weekStart = new Date(weekStart);
+    weekStart.setDate(weekStart.getDate() + 7);
+  }
+  if (weeks.length < 2) return null;
+  // Compute learning velocity
+  const firstScore = weeks[0].score;
+  const lastScore = weeks[weeks.length - 1].score;
+  const delta = lastScore - firstScore;
+  const weeksCount = weeks.length;
+  const velocityPerWeek = delta / weeksCount;
+  let velocityLabel;
+  if (velocityPerWeek > 3) velocityLabel = 'FAST';
+  else if (velocityPerWeek > 1) velocityLabel = 'STEADY';
+  else if (velocityPerWeek > -1) velocityLabel = 'STABLE';
+  else velocityLabel = 'DECLINING';
+  return {
+    weeks,
+    delta,
+    daysSpan: Math.round(daySpan),
+    velocityLabel,
+    velocityDetail: delta !== 0
+      ? `${Math.abs(delta)} point ${delta > 0 ? 'improvement' : 'change'} over ${Math.round(daySpan)} days`
+      : `Stable over ${Math.round(daySpan)} days`,
+  };
+}
+// ══════════════════════════════════════════════
+// PROJECT COMPLEXITY — What did they build?
+// ══════════════════════════════════════════════
+const complexitySignals = {
+  high: /\b(pipeline|distributed|real.?time|analytics|classification|machine learning|ml |auth|oauth|websocket|streaming|queue|worker|migration|microservice|kubernetes|docker|deployment|ci.?cd|infrastructure|database design|data model|schema design|api design|caching|rate limit)\b/i,
+  medium: /\b(api|crud|component|feature|integration|testing|refactor|database|query|endpoint|route|middleware|hook|state management|responsive|animation|chart|graph|dashboard)\b/i,
+};
+export function computeProjectComplexity(allSessions) {
+  const projectData = {};
+  for (const s of allSessions) {
+    const p = s.project || 'unknown';
+    if (!projectData[p]) {
+      projectData[p] = { sessions: 0, exchanges: 0, daysActive: new Set(), highSignals: new Set(), medSignals: new Set(), prompts: [] };
+    }
+    projectData[p].sessions++;
+    projectData[p].exchanges += s.exchangeCount;
+    if (s.startTime) {
+      projectData[p].daysActive.add(new Date(s.startTime).toISOString().split('T')[0]);
+    }
+    for (const ex of s.exchanges) {
+      const prompt = ex.userPrompt || '';
+      projectData[p].prompts.push(prompt);
+      // Extract complexity signals
+      const highMatches = prompt.match(complexitySignals.high);
+      const medMatches = prompt.match(complexitySignals.medium);
+      if (highMatches) {
+        for (const m of highMatches) projectData[p].highSignals.add(m.toLowerCase().trim());
+      }
+      if (medMatches) {
+        for (const m of medMatches) projectData[p].medSignals.add(m.toLowerCase().trim());
+      }
+    }
+  }
+  const projects = [];
+  for (const [name, data] of Object.entries(projectData)) {
+    if (data.exchanges < 3) continue; // Skip trivial projects
+    let complexity;
+    const signals = [...data.highSignals, ...data.medSignals].slice(0, 5);
+    if (data.highSignals.size >= 3 || (data.highSignals.size >= 1 && data.exchanges > 50)) {
+      complexity = 'HIGH';
+    } else if (data.medSignals.size >= 3 || data.highSignals.size >= 1 || data.exchanges > 30) {
+      complexity = 'MEDIUM';
+    } else {
+      complexity = 'LOW';
+    }
+    const shortName = name.length > 28 ? '...' + name.slice(-25) : name;
+    projects.push({
+      name: shortName,
+      complexity,
+      sessions: data.sessions,
+      exchanges: data.exchanges,
+      daysActive: data.daysActive.size,
+      signals,
+    });
+  }
+  // Sort by exchanges descending
+  projects.sort((a, b) => b.exchanges - a.exchanges);
+  return projects.slice(0, 5); // Top 5 projects
+}
+// ══════════════════════════════════════════════
+// ASSESSMENT — Narrative paragraph
+// ══════════════════════════════════════════════
+export function generateAssessment(result, metrics, signatures, watchPoints) {
+  const { overall, scores, archetype, tier } = result;
+  const d = metrics.decomposition.details;
+  const db = metrics.debugCycles.details;
+  const ai = metrics.aiLeverage.details;
+  const ss = metrics.sessionStructure.details;
+  // Find strongest and weakest dimensions
+  const dims = [
+    { key: 'decomposition', label: 'problem decomposition', score: scores.decomposition },
+    { key: 'debugCycles', label: 'debugging efficiency', score: scores.debugCycles },
+    { key: 'aiLeverage', label: 'AI leverage', score: scores.aiLeverage },
+    { key: 'sessionStructure', label: 'workflow discipline', score: scores.sessionStructure },
+  ];
+  dims.sort((a, b) => b.score - a.score);
+  const strongest = dims[0];
+  const weakest = dims[dims.length - 1];
+  // Build assessment parts
+  let assessment = `This engineer demonstrates ${dimQualitative(strongest.score)} ${strongest.label}`;
+  // Add signature mention if available
+  if (signatures.length > 0) {
+    assessment += ` with a distinctive pattern of ${signatures[0].name.toLowerCase()}`;
+  }
+  assessment += '.';
+  // Second sentence — second strength or debugging detail
+  if (dims[1].score >= 65) {
+    assessment += ` Their ${dims[1].label} is also ${dimQualitative(dims[1].score).toLowerCase()}`;
+    if (db.avgTurnsToResolve <= 2 && dims[1].key === 'debugCycles') {
+      assessment += ' \u2014 surgical and specific with ' + (db.longLoops === 0 ? 'zero' : 'minimal') + ' extended loops';
+    }
+    assessment += '.';
+  }
+  // Third sentence — growth area
+  if (weakest.score < 65) {
+    assessment += ` Primary growth opportunity is in ${weakest.label}`;
+    if (weakest.key === 'sessionStructure') {
+      assessment += ': context-setting and upfront planning are below benchmark';
+      if (ss.refinementRatio > 15) {
+        assessment += ', though iterative refinement partially compensates';
+      }
+    } else if (weakest.key === 'decomposition') {
+      assessment += ': more task breakdown and structured thinking would yield significant score improvement';
+    } else if (weakest.key === 'aiLeverage') {
+      assessment += ': using AI for architecture and planning, not just code generation, would increase impact';
+    } else {
+      assessment += ': stronger error reporting and systematic resolution would improve efficiency';
+    }
+    assessment += '.';
+  }
+  // Fourth sentence — best for
+  assessment += ' ' + archetype.bestFor;
+  return assessment;
+}
+function dimQualitative(score) {
+  if (score >= 80) return 'Exceptional';
+  if (score >= 65) return 'Strong';
+  if (score >= 50) return 'Solid';
+  if (score >= 35) return 'Developing';
+  return 'Early-stage';
+}
+// ══════════════════════════════════════════════
+// CONFIDENCE — Data volume indicator
+// ══════════════════════════════════════════════
+export function computeConfidence(sessionStats) {
+  const { totalSessions, totalExchanges, tools } = sessionStats;
+  const toolCount = tools.length;
+  // Score confidence on sessions, exchanges, and tool diversity
+  let score = 0;
+  if (totalSessions >= 50) score += 40;
+  else if (totalSessions >= 20) score += 30;
+  else if (totalSessions >= 10) score += 20;
+  else score += 10;
+  if (totalExchanges >= 500) score += 30;
+  else if (totalExchanges >= 200) score += 20;
+  else if (totalExchanges >= 50) score += 10;
+  if (toolCount >= 3) score += 20;
+  else if (toolCount >= 2) score += 15;
+  else score += 10;
+  // Bonus for enough data
+  if (totalSessions >= 30 && totalExchanges >= 300) score += 10;
+  score = Math.min(100, score);
+  let level;
+  if (score >= 80) level = 'HIGH';
+  else if (score >= 50) level = 'MODERATE';
+  else level = 'LOW';
+  return { score, level };
+}