npm - @grainulation/harvest - Versions diffs - 1.0.1 → 1.0.2 - Mend

@grainulation/harvest 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/lib/calibration.js CHANGED Viewed

@@ -1,4 +1,4 @@
-'use strict';
+"use strict";
 /**
  * Prediction vs outcome scoring.
@@ -21,20 +21,23 @@ const EVIDENCE_RANK = {
 };
 function calibrate(sprints) {
-  const allClaims = sprints.flatMap(s => s.claims.map(c => ({ ...c, _sprint: s.name })));
+  const allClaims = sprints.flatMap((s) =>
+    s.claims.map((c) => ({ ...c, _sprint: s.name })),
+  );
-  const estimates = allClaims.filter(c => c.type === 'estimate');
-  const calibrations = allClaims.filter(c =>
-    c.id && (c.id.startsWith('cal') || c.type === 'calibration')
+  const estimates = allClaims.filter((c) => c.type === "estimate");
+  const calibrations = allClaims.filter(
+    (c) => c.id && (c.id.startsWith("cal") || c.type === "calibration"),
   );
   // Match calibrations to estimates
   const scored = [];
   for (const cal of calibrations) {
     const refs = cal.references || cal.refs || [];
-    const matchedEstimates = estimates.filter(e =>
-      refs.includes(e.id) ||
-      (cal.tags && e.tags && cal.tags.some(t => e.tags.includes(t)))
+    const matchedEstimates = estimates.filter(
+      (e) =>
+        refs.includes(e.id) ||
+        (cal.tags && e.tags && cal.tags.some((t) => e.tags.includes(t))),
     );
     for (const est of matchedEstimates) {
@@ -53,27 +56,30 @@ function calibrate(sprints) {
   }
   // Unmatched estimates -- predictions with no follow-up
-  const scoredEstimateIds = new Set(scored.map(s => s.estimateId));
-  const unmatched = estimates.filter(e => !scoredEstimateIds.has(e.id));
+  const scoredEstimateIds = new Set(scored.map((s) => s.estimateId));
+  const unmatched = estimates.filter((e) => !scoredEstimateIds.has(e.id));
   // Compute aggregate stats
-  const accurateCount = scored.filter(s => s.accurate === true).length;
-  const inaccurateCount = scored.filter(s => s.accurate === false).length;
-  const unchecked = scored.filter(s => s.accurate === null).length;
+  const accurateCount = scored.filter((s) => s.accurate === true).length;
+  const inaccurateCount = scored.filter((s) => s.accurate === false).length;
+  const unchecked = scored.filter((s) => s.accurate === null).length;
   const totalScored = accurateCount + inaccurateCount;
-  const accuracyRate = totalScored > 0
-    ? Math.round(accurateCount / totalScored * 100)
-    : null;
+  const accuracyRate =
+    totalScored > 0 ? Math.round((accurateCount / totalScored) * 100) : null;
   // Confidence calibration: group by confidence bucket
-  const buckets = { high: { total: 0, accurate: 0 }, medium: { total: 0, accurate: 0 }, low: { total: 0, accurate: 0 } };
+  const buckets = {
+    high: { total: 0, accurate: 0 },
+    medium: { total: 0, accurate: 0 },
+    low: { total: 0, accurate: 0 },
+  };
   for (const s of scored) {
     const conf = s.estimateConfidence;
-    let bucket = 'medium';
-    if (typeof conf === 'number') {
-      bucket = conf >= 0.7 ? 'high' : conf >= 0.4 ? 'medium' : 'low';
-    } else if (typeof conf === 'string') {
+    let bucket = "medium";
+    if (typeof conf === "number") {
+      bucket = conf >= 0.7 ? "high" : conf >= 0.4 ? "medium" : "low";
+    } else if (typeof conf === "string") {
       bucket = conf.toLowerCase();
     }
     if (buckets[bucket]) {
@@ -85,9 +91,15 @@ function calibrate(sprints) {
   const calibrationScore = Object.fromEntries(
     Object.entries(buckets)
       .filter(([, v]) => v.total > 0)
-      .map(([k, v]) => [k, Math.round(v.accurate / v.total * 100)])
+      .map(([k, v]) => [k, Math.round((v.accurate / v.total) * 100)]),
   );
+  // Brier score: mean squared error between predicted probability and outcome (0=perfect, 1=worst)
+  const brierData = computeBrierScore(scored);
+  // Calibration curve: bin predictions by confidence, compare to actual outcome rate
+  const calibrationCurve = computeCalibrationCurve(scored);
   return {
     summary: {
       totalEstimates: estimates.length,
@@ -95,49 +107,204 @@ function calibrate(sprints) {
       matched: scored.length,
       unmatched: unmatched.length,
       accuracyRate,
+      brierScore: brierData.score,
     },
     calibrationByConfidence: calibrationScore,
-    scored: scored.map(s => ({
+    calibrationCurve,
+    brierScore: brierData,
+    scored: scored.map((s) => ({
       estimateId: s.estimateId,
       calibrationId: s.calibrationId,
       sprint: s.sprint,
       accurate: s.accurate,
       delta: s.delta,
     })),
-    unmatchedEstimates: unmatched.map(e => ({
+    unmatchedEstimates: unmatched.map((e) => ({
       id: e.id,
       sprint: e._sprint,
       text: e.text || e.claim || e.description,
       age: e.created ? daysSince(e.created) : null,
     })),
-    insight: generateInsight(accuracyRate, calibrationScore, unmatched.length, estimates.length),
+    insight: generateInsight(
+      accuracyRate,
+      calibrationScore,
+      unmatched.length,
+      estimates.length,
+      brierData,
+      calibrationCurve,
+    ),
   };
 }
-function generateInsight(accuracy, byConfidence, unmatchedCount, totalEstimates) {
+/**
+ * Compute Brier score -- mean squared difference between predicted probability and outcome.
+ * Scale: 0 (perfect) to 1 (worst). Metaculus community achieves 0.10-0.20.
+ */
+function computeBrierScore(scored) {
+  const withProbability = scored.filter(
+    (s) => s.accurate !== null && s.estimateConfidence !== null,
+  );
+  if (withProbability.length === 0) {
+    return { score: null, n: 0, interpretation: null };
+  }
+  let sumSquaredError = 0;
+  for (const s of withProbability) {
+    const predicted = normalizeConfidence(s.estimateConfidence);
+    const outcome = s.accurate ? 1 : 0;
+    sumSquaredError += (predicted - outcome) ** 2;
+  }
+  const score =
+    Math.round((sumSquaredError / withProbability.length) * 1000) / 1000;
+  let interpretation;
+  if (score <= 0.1)
+    interpretation =
+      "Excellent calibration -- approaching expert forecaster levels.";
+  else if (score <= 0.2)
+    interpretation =
+      "Good calibration -- comparable to prediction market aggregates.";
+  else if (score <= 0.3)
+    interpretation =
+      "Moderate calibration -- room for improvement in confidence estimates.";
+  else
+    interpretation =
+      "Weak calibration -- predictions are poorly matched to outcomes.";
+  return { score, n: withProbability.length, interpretation };
+}
+/**
+ * Build calibration curve data: bin predictions into buckets, compare predicted vs actual rates.
+ * Perfect calibration follows the diagonal (predicted 70% → 70% actually happen).
+ */
+function computeCalibrationCurve(scored) {
+  const BINS = [
+    { min: 0, max: 0.2, label: "0-20%" },
+    { min: 0.2, max: 0.4, label: "20-40%" },
+    { min: 0.4, max: 0.6, label: "40-60%" },
+    { min: 0.6, max: 0.8, label: "60-80%" },
+    { min: 0.8, max: 1.01, label: "80-100%" },
+  ];
+  const withData = scored.filter(
+    (s) => s.accurate !== null && s.estimateConfidence !== null,
+  );
+  if (withData.length === 0) return { bins: [], bias: null };
+  const bins = BINS.map((bin) => {
+    const inBin = withData.filter((s) => {
+      const conf = normalizeConfidence(s.estimateConfidence);
+      return conf >= bin.min && conf < bin.max;
+    });
+    const count = inBin.length;
+    const accurateCount = inBin.filter((s) => s.accurate).length;
+    const actualRate =
+      count > 0 ? Math.round((accurateCount / count) * 100) : null;
+    const midpoint = Math.round(((bin.min + bin.max) / 2) * 100);
+    return {
+      label: bin.label,
+      predicted: midpoint,
+      actual: actualRate,
+      count,
+    };
+  });
+  // Overall bias direction
+  let overconfidentBins = 0;
+  let underconfidentBins = 0;
+  for (const bin of bins) {
+    if (bin.count === 0 || bin.actual === null) continue;
+    if (bin.predicted > bin.actual) overconfidentBins++;
+    else if (bin.predicted < bin.actual) underconfidentBins++;
+  }
+  let bias = null;
+  if (overconfidentBins > underconfidentBins) bias = "overconfident";
+  else if (underconfidentBins > overconfidentBins) bias = "underconfident";
+  else if (overconfidentBins > 0) bias = "mixed";
+  return { bins, bias };
+}
+/**
+ * Normalize confidence to 0-1 range.
+ */
+function normalizeConfidence(conf) {
+  if (typeof conf === "number") return Math.max(0, Math.min(1, conf));
+  if (typeof conf === "string") {
+    const lower = conf.toLowerCase();
+    if (lower === "high") return 0.8;
+    if (lower === "medium") return 0.5;
+    if (lower === "low") return 0.2;
+  }
+  return 0.5; // default
+}
+function generateInsight(
+  accuracy,
+  byConfidence,
+  unmatchedCount,
+  totalEstimates,
+  brierData,
+  calibrationCurve,
+) {
   const parts = [];
   if (accuracy !== null) {
     if (accuracy >= 80) {
-      parts.push(`Strong calibration: ${accuracy}% of scored predictions were accurate.`);
+      parts.push(
+        `Strong calibration: ${accuracy}% of scored predictions were accurate.`,
+      );
     } else if (accuracy >= 50) {
-      parts.push(`Moderate calibration: ${accuracy}% accuracy. Room for improvement.`);
+      parts.push(
+        `Moderate calibration: ${accuracy}% accuracy. Room for improvement.`,
+      );
     } else {
-      parts.push(`Weak calibration: only ${accuracy}% accuracy. Estimates may need more evidence before committing.`);
+      parts.push(
+        `Weak calibration: only ${accuracy}% accuracy. Estimates may need more evidence before committing.`,
+      );
     }
   }
   if (byConfidence.high !== undefined && byConfidence.low !== undefined) {
     if (byConfidence.high < byConfidence.low) {
-      parts.push('Overconfidence detected: high-confidence predictions are less accurate than low-confidence ones.');
+      parts.push(
+        "Overconfidence detected: high-confidence predictions are less accurate than low-confidence ones.",
+      );
     }
   }
   if (totalEstimates > 0 && unmatchedCount / totalEstimates > 0.5) {
-    parts.push(`${unmatchedCount} of ${totalEstimates} estimates have no calibration follow-up. Run /calibrate to close the loop.`);
+    parts.push(
+      `${unmatchedCount} of ${totalEstimates} estimates have no calibration follow-up. Run /calibrate to close the loop.`,
+    );
+  }
+  if (brierData && brierData.score !== null) {
+    parts.push(`Brier score: ${brierData.score} (${brierData.interpretation})`);
+  }
+  if (calibrationCurve && calibrationCurve.bias) {
+    if (calibrationCurve.bias === "overconfident") {
+      parts.push(
+        "Systematic overconfidence detected: your high-confidence predictions resolve less often than expected. Consider adding buffer to estimates.",
+      );
+    } else if (calibrationCurve.bias === "underconfident") {
+      parts.push(
+        "You tend to underestimate -- your predictions succeed more often than your confidence suggests. Trust your analysis more.",
+      );
+    }
   }
-  return parts.length > 0 ? parts.join(' ') : 'Not enough data to generate calibration insights.';
+  return parts.length > 0
+    ? parts.join(" ")
+    : "Not enough data to generate calibration insights.";
 }
 function daysSince(dateStr) {

package/lib/dashboard.js CHANGED Viewed

@@ -1,20 +1,24 @@
-'use strict';
+"use strict";
-const fs = require('node:fs');
-const path = require('node:path');
+const fs = require("node:fs");
+const path = require("node:path");
 /**
  * Slim a claims array for dashboard embedding (compact keys).
  */
 function slim(claims) {
-  return claims.map(c => ({
-    i: c.id, t: c.type, tp: c.topic,
-    c: c.content || c.text || c.claim || c.description || '',
-    e: c.evidence, s: c.status,
-    p: c.phase_added, ts: c.timestamp || c.created || c.date,
+  return claims.map((c) => ({
+    i: c.id,
+    t: c.type,
+    tp: c.topic,
+    c: c.content || c.text || c.claim || c.description || "",
+    e: c.evidence,
+    s: c.status,
+    p: c.phase_added,
+    ts: c.timestamp || c.created || c.date,
     cf: (c.conflicts_with || []).length > 0 ? c.conflicts_with : undefined,
     r: c.resolved_by || undefined,
-    tg: (c.tags || []).length > 0 ? c.tags : undefined
+    tg: (c.tags || []).length > 0 ? c.tags : undefined,
   }));
 }
@@ -23,7 +27,7 @@ function slim(claims) {
  */
 function loadClaims(filePath) {
   try {
-    return JSON.parse(fs.readFileSync(filePath, 'utf8'));
+    return JSON.parse(fs.readFileSync(filePath, "utf8"));
   } catch {
     return null;
   }
@@ -37,17 +41,21 @@ function findSprintFiles(targetDir) {
   const found = [];
   // Direct claims.json in target dir
-  const direct = path.join(targetDir, 'claims.json');
+  const direct = path.join(targetDir, "claims.json");
   if (fs.existsSync(direct)) {
-    found.push({ file: direct, name: path.basename(targetDir), cat: 'root' });
+    found.push({ file: direct, name: path.basename(targetDir), cat: "root" });
   }
   // Archive subdir (flat JSON files)
-  const archiveDir = path.join(targetDir, 'archive');
+  const archiveDir = path.join(targetDir, "archive");
   if (fs.existsSync(archiveDir) && fs.statSync(archiveDir).isDirectory()) {
     for (const f of fs.readdirSync(archiveDir)) {
-      if (f.endsWith('.json') && f.includes('claims')) {
-        found.push({ file: path.join(archiveDir, f), name: f.replace('.json', '').replace(/-/g, ' '), cat: 'archive' });
+      if (f.endsWith(".json") && f.includes("claims")) {
+        found.push({
+          file: path.join(archiveDir, f),
+          name: f.replace(".json", "").replace(/-/g, " "),
+          cat: "archive",
+        });
       }
     }
   }
@@ -57,26 +65,35 @@ function findSprintFiles(targetDir) {
     const entries = fs.readdirSync(targetDir, { withFileTypes: true });
     for (const entry of entries) {
       if (!entry.isDirectory()) continue;
-      if (entry.name.startsWith('.') || entry.name === 'archive' || entry.name === 'node_modules') continue;
+      if (
+        entry.name.startsWith(".") ||
+        entry.name === "archive" ||
+        entry.name === "node_modules"
+      )
+        continue;
       const childDir = path.join(targetDir, entry.name);
-      const childClaims = path.join(childDir, 'claims.json');
+      const childClaims = path.join(childDir, "claims.json");
       if (fs.existsSync(childClaims)) {
-        found.push({ file: childClaims, name: entry.name, cat: 'active' });
+        found.push({ file: childClaims, name: entry.name, cat: "active" });
       }
       // Second level
       try {
         const subEntries = fs.readdirSync(childDir, { withFileTypes: true });
         for (const sub of subEntries) {
           if (!sub.isDirectory()) continue;
-          if (sub.name.startsWith('.')) continue;
-          const subClaims = path.join(childDir, sub.name, 'claims.json');
+          if (sub.name.startsWith(".")) continue;
+          const subClaims = path.join(childDir, sub.name, "claims.json");
           if (fs.existsSync(subClaims)) {
-            found.push({ file: subClaims, name: sub.name, cat: 'active' });
+            found.push({ file: subClaims, name: sub.name, cat: "active" });
           }
         }
-      } catch { /* skip */ }
+      } catch {
+        /* skip */
+      }
     }
-  } catch { /* skip */ }
+  } catch {
+    /* skip */
+  }
   return found;
 }
@@ -95,10 +112,10 @@ function loadSprints(targetDir) {
     if (claims.length === 0) continue;
     sprints.push({
       n: src.name,
-      p: data.meta?.phase || 'unknown',
-      q: data.meta?.question || '',
-      cat: src.cat || 'active',
-      c: slim(claims)
+      p: data.meta?.phase || "unknown",
+      q: data.meta?.question || "",
+      cat: src.cat || "active",
+      c: slim(claims),
     });
   }
   return sprints;
@@ -110,17 +127,22 @@ function loadSprints(targetDir) {
  * @returns {string} Complete HTML string
  */
 function buildHtml(sprints) {
-  const templatePath = path.join(__dirname, '..', 'templates', 'dashboard.html');
-  const template = fs.readFileSync(templatePath, 'utf8');
-  const jsonData = JSON.stringify(sprints).replace(/<\/script/gi, '<\\/script');
-  return template.replace('__SPRINT_DATA__', jsonData);
+  const templatePath = path.join(
+    __dirname,
+    "..",
+    "templates",
+    "dashboard.html",
+  );
+  const template = fs.readFileSync(templatePath, "utf8");
+  const jsonData = JSON.stringify(sprints).replace(/<\/script/gi, "<\\/script");
+  return template.replace("__SPRINT_DATA__", jsonData);
 }
 /**
  * Return paths to all claims.json files for watching.
  */
 function claimsPaths(targetDir) {
-  return findSprintFiles(targetDir).map(s => s.file);
+  return findSprintFiles(targetDir).map((s) => s.file);
 }
 module.exports = { loadSprints, buildHtml, claimsPaths, findSprintFiles, slim };