@grainulation/harvest 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- 'use strict';
1
+ "use strict";
2
2
 
3
3
  /**
4
4
  * Prediction vs outcome scoring.
@@ -21,20 +21,23 @@ const EVIDENCE_RANK = {
21
21
  };
22
22
 
23
23
  function calibrate(sprints) {
24
- const allClaims = sprints.flatMap(s => s.claims.map(c => ({ ...c, _sprint: s.name })));
24
+ const allClaims = sprints.flatMap((s) =>
25
+ s.claims.map((c) => ({ ...c, _sprint: s.name })),
26
+ );
25
27
 
26
- const estimates = allClaims.filter(c => c.type === 'estimate');
27
- const calibrations = allClaims.filter(c =>
28
- c.id && (c.id.startsWith('cal') || c.type === 'calibration')
28
+ const estimates = allClaims.filter((c) => c.type === "estimate");
29
+ const calibrations = allClaims.filter(
30
+ (c) => c.id && (c.id.startsWith("cal") || c.type === "calibration"),
29
31
  );
30
32
 
31
33
  // Match calibrations to estimates
32
34
  const scored = [];
33
35
  for (const cal of calibrations) {
34
36
  const refs = cal.references || cal.refs || [];
35
- const matchedEstimates = estimates.filter(e =>
36
- refs.includes(e.id) ||
37
- (cal.tags && e.tags && cal.tags.some(t => e.tags.includes(t)))
37
+ const matchedEstimates = estimates.filter(
38
+ (e) =>
39
+ refs.includes(e.id) ||
40
+ (cal.tags && e.tags && cal.tags.some((t) => e.tags.includes(t))),
38
41
  );
39
42
 
40
43
  for (const est of matchedEstimates) {
@@ -53,27 +56,30 @@ function calibrate(sprints) {
53
56
  }
54
57
 
55
58
  // Unmatched estimates -- predictions with no follow-up
56
- const scoredEstimateIds = new Set(scored.map(s => s.estimateId));
57
- const unmatched = estimates.filter(e => !scoredEstimateIds.has(e.id));
59
+ const scoredEstimateIds = new Set(scored.map((s) => s.estimateId));
60
+ const unmatched = estimates.filter((e) => !scoredEstimateIds.has(e.id));
58
61
 
59
62
  // Compute aggregate stats
60
- const accurateCount = scored.filter(s => s.accurate === true).length;
61
- const inaccurateCount = scored.filter(s => s.accurate === false).length;
62
- const unchecked = scored.filter(s => s.accurate === null).length;
63
+ const accurateCount = scored.filter((s) => s.accurate === true).length;
64
+ const inaccurateCount = scored.filter((s) => s.accurate === false).length;
65
+ const unchecked = scored.filter((s) => s.accurate === null).length;
63
66
 
64
67
  const totalScored = accurateCount + inaccurateCount;
65
- const accuracyRate = totalScored > 0
66
- ? Math.round(accurateCount / totalScored * 100)
67
- : null;
68
+ const accuracyRate =
69
+ totalScored > 0 ? Math.round((accurateCount / totalScored) * 100) : null;
68
70
 
69
71
  // Confidence calibration: group by confidence bucket
70
- const buckets = { high: { total: 0, accurate: 0 }, medium: { total: 0, accurate: 0 }, low: { total: 0, accurate: 0 } };
72
+ const buckets = {
73
+ high: { total: 0, accurate: 0 },
74
+ medium: { total: 0, accurate: 0 },
75
+ low: { total: 0, accurate: 0 },
76
+ };
71
77
  for (const s of scored) {
72
78
  const conf = s.estimateConfidence;
73
- let bucket = 'medium';
74
- if (typeof conf === 'number') {
75
- bucket = conf >= 0.7 ? 'high' : conf >= 0.4 ? 'medium' : 'low';
76
- } else if (typeof conf === 'string') {
79
+ let bucket = "medium";
80
+ if (typeof conf === "number") {
81
+ bucket = conf >= 0.7 ? "high" : conf >= 0.4 ? "medium" : "low";
82
+ } else if (typeof conf === "string") {
77
83
  bucket = conf.toLowerCase();
78
84
  }
79
85
  if (buckets[bucket]) {
@@ -85,9 +91,15 @@ function calibrate(sprints) {
85
91
  const calibrationScore = Object.fromEntries(
86
92
  Object.entries(buckets)
87
93
  .filter(([, v]) => v.total > 0)
88
- .map(([k, v]) => [k, Math.round(v.accurate / v.total * 100)])
94
+ .map(([k, v]) => [k, Math.round((v.accurate / v.total) * 100)]),
89
95
  );
90
96
 
97
+ // Brier score: mean squared error between predicted probability and outcome (0=perfect, 1=worst)
98
+ const brierData = computeBrierScore(scored);
99
+
100
+ // Calibration curve: bin predictions by confidence, compare to actual outcome rate
101
+ const calibrationCurve = computeCalibrationCurve(scored);
102
+
91
103
  return {
92
104
  summary: {
93
105
  totalEstimates: estimates.length,
@@ -95,49 +107,204 @@ function calibrate(sprints) {
95
107
  matched: scored.length,
96
108
  unmatched: unmatched.length,
97
109
  accuracyRate,
110
+ brierScore: brierData.score,
98
111
  },
99
112
  calibrationByConfidence: calibrationScore,
100
- scored: scored.map(s => ({
113
+ calibrationCurve,
114
+ brierScore: brierData,
115
+ scored: scored.map((s) => ({
101
116
  estimateId: s.estimateId,
102
117
  calibrationId: s.calibrationId,
103
118
  sprint: s.sprint,
104
119
  accurate: s.accurate,
105
120
  delta: s.delta,
106
121
  })),
107
- unmatchedEstimates: unmatched.map(e => ({
122
+ unmatchedEstimates: unmatched.map((e) => ({
108
123
  id: e.id,
109
124
  sprint: e._sprint,
110
125
  text: e.text || e.claim || e.description,
111
126
  age: e.created ? daysSince(e.created) : null,
112
127
  })),
113
- insight: generateInsight(accuracyRate, calibrationScore, unmatched.length, estimates.length),
128
+ insight: generateInsight(
129
+ accuracyRate,
130
+ calibrationScore,
131
+ unmatched.length,
132
+ estimates.length,
133
+ brierData,
134
+ calibrationCurve,
135
+ ),
114
136
  };
115
137
  }
116
138
 
117
- function generateInsight(accuracy, byConfidence, unmatchedCount, totalEstimates) {
139
+ /**
140
+ * Compute Brier score -- mean squared difference between predicted probability and outcome.
141
+ * Scale: 0 (perfect) to 1 (worst). Metaculus community achieves 0.10-0.20.
142
+ */
143
+ function computeBrierScore(scored) {
144
+ const withProbability = scored.filter(
145
+ (s) => s.accurate !== null && s.estimateConfidence !== null,
146
+ );
147
+
148
+ if (withProbability.length === 0) {
149
+ return { score: null, n: 0, interpretation: null };
150
+ }
151
+
152
+ let sumSquaredError = 0;
153
+ for (const s of withProbability) {
154
+ const predicted = normalizeConfidence(s.estimateConfidence);
155
+ const outcome = s.accurate ? 1 : 0;
156
+ sumSquaredError += (predicted - outcome) ** 2;
157
+ }
158
+
159
+ const score =
160
+ Math.round((sumSquaredError / withProbability.length) * 1000) / 1000;
161
+
162
+ let interpretation;
163
+ if (score <= 0.1)
164
+ interpretation =
165
+ "Excellent calibration -- approaching expert forecaster levels.";
166
+ else if (score <= 0.2)
167
+ interpretation =
168
+ "Good calibration -- comparable to prediction market aggregates.";
169
+ else if (score <= 0.3)
170
+ interpretation =
171
+ "Moderate calibration -- room for improvement in confidence estimates.";
172
+ else
173
+ interpretation =
174
+ "Weak calibration -- predictions are poorly matched to outcomes.";
175
+
176
+ return { score, n: withProbability.length, interpretation };
177
+ }
178
+
179
+ /**
180
+ * Build calibration curve data: bin predictions into buckets, compare predicted vs actual rates.
181
+ * Perfect calibration follows the diagonal (predicted 70% → 70% actually happen).
182
+ */
183
+ function computeCalibrationCurve(scored) {
184
+ const BINS = [
185
+ { min: 0, max: 0.2, label: "0-20%" },
186
+ { min: 0.2, max: 0.4, label: "20-40%" },
187
+ { min: 0.4, max: 0.6, label: "40-60%" },
188
+ { min: 0.6, max: 0.8, label: "60-80%" },
189
+ { min: 0.8, max: 1.01, label: "80-100%" },
190
+ ];
191
+
192
+ const withData = scored.filter(
193
+ (s) => s.accurate !== null && s.estimateConfidence !== null,
194
+ );
195
+
196
+ if (withData.length === 0) return { bins: [], bias: null };
197
+
198
+ const bins = BINS.map((bin) => {
199
+ const inBin = withData.filter((s) => {
200
+ const conf = normalizeConfidence(s.estimateConfidence);
201
+ return conf >= bin.min && conf < bin.max;
202
+ });
203
+
204
+ const count = inBin.length;
205
+ const accurateCount = inBin.filter((s) => s.accurate).length;
206
+ const actualRate =
207
+ count > 0 ? Math.round((accurateCount / count) * 100) : null;
208
+ const midpoint = Math.round(((bin.min + bin.max) / 2) * 100);
209
+
210
+ return {
211
+ label: bin.label,
212
+ predicted: midpoint,
213
+ actual: actualRate,
214
+ count,
215
+ };
216
+ });
217
+
218
+ // Overall bias direction
219
+ let overconfidentBins = 0;
220
+ let underconfidentBins = 0;
221
+ for (const bin of bins) {
222
+ if (bin.count === 0 || bin.actual === null) continue;
223
+ if (bin.predicted > bin.actual) overconfidentBins++;
224
+ else if (bin.predicted < bin.actual) underconfidentBins++;
225
+ }
226
+
227
+ let bias = null;
228
+ if (overconfidentBins > underconfidentBins) bias = "overconfident";
229
+ else if (underconfidentBins > overconfidentBins) bias = "underconfident";
230
+ else if (overconfidentBins > 0) bias = "mixed";
231
+
232
+ return { bins, bias };
233
+ }
234
+
235
+ /**
236
+ * Normalize confidence to 0-1 range.
237
+ */
238
+ function normalizeConfidence(conf) {
239
+ if (typeof conf === "number") return Math.max(0, Math.min(1, conf));
240
+ if (typeof conf === "string") {
241
+ const lower = conf.toLowerCase();
242
+ if (lower === "high") return 0.8;
243
+ if (lower === "medium") return 0.5;
244
+ if (lower === "low") return 0.2;
245
+ }
246
+ return 0.5; // default
247
+ }
248
+
249
+ function generateInsight(
250
+ accuracy,
251
+ byConfidence,
252
+ unmatchedCount,
253
+ totalEstimates,
254
+ brierData,
255
+ calibrationCurve,
256
+ ) {
118
257
  const parts = [];
119
258
 
120
259
  if (accuracy !== null) {
121
260
  if (accuracy >= 80) {
122
- parts.push(`Strong calibration: ${accuracy}% of scored predictions were accurate.`);
261
+ parts.push(
262
+ `Strong calibration: ${accuracy}% of scored predictions were accurate.`,
263
+ );
123
264
  } else if (accuracy >= 50) {
124
- parts.push(`Moderate calibration: ${accuracy}% accuracy. Room for improvement.`);
265
+ parts.push(
266
+ `Moderate calibration: ${accuracy}% accuracy. Room for improvement.`,
267
+ );
125
268
  } else {
126
- parts.push(`Weak calibration: only ${accuracy}% accuracy. Estimates may need more evidence before committing.`);
269
+ parts.push(
270
+ `Weak calibration: only ${accuracy}% accuracy. Estimates may need more evidence before committing.`,
271
+ );
127
272
  }
128
273
  }
129
274
 
130
275
  if (byConfidence.high !== undefined && byConfidence.low !== undefined) {
131
276
  if (byConfidence.high < byConfidence.low) {
132
- parts.push('Overconfidence detected: high-confidence predictions are less accurate than low-confidence ones.');
277
+ parts.push(
278
+ "Overconfidence detected: high-confidence predictions are less accurate than low-confidence ones.",
279
+ );
133
280
  }
134
281
  }
135
282
 
136
283
  if (totalEstimates > 0 && unmatchedCount / totalEstimates > 0.5) {
137
- parts.push(`${unmatchedCount} of ${totalEstimates} estimates have no calibration follow-up. Run /calibrate to close the loop.`);
284
+ parts.push(
285
+ `${unmatchedCount} of ${totalEstimates} estimates have no calibration follow-up. Run /calibrate to close the loop.`,
286
+ );
287
+ }
288
+
289
+ if (brierData && brierData.score !== null) {
290
+ parts.push(`Brier score: ${brierData.score} (${brierData.interpretation})`);
291
+ }
292
+
293
+ if (calibrationCurve && calibrationCurve.bias) {
294
+ if (calibrationCurve.bias === "overconfident") {
295
+ parts.push(
296
+ "Systematic overconfidence detected: your high-confidence predictions resolve less often than expected. Consider adding buffer to estimates.",
297
+ );
298
+ } else if (calibrationCurve.bias === "underconfident") {
299
+ parts.push(
300
+ "You tend to underestimate -- your predictions succeed more often than your confidence suggests. Trust your analysis more.",
301
+ );
302
+ }
138
303
  }
139
304
 
140
- return parts.length > 0 ? parts.join(' ') : 'Not enough data to generate calibration insights.';
305
+ return parts.length > 0
306
+ ? parts.join(" ")
307
+ : "Not enough data to generate calibration insights.";
141
308
  }
142
309
 
143
310
  function daysSince(dateStr) {
package/lib/dashboard.js CHANGED
@@ -1,20 +1,24 @@
1
- 'use strict';
1
+ "use strict";
2
2
 
3
- const fs = require('node:fs');
4
- const path = require('node:path');
3
+ const fs = require("node:fs");
4
+ const path = require("node:path");
5
5
 
6
6
  /**
7
7
  * Slim a claims array for dashboard embedding (compact keys).
8
8
  */
9
9
  function slim(claims) {
10
- return claims.map(c => ({
11
- i: c.id, t: c.type, tp: c.topic,
12
- c: c.content || c.text || c.claim || c.description || '',
13
- e: c.evidence, s: c.status,
14
- p: c.phase_added, ts: c.timestamp || c.created || c.date,
10
+ return claims.map((c) => ({
11
+ i: c.id,
12
+ t: c.type,
13
+ tp: c.topic,
14
+ c: c.content || c.text || c.claim || c.description || "",
15
+ e: c.evidence,
16
+ s: c.status,
17
+ p: c.phase_added,
18
+ ts: c.timestamp || c.created || c.date,
15
19
  cf: (c.conflicts_with || []).length > 0 ? c.conflicts_with : undefined,
16
20
  r: c.resolved_by || undefined,
17
- tg: (c.tags || []).length > 0 ? c.tags : undefined
21
+ tg: (c.tags || []).length > 0 ? c.tags : undefined,
18
22
  }));
19
23
  }
20
24
 
@@ -23,7 +27,7 @@ function slim(claims) {
23
27
  */
24
28
  function loadClaims(filePath) {
25
29
  try {
26
- return JSON.parse(fs.readFileSync(filePath, 'utf8'));
30
+ return JSON.parse(fs.readFileSync(filePath, "utf8"));
27
31
  } catch {
28
32
  return null;
29
33
  }
@@ -37,17 +41,21 @@ function findSprintFiles(targetDir) {
37
41
  const found = [];
38
42
 
39
43
  // Direct claims.json in target dir
40
- const direct = path.join(targetDir, 'claims.json');
44
+ const direct = path.join(targetDir, "claims.json");
41
45
  if (fs.existsSync(direct)) {
42
- found.push({ file: direct, name: path.basename(targetDir), cat: 'root' });
46
+ found.push({ file: direct, name: path.basename(targetDir), cat: "root" });
43
47
  }
44
48
 
45
49
  // Archive subdir (flat JSON files)
46
- const archiveDir = path.join(targetDir, 'archive');
50
+ const archiveDir = path.join(targetDir, "archive");
47
51
  if (fs.existsSync(archiveDir) && fs.statSync(archiveDir).isDirectory()) {
48
52
  for (const f of fs.readdirSync(archiveDir)) {
49
- if (f.endsWith('.json') && f.includes('claims')) {
50
- found.push({ file: path.join(archiveDir, f), name: f.replace('.json', '').replace(/-/g, ' '), cat: 'archive' });
53
+ if (f.endsWith(".json") && f.includes("claims")) {
54
+ found.push({
55
+ file: path.join(archiveDir, f),
56
+ name: f.replace(".json", "").replace(/-/g, " "),
57
+ cat: "archive",
58
+ });
51
59
  }
52
60
  }
53
61
  }
@@ -57,26 +65,35 @@ function findSprintFiles(targetDir) {
57
65
  const entries = fs.readdirSync(targetDir, { withFileTypes: true });
58
66
  for (const entry of entries) {
59
67
  if (!entry.isDirectory()) continue;
60
- if (entry.name.startsWith('.') || entry.name === 'archive' || entry.name === 'node_modules') continue;
68
+ if (
69
+ entry.name.startsWith(".") ||
70
+ entry.name === "archive" ||
71
+ entry.name === "node_modules"
72
+ )
73
+ continue;
61
74
  const childDir = path.join(targetDir, entry.name);
62
- const childClaims = path.join(childDir, 'claims.json');
75
+ const childClaims = path.join(childDir, "claims.json");
63
76
  if (fs.existsSync(childClaims)) {
64
- found.push({ file: childClaims, name: entry.name, cat: 'active' });
77
+ found.push({ file: childClaims, name: entry.name, cat: "active" });
65
78
  }
66
79
  // Second level
67
80
  try {
68
81
  const subEntries = fs.readdirSync(childDir, { withFileTypes: true });
69
82
  for (const sub of subEntries) {
70
83
  if (!sub.isDirectory()) continue;
71
- if (sub.name.startsWith('.')) continue;
72
- const subClaims = path.join(childDir, sub.name, 'claims.json');
84
+ if (sub.name.startsWith(".")) continue;
85
+ const subClaims = path.join(childDir, sub.name, "claims.json");
73
86
  if (fs.existsSync(subClaims)) {
74
- found.push({ file: subClaims, name: sub.name, cat: 'active' });
87
+ found.push({ file: subClaims, name: sub.name, cat: "active" });
75
88
  }
76
89
  }
77
- } catch { /* skip */ }
90
+ } catch {
91
+ /* skip */
92
+ }
78
93
  }
79
- } catch { /* skip */ }
94
+ } catch {
95
+ /* skip */
96
+ }
80
97
 
81
98
  return found;
82
99
  }
@@ -95,10 +112,10 @@ function loadSprints(targetDir) {
95
112
  if (claims.length === 0) continue;
96
113
  sprints.push({
97
114
  n: src.name,
98
- p: data.meta?.phase || 'unknown',
99
- q: data.meta?.question || '',
100
- cat: src.cat || 'active',
101
- c: slim(claims)
115
+ p: data.meta?.phase || "unknown",
116
+ q: data.meta?.question || "",
117
+ cat: src.cat || "active",
118
+ c: slim(claims),
102
119
  });
103
120
  }
104
121
  return sprints;
@@ -110,17 +127,22 @@ function loadSprints(targetDir) {
110
127
  * @returns {string} Complete HTML string
111
128
  */
112
129
  function buildHtml(sprints) {
113
- const templatePath = path.join(__dirname, '..', 'templates', 'dashboard.html');
114
- const template = fs.readFileSync(templatePath, 'utf8');
115
- const jsonData = JSON.stringify(sprints).replace(/<\/script/gi, '<\\/script');
116
- return template.replace('__SPRINT_DATA__', jsonData);
130
+ const templatePath = path.join(
131
+ __dirname,
132
+ "..",
133
+ "templates",
134
+ "dashboard.html",
135
+ );
136
+ const template = fs.readFileSync(templatePath, "utf8");
137
+ const jsonData = JSON.stringify(sprints).replace(/<\/script/gi, "<\\/script");
138
+ return template.replace("__SPRINT_DATA__", jsonData);
117
139
  }
118
140
 
119
141
  /**
120
142
  * Return paths to all claims.json files for watching.
121
143
  */
122
144
  function claimsPaths(targetDir) {
123
- return findSprintFiles(targetDir).map(s => s.file);
145
+ return findSprintFiles(targetDir).map((s) => s.file);
124
146
  }
125
147
 
126
148
  module.exports = { loadSprints, buildHtml, claimsPaths, findSprintFiles, slim };