transparent-confidence 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/LICENSE +183 -0
- package/README.md +537 -0
- package/dist/index.cjs +561 -0
- package/dist/index.d.cts +256 -0
- package/dist/index.d.ts +256 -0
- package/dist/index.js +533 -0
- package/package.json +60 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,533 @@
|
|
|
1
|
+
// src/dimensions/authority.ts
|
|
2
|
+
var MAX = 20;
|
|
3
|
+
var DEFAULT_TIERS = [
|
|
4
|
+
{ name: "Primary", rank: 10 },
|
|
5
|
+
{ name: "Secondary", rank: 20 },
|
|
6
|
+
{ name: "Supporting", rank: 30 }
|
|
7
|
+
];
|
|
8
|
+
function scoreAuthority(inputs, config) {
|
|
9
|
+
const { candidates } = inputs;
|
|
10
|
+
const tiers = config.authority?.tiers ?? DEFAULT_TIERS;
|
|
11
|
+
const parts = [];
|
|
12
|
+
if (candidates.length === 0) {
|
|
13
|
+
return {
|
|
14
|
+
raw: 0,
|
|
15
|
+
max: MAX,
|
|
16
|
+
normalized: 0,
|
|
17
|
+
explanation: "No candidates retrieved \u2014 authority cannot be evaluated."
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
const effectiveRanks = candidates.map((c) => resolveRank(c.authorityRank, c.documentType, tiers));
|
|
21
|
+
const minRank = Math.min(...effectiveRanks);
|
|
22
|
+
let base;
|
|
23
|
+
let tierName;
|
|
24
|
+
if (minRank <= 10) {
|
|
25
|
+
base = 18;
|
|
26
|
+
tierName = tiers.find((t) => t.rank <= 10)?.name ?? "Primary";
|
|
27
|
+
parts.push(`Answer grounded in ${tierName} tier documents (rank \u2264 10) \u2014 highest authority.`);
|
|
28
|
+
} else if (minRank <= 20) {
|
|
29
|
+
base = 13;
|
|
30
|
+
tierName = tiers.find((t) => t.rank > 10 && t.rank <= 20)?.name ?? "Secondary";
|
|
31
|
+
parts.push(`Answer grounded in ${tierName} tier documents (rank \u2264 20).`);
|
|
32
|
+
} else if (minRank <= 30) {
|
|
33
|
+
base = 7;
|
|
34
|
+
tierName = tiers.find((t) => t.rank > 20 && t.rank <= 30)?.name ?? "Supporting";
|
|
35
|
+
parts.push(`Answer grounded in ${tierName} tier documents (rank \u2264 30) \u2014 lower authority.`);
|
|
36
|
+
} else {
|
|
37
|
+
base = 2;
|
|
38
|
+
parts.push("Answer grounded in unclassified or minimal-authority documents.");
|
|
39
|
+
}
|
|
40
|
+
let bonus = 0;
|
|
41
|
+
if (candidates.some((c) => c.isAmendment === true)) {
|
|
42
|
+
bonus += 1;
|
|
43
|
+
parts.push(
|
|
44
|
+
"Amendment sections included \u2014 amended version controls over original language (+1)."
|
|
45
|
+
);
|
|
46
|
+
}
|
|
47
|
+
const uniqueRankTiers = new Set(effectiveRanks.map((r) => rankBucket(r)));
|
|
48
|
+
if (uniqueRankTiers.size > 1) {
|
|
49
|
+
bonus += 1;
|
|
50
|
+
parts.push("Multiple authority tiers consulted (+1).");
|
|
51
|
+
}
|
|
52
|
+
const raw = Math.min(MAX, base + bonus);
|
|
53
|
+
return {
|
|
54
|
+
raw,
|
|
55
|
+
max: MAX,
|
|
56
|
+
normalized: Math.round(raw / MAX * 100),
|
|
57
|
+
explanation: parts.join(" ")
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
function resolveRank(authorityRank, documentType, tiers) {
|
|
61
|
+
if (authorityRank !== void 0) return authorityRank;
|
|
62
|
+
if (!documentType) return 99;
|
|
63
|
+
const lower = documentType.toLowerCase();
|
|
64
|
+
for (const tier of tiers) {
|
|
65
|
+
if (tier.keywords?.some((kw) => lower.includes(kw.toLowerCase()))) {
|
|
66
|
+
return tier.rank;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return 99;
|
|
70
|
+
}
|
|
71
|
+
function rankBucket(rank) {
|
|
72
|
+
if (rank <= 10) return "tier1";
|
|
73
|
+
if (rank <= 20) return "tier2";
|
|
74
|
+
if (rank <= 30) return "tier3";
|
|
75
|
+
return "unclassified";
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// src/dimensions/consistency.ts
|
|
79
|
+
var MAX2 = 10;
|
|
80
|
+
function scoreConsistency(inputs) {
|
|
81
|
+
const { candidates } = inputs;
|
|
82
|
+
const parts = [];
|
|
83
|
+
if (candidates.length === 0) {
|
|
84
|
+
return {
|
|
85
|
+
raw: 0,
|
|
86
|
+
max: MAX2,
|
|
87
|
+
normalized: 0,
|
|
88
|
+
explanation: "No candidates retrieved \u2014 evidence consistency cannot be evaluated."
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
let variancePts;
|
|
92
|
+
if (candidates.length === 1) {
|
|
93
|
+
variancePts = 4;
|
|
94
|
+
parts.push("Single candidate retrieved \u2014 variance unmeasurable, neutral score assigned.");
|
|
95
|
+
} else {
|
|
96
|
+
const scores = candidates.map((c) => c.combinedScore);
|
|
97
|
+
const sd = stdDev(scores);
|
|
98
|
+
if (sd < 0.1) {
|
|
99
|
+
variancePts = 8;
|
|
100
|
+
parts.push(`Score std dev ${sd.toFixed(3)} \u2014 very tight retrieval consistency.`);
|
|
101
|
+
} else if (sd < 0.2) {
|
|
102
|
+
variancePts = 6;
|
|
103
|
+
parts.push(`Score std dev ${sd.toFixed(3)} \u2014 good retrieval consistency.`);
|
|
104
|
+
} else if (sd < 0.3) {
|
|
105
|
+
variancePts = 4;
|
|
106
|
+
parts.push(`Score std dev ${sd.toFixed(3)} \u2014 moderate retrieval consistency.`);
|
|
107
|
+
} else {
|
|
108
|
+
variancePts = 2;
|
|
109
|
+
parts.push(`Score std dev ${sd.toFixed(3)} \u2014 scattered retrieval scores.`);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
let conflictAdj;
|
|
113
|
+
if (inputs.conflictingCandidateCount !== void 0) {
|
|
114
|
+
if (inputs.conflictingCandidateCount === 0) {
|
|
115
|
+
conflictAdj = 2;
|
|
116
|
+
parts.push("No conflicting candidates \u2014 full agreement (+2).");
|
|
117
|
+
} else if (inputs.conflictingCandidateCount === 1) {
|
|
118
|
+
conflictAdj = 0;
|
|
119
|
+
parts.push("1 conflicting candidate detected.");
|
|
120
|
+
} else {
|
|
121
|
+
conflictAdj = -2;
|
|
122
|
+
parts.push(`${inputs.conflictingCandidateCount} conflicting candidates detected (\u22122).`);
|
|
123
|
+
}
|
|
124
|
+
} else if (inputs.hasConflict === true) {
|
|
125
|
+
conflictAdj = -2;
|
|
126
|
+
parts.push("Conflicting information detected across candidates (\u22122).");
|
|
127
|
+
} else {
|
|
128
|
+
conflictAdj = 2;
|
|
129
|
+
parts.push("No conflict detected (+2).");
|
|
130
|
+
}
|
|
131
|
+
const raw = Math.max(0, Math.min(MAX2, variancePts + conflictAdj));
|
|
132
|
+
return {
|
|
133
|
+
raw,
|
|
134
|
+
max: MAX2,
|
|
135
|
+
normalized: Math.round(raw / MAX2 * 100),
|
|
136
|
+
explanation: parts.join(" ")
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
function stdDev(values) {
|
|
140
|
+
const n = values.length;
|
|
141
|
+
if (n < 2) return 0;
|
|
142
|
+
const mean = values.reduce((sum, v) => sum + v, 0) / n;
|
|
143
|
+
const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / n;
|
|
144
|
+
return Math.sqrt(variance);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// src/dimensions/corpus.ts
|
|
148
|
+
var MAX3 = 15;
|
|
149
|
+
function scoreCorpus(inputs, config) {
|
|
150
|
+
const expectedDocCount = config.corpus?.expectedDocCount ?? 5;
|
|
151
|
+
const { corpusDocCount, missingRelevantType } = inputs;
|
|
152
|
+
const parts = [];
|
|
153
|
+
if (corpusDocCount === void 0) {
|
|
154
|
+
return {
|
|
155
|
+
raw: 0,
|
|
156
|
+
max: MAX3,
|
|
157
|
+
normalized: 0,
|
|
158
|
+
explanation: "corpusDocCount not provided. Supply the number of loaded document types to enable corpus scoring."
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
const ratio = Math.min(corpusDocCount, expectedDocCount) / expectedDocCount;
|
|
162
|
+
let base;
|
|
163
|
+
if (ratio >= 1) {
|
|
164
|
+
base = 15;
|
|
165
|
+
parts.push(`All ${expectedDocCount} expected document types present.`);
|
|
166
|
+
} else if (ratio >= 0.8) {
|
|
167
|
+
base = 12;
|
|
168
|
+
parts.push(`${corpusDocCount} of ${expectedDocCount} expected document types present.`);
|
|
169
|
+
} else if (ratio >= 0.6) {
|
|
170
|
+
base = 9;
|
|
171
|
+
parts.push(`${corpusDocCount} of ${expectedDocCount} expected document types present.`);
|
|
172
|
+
} else if (ratio >= 0.4) {
|
|
173
|
+
base = 5;
|
|
174
|
+
parts.push(`${corpusDocCount} of ${expectedDocCount} expected document types present.`);
|
|
175
|
+
} else if (ratio >= 0.2) {
|
|
176
|
+
base = 2;
|
|
177
|
+
parts.push(`Only ${corpusDocCount} of ${expectedDocCount} expected document types present.`);
|
|
178
|
+
} else {
|
|
179
|
+
base = 0;
|
|
180
|
+
parts.push(`Corpus is empty or nearly empty (${corpusDocCount} of ${expectedDocCount} types).`);
|
|
181
|
+
}
|
|
182
|
+
let score = base;
|
|
183
|
+
if (missingRelevantType === true) {
|
|
184
|
+
score = Math.max(0, score - 3);
|
|
185
|
+
parts.push(
|
|
186
|
+
"A document type directly relevant to this question appears to be missing \u2014 answer may be incomplete (\u22123)."
|
|
187
|
+
);
|
|
188
|
+
}
|
|
189
|
+
const raw = Math.max(0, Math.min(MAX3, score));
|
|
190
|
+
return {
|
|
191
|
+
raw,
|
|
192
|
+
max: MAX3,
|
|
193
|
+
normalized: Math.round(raw / MAX3 * 100),
|
|
194
|
+
explanation: parts.join(" ")
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// src/dimensions/freshness.ts
|
|
199
|
+
var MAX4 = 15;
|
|
200
|
+
var DEFAULTS = {
|
|
201
|
+
maxAgeForFullScore: 90,
|
|
202
|
+
penaltyPerMonth: 1.5,
|
|
203
|
+
hardCutoffAge: 730
|
|
204
|
+
};
|
|
205
|
+
function scoreFreshness(inputs, config) {
|
|
206
|
+
const cfg = {
|
|
207
|
+
maxAgeForFullScore: config.freshness?.maxAgeForFullScore ?? DEFAULTS.maxAgeForFullScore,
|
|
208
|
+
penaltyPerMonth: config.freshness?.penaltyPerMonth ?? DEFAULTS.penaltyPerMonth,
|
|
209
|
+
hardCutoffAge: config.freshness?.hardCutoffAge ?? DEFAULTS.hardCutoffAge
|
|
210
|
+
};
|
|
211
|
+
const datedCandidates = inputs.candidates.filter((c) => c.lastUpdated instanceof Date);
|
|
212
|
+
if (datedCandidates.length === 0) {
|
|
213
|
+
return {
|
|
214
|
+
raw: 0,
|
|
215
|
+
max: MAX4,
|
|
216
|
+
normalized: 0,
|
|
217
|
+
explanation: "No lastUpdated dates provided on candidates. Supply Candidate.lastUpdated to enable freshness scoring."
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
const now = Date.now();
|
|
221
|
+
const agesInDays = datedCandidates.map((c) => (now - c.lastUpdated.getTime()) / (1e3 * 60 * 60 * 24)).sort((a, b) => a - b);
|
|
222
|
+
const medianAge = median(agesInDays);
|
|
223
|
+
const parts = [];
|
|
224
|
+
parts.push(`Median document age: ${Math.round(medianAge)} days.`);
|
|
225
|
+
if (medianAge >= cfg.hardCutoffAge) {
|
|
226
|
+
return {
|
|
227
|
+
raw: 0,
|
|
228
|
+
max: MAX4,
|
|
229
|
+
normalized: 0,
|
|
230
|
+
explanation: `${parts.join(" ")} Documents exceed the ${cfg.hardCutoffAge}-day cutoff \u2014 freshness score is 0.`
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
if (medianAge <= cfg.maxAgeForFullScore) {
|
|
234
|
+
parts.push(`Within the ${cfg.maxAgeForFullScore}-day full-score window.`);
|
|
235
|
+
return {
|
|
236
|
+
raw: MAX4,
|
|
237
|
+
max: MAX4,
|
|
238
|
+
normalized: 100,
|
|
239
|
+
explanation: parts.join(" ")
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
const daysOverWindow = medianAge - cfg.maxAgeForFullScore;
|
|
243
|
+
const monthsOver = daysOverWindow / 30;
|
|
244
|
+
const penalty = monthsOver * cfg.penaltyPerMonth;
|
|
245
|
+
const raw = Math.max(0, Math.min(MAX4, MAX4 - penalty));
|
|
246
|
+
const rawRounded = Math.round(raw * 10) / 10;
|
|
247
|
+
parts.push(
|
|
248
|
+
`${Math.round(daysOverWindow)} days beyond the ${cfg.maxAgeForFullScore}-day window \u2014 penalty: ${penalty.toFixed(1)} pts.`
|
|
249
|
+
);
|
|
250
|
+
return {
|
|
251
|
+
raw: Math.round(rawRounded),
|
|
252
|
+
max: MAX4,
|
|
253
|
+
normalized: Math.round(rawRounded / MAX4 * 100),
|
|
254
|
+
explanation: parts.join(" ")
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
function median(sorted) {
|
|
258
|
+
const mid = Math.floor(sorted.length / 2);
|
|
259
|
+
return sorted.length % 2 === 0 ? ((sorted[mid - 1] ?? 0) + (sorted[mid] ?? 0)) / 2 : sorted[mid] ?? 0;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// src/dimensions/grounding.ts
|
|
263
|
+
var MAX5 = 30;
|
|
264
|
+
var COMPLEXITY_CEILINGS = {
|
|
265
|
+
direct: 30,
|
|
266
|
+
inferential: 24,
|
|
267
|
+
"multi-hop": 18,
|
|
268
|
+
comparative: 16
|
|
269
|
+
};
|
|
270
|
+
function scoreGrounding(inputs) {
|
|
271
|
+
const parts = [];
|
|
272
|
+
if (inputs.documentsSilent === true) {
|
|
273
|
+
return {
|
|
274
|
+
raw: 0,
|
|
275
|
+
max: MAX5,
|
|
276
|
+
normalized: 0,
|
|
277
|
+
explanation: "Source documents do not address this question."
|
|
278
|
+
};
|
|
279
|
+
}
|
|
280
|
+
let score;
|
|
281
|
+
if (inputs.confidenceLevel === "high" && !inputs.ambiguityNotes) {
|
|
282
|
+
score = 30;
|
|
283
|
+
parts.push("Source text directly and unambiguously answers the question.");
|
|
284
|
+
} else if (inputs.confidenceLevel === "high") {
|
|
285
|
+
score = 21;
|
|
286
|
+
parts.push(
|
|
287
|
+
`Source text addresses the question but contains ambiguity: ${inputs.ambiguityNotes}`
|
|
288
|
+
);
|
|
289
|
+
} else if (inputs.confidenceLevel === "medium") {
|
|
290
|
+
score = 13;
|
|
291
|
+
if (inputs.ambiguityNotes) {
|
|
292
|
+
parts.push(
|
|
293
|
+
`Answer is inferrable from source text but not explicit. Ambiguity: ${inputs.ambiguityNotes}`
|
|
294
|
+
);
|
|
295
|
+
} else {
|
|
296
|
+
parts.push("Answer is inferrable from source text but not explicitly stated.");
|
|
297
|
+
}
|
|
298
|
+
} else {
|
|
299
|
+
score = 5;
|
|
300
|
+
parts.push(
|
|
301
|
+
"Source documents are insufficient, conflicting, or ambiguous on this question. Answer reflects best available inference."
|
|
302
|
+
);
|
|
303
|
+
}
|
|
304
|
+
if (inputs.requiresExpertReview === true) {
|
|
305
|
+
score -= 3;
|
|
306
|
+
parts.push("Expert review recommended before acting on this answer (\u22123).");
|
|
307
|
+
}
|
|
308
|
+
if (inputs.externalConstraintNote) {
|
|
309
|
+
score -= 2;
|
|
310
|
+
parts.push(
|
|
311
|
+
`External constraint may affect this answer: ${inputs.externalConstraintNote} (\u22122).`
|
|
312
|
+
);
|
|
313
|
+
}
|
|
314
|
+
if (inputs.hasConflict === true) {
|
|
315
|
+
score -= 5;
|
|
316
|
+
parts.push("Conflicting information detected across retrieved sections (\u22125).");
|
|
317
|
+
}
|
|
318
|
+
score = Math.max(0, score);
|
|
319
|
+
if (inputs.queryComplexity && inputs.queryComplexity !== "direct") {
|
|
320
|
+
const ceiling = COMPLEXITY_CEILINGS[inputs.queryComplexity];
|
|
321
|
+
if (score > ceiling) {
|
|
322
|
+
score = ceiling;
|
|
323
|
+
parts.push(`Score capped at ${ceiling} for ${inputs.queryComplexity} question type.`);
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
if (inputs.faithfulnessScore !== void 0) {
|
|
327
|
+
const fs = inputs.faithfulnessScore;
|
|
328
|
+
if (fs < 0.5) {
|
|
329
|
+
score = Math.max(0, score - 12);
|
|
330
|
+
parts.push(
|
|
331
|
+
`Faithfulness score ${fs.toFixed(2)} indicates significant hallucination risk (\u221212).`
|
|
332
|
+
);
|
|
333
|
+
} else if (fs < 0.7) {
|
|
334
|
+
score = Math.max(0, score - 7);
|
|
335
|
+
parts.push(`Faithfulness score ${fs.toFixed(2)} indicates moderate hallucination risk (\u22127).`);
|
|
336
|
+
} else if (fs < 0.9) {
|
|
337
|
+
score = Math.max(0, score - 3);
|
|
338
|
+
parts.push(`Faithfulness score ${fs.toFixed(2)} indicates minor hallucination risk (\u22123).`);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
if (inputs.citationCount !== void 0 && inputs.citationCount > 0) {
|
|
342
|
+
const bonus = inputs.citationCount >= 3 ? 2 : inputs.citationCount === 2 ? 1 : 0;
|
|
343
|
+
if (bonus > 0) {
|
|
344
|
+
score = Math.min(MAX5, score + bonus);
|
|
345
|
+
parts.push(`${inputs.citationCount} sections explicitly cited in answer (+${bonus}).`);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
const raw = Math.max(0, Math.min(MAX5, score));
|
|
349
|
+
return {
|
|
350
|
+
raw,
|
|
351
|
+
max: MAX5,
|
|
352
|
+
normalized: normalize(raw, MAX5),
|
|
353
|
+
explanation: parts.join(" ")
|
|
354
|
+
};
|
|
355
|
+
}
|
|
356
|
+
function normalize(raw, max) {
|
|
357
|
+
return Math.round(raw / max * 100);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// src/dimensions/retrieval.ts
|
|
361
|
+
var MAX6 = 25;
|
|
362
|
+
function scoreRetrieval(inputs) {
|
|
363
|
+
const { candidates } = inputs;
|
|
364
|
+
const parts = [];
|
|
365
|
+
if (candidates.length === 0) {
|
|
366
|
+
return {
|
|
367
|
+
raw: 0,
|
|
368
|
+
max: MAX6,
|
|
369
|
+
normalized: 0,
|
|
370
|
+
explanation: "No candidates were retrieved."
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
const confirmed = candidates.filter(
|
|
374
|
+
(c) => Object.values(c.retrievalScores).filter((s) => s > 0).length >= 2
|
|
375
|
+
);
|
|
376
|
+
let agreementPts;
|
|
377
|
+
if (confirmed.length >= 3) {
|
|
378
|
+
agreementPts = 15;
|
|
379
|
+
parts.push(`${confirmed.length} candidates confirmed by 2+ retrieval methods.`);
|
|
380
|
+
} else if (confirmed.length === 2) {
|
|
381
|
+
agreementPts = 12;
|
|
382
|
+
parts.push("2 candidates confirmed by 2+ retrieval methods.");
|
|
383
|
+
} else if (confirmed.length === 1) {
|
|
384
|
+
agreementPts = 8;
|
|
385
|
+
parts.push("1 candidate confirmed by 2+ retrieval methods.");
|
|
386
|
+
} else {
|
|
387
|
+
agreementPts = 3;
|
|
388
|
+
parts.push(
|
|
389
|
+
"No candidates confirmed by multiple retrieval methods \u2014 single-path retrieval only."
|
|
390
|
+
);
|
|
391
|
+
}
|
|
392
|
+
const sorted = [...candidates].sort((a, b) => b.combinedScore - a.combinedScore);
|
|
393
|
+
const top3 = sorted.slice(0, 3);
|
|
394
|
+
const effectiveScores = top3.map(
|
|
395
|
+
(c) => c.extractionQuality !== void 0 ? c.combinedScore * c.extractionQuality : c.combinedScore
|
|
396
|
+
);
|
|
397
|
+
const avgScore = effectiveScores.reduce((sum, s) => sum + s, 0) / effectiveScores.length;
|
|
398
|
+
let magnitudePts;
|
|
399
|
+
if (avgScore >= 0.8) {
|
|
400
|
+
magnitudePts = 8;
|
|
401
|
+
} else if (avgScore >= 0.65) {
|
|
402
|
+
magnitudePts = 6;
|
|
403
|
+
} else if (avgScore >= 0.5) {
|
|
404
|
+
magnitudePts = 4;
|
|
405
|
+
} else if (avgScore >= 0.35) {
|
|
406
|
+
magnitudePts = 2;
|
|
407
|
+
} else {
|
|
408
|
+
magnitudePts = 0;
|
|
409
|
+
}
|
|
410
|
+
parts.push(`Top-3 effective score avg: ${avgScore.toFixed(2)}.`);
|
|
411
|
+
if (top3.some((c) => c.extractionQuality !== void 0)) {
|
|
412
|
+
parts.push("Extraction quality applied as score multiplier.");
|
|
413
|
+
}
|
|
414
|
+
const withIds = candidates.filter((c) => c.documentId !== void 0);
|
|
415
|
+
const uniqueDocIds = new Set(withIds.map((c) => c.documentId));
|
|
416
|
+
let diversityPts = 0;
|
|
417
|
+
if (withIds.length > 0) {
|
|
418
|
+
if (uniqueDocIds.size >= 3) {
|
|
419
|
+
diversityPts = 3;
|
|
420
|
+
parts.push(`${uniqueDocIds.size} distinct source documents.`);
|
|
421
|
+
} else if (uniqueDocIds.size === 2) {
|
|
422
|
+
diversityPts = 1;
|
|
423
|
+
parts.push("2 distinct source documents.");
|
|
424
|
+
} else {
|
|
425
|
+
parts.push("All candidates from a single source document.");
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
let breadthPts = 0;
|
|
429
|
+
if (candidates.length >= 5) {
|
|
430
|
+
breadthPts = 2;
|
|
431
|
+
} else if (candidates.length >= 3) {
|
|
432
|
+
breadthPts = 1;
|
|
433
|
+
}
|
|
434
|
+
parts.push(`${candidates.length} total candidates.`);
|
|
435
|
+
const raw = Math.min(MAX6, agreementPts + magnitudePts + diversityPts + breadthPts);
|
|
436
|
+
return {
|
|
437
|
+
raw,
|
|
438
|
+
max: MAX6,
|
|
439
|
+
normalized: Math.round(raw / MAX6 * 100),
|
|
440
|
+
explanation: parts.join(" ")
|
|
441
|
+
};
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// src/labels.ts
|
|
445
|
+
function deriveLabel(score) {
|
|
446
|
+
if (score >= 85) return { label: "Strong", color: "green" };
|
|
447
|
+
if (score >= 65) return { label: "Moderate", color: "amber" };
|
|
448
|
+
if (score >= 40) return { label: "Limited", color: "orange" };
|
|
449
|
+
return { label: "Insufficient", color: "red" };
|
|
450
|
+
}
|
|
451
|
+
function deriveTier1(raw, max, notAddressed = false) {
|
|
452
|
+
if (notAddressed) {
|
|
453
|
+
return { score: 0, label: "Not Addressed", color: "gray" };
|
|
454
|
+
}
|
|
455
|
+
if (max <= 0) return null;
|
|
456
|
+
const score = Math.round(raw / max * 100);
|
|
457
|
+
const { label, color } = deriveLabel(score);
|
|
458
|
+
return { score, label, color };
|
|
459
|
+
}
|
|
460
|
+
function deriveTier2(raw, max) {
|
|
461
|
+
if (max <= 0) return null;
|
|
462
|
+
const score = Math.round(raw / max * 100);
|
|
463
|
+
if (score >= 85) return { score, label: "Complete", color: "green" };
|
|
464
|
+
if (score >= 65) return { score, label: "Good", color: "amber" };
|
|
465
|
+
if (score >= 40) return { score, label: "Partial", color: "orange" };
|
|
466
|
+
return { score, label: "Thin", color: "red" };
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// src/normalize.ts
|
|
470
|
+
function normalize2(raw, max) {
|
|
471
|
+
if (max <= 0) return 0;
|
|
472
|
+
return Math.round(Math.max(0, Math.min(100, raw / max * 100)));
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
// src/scorer.ts
|
|
476
|
+
var CORE_MAX = 65;
|
|
477
|
+
var AUTHORITY_MAX = 20;
|
|
478
|
+
var CORPUS_MAX = 15;
|
|
479
|
+
var FRESHNESS_MAX = 15;
|
|
480
|
+
function computeConfidence(inputs, config = {}) {
|
|
481
|
+
const hasAuthority = config.authority !== void 0;
|
|
482
|
+
const hasCorpus = config.corpus !== void 0;
|
|
483
|
+
const hasFreshness = config.freshness !== void 0;
|
|
484
|
+
const grounding = scoreGrounding(inputs);
|
|
485
|
+
const retrieval = scoreRetrieval(inputs);
|
|
486
|
+
const consistency = scoreConsistency(inputs);
|
|
487
|
+
const authority = hasAuthority ? scoreAuthority(inputs, config) : void 0;
|
|
488
|
+
const corpus = hasCorpus ? scoreCorpus(inputs, config) : void 0;
|
|
489
|
+
const freshness = hasFreshness ? scoreFreshness(inputs, config) : void 0;
|
|
490
|
+
const activeExtensions = [];
|
|
491
|
+
if (hasAuthority) activeExtensions.push("authority");
|
|
492
|
+
if (hasCorpus) activeExtensions.push("corpus");
|
|
493
|
+
if (hasFreshness) activeExtensions.push("freshness");
|
|
494
|
+
const maxPossible = CORE_MAX + (hasAuthority ? AUTHORITY_MAX : 0) + (hasCorpus ? CORPUS_MAX : 0) + (hasFreshness ? FRESHNESS_MAX : 0);
|
|
495
|
+
const rawTotal = grounding.raw + retrieval.raw + consistency.raw + (authority?.raw ?? 0) + (corpus?.raw ?? 0) + (freshness?.raw ?? 0);
|
|
496
|
+
const total = normalize2(rawTotal, maxPossible);
|
|
497
|
+
const { label, color: labelColor } = deriveLabel(total);
|
|
498
|
+
const tier1Raw = grounding.raw + retrieval.raw + consistency.raw + (authority?.raw ?? 0);
|
|
499
|
+
const tier1Max = CORE_MAX + (hasAuthority ? AUTHORITY_MAX : 0);
|
|
500
|
+
const tier1 = deriveTier1(tier1Raw, tier1Max, inputs.documentsSilent === true);
|
|
501
|
+
const tier2Max = (hasCorpus ? CORPUS_MAX : 0) + (hasFreshness ? FRESHNESS_MAX : 0);
|
|
502
|
+
const tier2Raw = (corpus?.raw ?? 0) + (freshness?.raw ?? 0);
|
|
503
|
+
const tier2 = deriveTier2(tier2Raw, tier2Max);
|
|
504
|
+
return {
|
|
505
|
+
total,
|
|
506
|
+
label,
|
|
507
|
+
labelColor,
|
|
508
|
+
tier1,
|
|
509
|
+
tier2,
|
|
510
|
+
dimensions: {
|
|
511
|
+
grounding,
|
|
512
|
+
retrieval,
|
|
513
|
+
consistency,
|
|
514
|
+
...authority !== void 0 && { authority },
|
|
515
|
+
...corpus !== void 0 && { corpus },
|
|
516
|
+
...freshness !== void 0 && { freshness }
|
|
517
|
+
},
|
|
518
|
+
meta: {
|
|
519
|
+
rawTotal,
|
|
520
|
+
maxPossible,
|
|
521
|
+
activeExtensions
|
|
522
|
+
}
|
|
523
|
+
};
|
|
524
|
+
}
|
|
525
|
+
function createScorer(config) {
|
|
526
|
+
return {
|
|
527
|
+
compute: (inputs) => computeConfidence(inputs, config)
|
|
528
|
+
};
|
|
529
|
+
}
|
|
530
|
+
export {
|
|
531
|
+
computeConfidence,
|
|
532
|
+
createScorer
|
|
533
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "transparent-confidence",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Explainable confidence scoring for RAG system answers — structured 0–100 scorecard across retrieval, grounding, and consistency dimensions",
|
|
5
|
+
"author": "Eric Tetzlaff",
|
|
6
|
+
"license": "Apache-2.0",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"main": "./dist/index.cjs",
|
|
9
|
+
"module": "./dist/index.js",
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"exports": {
|
|
12
|
+
".": {
|
|
13
|
+
"types": "./dist/index.d.ts",
|
|
14
|
+
"import": "./dist/index.js",
|
|
15
|
+
"require": "./dist/index.cjs"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"files": [
|
|
19
|
+
"dist",
|
|
20
|
+
"README.md",
|
|
21
|
+
"CHANGELOG.md"
|
|
22
|
+
],
|
|
23
|
+
"engines": {
|
|
24
|
+
"node": ">=20"
|
|
25
|
+
},
|
|
26
|
+
"keywords": [
|
|
27
|
+
"rag",
|
|
28
|
+
"retrieval-augmented-generation",
|
|
29
|
+
"confidence",
|
|
30
|
+
"scoring",
|
|
31
|
+
"llm",
|
|
32
|
+
"ai",
|
|
33
|
+
"explainability",
|
|
34
|
+
"evaluation",
|
|
35
|
+
"ragas",
|
|
36
|
+
"vector-search"
|
|
37
|
+
],
|
|
38
|
+
"repository": {
|
|
39
|
+
"type": "git",
|
|
40
|
+
"url": "git+https://github.com/emtcmca/transparent-confidence.git"
|
|
41
|
+
},
|
|
42
|
+
"scripts": {
|
|
43
|
+
"build": "tsup src/index.ts --format esm,cjs --dts --out-dir dist",
|
|
44
|
+
"test": "vitest run",
|
|
45
|
+
"test:watch": "vitest",
|
|
46
|
+
"coverage": "vitest run --coverage",
|
|
47
|
+
"lint": "biome check src/",
|
|
48
|
+
"lint:fix": "biome check --write src/",
|
|
49
|
+
"typecheck": "tsc --noEmit"
|
|
50
|
+
},
|
|
51
|
+
"devDependencies": {
|
|
52
|
+
"@biomejs/biome": "^2.4.16",
|
|
53
|
+
"@types/node": "^25.9.2",
|
|
54
|
+
"@vitest/coverage-v8": "^4.1.8",
|
|
55
|
+
"tsup": "^8.5.1",
|
|
56
|
+
"tsx": "^4.22.4",
|
|
57
|
+
"typescript": "^6.0.3",
|
|
58
|
+
"vitest": "^4.1.8"
|
|
59
|
+
}
|
|
60
|
+
}
|