npm - gravito-eval - Versions diffs - 0.1.0 - Mend

gravito-eval 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/LICENSE +21 -0
package/README.md +137 -0
package/dist/cli/index.d.ts +14 -0
package/dist/cli/index.d.ts.map +1 -0
package/dist/cli/index.js +276 -0
package/dist/cli/index.js.map +1 -0
package/dist/src/adjudication/index.d.ts +36 -0
package/dist/src/adjudication/index.d.ts.map +1 -0
package/dist/src/adjudication/index.js +149 -0
package/dist/src/adjudication/index.js.map +1 -0
package/dist/src/calibration/index.d.ts +38 -0
package/dist/src/calibration/index.d.ts.map +1 -0
package/dist/src/calibration/index.js +104 -0
package/dist/src/calibration/index.js.map +1 -0
package/dist/src/confidence/index.d.ts +27 -0
package/dist/src/confidence/index.d.ts.map +1 -0
package/dist/src/confidence/index.js +168 -0
package/dist/src/confidence/index.js.map +1 -0
package/dist/src/index.d.ts +26 -0
package/dist/src/index.d.ts.map +1 -0
package/dist/src/index.js +47 -0
package/dist/src/index.js.map +1 -0
package/dist/src/matching/index.d.ts +37 -0
package/dist/src/matching/index.d.ts.map +1 -0
package/dist/src/matching/index.js +292 -0
package/dist/src/matching/index.js.map +1 -0
package/dist/src/metrics/index.d.ts +15 -0
package/dist/src/metrics/index.d.ts.map +1 -0
package/dist/src/metrics/index.js +177 -0
package/dist/src/metrics/index.js.map +1 -0
package/dist/src/telemetry/index.d.ts +10 -0
package/dist/src/telemetry/index.d.ts.map +1 -0
package/dist/src/telemetry/index.js +106 -0
package/dist/src/telemetry/index.js.map +1 -0
package/dist/src/types.d.ts +131 -0
package/dist/src/types.d.ts.map +1 -0
package/dist/src/types.js +28 -0
package/dist/src/types.js.map +1 -0
package/examples/basic/input.json +76 -0
package/examples/basic/run.ts +33 -0
package/package.json +50 -0

package/dist/src/calibration/index.d.ts ADDED Viewed

@@ -0,0 +1,38 @@
+/**
+ * Gravito Eval — Calibration Engine
+ *
+ * The main entry point for evaluating AI system alignment with human judgment.
+ * Combines matching, metrics, adjudication, and confidence scoring into
+ * a single, easy-to-use evaluation pipeline.
+ */
+import type { Finding, Adjudication, EvalResult } from "../types";
+import { type MultiPassOptions } from "../matching";
+export interface EvalOptions {
+    /** Custom matching thresholds */
+    matching?: MultiPassOptions;
+    /** Human adjudications for AI-only findings (if available) */
+    adjudications?: Adjudication[];
+    /** Auto-adjudicate AI-only findings when no human adjudications provided */
+    autoAdjudicate?: boolean;
+}
+/**
+ * Run a full evaluation of AI findings against human findings.
+ *
+ * This is the primary entry point for gravito-eval.
+ *
+ * @example
+ * ```ts
+ * import { evaluate } from "gravito-eval";
+ *
+ * const result = evaluate(aiFindings, humanFindings);
+ * console.log(`Recall: ${(result.detection.recall * 100).toFixed(1)}%`);
+ * console.log(`Precision: ${(result.detection.precision * 100).toFixed(1)}%`);
+ * ```
+ */
+export declare function evaluate(aiFindings: Finding[], humanFindings: Finding[], options?: EvalOptions): EvalResult;
+export { multiPassMatch, toFlatMatchResult } from "../matching";
+export type { MultiPassOptions } from "../matching";
+export { computeDetectionMetrics, computeRankingMetrics, computeSeverityMetrics, wilsonInterval, } from "../metrics";
+export { computeNovelSignal, computeAdjustedPrecision, autoAdjudicate, batchAutoAdjudicate, } from "../adjudication";
+export { scoreConfidence, scoreFindings } from "../confidence";
+//# sourceMappingURL=index.d.ts.map

package/dist/src/calibration/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/calibration/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EACV,OAAO,EACP,YAAY,EACZ,UAAU,EAEX,MAAM,UAAU,CAAC;AAClB,OAAO,EAAkB,KAAK,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAcpE,MAAM,WAAW,WAAW;IAC1B,iCAAiC;IACjC,QAAQ,CAAC,EAAE,gBAAgB,CAAC;IAC5B,8DAA8D;IAC9D,aAAa,CAAC,EAAE,YAAY,EAAE,CAAC;IAC/B,4EAA4E;IAC5E,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,QAAQ,CACtB,UAAU,EAAE,OAAO,EAAE,EACrB,aAAa,EAAE,OAAO,EAAE,EACxB,OAAO,CAAC,EAAE,WAAW,GACpB,UAAU,CA8DZ;AAyBD,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAChE,YAAY,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EACL,uBAAuB,EACvB,qBAAqB,EACrB,sBAAsB,EACtB,cAAc,GACf,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,kBAAkB,EAClB,wBAAwB,EACxB,cAAc,EACd,mBAAmB,GACpB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC"}

package/dist/src/calibration/index.js ADDED Viewed

@@ -0,0 +1,104 @@
+"use strict";
+/**
+ * Gravito Eval — Calibration Engine
+ *
+ * The main entry point for evaluating AI system alignment with human judgment.
+ * Combines matching, metrics, adjudication, and confidence scoring into
+ * a single, easy-to-use evaluation pipeline.
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.scoreFindings = exports.scoreConfidence = exports.batchAutoAdjudicate = exports.autoAdjudicate = exports.computeAdjustedPrecision = exports.computeNovelSignal = exports.wilsonInterval = exports.computeSeverityMetrics = exports.computeRankingMetrics = exports.computeDetectionMetrics = exports.toFlatMatchResult = exports.multiPassMatch = void 0;
+exports.evaluate = evaluate;
+const matching_1 = require("../matching");
+const metrics_1 = require("../metrics");
+const adjudication_1 = require("../adjudication");
+/**
+ * Run a full evaluation of AI findings against human findings.
+ *
+ * This is the primary entry point for gravito-eval.
+ *
+ * @example
+ * ```ts
+ * import { evaluate } from "gravito-eval";
+ *
+ * const result = evaluate(aiFindings, humanFindings);
+ * console.log(`Recall: ${(result.detection.recall * 100).toFixed(1)}%`);
+ * console.log(`Precision: ${(result.detection.precision * 100).toFixed(1)}%`);
+ * ```
+ */
+function evaluate(aiFindings, humanFindings, options) {
+    // Step 1: Multi-pass matching
+    const matchResult = (0, matching_1.multiPassMatch)(aiFindings, humanFindings, options?.matching);
+    // Step 2: Detection metrics
+    const detection = (0, metrics_1.computeDetectionMetrics)(matchResult, aiFindings.length, humanFindings.length);
+    // Step 3: Ranking metrics
+    const ranking = (0, metrics_1.computeRankingMetrics)(aiFindings, humanFindings, matchResult);
+    // Step 4: Severity agreement
+    const severity = (0, metrics_1.computeSeverityMetrics)(matchResult);
+    // Step 5: Novel signal (optional)
+    let novelSignal = undefined;
+    let adjustedPrecision = undefined;
+    if (matchResult.aiOnly.length > 0) {
+        const adjudications = options?.adjudications ??
+            (options?.autoAdjudicate !== false ? (0, adjudication_1.batchAutoAdjudicate)(matchResult.aiOnly) : []);
+        if (adjudications.length > 0) {
+            novelSignal = (0, adjudication_1.computeNovelSignal)(matchResult.aiOnly, adjudications);
+            adjustedPrecision = (0, adjudication_1.computeAdjustedPrecision)(detection.matchedCount, novelSignal.validCount, detection.totalAI);
+        }
+    }
+    // Step 6: Verdict
+    const verdict = determineVerdict(detection, matchResult);
+    // Flatten all match pairs
+    const allMatches = [
+        ...matchResult.strictMatches,
+        ...matchResult.crossCategoryMatches,
+        ...matchResult.conceptualMatches,
+    ];
+    return {
+        detection,
+        ranking,
+        severity,
+        novelSignal,
+        matchBreakdown: {
+            strict: matchResult.summary.strict_matched,
+            crossCategory: matchResult.summary.cross_category_matched,
+            conceptual: matchResult.summary.conceptual_matched,
+        },
+        matches: allMatches,
+        aiOnly: matchResult.aiOnly,
+        humanOnly: matchResult.humanOnly,
+        adjustedPrecision,
+        verdict,
+    };
+}
+// ─── Verdict Logic ────────────────────────────────────────────────────────
+function determineVerdict(detection, matchResult) {
+    if (detection.totalAI < 3 || detection.totalHuman < 3) {
+        return "INSUFFICIENT_DATA";
+    }
+    if (detection.recall >= 0.60 && detection.precision >= 0.50) {
+        return "PASS";
+    }
+    if (detection.recall >= 0.40 || detection.precision >= 0.35) {
+        return "PARTIAL";
+    }
+    return "FAIL";
+}
+// ─── Re-exports ───────────────────────────────────────────────────────────
+var matching_2 = require("../matching");
+Object.defineProperty(exports, "multiPassMatch", { enumerable: true, get: function () { return matching_2.multiPassMatch; } });
+Object.defineProperty(exports, "toFlatMatchResult", { enumerable: true, get: function () { return matching_2.toFlatMatchResult; } });
+var metrics_2 = require("../metrics");
+Object.defineProperty(exports, "computeDetectionMetrics", { enumerable: true, get: function () { return metrics_2.computeDetectionMetrics; } });
+Object.defineProperty(exports, "computeRankingMetrics", { enumerable: true, get: function () { return metrics_2.computeRankingMetrics; } });
+Object.defineProperty(exports, "computeSeverityMetrics", { enumerable: true, get: function () { return metrics_2.computeSeverityMetrics; } });
+Object.defineProperty(exports, "wilsonInterval", { enumerable: true, get: function () { return metrics_2.wilsonInterval; } });
+var adjudication_2 = require("../adjudication");
+Object.defineProperty(exports, "computeNovelSignal", { enumerable: true, get: function () { return adjudication_2.computeNovelSignal; } });
+Object.defineProperty(exports, "computeAdjustedPrecision", { enumerable: true, get: function () { return adjudication_2.computeAdjustedPrecision; } });
+Object.defineProperty(exports, "autoAdjudicate", { enumerable: true, get: function () { return adjudication_2.autoAdjudicate; } });
+Object.defineProperty(exports, "batchAutoAdjudicate", { enumerable: true, get: function () { return adjudication_2.batchAutoAdjudicate; } });
+var confidence_1 = require("../confidence");
+Object.defineProperty(exports, "scoreConfidence", { enumerable: true, get: function () { return confidence_1.scoreConfidence; } });
+Object.defineProperty(exports, "scoreFindings", { enumerable: true, get: function () { return confidence_1.scoreFindings; } });
+//# sourceMappingURL=index.js.map

package/dist/src/calibration/index.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/calibration/index.ts"],"names":[],"mappings":";AAAA;;;;;;GAMG;;;AA6CH,4BAkEC;AAvGD,0CAAoE;AACpE,wCAIoB;AACpB,kDAIyB;AAazB;;;;;;;;;;;;;GAaG;AACH,SAAgB,QAAQ,CACtB,UAAqB,EACrB,aAAwB,EACxB,OAAqB;IAErB,8BAA8B;IAC9B,MAAM,WAAW,GAAG,IAAA,yBAAc,EAAC,UAAU,EAAE,aAAa,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;IAEjF,4BAA4B;IAC5B,MAAM,SAAS,GAAG,IAAA,iCAAuB,EACvC,WAAW,EACX,UAAU,CAAC,MAAM,EACjB,aAAa,CAAC,MAAM,CACrB,CAAC;IAEF,0BAA0B;IAC1B,MAAM,OAAO,GAAG,IAAA,+BAAqB,EAAC,UAAU,EAAE,aAAa,EAAE,WAAW,CAAC,CAAC;IAE9E,6BAA6B;IAC7B,MAAM,QAAQ,GAAG,IAAA,gCAAsB,EAAC,WAAW,CAAC,CAAC;IAErD,kCAAkC;IAClC,IAAI,WAAW,GAAG,SAAS,CAAC;IAC5B,IAAI,iBAAiB,GAAG,SAAS,CAAC;IAElC,IAAI,WAAW,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClC,MAAM,aAAa,GACjB,OAAO,EAAE,aAAa;YACtB,CAAC,OAAO,EAAE,cAAc,KAAK,KAAK,CAAC,CAAC,CAAC,IAAA,kCAAmB,EAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAErF,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,WAAW,GAAG,IAAA,iCAAkB,EAAC,WAAW,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;YACpE,iBAAiB,GAAG,IAAA,uCAAwB,EAC1C,SAAS,CAAC,YAAY,EACtB,WAAW,CAAC,UAAU,EACtB,SAAS,CAAC,OAAO,CAClB,CAAC;QACJ,CAAC;IACH,CAAC;IAED,kBAAkB;IAClB,MAAM,OAAO,GAAG,gBAAgB,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;IAEzD,0BAA0B;IAC1B,MAAM,UAAU,GAAG;QACjB,GAAG,WAAW,CAAC,aAAa;QAC5B,GAAG,WAAW,CAAC,oBAAoB;QACnC,GAAG,WAAW,CAAC,iBAAiB;KACjC,CAAC;IAEF,OAAO;QACL,SAAS;QACT,OAAO;QACP,QAAQ;QACR,WAAW;QACX,cAAc,EAAE;YACd,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,cAAc;YAC1C,aAAa,EAAE,WAAW,CAAC,OAAO,CAAC,sBAAsB;YACzD,UAAU,EAAE,WAAW,CAAC,OAAO,CAAC,kBAAkB;SACnD;QACD,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,WAAW,CAAC,MAAM;QAC1B,SAAS,EAAE,WAAW,CAAC,SAAS;QAChC,iBAAiB;QACjB,OAAO;KACR,CAAC;AACJ,CAAC;AAED,6EAA6E;AAE7E,SAAS,gBAAgB,CACvB,SAAqF,EACrF,WAAiC;IAEjC,IAAI,SAAS,CAAC,OAAO,GAAG,CAAC,IAAI,SAAS,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;QACtD,OAAO,mBAAmB,CAAC;IAC7B,CAAC;IAED,IAAI,SAAS,CAAC,MAAM,IAAI,IAAI,IAAI,SAAS,CAAC,SAAS,IAAI,IAAI,EAAE,CAAC;QAC5D,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,IAAI,SAAS,CAAC,MAAM,IAAI,IAAI,IAAI,SAAS,CAAC,SAAS,IAAI,IAAI,EAAE,CAAC;QAC5D,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6EAA6E;AAE7E,wCAAgE;AAAvD,0GAAA,cAAc,OAAA;AAAE,6GAAA,iBAAiB,OAAA;AAE1C,sCAKoB;AAJlB,kHAAA,uBAAuB,OAAA;AACvB,gHAAA,qBAAqB,OAAA;AACrB,iHAAA,sBAAsB,OAAA;AACtB,yGAAA,cAAc,OAAA;AAEhB,gDAKyB;AAJvB,kHAAA,kBAAkB,OAAA;AAClB,wHAAA,wBAAwB,OAAA;AACxB,8GAAA,cAAc,OAAA;AACd,mHAAA,mBAAmB,OAAA;AAErB,4CAA+D;AAAtD,6GAAA,eAAe,OAAA;AAAE,2GAAA,aAAa,OAAA"}

package/dist/src/confidence/index.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+/**
+ * Gravito Eval — Confidence Scoring
+ *
+ * Scores individual findings on a 0–1 confidence scale based on
+ * observable signal factors. Uses generic weights suitable for
+ * any AI evaluation domain.
+ *
+ * Applies nonlinear scaling to spread the distribution across
+ * the 0.3–0.95 range (avoiding the common clustering at 0.6–0.7).
+ */
+import type { Finding, ScoredFinding } from "../types";
+/**
+ * Score a single finding's confidence level.
+ *
+ * Returns a value between 0 and 1 where:
+ * - 0.0–0.3: Very low confidence (likely noise)
+ * - 0.3–0.5: Low confidence (needs review)
+ * - 0.5–0.7: Moderate confidence (plausible)
+ * - 0.7–0.85: High confidence (likely valid)
+ * - 0.85–1.0: Very high confidence (strong evidence)
+ */
+export declare function scoreConfidence(finding: Finding): ScoredFinding;
+/**
+ * Score a batch of findings.
+ */
+export declare function scoreFindings(findings: Finding[]): ScoredFinding[];
+//# sourceMappingURL=index.d.ts.map

package/dist/src/confidence/index.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/confidence/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,OAAO,EAAE,aAAa,EAAqB,MAAM,UAAU,CAAC;AAsI1E;;;;;;;;;GASG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,OAAO,GAAG,aAAa,CAa/D;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,GAAG,aAAa,EAAE,CAElE"}

package/dist/src/confidence/index.js ADDED Viewed

@@ -0,0 +1,168 @@
+"use strict";
+/**
+ * Gravito Eval — Confidence Scoring
+ *
+ * Scores individual findings on a 0–1 confidence scale based on
+ * observable signal factors. Uses generic weights suitable for
+ * any AI evaluation domain.
+ *
+ * Applies nonlinear scaling to spread the distribution across
+ * the 0.3–0.95 range (avoiding the common clustering at 0.6–0.7).
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.scoreConfidence = scoreConfidence;
+exports.scoreFindings = scoreFindings;
+// ─── Signal Analysis ──────────────────────────────────────────────────────
+function countSignals(finding) {
+    let count = 0;
+    if (finding.description && finding.description.length > 40)
+        count++;
+    if (finding.category)
+        count++;
+    if (finding.severity)
+        count++;
+    if (finding.location)
+        count++;
+    if (finding.keywords && finding.keywords.length > 0)
+        count++;
+    return count;
+}
+function isSubjective(finding) {
+    const subjectivePatterns = [
+        "feel",
+        "seem",
+        "appear",
+        "might",
+        "could",
+        "possibly",
+        "arguably",
+        "subjective",
+        "opinion",
+        "aesthetic",
+    ];
+    const desc = finding.description.toLowerCase();
+    return subjectivePatterns.some((p) => desc.includes(p));
+}
+function hasSpecificEvidence(finding) {
+    const evidencePatterns = [
+        "button",
+        "link",
+        "form",
+        "image",
+        "text",
+        "header",
+        "footer",
+        "navigation",
+        "color",
+        "font",
+        "size",
+        "spacing",
+        "contrast",
+        "error",
+        "missing",
+        "broken",
+        "incorrect",
+        "404",
+        "timeout",
+        "slow",
+        "pixel",
+        "mobile",
+        "desktop",
+        "screen",
+    ];
+    const desc = finding.description.toLowerCase();
+    return evidencePatterns.some((p) => desc.includes(p));
+}
+function hasPatternRepetition(finding) {
+    // Check if keywords suggest a recurring pattern
+    if (finding.keywords && finding.keywords.length >= 3)
+        return true;
+    // Check if description mentions multiple instances
+    const desc = finding.description.toLowerCase();
+    return /\b(multiple|several|many|all|every|each|throughout)\b/.test(desc);
+}
+// ─── Factor Computation ───────────────────────────────────────────────────
+function computeFactors(finding) {
+    const signals = countSignals(finding);
+    const maxSignals = 5;
+    return {
+        signal_strength: Math.min(signals / maxSignals, 1),
+        cross_signal_support: hasSpecificEvidence(finding) ? 0.8 : 0.3,
+        pattern_repetition: hasPatternRepetition(finding) ? 0.7 : 0.2,
+        rule_determinism: isSubjective(finding) ? 0.2 : 0.7,
+        clarity_of_evidence: Math.min(finding.description.length / 150, 1),
+    };
+}
+// ─── Confidence Computation ───────────────────────────────────────────────
+/**
+ * Generic weights for confidence scoring.
+ * These are intentionally balanced — not tuned for any specific domain.
+ */
+const GENERIC_WEIGHTS = {
+    signal_strength: 0.20,
+    cross_signal_support: 0.25,
+    pattern_repetition: 0.15,
+    rule_determinism: 0.20,
+    clarity_of_evidence: 0.20,
+};
+function computeRawConfidence(factors) {
+    let raw = 0;
+    for (const [key, weight] of Object.entries(GENERIC_WEIGHTS)) {
+        raw += factors[key] * weight;
+    }
+    return raw;
+}
+/**
+ * Apply nonlinear scaling to spread the distribution.
+ * Uses a sigmoid-like transform to push values away from the center.
+ */
+function applyNonlinearScaling(raw) {
+    // Shift and scale to spread the 0.4-0.7 cluster
+    const centered = (raw - 0.5) * 2.5;
+    const sigmoid = 1 / (1 + Math.exp(-centered));
+    // Map back to 0.15-0.95 range
+    return 0.15 + sigmoid * 0.80;
+}
+/**
+ * Apply severity bonus/penalty.
+ */
+function applySeverityAdjustment(confidence, severity) {
+    const adjustments = {
+        critical: 0.10,
+        high: 0.05,
+        medium: 0.00,
+        low: -0.05,
+    };
+    return Math.max(0, Math.min(1, confidence + (adjustments[severity] ?? 0)));
+}
+// ─── Public API ───────────────────────────────────────────────────────────
+/**
+ * Score a single finding's confidence level.
+ *
+ * Returns a value between 0 and 1 where:
+ * - 0.0–0.3: Very low confidence (likely noise)
+ * - 0.3–0.5: Low confidence (needs review)
+ * - 0.5–0.7: Moderate confidence (plausible)
+ * - 0.7–0.85: High confidence (likely valid)
+ * - 0.85–1.0: Very high confidence (strong evidence)
+ */
+function scoreConfidence(finding) {
+    const factors = computeFactors(finding);
+    const raw = computeRawConfidence(factors);
+    const scaled = applyNonlinearScaling(raw);
+    const final = applySeverityAdjustment(scaled, finding.severity);
+    return {
+        ...finding,
+        confidence: Math.round(final * 1000) / 1000,
+        factors,
+        isSubjective: isSubjective(finding),
+        signalCount: countSignals(finding),
+    };
+}
+/**
+ * Score a batch of findings.
+ */
+function scoreFindings(findings) {
+    return findings.map(scoreConfidence);
+}
+//# sourceMappingURL=index.js.map

package/dist/src/confidence/index.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/confidence/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;AAkJH,0CAaC;AAKD,sCAEC;AAlKD,6EAA6E;AAE7E,SAAS,YAAY,CAAC,OAAgB;IACpC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW,CAAC,MAAM,GAAG,EAAE;QAAE,KAAK,EAAE,CAAC;IACpE,IAAI,OAAO,CAAC,QAAQ;QAAE,KAAK,EAAE,CAAC;IAC9B,IAAI,OAAO,CAAC,QAAQ;QAAE,KAAK,EAAE,CAAC;IAC9B,IAAI,OAAO,CAAC,QAAQ;QAAE,KAAK,EAAE,CAAC;IAC9B,IAAI,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC;QAAE,KAAK,EAAE,CAAC;IAC7D,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,YAAY,CAAC,OAAgB;IACpC,MAAM,kBAAkB,GAAG;QACzB,MAAM;QACN,MAAM;QACN,QAAQ;QACR,OAAO;QACP,OAAO;QACP,UAAU;QACV,UAAU;QACV,YAAY;QACZ,SAAS;QACT,WAAW;KACZ,CAAC;IACF,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC;IAC/C,OAAO,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;AAC1D,CAAC;AAED,SAAS,mBAAmB,CAAC,OAAgB;IAC3C,MAAM,gBAAgB,GAAG;QACvB,QAAQ;QACR,MAAM;QACN,MAAM;QACN,OAAO;QACP,MAAM;QACN,QAAQ;QACR,QAAQ;QACR,YAAY;QACZ,OAAO;QACP,MAAM;QACN,MAAM;QACN,SAAS;QACT,UAAU;QACV,OAAO;QACP,SAAS;QACT,QAAQ;QACR,WAAW;QACX,KAAK;QACL,SAAS;QACT,MAAM;QACN,OAAO;QACP,QAAQ;QACR,SAAS;QACT,QAAQ;KACT,CAAC;IACF,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC;IAC/C,OAAO,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;AACxD,CAAC;AAED,SAAS,oBAAoB,CAAC,OAAgB;IAC5C,gDAAgD;IAChD,IAAI,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAClE,mDAAmD;IACnD,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC;IAC/C,OAAO,uDAAuD,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5E,CAAC;AAED,6EAA6E;AAE7E,SAAS,cAAc,CAAC,OAAgB;IACtC,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,CAAC,CAAC;IAErB,OAAO;QACL,eAAe,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,UAAU,EAAE,CAAC,CAAC;QAClD,oBAAoB,EAAE,mBAAmB,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;QAC9D,kBAAkB,EAAE,oBAAoB,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;QAC7D,gBAAgB,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;QACnD,mBAAmB,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC,CAAC;KACnE,CAAC;AACJ,CAAC;AAED,6EAA6E;AAE7E;;;GAGG;AACH,MAAM,eAAe,GAAG;IACtB,eAAe,EAAE,IAAI;IACrB,oBAAoB,EAAE,IAAI;IAC1B,kBAAkB,EAAE,IAAI;IACxB,gBAAgB,EAAE,IAAI;IACtB,mBAAmB,EAAE,IAAI;CAC1B,CAAC;AAEF,SAAS,oBAAoB,CAAC,OAA0B;IACtD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,EAAE,CAAC;QAC5D,GAAG,IAAI,OAAO,CAAC,GAA8B,CAAC,GAAG,MAAM,CAAC;IAC1D,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;GAGG;AACH,SAAS,qBAAqB,CAAC,GAAW;IACxC,gDAAgD;IAChD,MAAM,QAAQ,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;IACnC,MAAM,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC9C,8BAA8B;IAC9B,OAAO,IAAI,GAAG,OAAO,GAAG,IAAI,CAAC;AAC/B,CAAC;AAED;;GAEG;AACH,SAAS,uBAAuB,CAAC,UAAkB,EAAE,QAAgB;IACnE,MAAM,WAAW,GAA2B;QAC1C,QAAQ,EAAE,IAAI;QACd,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,IAAI;QACZ,GAAG,EAAE,CAAC,IAAI;KACX,CAAC;IACF,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,GAAG,CAAC,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;AAC7E,CAAC;AAED,6EAA6E;AAE7E;;;;;;;;;GASG;AACH,SAAgB,eAAe,CAAC,OAAgB;IAC9C,MAAM,OAAO,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;IACxC,MAAM,GAAG,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,qBAAqB,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,KAAK,GAAG,uBAAuB,CAAC,MAAM,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IAEhE,OAAO;QACL,GAAG,OAAO;QACV,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,IAAI;QAC3C,OAAO;QACP,YAAY,EAAE,YAAY,CAAC,OAAO,CAAC;QACnC,WAAW,EAAE,YAAY,CAAC,OAAO,CAAC;KACnC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAgB,aAAa,CAAC,QAAmB;IAC/C,OAAO,QAAQ,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;AACvC,CAAC"}

package/dist/src/index.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * Gravito Eval
+ *
+ * Measure how closely AI decisions match human judgment —
+ * and where they add new signal.
+ *
+ * @example
+ * ```ts
+ * import { evaluate } from "gravito-eval";
+ *
+ * const result = evaluate(aiFindings, humanFindings);
+ * console.log(`Recall: ${(result.detection.recall * 100).toFixed(1)}%`);
+ * console.log(`Precision: ${(result.detection.precision * 100).toFixed(1)}%`);
+ * console.log(`F1: ${(result.detection.f1 * 100).toFixed(1)}%`);
+ * ```
+ */
+export { evaluate } from "./calibration";
+export type { EvalOptions } from "./calibration";
+export { multiPassMatch, toFlatMatchResult, keywordSimilarity } from "./matching";
+export type { MultiPassOptions } from "./matching";
+export { computeDetectionMetrics, computeRankingMetrics, computeSeverityMetrics, wilsonInterval, } from "./metrics";
+export { computeNovelSignal, computeAdjustedPrecision, autoAdjudicate, batchAutoAdjudicate, } from "./adjudication";
+export { scoreConfidence, scoreFindings } from "./confidence";
+export { trackRun } from "./telemetry";
+export type { Finding, MatchPair, MatchResult, MultiPassMatchResult, Adjudication, AdjudicationLabel, DetectionMetrics, RankingMetrics, SeverityMetrics, NovelSignalMetrics, ConfidenceInterval, EvalResult, ScoredFinding, ConfidenceFactors, IssueCategory, SeverityLevel, } from "./types";
+//# sourceMappingURL=index.d.ts.map

package/dist/src/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAGH,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,YAAY,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAGjD,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAClF,YAAY,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAGnD,OAAO,EACL,uBAAuB,EACvB,qBAAqB,EACrB,sBAAsB,EACtB,cAAc,GACf,MAAM,WAAW,CAAC;AAGnB,OAAO,EACL,kBAAkB,EAClB,wBAAwB,EACxB,cAAc,EACd,mBAAmB,GACpB,MAAM,gBAAgB,CAAC;AAGxB,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAG9D,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,YAAY,EACV,OAAO,EACP,SAAS,EACT,WAAW,EACX,oBAAoB,EACpB,YAAY,EACZ,iBAAiB,EACjB,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,kBAAkB,EAClB,kBAAkB,EAClB,UAAU,EACV,aAAa,EACb,iBAAiB,EACjB,aAAa,EACb,aAAa,GACd,MAAM,SAAS,CAAC"}

package/dist/src/index.js ADDED Viewed

@@ -0,0 +1,47 @@
+"use strict";
+/**
+ * Gravito Eval
+ *
+ * Measure how closely AI decisions match human judgment —
+ * and where they add new signal.
+ *
+ * @example
+ * ```ts
+ * import { evaluate } from "gravito-eval";
+ *
+ * const result = evaluate(aiFindings, humanFindings);
+ * console.log(`Recall: ${(result.detection.recall * 100).toFixed(1)}%`);
+ * console.log(`Precision: ${(result.detection.precision * 100).toFixed(1)}%`);
+ * console.log(`F1: ${(result.detection.f1 * 100).toFixed(1)}%`);
+ * ```
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.trackRun = exports.scoreFindings = exports.scoreConfidence = exports.batchAutoAdjudicate = exports.autoAdjudicate = exports.computeAdjustedPrecision = exports.computeNovelSignal = exports.wilsonInterval = exports.computeSeverityMetrics = exports.computeRankingMetrics = exports.computeDetectionMetrics = exports.keywordSimilarity = exports.toFlatMatchResult = exports.multiPassMatch = exports.evaluate = void 0;
+// Main entry point
+var calibration_1 = require("./calibration");
+Object.defineProperty(exports, "evaluate", { enumerable: true, get: function () { return calibration_1.evaluate; } });
+// Matching
+var matching_1 = require("./matching");
+Object.defineProperty(exports, "multiPassMatch", { enumerable: true, get: function () { return matching_1.multiPassMatch; } });
+Object.defineProperty(exports, "toFlatMatchResult", { enumerable: true, get: function () { return matching_1.toFlatMatchResult; } });
+Object.defineProperty(exports, "keywordSimilarity", { enumerable: true, get: function () { return matching_1.keywordSimilarity; } });
+// Metrics
+var metrics_1 = require("./metrics");
+Object.defineProperty(exports, "computeDetectionMetrics", { enumerable: true, get: function () { return metrics_1.computeDetectionMetrics; } });
+Object.defineProperty(exports, "computeRankingMetrics", { enumerable: true, get: function () { return metrics_1.computeRankingMetrics; } });
+Object.defineProperty(exports, "computeSeverityMetrics", { enumerable: true, get: function () { return metrics_1.computeSeverityMetrics; } });
+Object.defineProperty(exports, "wilsonInterval", { enumerable: true, get: function () { return metrics_1.wilsonInterval; } });
+// Adjudication
+var adjudication_1 = require("./adjudication");
+Object.defineProperty(exports, "computeNovelSignal", { enumerable: true, get: function () { return adjudication_1.computeNovelSignal; } });
+Object.defineProperty(exports, "computeAdjustedPrecision", { enumerable: true, get: function () { return adjudication_1.computeAdjustedPrecision; } });
+Object.defineProperty(exports, "autoAdjudicate", { enumerable: true, get: function () { return adjudication_1.autoAdjudicate; } });
+Object.defineProperty(exports, "batchAutoAdjudicate", { enumerable: true, get: function () { return adjudication_1.batchAutoAdjudicate; } });
+// Confidence
+var confidence_1 = require("./confidence");
+Object.defineProperty(exports, "scoreConfidence", { enumerable: true, get: function () { return confidence_1.scoreConfidence; } });
+Object.defineProperty(exports, "scoreFindings", { enumerable: true, get: function () { return confidence_1.scoreFindings; } });
+// Telemetry
+var telemetry_1 = require("./telemetry");
+Object.defineProperty(exports, "trackRun", { enumerable: true, get: function () { return telemetry_1.trackRun; } });
+//# sourceMappingURL=index.js.map

package/dist/src/index.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;GAeG;;;AAEH,mBAAmB;AACnB,6CAAyC;AAAhC,uGAAA,QAAQ,OAAA;AAGjB,WAAW;AACX,uCAAkF;AAAzE,0GAAA,cAAc,OAAA;AAAE,6GAAA,iBAAiB,OAAA;AAAE,6GAAA,iBAAiB,OAAA;AAG7D,UAAU;AACV,qCAKmB;AAJjB,kHAAA,uBAAuB,OAAA;AACvB,gHAAA,qBAAqB,OAAA;AACrB,iHAAA,sBAAsB,OAAA;AACtB,yGAAA,cAAc,OAAA;AAGhB,eAAe;AACf,+CAKwB;AAJtB,kHAAA,kBAAkB,OAAA;AAClB,wHAAA,wBAAwB,OAAA;AACxB,8GAAA,cAAc,OAAA;AACd,mHAAA,mBAAmB,OAAA;AAGrB,aAAa;AACb,2CAA8D;AAArD,6GAAA,eAAe,OAAA;AAAE,2GAAA,aAAa,OAAA;AAEvC,YAAY;AACZ,yCAAuC;AAA9B,qGAAA,QAAQ,OAAA"}

package/dist/src/matching/index.d.ts ADDED Viewed

@@ -0,0 +1,37 @@
+/**
+ * Gravito Eval — Multi-Pass Semantic Matcher
+ *
+ * Matches AI findings against human findings using three passes:
+ * 1. Strict: same category + high keyword similarity (>0.75)
+ * 2. Cross-category: high similarity (>0.80) + category equivalence
+ * 3. Conceptual merge: cluster related findings, match cluster ↔ single issue
+ *
+ * Each pass uses greedy one-to-one matching — no double-counting.
+ */
+import type { Finding, MatchResult, MultiPassMatchResult } from "../types";
+/**
+ * Compute keyword similarity between two findings.
+ * Uses a hybrid approach:
+ * 1. Jaccard on full text (description + keywords)
+ * 2. Keyword-specific overlap (weighted higher)
+ * 3. Location similarity bonus
+ * Final score = max(jaccard, keywordOverlap) with location bonus
+ */
+export declare function keywordSimilarity(a: Finding, b: Finding): number;
+export interface MultiPassOptions {
+    strictThreshold?: number;
+    crossCategoryThreshold?: number;
+    clusterThreshold?: number;
+    conceptualMatchThreshold?: number;
+}
+/**
+ * Run multi-pass semantic matching between AI and human findings.
+ *
+ * Returns detailed results with match breakdown by pass type.
+ */
+export declare function multiPassMatch(aiFindings: Finding[], humanFindings: Finding[], options?: MultiPassOptions): MultiPassMatchResult;
+/**
+ * Convert MultiPassMatchResult to a flat MatchResult for simpler consumers.
+ */
+export declare function toFlatMatchResult(result: MultiPassMatchResult): MatchResult;
+//# sourceMappingURL=index.d.ts.map

package/dist/src/matching/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/matching/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EACV,OAAO,EAEP,WAAW,EACX,oBAAoB,EAErB,MAAM,UAAU,CAAC;AA+BlB;;;;;;;GAOG;AACH,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,GAAG,MAAM,CAqDhE;AA4MD,MAAM,WAAW,gBAAgB;IAC/B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,sBAAsB,CAAC,EAAE,MAAM,CAAC;IAChC,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,wBAAwB,CAAC,EAAE,MAAM,CAAC;CACnC;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAC5B,UAAU,EAAE,OAAO,EAAE,EACrB,aAAa,EAAE,OAAO,EAAE,EACxB,OAAO,CAAC,EAAE,gBAAgB,GACzB,oBAAoB,CAqDtB;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,oBAAoB,GAAG,WAAW,CAU3E"}