gravito-eval 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +137 -0
- package/dist/cli/index.d.ts +14 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +276 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/src/adjudication/index.d.ts +36 -0
- package/dist/src/adjudication/index.d.ts.map +1 -0
- package/dist/src/adjudication/index.js +149 -0
- package/dist/src/adjudication/index.js.map +1 -0
- package/dist/src/calibration/index.d.ts +38 -0
- package/dist/src/calibration/index.d.ts.map +1 -0
- package/dist/src/calibration/index.js +104 -0
- package/dist/src/calibration/index.js.map +1 -0
- package/dist/src/confidence/index.d.ts +27 -0
- package/dist/src/confidence/index.d.ts.map +1 -0
- package/dist/src/confidence/index.js +168 -0
- package/dist/src/confidence/index.js.map +1 -0
- package/dist/src/index.d.ts +26 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +47 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/matching/index.d.ts +37 -0
- package/dist/src/matching/index.d.ts.map +1 -0
- package/dist/src/matching/index.js +292 -0
- package/dist/src/matching/index.js.map +1 -0
- package/dist/src/metrics/index.d.ts +15 -0
- package/dist/src/metrics/index.d.ts.map +1 -0
- package/dist/src/metrics/index.js +177 -0
- package/dist/src/metrics/index.js.map +1 -0
- package/dist/src/telemetry/index.d.ts +10 -0
- package/dist/src/telemetry/index.d.ts.map +1 -0
- package/dist/src/telemetry/index.js +106 -0
- package/dist/src/telemetry/index.js.map +1 -0
- package/dist/src/types.d.ts +131 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/types.js +28 -0
- package/dist/src/types.js.map +1 -0
- package/examples/basic/input.json +76 -0
- package/examples/basic/run.ts +33 -0
- package/package.json +50 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Gravito
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# Gravito Eval
|
|
2
|
+
|
|
3
|
+
Measure how closely your AI matches human judgment — and where it finds things humans missed.
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
npx gravito-eval run ./examples/basic
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
Gravito Eval Results
|
|
11
|
+
|
|
12
|
+
Recall: 75%
|
|
13
|
+
Precision: 50%
|
|
14
|
+
F1: 60%
|
|
15
|
+
|
|
16
|
+
Top-3 Agreement: 100%
|
|
17
|
+
Novel Signal: 67% (validated)
|
|
18
|
+
|
|
19
|
+
Interpretation:
|
|
20
|
+
- Strong alignment with human judgment
|
|
21
|
+
- AI found significant issues humans missed
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## What this tells you
|
|
27
|
+
|
|
28
|
+
Your AI found 75% of what humans found (Recall).
|
|
29
|
+
Half of what it flagged was relevant (Precision).
|
|
30
|
+
And 67% of its unique findings were genuinely useful (Novel Signal).
|
|
31
|
+
|
|
32
|
+
That means your AI is catching real issues humans miss — but also generating some noise.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## Install
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
npm install gravito-eval
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Or run directly:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
npx gravito-eval run ./your-data.json
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Input format
|
|
51
|
+
|
|
52
|
+
```json
|
|
53
|
+
{
|
|
54
|
+
"aiFindings": [
|
|
55
|
+
{ "id": "ai-1", "description": "Missing CTA", "category": "conversion", "severity": "high" }
|
|
56
|
+
],
|
|
57
|
+
"humanFindings": [
|
|
58
|
+
{ "id": "h-1", "description": "No clear action", "category": "conversion", "severity": "high" }
|
|
59
|
+
]
|
|
60
|
+
}
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Save as `input.json` in a directory, then run:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
gravito-eval run ./my-directory
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Flags
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
gravito-eval run <path> --explain # Show why each match was made
|
|
75
|
+
gravito-eval run <path> --json # Raw JSON output
|
|
76
|
+
gravito-eval run <path> --no-telemetry
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Programmatic API
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
import { evaluate } from "gravito-eval";
|
|
85
|
+
|
|
86
|
+
const result = evaluate(aiFindings, humanFindings);
|
|
87
|
+
|
|
88
|
+
result.detection.recall // How much of what humans find does the AI catch?
|
|
89
|
+
result.detection.precision // How much of what the AI finds is actually relevant?
|
|
90
|
+
result.detection.f1 // Harmonic mean
|
|
91
|
+
result.novelSignal // What did the AI find that humans missed?
|
|
92
|
+
result.verdict // PASS | PARTIAL | FAIL | INSUFFICIENT_DATA
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## What this is for
|
|
98
|
+
|
|
99
|
+
- Evaluating LLM outputs against human baselines
|
|
100
|
+
- QA for AI agents (code review, content audit, compliance)
|
|
101
|
+
- Measuring whether your AI is useful, not just accurate
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## What this is NOT
|
|
106
|
+
|
|
107
|
+
This does not generate outputs, fix issues, or run workflows.
|
|
108
|
+
It **measures** and **evaluates**.
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Telemetry
|
|
113
|
+
|
|
114
|
+
Anonymous usage data (timestamp, version, command name) is collected to improve the tool.
|
|
115
|
+
No findings, file paths, or PII.
|
|
116
|
+
|
|
117
|
+
Disable:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
GRAVITO_TELEMETRY=0 gravito-eval run ./data
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Respects `DO_NOT_TRACK=1`.
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## Gravito
|
|
128
|
+
|
|
129
|
+
This is the open-source evaluation layer behind [Gravito](https://gravito.ai) — continuous AI governance that scans, calibrates, and self-corrects.
|
|
130
|
+
|
|
131
|
+
**Want this running continuously on your system?**
|
|
132
|
+
|
|
133
|
+
[Request a pilot →](https://gravito.ai/pilot)
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
MIT License
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Gravito Eval CLI
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* gravito-eval run <path> Run evaluation
|
|
7
|
+
* gravito-eval run <path> --explain Show detailed match reasoning
|
|
8
|
+
* gravito-eval run <path> --json Output raw JSON
|
|
9
|
+
* gravito-eval run <path> --no-telemetry Disable anonymous usage tracking
|
|
10
|
+
* gravito-eval --help Show help
|
|
11
|
+
* gravito-eval --version Show version
|
|
12
|
+
*/
|
|
13
|
+
export {};
|
|
14
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../cli/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;GAUG"}
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
/**
|
|
4
|
+
* Gravito Eval CLI
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* gravito-eval run <path> Run evaluation
|
|
8
|
+
* gravito-eval run <path> --explain Show detailed match reasoning
|
|
9
|
+
* gravito-eval run <path> --json Output raw JSON
|
|
10
|
+
* gravito-eval run <path> --no-telemetry Disable anonymous usage tracking
|
|
11
|
+
* gravito-eval --help Show help
|
|
12
|
+
* gravito-eval --version Show version
|
|
13
|
+
*/
|
|
14
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
17
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
18
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
19
|
+
}
|
|
20
|
+
Object.defineProperty(o, k2, desc);
|
|
21
|
+
}) : (function(o, m, k, k2) {
|
|
22
|
+
if (k2 === undefined) k2 = k;
|
|
23
|
+
o[k2] = m[k];
|
|
24
|
+
}));
|
|
25
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
26
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
27
|
+
}) : function(o, v) {
|
|
28
|
+
o["default"] = v;
|
|
29
|
+
});
|
|
30
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
31
|
+
var ownKeys = function(o) {
|
|
32
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
33
|
+
var ar = [];
|
|
34
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
35
|
+
return ar;
|
|
36
|
+
};
|
|
37
|
+
return ownKeys(o);
|
|
38
|
+
};
|
|
39
|
+
return function (mod) {
|
|
40
|
+
if (mod && mod.__esModule) return mod;
|
|
41
|
+
var result = {};
|
|
42
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
43
|
+
__setModuleDefault(result, mod);
|
|
44
|
+
return result;
|
|
45
|
+
};
|
|
46
|
+
})();
|
|
47
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
48
|
+
const fs = __importStar(require("fs"));
|
|
49
|
+
const path = __importStar(require("path"));
|
|
50
|
+
const calibration_1 = require("../src/calibration");
|
|
51
|
+
const telemetry_1 = require("../src/telemetry");
|
|
52
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────
|
|
53
|
+
function pct(n) {
|
|
54
|
+
return `${Math.round(n * 100)}%`;
|
|
55
|
+
}
|
|
56
|
+
// ─── Output Formatting ────────────────────────────────────────────────────
|
|
57
|
+
function printResult(result) {
|
|
58
|
+
const d = result.detection;
|
|
59
|
+
const r = result.ranking;
|
|
60
|
+
console.log();
|
|
61
|
+
console.log("Gravito Eval Results");
|
|
62
|
+
console.log();
|
|
63
|
+
console.log(`Recall: ${pct(d.recall)}`);
|
|
64
|
+
console.log(`Precision: ${pct(d.precision)}`);
|
|
65
|
+
console.log(`F1: ${pct(d.f1)}`);
|
|
66
|
+
console.log();
|
|
67
|
+
console.log(`Top-3 Agreement: ${pct(r.top3Overlap)}`);
|
|
68
|
+
if (result.novelSignal) {
|
|
69
|
+
console.log(`Novel Signal: ${pct(result.novelSignal.validatedNovelRate)} (validated)`);
|
|
70
|
+
}
|
|
71
|
+
console.log();
|
|
72
|
+
console.log("Interpretation:");
|
|
73
|
+
printInterpretation(result);
|
|
74
|
+
console.log();
|
|
75
|
+
console.log("Next Step:");
|
|
76
|
+
console.log("Want this running continuously and fixing issues automatically?");
|
|
77
|
+
console.log();
|
|
78
|
+
console.log("→ Try Gravito: https://gravito.ai/pilot");
|
|
79
|
+
console.log();
|
|
80
|
+
}
|
|
81
|
+
function printInterpretation(result) {
|
|
82
|
+
const d = result.detection;
|
|
83
|
+
if (d.recall >= 0.7) {
|
|
84
|
+
console.log("- Strong alignment with human judgment");
|
|
85
|
+
}
|
|
86
|
+
else if (d.recall >= 0.5) {
|
|
87
|
+
console.log("- Moderate alignment — some human findings missed");
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
console.log("- Low alignment — many human findings missed");
|
|
91
|
+
}
|
|
92
|
+
if (result.novelSignal) {
|
|
93
|
+
const rate = result.novelSignal.validatedNovelRate;
|
|
94
|
+
if (rate >= 0.4) {
|
|
95
|
+
console.log("- AI found significant issues humans missed");
|
|
96
|
+
}
|
|
97
|
+
else if (rate >= 0.2) {
|
|
98
|
+
console.log("- AI found some issues humans missed");
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
// ─── Explain Mode ─────────────────────────────────────────────────────────
|
|
103
|
+
function printExplain(result) {
|
|
104
|
+
console.log("─── Detailed Reasoning ───");
|
|
105
|
+
console.log();
|
|
106
|
+
// Matched pairs
|
|
107
|
+
if (result.matches.length > 0) {
|
|
108
|
+
console.log("Matched (AI ↔ Human):");
|
|
109
|
+
for (const m of result.matches) {
|
|
110
|
+
console.log();
|
|
111
|
+
console.log(` AI: "${m.aiIssue.description}"`);
|
|
112
|
+
console.log(` Human: "${m.humanIssue.description}"`);
|
|
113
|
+
console.log(` Why: ${m.matchType} match (${Math.round(m.similarity * 100)}% similar)`);
|
|
114
|
+
}
|
|
115
|
+
console.log();
|
|
116
|
+
}
|
|
117
|
+
// Novel findings
|
|
118
|
+
if (result.aiOnly.length > 0) {
|
|
119
|
+
console.log("Novel (AI found, humans didn't):");
|
|
120
|
+
for (const f of result.aiOnly) {
|
|
121
|
+
console.log(` → "${f.description}"`);
|
|
122
|
+
console.log(` Why novel: No similar human finding found`);
|
|
123
|
+
}
|
|
124
|
+
console.log();
|
|
125
|
+
}
|
|
126
|
+
// Missed findings
|
|
127
|
+
if (result.humanOnly.length > 0) {
|
|
128
|
+
console.log("Missed (humans found, AI didn't):");
|
|
129
|
+
for (const f of result.humanOnly) {
|
|
130
|
+
console.log(` ✗ "${f.description}"`);
|
|
131
|
+
}
|
|
132
|
+
console.log();
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
function printInvalidInput() {
|
|
136
|
+
console.error(`Invalid input.`);
|
|
137
|
+
console.error();
|
|
138
|
+
console.error(`Expected:`);
|
|
139
|
+
console.error(`{`);
|
|
140
|
+
console.error(` "aiFindings": [...],`);
|
|
141
|
+
console.error(` "humanFindings": [...]`);
|
|
142
|
+
console.error(`}`);
|
|
143
|
+
console.error();
|
|
144
|
+
console.error(`Run:`);
|
|
145
|
+
console.error(`npx gravito-eval run ./examples/basic`);
|
|
146
|
+
}
|
|
147
|
+
function validateData(data) {
|
|
148
|
+
if (!data || typeof data !== "object") {
|
|
149
|
+
printInvalidInput();
|
|
150
|
+
process.exit(1);
|
|
151
|
+
}
|
|
152
|
+
if (!Array.isArray(data.aiFindings) || !Array.isArray(data.humanFindings)) {
|
|
153
|
+
printInvalidInput();
|
|
154
|
+
process.exit(1);
|
|
155
|
+
}
|
|
156
|
+
return data;
|
|
157
|
+
}
|
|
158
|
+
function loadData(inputPath) {
|
|
159
|
+
const resolved = path.resolve(inputPath);
|
|
160
|
+
if (!fs.existsSync(resolved)) {
|
|
161
|
+
console.error(`Path not found: ${inputPath}`);
|
|
162
|
+
process.exit(1);
|
|
163
|
+
}
|
|
164
|
+
if (fs.statSync(resolved).isDirectory()) {
|
|
165
|
+
for (const name of ["input.json", "data.json"]) {
|
|
166
|
+
const file = path.join(resolved, name);
|
|
167
|
+
if (fs.existsSync(file)) {
|
|
168
|
+
const raw = JSON.parse(fs.readFileSync(file, "utf-8"));
|
|
169
|
+
return validateData(raw);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
const aiFile = path.join(resolved, "ai-findings.json");
|
|
173
|
+
const humanFile = path.join(resolved, "human-findings.json");
|
|
174
|
+
if (!fs.existsSync(aiFile) || !fs.existsSync(humanFile)) {
|
|
175
|
+
console.error(`No input.json found in ${inputPath}`);
|
|
176
|
+
console.error();
|
|
177
|
+
console.error(`Expected: input.json with { aiFindings, humanFindings }`);
|
|
178
|
+
console.error();
|
|
179
|
+
console.error(`Run:`);
|
|
180
|
+
console.error(`npx gravito-eval run ./examples/basic`);
|
|
181
|
+
process.exit(1);
|
|
182
|
+
}
|
|
183
|
+
const data = {
|
|
184
|
+
aiFindings: JSON.parse(fs.readFileSync(aiFile, "utf-8")),
|
|
185
|
+
humanFindings: JSON.parse(fs.readFileSync(humanFile, "utf-8")),
|
|
186
|
+
};
|
|
187
|
+
const adjFile = path.join(resolved, "adjudications.json");
|
|
188
|
+
if (fs.existsSync(adjFile)) {
|
|
189
|
+
data.adjudications = JSON.parse(fs.readFileSync(adjFile, "utf-8"));
|
|
190
|
+
}
|
|
191
|
+
return validateData(data);
|
|
192
|
+
}
|
|
193
|
+
if (!resolved.endsWith(".json")) {
|
|
194
|
+
console.error(`Expected a .json file or directory, got: ${inputPath}`);
|
|
195
|
+
process.exit(1);
|
|
196
|
+
}
|
|
197
|
+
const raw = JSON.parse(fs.readFileSync(resolved, "utf-8"));
|
|
198
|
+
return validateData(raw);
|
|
199
|
+
}
|
|
200
|
+
// ─── Main ─────────────────────────────────────────────────────────────────
|
|
201
|
+
function showHelp() {
|
|
202
|
+
console.log(`
|
|
203
|
+
Gravito Eval — Measure AI-human alignment
|
|
204
|
+
|
|
205
|
+
Usage:
|
|
206
|
+
gravito-eval run <path> Evaluate findings
|
|
207
|
+
gravito-eval run <path> --explain Show detailed match reasoning
|
|
208
|
+
gravito-eval run <path> --json Output raw JSON
|
|
209
|
+
gravito-eval run <path> --no-telemetry Disable anonymous tracking
|
|
210
|
+
|
|
211
|
+
Input:
|
|
212
|
+
<path> can be a .json file or a directory containing input.json
|
|
213
|
+
|
|
214
|
+
Examples:
|
|
215
|
+
gravito-eval run ./examples/basic
|
|
216
|
+
gravito-eval run ./my-audit.json
|
|
217
|
+
gravito-eval run ./examples/basic --explain
|
|
218
|
+
`);
|
|
219
|
+
}
|
|
220
|
+
function main() {
|
|
221
|
+
const args = process.argv.slice(2);
|
|
222
|
+
if (args.length === 0 || args.includes("--help") || args.includes("-h")) {
|
|
223
|
+
showHelp();
|
|
224
|
+
process.exit(0);
|
|
225
|
+
}
|
|
226
|
+
if (args.includes("--version") || args.includes("-v")) {
|
|
227
|
+
let pkgDir = __dirname;
|
|
228
|
+
while (!fs.existsSync(path.join(pkgDir, "package.json"))) {
|
|
229
|
+
const parent = path.dirname(pkgDir);
|
|
230
|
+
if (parent === pkgDir)
|
|
231
|
+
break;
|
|
232
|
+
pkgDir = parent;
|
|
233
|
+
}
|
|
234
|
+
const pkg = JSON.parse(fs.readFileSync(path.join(pkgDir, "package.json"), "utf-8"));
|
|
235
|
+
console.log(pkg.version);
|
|
236
|
+
process.exit(0);
|
|
237
|
+
}
|
|
238
|
+
if (args[0] !== "run") {
|
|
239
|
+
console.error(`Unknown command: ${args[0]}`);
|
|
240
|
+
console.error(`Run: gravito-eval --help`);
|
|
241
|
+
process.exit(1);
|
|
242
|
+
}
|
|
243
|
+
if (!args[1]) {
|
|
244
|
+
console.error(`Missing path.`);
|
|
245
|
+
console.error(`Usage: gravito-eval run <path>`);
|
|
246
|
+
process.exit(1);
|
|
247
|
+
}
|
|
248
|
+
const jsonOutput = args.includes("--json");
|
|
249
|
+
const explainMode = args.includes("--explain");
|
|
250
|
+
// Fire-and-forget telemetry (non-blocking)
|
|
251
|
+
(0, telemetry_1.trackRun)("run");
|
|
252
|
+
try {
|
|
253
|
+
const data = loadData(args[1]);
|
|
254
|
+
const result = (0, calibration_1.evaluate)(data.aiFindings, data.humanFindings, {
|
|
255
|
+
adjudications: data.adjudications,
|
|
256
|
+
autoAdjudicate: !data.adjudications,
|
|
257
|
+
});
|
|
258
|
+
if (jsonOutput) {
|
|
259
|
+
console.log(JSON.stringify(result, null, 2));
|
|
260
|
+
}
|
|
261
|
+
else {
|
|
262
|
+
printResult(result);
|
|
263
|
+
if (explainMode) {
|
|
264
|
+
printExplain(result);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
catch (err) {
|
|
269
|
+
console.error(`Error: ${err.message}`);
|
|
270
|
+
process.exit(1);
|
|
271
|
+
}
|
|
272
|
+
// Force exit — telemetry HTTP should not keep process alive
|
|
273
|
+
setTimeout(() => process.exit(0), 100);
|
|
274
|
+
}
|
|
275
|
+
main();
|
|
276
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../cli/index.ts"],"names":[],"mappings":";;AAEA;;;;;;;;;;GAUG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;AAC7B,oDAA8C;AAC9C,gDAA4C;AAG5C,6EAA6E;AAE7E,SAAS,GAAG,CAAC,CAAS;IACpB,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC;AACnC,CAAC;AAED,6EAA6E;AAE7E,SAAS,WAAW,CAAC,MAAkB;IACrC,MAAM,CAAC,GAAG,MAAM,CAAC,SAAS,CAAC;IAC3B,MAAM,CAAC,GAAG,MAAM,CAAC,OAAO,CAAC;IAEzB,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;IACpC,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,OAAO,CAAC,GAAG,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACxC,OAAO,CAAC,GAAG,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;IAC9C,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;IAChC,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,OAAO,CAAC,GAAG,CAAC,oBAAoB,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;IAEtD,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,CAAC,MAAM,CAAC,WAAW,CAAC,kBAAkB,CAAC,cAAc,CAAC,CAAC;IACzF,CAAC;IAED,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;IAC/B,mBAAmB,CAAC,MAAM,CAAC,CAAC;IAC5B,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;IAC1B,OAAO,CAAC,GAAG,CAAC,iEAAiE,CAAC,CAAC;IAC/E,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,OAAO,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,EAAE,CAAC;AAChB,CAAC;AAED,SAAS,mBAAmB,CAAC,MAAkB;IAC7C,MAAM,CAAC,GAAG,MAAM,CAAC,SAAS,CAAC;IAE3B,IAAI,CAAC,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;QACpB,OAAO,CAAC,GAAG,CAAC,wCAAwC,CAAC,CAAC;IACxD,CAAC;SAAM,IAAI,CAAC,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;QAC3B,OAAO,CAAC,GAAG,CAAC,mDAAmD,CAAC,CAAC;IACnE,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,8CAA8C,CAAC,CAAC;IAC9D,CAAC;IAED,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,CAAC,kBAAkB,CAAC;QACnD,IAAI,IAAI,IAAI,GAAG,EAAE,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,6CAA6C,CAAC,CAAC;QAC7D,CAAC;aAAM,IAAI,IAAI,IAAI,GAAG,EAAE,CAAC;YACvB,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAC;QACtD,CAAC;IACH,CAAC;AACH,CAAC;AAED,6EAA6E;AAE7E,SAAS,YAAY,CAAC,MAAkB;IACtC,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;IAC1C,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,gBAAgB;IAChB,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9B,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACrC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YAC/B,OAAO,CAAC,GAAG,EAAE,CAAC;YACd,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,WAAW,GAAG,CAAC,CAAC;YACnD,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,UAAU,CAAC,WAAW,GAAG,CAAC,CAAC;YACtD,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,SAAS,WAAW,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,UAAU,GAAG,GAAG,CAAC,YAAY,CAAC,CAAC;QAC5F,CAAC;QACD,OAAO,CAAC,GAAG,EAAE,CAAC;IAChB,CAAC;IAED,iBAAiB;IACjB,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;QAChD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAC9B,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,WAAW,GAAG,CAAC,CAAC;YACtC,OAAO,CAAC,GAAG,CAAC,+CAA+C,CAAC,CAAC;QAC/D,CAAC;QACD,OAAO,CAAC,GAAG,EAAE,CAAC;IAChB,CAAC;IAED,kBAAkB;IAClB,IAAI,MAAM,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChC,OAAO,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC;QACjD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;YACjC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,WAAW,GAAG,CAAC,CAAC;QACxC,CAAC;QACD,OAAO,CAAC,GAAG,EAAE,CAAC;IAChB,CAAC;AACH,CAAC;AAUD,SAAS,iBAAiB;IACxB,OAAO,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;IAChC,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;IAC3B,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACnB,OAAO,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;IACxC,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;IAC1C,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACnB,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IACtB,OAAO,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;AACzD,CAAC;AAED,SAAS,YAAY,CAAC,IAAS;IAC7B,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,iBAAiB,EAAE,CAAC;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1E,iBAAiB,EAAE,CAAC;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,OAAO,IAAgB,CAAC;AAC1B,CAAC;AAED,SAAS,QAAQ,CAAC,SAAiB;IACjC,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAEzC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC7B,OAAO,CAAC,KAAK,CAAC,mBAAmB,SAAS,EAAE,CAAC,CAAC;QAC9C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC;QACxC,KAAK,MAAM,IAAI,IAAI,CAAC,YAAY,EAAE,WAAW,CAAC,EAAE,CAAC;YAC/C,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;YACvC,IAAI,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBACxB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC;gBACvD,OAAO,YAAY,CAAC,GAAG,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,kBAAkB,CAAC,CAAC;QACvD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,qBAAqB,CAAC,CAAC;QAE7D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YACxD,OAAO,CAAC,KAAK,CAAC,0BAA0B,SAAS,EAAE,CAAC,CAAC;YACrD,OAAO,CAAC,KAAK,EAAE,CAAC;YAChB,OAAO,CAAC,KAAK,CAAC,yDAAyD,CAAC,CAAC;YACzE,OAAO,CAAC,KAAK,EAAE,CAAC;YAChB,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YACtB,OAAO,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;YACvD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,MAAM,IAAI,GAAa;YACrB,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YACxD,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;SAC/D,CAAC;QAEF,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,oBAAoB,CAAC,CAAC;QAC1D,IAAI,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;YAC3B,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC;QACrE,CAAC;QAED,OAAO,YAAY,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QAChC,OAAO,CAAC,KAAK,CAAC,4CAA4C,SAAS,EAAE,CAAC,CAAC;QACvE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;IAC3D,OAAO,YAAY,CAAC,GAAG,CAAC,CAAC;AAC3B,CAAC;AAED,6EAA6E;AAE7E,SAAS,QAAQ;IACf,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;CAgBb,CAAC,CAAC;AACH,CAAC;AAED,SAAS,IAAI;IACX,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAEnC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACxE,QAAQ,EAAE,CAAC;QACX,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACtD,IAAI,MAAM,GAAG,SAAS,CAAC;QACvB,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC,EAAE,CAAC;YACzD,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YACpC,IAAI,MAAM,KAAK,MAAM;gBAAE,MAAM;YAC7B,MAAM,GAAG,MAAM,CAAC;QAClB,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CACpB,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,CAAC,EAAE,OAAO,CAAC,CAC5D,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACzB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,KAAK,EAAE,CAAC;QACtB,OAAO,CAAC,KAAK,CAAC,oBAAoB,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAC7C,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;QAC/B,OAAO,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC3C,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE/C,2CAA2C;IAC3C,IAAA,oBAAQ,EAAC,KAAK,CAAC,CAAC;IAEhB,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QAE/B,MAAM,MAAM,GAAG,IAAA,sBAAQ,EAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,aAAa,EAAE;YAC3D,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,cAAc,EAAE,CAAC,IAAI,CAAC,aAAa;SACpC,CAAC,CAAC;QAEH,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/C,CAAC;aAAM,CAAC;YACN,WAAW,CAAC,MAAM,CAAC,CAAC;YACpB,IAAI,WAAW,EAAE,CAAC;gBAChB,YAAY,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,4DAA4D;IAC5D,UAAU,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AACzC,CAAC;AAED,IAAI,EAAE,CAAC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gravito Eval — Adjudication Engine
|
|
3
|
+
*
|
|
4
|
+
* Evaluates AI-only findings (those not matched to any human finding)
|
|
5
|
+
* to determine whether they represent genuine novel signal or noise.
|
|
6
|
+
*
|
|
7
|
+
* Adjudications can be provided by human reviewers or auto-generated
|
|
8
|
+
* using simple heuristics.
|
|
9
|
+
*/
|
|
10
|
+
import type { Finding, Adjudication, NovelSignalMetrics } from "../types";
|
|
11
|
+
/**
|
|
12
|
+
* Simple heuristic-based auto-adjudication for AI-only findings.
|
|
13
|
+
* This provides a baseline — human adjudication is always preferred.
|
|
14
|
+
*/
|
|
15
|
+
export declare function autoAdjudicate(finding: Finding): Adjudication;
|
|
16
|
+
/**
|
|
17
|
+
* Batch auto-adjudicate a list of AI-only findings.
|
|
18
|
+
*/
|
|
19
|
+
export declare function batchAutoAdjudicate(findings: Finding[]): Adjudication[];
|
|
20
|
+
/**
|
|
21
|
+
* Compute novel signal metrics from adjudicated AI-only findings.
|
|
22
|
+
*
|
|
23
|
+
* System strength interpretation:
|
|
24
|
+
* - WEAK: <15% valid → mostly noise
|
|
25
|
+
* - MODERATE: 15-25% valid → some signal
|
|
26
|
+
* - STRONG: 25-40% valid → meaningful additional value
|
|
27
|
+
* - DIFFERENTIATED: >40% valid → system finds things humans miss
|
|
28
|
+
*/
|
|
29
|
+
export declare function computeNovelSignal(aiOnlyFindings: Finding[], adjudications: Adjudication[]): NovelSignalMetrics;
|
|
30
|
+
/**
|
|
31
|
+
* Compute adjusted precision that accounts for validated novel signal.
|
|
32
|
+
*
|
|
33
|
+
* adjusted_precision = (matched + validated_novel) / total_ai_findings
|
|
34
|
+
*/
|
|
35
|
+
export declare function computeAdjustedPrecision(matchedCount: number, validNovelCount: number, totalAI: number): number;
|
|
36
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/adjudication/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EACV,OAAO,EACP,YAAY,EAEZ,kBAAkB,EACnB,MAAM,UAAU,CAAC;AAIlB;;;GAGG;AACH,wBAAgB,cAAc,CAAC,OAAO,EAAE,OAAO,GAAG,YAAY,CAuD7D;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,OAAO,EAAE,GAAG,YAAY,EAAE,CAEvE;AAID;;;;;;;;GAQG;AACH,wBAAgB,kBAAkB,CAChC,cAAc,EAAE,OAAO,EAAE,EACzB,aAAa,EAAE,YAAY,EAAE,GAC5B,kBAAkB,CAsDpB;AAED;;;;GAIG;AACH,wBAAgB,wBAAwB,CACtC,YAAY,EAAE,MAAM,EACpB,eAAe,EAAE,MAAM,EACvB,OAAO,EAAE,MAAM,GACd,MAAM,CAGR"}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Gravito Eval — Adjudication Engine
|
|
4
|
+
*
|
|
5
|
+
* Evaluates AI-only findings (those not matched to any human finding)
|
|
6
|
+
* to determine whether they represent genuine novel signal or noise.
|
|
7
|
+
*
|
|
8
|
+
* Adjudications can be provided by human reviewers or auto-generated
|
|
9
|
+
* using simple heuristics.
|
|
10
|
+
*/
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.autoAdjudicate = autoAdjudicate;
|
|
13
|
+
exports.batchAutoAdjudicate = batchAutoAdjudicate;
|
|
14
|
+
exports.computeNovelSignal = computeNovelSignal;
|
|
15
|
+
exports.computeAdjustedPrecision = computeAdjustedPrecision;
|
|
16
|
+
// ─── Auto-Adjudication Heuristics ─────────────────────────────────────────
|
|
17
|
+
/**
|
|
18
|
+
* Simple heuristic-based auto-adjudication for AI-only findings.
|
|
19
|
+
* This provides a baseline — human adjudication is always preferred.
|
|
20
|
+
*/
|
|
21
|
+
function autoAdjudicate(finding) {
|
|
22
|
+
const desc = finding.description.toLowerCase();
|
|
23
|
+
// Short, vague descriptions → LOW_VALUE
|
|
24
|
+
if (desc.length < 30) {
|
|
25
|
+
return {
|
|
26
|
+
findingId: finding.id,
|
|
27
|
+
label: "LOW_VALUE",
|
|
28
|
+
reasoning: "Description too brief to be actionable",
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
// Generic/boilerplate patterns → LOW_VALUE
|
|
32
|
+
const genericPatterns = [
|
|
33
|
+
"could be improved",
|
|
34
|
+
"might benefit from",
|
|
35
|
+
"consider adding",
|
|
36
|
+
"may want to",
|
|
37
|
+
"general improvement",
|
|
38
|
+
];
|
|
39
|
+
if (genericPatterns.some((p) => desc.includes(p))) {
|
|
40
|
+
return {
|
|
41
|
+
findingId: finding.id,
|
|
42
|
+
label: "LOW_VALUE",
|
|
43
|
+
reasoning: "Generic improvement suggestion without specific evidence",
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
// High severity with specific evidence → VALID
|
|
47
|
+
if ((finding.severity === "high" || finding.severity === "critical") &&
|
|
48
|
+
desc.length > 80) {
|
|
49
|
+
return {
|
|
50
|
+
findingId: finding.id,
|
|
51
|
+
label: "VALID",
|
|
52
|
+
reasoning: "High severity with detailed description suggests genuine issue",
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
// Medium severity with reasonable detail → VALID
|
|
56
|
+
if (finding.severity === "medium" && desc.length > 60) {
|
|
57
|
+
return {
|
|
58
|
+
findingId: finding.id,
|
|
59
|
+
label: "VALID",
|
|
60
|
+
reasoning: "Medium severity with sufficient detail",
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
// Default: LOW_VALUE for low severity or insufficient detail
|
|
64
|
+
return {
|
|
65
|
+
findingId: finding.id,
|
|
66
|
+
label: "LOW_VALUE",
|
|
67
|
+
reasoning: "Insufficient severity or detail for confident validation",
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Batch auto-adjudicate a list of AI-only findings.
|
|
72
|
+
*/
|
|
73
|
+
function batchAutoAdjudicate(findings) {
|
|
74
|
+
return findings.map(autoAdjudicate);
|
|
75
|
+
}
|
|
76
|
+
// ─── Novel Signal Computation ─────────────────────────────────────────────
|
|
77
|
+
/**
|
|
78
|
+
* Compute novel signal metrics from adjudicated AI-only findings.
|
|
79
|
+
*
|
|
80
|
+
* System strength interpretation:
|
|
81
|
+
* - WEAK: <15% valid → mostly noise
|
|
82
|
+
* - MODERATE: 15-25% valid → some signal
|
|
83
|
+
* - STRONG: 25-40% valid → meaningful additional value
|
|
84
|
+
* - DIFFERENTIATED: >40% valid → system finds things humans miss
|
|
85
|
+
*/
|
|
86
|
+
function computeNovelSignal(aiOnlyFindings, adjudications) {
|
|
87
|
+
const adjMap = new Map();
|
|
88
|
+
for (const adj of adjudications) {
|
|
89
|
+
adjMap.set(adj.findingId, adj.label);
|
|
90
|
+
}
|
|
91
|
+
let validCount = 0;
|
|
92
|
+
let invalidCount = 0;
|
|
93
|
+
let duplicateCount = 0;
|
|
94
|
+
let lowValueCount = 0;
|
|
95
|
+
for (const finding of aiOnlyFindings) {
|
|
96
|
+
const label = adjMap.get(finding.id);
|
|
97
|
+
if (!label)
|
|
98
|
+
continue;
|
|
99
|
+
switch (label) {
|
|
100
|
+
case "VALID":
|
|
101
|
+
validCount++;
|
|
102
|
+
break;
|
|
103
|
+
case "INVALID":
|
|
104
|
+
invalidCount++;
|
|
105
|
+
break;
|
|
106
|
+
case "DUPLICATE":
|
|
107
|
+
duplicateCount++;
|
|
108
|
+
break;
|
|
109
|
+
case "LOW_VALUE":
|
|
110
|
+
lowValueCount++;
|
|
111
|
+
break;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
const totalAdjudicated = validCount + invalidCount + duplicateCount + lowValueCount;
|
|
115
|
+
const validatedNovelRate = totalAdjudicated > 0 ? validCount / totalAdjudicated : 0;
|
|
116
|
+
let systemStrength;
|
|
117
|
+
if (validatedNovelRate >= 0.40) {
|
|
118
|
+
systemStrength = "DIFFERENTIATED";
|
|
119
|
+
}
|
|
120
|
+
else if (validatedNovelRate >= 0.25) {
|
|
121
|
+
systemStrength = "STRONG";
|
|
122
|
+
}
|
|
123
|
+
else if (validatedNovelRate >= 0.15) {
|
|
124
|
+
systemStrength = "MODERATE";
|
|
125
|
+
}
|
|
126
|
+
else {
|
|
127
|
+
systemStrength = "WEAK";
|
|
128
|
+
}
|
|
129
|
+
return {
|
|
130
|
+
totalAiOnly: aiOnlyFindings.length,
|
|
131
|
+
validCount,
|
|
132
|
+
invalidCount,
|
|
133
|
+
duplicateCount,
|
|
134
|
+
lowValueCount,
|
|
135
|
+
validatedNovelRate,
|
|
136
|
+
systemStrength,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Compute adjusted precision that accounts for validated novel signal.
|
|
141
|
+
*
|
|
142
|
+
* adjusted_precision = (matched + validated_novel) / total_ai_findings
|
|
143
|
+
*/
|
|
144
|
+
function computeAdjustedPrecision(matchedCount, validNovelCount, totalAI) {
|
|
145
|
+
if (totalAI === 0)
|
|
146
|
+
return 0;
|
|
147
|
+
return (matchedCount + validNovelCount) / totalAI;
|
|
148
|
+
}
|
|
149
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/adjudication/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;GAQG;;AAeH,wCAuDC;AAKD,kDAEC;AAaD,gDAyDC;AAOD,4DAOC;AAxJD,6EAA6E;AAE7E;;;GAGG;AACH,SAAgB,cAAc,CAAC,OAAgB;IAC7C,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC;IAE/C,wCAAwC;IACxC,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QACrB,OAAO;YACL,SAAS,EAAE,OAAO,CAAC,EAAE;YACrB,KAAK,EAAE,WAAW;YAClB,SAAS,EAAE,wCAAwC;SACpD,CAAC;IACJ,CAAC;IAED,2CAA2C;IAC3C,MAAM,eAAe,GAAG;QACtB,mBAAmB;QACnB,oBAAoB;QACpB,iBAAiB;QACjB,aAAa;QACb,qBAAqB;KACtB,CAAC;IACF,IAAI,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAClD,OAAO;YACL,SAAS,EAAE,OAAO,CAAC,EAAE;YACrB,KAAK,EAAE,WAAW;YAClB,SAAS,EAAE,0DAA0D;SACtE,CAAC;IACJ,CAAC;IAED,+CAA+C;IAC/C,IACE,CAAC,OAAO,CAAC,QAAQ,KAAK,MAAM,IAAI,OAAO,CAAC,QAAQ,KAAK,UAAU,CAAC;QAChE,IAAI,CAAC,MAAM,GAAG,EAAE,EAChB,CAAC;QACD,OAAO;YACL,SAAS,EAAE,OAAO,CAAC,EAAE;YACrB,KAAK,EAAE,OAAO;YACd,SAAS,EAAE,gEAAgE;SAC5E,CAAC;IACJ,CAAC;IAED,iDAAiD;IACjD,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QACtD,OAAO;YACL,SAAS,EAAE,OAAO,CAAC,EAAE;YACrB,KAAK,EAAE,OAAO;YACd,SAAS,EAAE,wCAAwC;SACpD,CAAC;IACJ,CAAC;IAED,6DAA6D;IAC7D,OAAO;QACL,SAAS,EAAE,OAAO,CAAC,EAAE;QACrB,KAAK,EAAE,WAAW;QAClB,SAAS,EAAE,0DAA0D;KACtE,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAgB,mBAAmB,CAAC,QAAmB;IACrD,OAAO,QAAQ,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;AACtC,CAAC;AAED,6EAA6E;AAE7E;;;;;;;;GAQG;AACH,SAAgB,kBAAkB,CAChC,cAAyB,EACzB,aAA6B;IAE7B,MAAM,MAAM,GAAG,IAAI,GAAG,EAA6B,CAAC;IACpD,KAAK,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;QAChC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,SAAS,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC;IACvC,CAAC;IAED,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,IAAI,cAAc,GAAG,CAAC,CAAC;IACvB,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,KAAK,MAAM,OAAO,IAAI,cAAc,EAAE,CAAC;QACrC,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACrC,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,OAAO;gBACV,UAAU,EAAE,CAAC;gBACb,MAAM;YACR,KAAK,SAAS;gBACZ,YAAY,EAAE,CAAC;gBACf,MAAM;YACR,KAAK,WAAW;gBACd,cAAc,EAAE,CAAC;gBACjB,MAAM;YACR,KAAK,WAAW;gBACd,aAAa,EAAE,CAAC;gBAChB,MAAM;QACV,CAAC;IACH,CAAC;IAED,MAAM,gBAAgB,GAAG,UAAU,GAAG,YAAY,GAAG,cAAc,GAAG,aAAa,CAAC;IACpF,MAAM,kBAAkB,GAAG,gBAAgB,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC,CAAC;IAEpF,IAAI,cAAoD,CAAC;IACzD,IAAI,kBAAkB,IAAI,IAAI,EAAE,CAAC;QAC/B,cAAc,GAAG,gBAAgB,CAAC;IACpC,CAAC;SAAM,IAAI,kBAAkB,IAAI,IAAI,EAAE,CAAC;QACtC,cAAc,GAAG,QAAQ,CAAC;IAC5B,CAAC;SAAM,IAAI,kBAAkB,IAAI,IAAI,EAAE,CAAC;QACtC,cAAc,GAAG,UAAU,CAAC;IAC9B,CAAC;SAAM,CAAC;QACN,cAAc,GAAG,MAAM,CAAC;IAC1B,CAAC;IAED,OAAO;QACL,WAAW,EAAE,cAAc,CAAC,MAAM;QAClC,UAAU;QACV,YAAY;QACZ,cAAc;QACd,aAAa;QACb,kBAAkB;QAClB,cAAc;KACf,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,SAAgB,wBAAwB,CACtC,YAAoB,EACpB,eAAuB,EACvB,OAAe;IAEf,IAAI,OAAO,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC5B,OAAO,CAAC,YAAY,GAAG,eAAe,CAAC,GAAG,OAAO,CAAC;AACpD,CAAC"}
|