switchboard-cli 0.1.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +122 -0
- package/bin/switchboard.mjs +49 -0
- package/calibration/engine/baseline.ts +93 -0
- package/calibration/engine/diagnosis.ts +191 -0
- package/calibration/engine/diff.ts +118 -0
- package/calibration/engine/escalation.ts +49 -0
- package/calibration/engine/ledger.ts +141 -0
- package/calibration/engine/trends.ts +141 -0
- package/calibration/external/rubric.yaml +32 -0
- package/calibration/external/scorer.ts +479 -0
- package/calibration/external/verdict-writer.ts +29 -0
- package/calibration/internal/harness.ts +697 -0
- package/calibration/internal/return-simulator.ts +270 -0
- package/calibration/internal/trace-collector.ts +78 -0
- package/calibration/internal/verdict-writer.ts +149 -0
- package/calibration/ledger/baselines/baseline-2026-04-09.yaml +23 -0
- package/calibration/ledger/history.yaml +18 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/diffs/adv-0bdc944b61d5.yaml +9 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/diffs/blind-16cdf0db1b43.yaml +9 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/diffs/blind-a6b2c8be67cc.yaml +9 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/manifest.yaml +8 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/seeds/adv-0bdc944b61d5.yaml +7 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/seeds/blind-16cdf0db1b43.yaml +8 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/seeds/blind-a6b2c8be67cc.yaml +8 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/summary.yaml +10 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/traces/adv-0bdc944b61d5.yaml +141 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/traces/blind-16cdf0db1b43.yaml +147 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/traces/blind-a6b2c8be67cc.yaml +147 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/verdicts-a/adv-0bdc944b61d5.yaml +24 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/verdicts-a/blind-16cdf0db1b43.yaml +24 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/verdicts-a/blind-a6b2c8be67cc.yaml +25 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/verdicts-b/adv-0bdc944b61d5.yaml +31 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/verdicts-b/blind-16cdf0db1b43.yaml +32 -0
- package/calibration/ledger/runs/2026-04-09T09-45-01-838Z/verdicts-b/blind-a6b2c8be67cc.yaml +32 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/diffs/adv-a0c9e2bfb0d6.yaml +9 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/diffs/blind-3e892f3a89ee.yaml +9 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/diffs/blind-958b2f9e6816.yaml +9 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/manifest.yaml +8 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/seeds/adv-a0c9e2bfb0d6.yaml +7 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/seeds/blind-3e892f3a89ee.yaml +8 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/seeds/blind-958b2f9e6816.yaml +8 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/summary.yaml +10 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/traces/adv-a0c9e2bfb0d6.yaml +141 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/traces/blind-3e892f3a89ee.yaml +147 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/traces/blind-958b2f9e6816.yaml +147 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/verdicts-a/adv-a0c9e2bfb0d6.yaml +24 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/verdicts-a/blind-3e892f3a89ee.yaml +23 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/verdicts-a/blind-958b2f9e6816.yaml +25 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/verdicts-b/adv-a0c9e2bfb0d6.yaml +31 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/verdicts-b/blind-3e892f3a89ee.yaml +32 -0
- package/calibration/ledger/runs/2026-04-09T10-02-57-143Z/verdicts-b/blind-958b2f9e6816.yaml +32 -0
- package/calibration/seeds/adversarial-generator.ts +159 -0
- package/calibration/seeds/blind-generator.ts +169 -0
- package/calibration/seeds/replay-loader.ts +117 -0
- package/calibration/skill/calibrate.ts +292 -0
- package/calibration/skill/cli-flags.ts +49 -0
- package/calibration/skill/report.ts +80 -0
- package/calibration/skill/review.ts +118 -0
- package/calibration/types.ts +292 -0
- package/package.json +46 -0
- package/src/commands/audit-codex.ts +266 -0
- package/src/commands/calibrate.ts +70 -0
- package/src/commands/compile.ts +117 -0
- package/src/commands/evaluate.ts +103 -0
- package/src/commands/ingest.ts +250 -0
- package/src/commands/init.ts +133 -0
- package/src/commands/packet.ts +408 -0
- package/src/commands/receipt.ts +305 -0
- package/src/commands/run-claude.ts +355 -0
- package/src/index.ts +43 -0
- package/src/lib/draft-return.ts +278 -0
- package/src/lib/drift-guard.ts +105 -0
- package/src/lib/errors.ts +61 -0
- package/src/lib/output.ts +43 -0
- package/src/lib/paths.ts +125 -0
- package/src/lib/proof.ts +262 -0
- package/src/lib/transport.ts +276 -0
- package/src/lib/yaml-io.ts +62 -0
- package/src/store/filesystem-store.ts +326 -0
package/README.md
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# @switchboard/cli
|
|
2
|
+
|
|
3
|
+
Portable governance substrate for AI workflows.
|
|
4
|
+
|
|
5
|
+
Compile specs, dispatch to Claude/ChatGPT/Cursor/Codex, ingest structured returns, issue trust receipts, evaluate independently, and calibrate your pipeline.
|
|
6
|
+
|
|
7
|
+
> **Alpha release.** Expect breaking changes. [Report issues](https://github.com/switchboard-foundation/switchboard/issues).
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
npm install -g @switchboard/cli
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Requires Node >= 18.
|
|
16
|
+
|
|
17
|
+
## Quick start
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Initialize a governed project
|
|
21
|
+
sb init
|
|
22
|
+
|
|
23
|
+
# Compile project contract + spec from canonical state
|
|
24
|
+
sb compile
|
|
25
|
+
|
|
26
|
+
# Generate a dispatch packet for a surface
|
|
27
|
+
sb packet claude # or: chatgpt, cursor, codex
|
|
28
|
+
|
|
29
|
+
# Launch governed dispatch to Claude Code
|
|
30
|
+
sb run claude
|
|
31
|
+
|
|
32
|
+
# Ingest the structured return
|
|
33
|
+
sb ingest
|
|
34
|
+
|
|
35
|
+
# Issue an immutable trust receipt
|
|
36
|
+
sb receipt
|
|
37
|
+
|
|
38
|
+
# Export receipt as standalone JSON
|
|
39
|
+
sb receipt --export json
|
|
40
|
+
|
|
41
|
+
# Run independent evaluation
|
|
42
|
+
sb evaluate
|
|
43
|
+
|
|
44
|
+
# Run the two-layer calibration harness
|
|
45
|
+
sb calibrate
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Pipeline
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
init -> compile -> packet -> run -> ingest -> receipt -> evaluate
|
|
52
|
+
|
|
|
53
|
+
calibrate
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Each step reads from and writes to `.switchboard/` in your project root. All state is file-based (YAML + Markdown). No database, no hosted service.
|
|
57
|
+
|
|
58
|
+
## Commands
|
|
59
|
+
|
|
60
|
+
| Command | What it does |
|
|
61
|
+
|---------|-------------|
|
|
62
|
+
| `sb init` | Scaffold `.switchboard/` directory with contract, working state, and spec |
|
|
63
|
+
| `sb compile` | Compile a loop contract from canonical state |
|
|
64
|
+
| `sb packet <surface>` | Generate a governed dispatch packet (SBX bundle) |
|
|
65
|
+
| `sb run claude` | Dispatch to Claude Code via Agent SDK with proof capture |
|
|
66
|
+
| `sb ingest` | Ingest structured SB_RETURN.yaml, run reconciliation + gates |
|
|
67
|
+
| `sb receipt` | Build immutable ReceiptIssuedV2 from persisted ingest artifacts |
|
|
68
|
+
| `sb receipt --export json` | Export receipt as standalone JSON file |
|
|
69
|
+
| `sb receipt --export yaml` | Export receipt as standalone YAML file |
|
|
70
|
+
| `sb evaluate` | Run independent evaluation on the latest receipt |
|
|
71
|
+
| `sb audit codex` | Generate Codex audit packet for independent review |
|
|
72
|
+
| `sb calibrate` | Run two-layer calibration harness |
|
|
73
|
+
|
|
74
|
+
## Calibration
|
|
75
|
+
|
|
76
|
+
The calibration harness tests the trust machinery itself:
|
|
77
|
+
|
|
78
|
+
- **Layer A** (internal): Runs seeds through the real governance pipeline with full trace access
|
|
79
|
+
- **Layer B** (external): Scores independently with zero imports from core — only sees raw output
|
|
80
|
+
- **Blind seeds**: Fresh ideas the system has never seen
|
|
81
|
+
- **Adversarial seeds**: Designed to break the machinery (scope traps, non-goal evasion, evidence gaps)
|
|
82
|
+
- **Living ledger**: Every run appends to an auditable history with trends
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# Default run: 5 blind + 3 adversarial seeds
|
|
86
|
+
sb calibrate
|
|
87
|
+
|
|
88
|
+
# Custom mix
|
|
89
|
+
sb calibrate --blind 10 --adversarial 5
|
|
90
|
+
|
|
91
|
+
# Stress test only
|
|
92
|
+
sb calibrate --adversarial-only 8
|
|
93
|
+
|
|
94
|
+
# Review pending escalations
|
|
95
|
+
sb calibrate --review
|
|
96
|
+
|
|
97
|
+
# Show calibration trends
|
|
98
|
+
sb calibrate --trends
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Trust machinery
|
|
102
|
+
|
|
103
|
+
The governance pipeline enforces:
|
|
104
|
+
|
|
105
|
+
- **Honesty guardrails**: Banned terms ("certified", "guaranteed", "no issues") in receipts
|
|
106
|
+
- **Reconciliation**: Six drift checks (scope inflation, non-goal violation, goal drift, target-user shift, untracked decisions, evidence contradiction)
|
|
107
|
+
- **Gates**: Approval gates for scope changes, unparented decisions, weak evidence, trust boundary changes
|
|
108
|
+
- **Claim spread**: Deterministic measurement of how far executor claims drift from proven evidence
|
|
109
|
+
- **Receipts**: Immutable trust artifacts with claim assessments, verification results, and closure basis
|
|
110
|
+
|
|
111
|
+
## What this is NOT
|
|
112
|
+
|
|
113
|
+
- Not a chat wrapper
|
|
114
|
+
- Not an LLM gateway
|
|
115
|
+
- Not an eval platform
|
|
116
|
+
- Not an autonomous agent
|
|
117
|
+
|
|
118
|
+
It is a governance substrate. It preserves project coherence while you bounce work across AI surfaces.
|
|
119
|
+
|
|
120
|
+
## License
|
|
121
|
+
|
|
122
|
+
MIT
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Switchboard CLI bin shim.
|
|
5
|
+
*
|
|
6
|
+
* Spawns tsx on the real TypeScript entrypoint so the CLI is invocable
|
|
7
|
+
* without a build step. Works from any cwd — resolves paths relative
|
|
8
|
+
* to this file, not the caller.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { execFileSync } from "node:child_process";
|
|
12
|
+
import { fileURLToPath } from "node:url";
|
|
13
|
+
import { dirname, resolve } from "node:path";
|
|
14
|
+
|
|
15
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
16
|
+
const entry = resolve(__dirname, "..", "src", "index.ts");
|
|
17
|
+
|
|
18
|
+
// Resolve tsx from this package's own node_modules first, then fall
|
|
19
|
+
// back to a global/hoisted location.
|
|
20
|
+
const tsxPaths = [
|
|
21
|
+
resolve(__dirname, "..", "node_modules", ".bin", "tsx"),
|
|
22
|
+
"tsx",
|
|
23
|
+
];
|
|
24
|
+
|
|
25
|
+
let tsxBin;
|
|
26
|
+
for (const candidate of tsxPaths) {
|
|
27
|
+
try {
|
|
28
|
+
execFileSync(candidate, ["--version"], { stdio: "ignore" });
|
|
29
|
+
tsxBin = candidate;
|
|
30
|
+
break;
|
|
31
|
+
} catch {
|
|
32
|
+
// try next
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (!tsxBin) {
|
|
37
|
+
console.error("switchboard: could not find tsx. Run `pnpm install` in the monorepo root.");
|
|
38
|
+
process.exit(1);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
try {
|
|
42
|
+
execFileSync(tsxBin, [entry, ...process.argv.slice(2)], {
|
|
43
|
+
stdio: "inherit",
|
|
44
|
+
cwd: process.cwd(),
|
|
45
|
+
});
|
|
46
|
+
} catch (err) {
|
|
47
|
+
// execFileSync throws on non-zero exit; the child already printed output
|
|
48
|
+
process.exit(err.status ?? 1);
|
|
49
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { mkdirSync, writeFileSync, readFileSync, existsSync, readdirSync } from "fs";
|
|
2
|
+
import { join } from "path";
|
|
3
|
+
import YAML from "yaml";
|
|
4
|
+
|
|
5
|
+
import type { BaselineSnapshot } from "../types";
|
|
6
|
+
import { getAxisWeights } from "@switchboard/core";
|
|
7
|
+
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
// Threshold snapshot management
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
|
|
12
|
+
const BASELINES_DIR = "baselines";
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Captures a baseline snapshot of current axis weights from the
|
|
16
|
+
* core claim-spread module and persists it to the baselines directory.
|
|
17
|
+
*
|
|
18
|
+
* Baseline IDs follow the format: baseline-<ISO-date>
|
|
19
|
+
*/
|
|
20
|
+
export function captureBaseline(basePath: string): BaselineSnapshot {
|
|
21
|
+
const baselinesDir = join(basePath, BASELINES_DIR);
|
|
22
|
+
mkdirSync(baselinesDir, { recursive: true });
|
|
23
|
+
|
|
24
|
+
const now = new Date().toISOString();
|
|
25
|
+
const baselineId = `baseline-${now.split("T")[0]}`;
|
|
26
|
+
|
|
27
|
+
const axisWeights = getAxisWeights();
|
|
28
|
+
|
|
29
|
+
// Gate thresholds derived from axis weights —
|
|
30
|
+
// each axis weight implies a threshold at which that axis triggers concern.
|
|
31
|
+
// We use the weights themselves as the threshold baseline since they represent
|
|
32
|
+
// the relative importance (and therefore the sensitivity boundary) of each axis.
|
|
33
|
+
const gateThresholds: Record<string, number> = {};
|
|
34
|
+
for (const [axis, weight] of Object.entries(axisWeights)) {
|
|
35
|
+
gateThresholds[axis] = weight;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Reconcile thresholds — the composite spread score thresholds
|
|
39
|
+
// for reconciliation decisions.
|
|
40
|
+
const reconcileThresholds: Record<string, number> = {
|
|
41
|
+
drift_warning: 0.3,
|
|
42
|
+
drift_block: 0.6,
|
|
43
|
+
reconcile_required: 0.5,
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
const snapshot: BaselineSnapshot = {
|
|
47
|
+
baseline_id: baselineId,
|
|
48
|
+
created_at: now,
|
|
49
|
+
axis_weights: axisWeights as Record<string, number>,
|
|
50
|
+
gate_thresholds: gateThresholds,
|
|
51
|
+
reconcile_thresholds: reconcileThresholds,
|
|
52
|
+
notes: "",
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
writeFileSync(
|
|
56
|
+
join(baselinesDir, `${baselineId}.yaml`),
|
|
57
|
+
YAML.stringify(snapshot),
|
|
58
|
+
"utf-8",
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
return snapshot;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Reads the most recent baseline snapshot, or null if none exist.
|
|
66
|
+
*/
|
|
67
|
+
export function readLatestBaseline(basePath: string): BaselineSnapshot | null {
|
|
68
|
+
const baselines = listBaselines(basePath);
|
|
69
|
+
if (baselines.length === 0) return null;
|
|
70
|
+
|
|
71
|
+
// Sort by created_at descending and return the most recent
|
|
72
|
+
baselines.sort((a, b) => b.created_at.localeCompare(a.created_at));
|
|
73
|
+
return baselines[0] ?? null;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Lists all baseline snapshots from the baselines directory.
|
|
78
|
+
*/
|
|
79
|
+
export function listBaselines(basePath: string): BaselineSnapshot[] {
|
|
80
|
+
const baselinesDir = join(basePath, BASELINES_DIR);
|
|
81
|
+
if (!existsSync(baselinesDir)) return [];
|
|
82
|
+
|
|
83
|
+
const files = readdirSync(baselinesDir).filter(f => f.endsWith(".yaml"));
|
|
84
|
+
const snapshots: BaselineSnapshot[] = [];
|
|
85
|
+
|
|
86
|
+
for (const file of files) {
|
|
87
|
+
const raw = readFileSync(join(baselinesDir, file), "utf-8");
|
|
88
|
+
const parsed = YAML.parse(raw) as BaselineSnapshot;
|
|
89
|
+
snapshots.push(parsed);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return snapshots;
|
|
93
|
+
}
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import { randomBytes } from "crypto";
|
|
2
|
+
|
|
3
|
+
import type {
|
|
4
|
+
CalibrationSeed,
|
|
5
|
+
VerdictDiff,
|
|
6
|
+
PipelineTrace,
|
|
7
|
+
LayerAVerdict,
|
|
8
|
+
LayerBVerdict,
|
|
9
|
+
DiagnosisCard,
|
|
10
|
+
} from "../types";
|
|
11
|
+
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Diagnosis card generation — root cause analysis for disagreements
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Generates a diagnosis card for a disagreement between Layer A and Layer B.
|
|
18
|
+
*
|
|
19
|
+
* Only called when the diff is a disagreement (a_pass_b_fail, a_fail_b_pass,
|
|
20
|
+
* or both_fail_different). Analyzes the trace, verdicts, and seed to determine
|
|
21
|
+
* root cause and recommend a fix.
|
|
22
|
+
*/
|
|
23
|
+
export function generateDiagnosis(
|
|
24
|
+
seed: CalibrationSeed,
|
|
25
|
+
diff: VerdictDiff,
|
|
26
|
+
trace: PipelineTrace,
|
|
27
|
+
verdictA: LayerAVerdict,
|
|
28
|
+
verdictB: LayerBVerdict,
|
|
29
|
+
): DiagnosisCard {
|
|
30
|
+
const id = `diag-${randomBytes(8).toString("hex")}`;
|
|
31
|
+
const now = new Date().toISOString();
|
|
32
|
+
|
|
33
|
+
// --- Root cause analysis ---
|
|
34
|
+
let gateThatShouldHaveFired = "";
|
|
35
|
+
let thresholdMargin = 0;
|
|
36
|
+
let evidenceMissed = "";
|
|
37
|
+
let detail = "";
|
|
38
|
+
|
|
39
|
+
// --- Recommended fix ---
|
|
40
|
+
let fixType: DiagnosisCard["recommended_fix"]["type"];
|
|
41
|
+
let fixParameter = "";
|
|
42
|
+
let fixCurrentValue = 0;
|
|
43
|
+
let fixProposedValue = 0;
|
|
44
|
+
let fixJustification = "";
|
|
45
|
+
|
|
46
|
+
if (diff.category === "a_pass_b_fail") {
|
|
47
|
+
// Machinery missed something — Layer A was too lenient
|
|
48
|
+
// Check which gate should have fired based on trace data
|
|
49
|
+
detail = `Layer A passed but Layer B failed. The trust machinery missed a problem ` +
|
|
50
|
+
`that independent scoring caught. Seed source: ${seed.source}, ` +
|
|
51
|
+
`complexity: ${seed.complexity_target}.`;
|
|
52
|
+
|
|
53
|
+
// Find the closest threshold proximity — the gate that almost fired
|
|
54
|
+
type ProxEntry = { threshold: number; actual: number; margin: number };
|
|
55
|
+
const proximities = Object.entries(verdictA.threshold_proximity) as Array<[string, ProxEntry]>;
|
|
56
|
+
if (proximities.length > 0) {
|
|
57
|
+
// Sort by margin ascending — smallest margin = closest to firing
|
|
58
|
+
proximities.sort(([, a], [, b]) => a.margin - b.margin);
|
|
59
|
+
const [closestGate, closestData] = proximities[0]!;
|
|
60
|
+
gateThatShouldHaveFired = closestGate;
|
|
61
|
+
thresholdMargin = closestData.margin;
|
|
62
|
+
|
|
63
|
+
// Find what Layer B caught that A missed
|
|
64
|
+
type DimEntry = { score: number; justification: string };
|
|
65
|
+
const dims = verdictB.dimensions as Record<string, DimEntry>;
|
|
66
|
+
const lowDimensions = Object.entries(dims)
|
|
67
|
+
.filter(([, dim]) => dim.score < 0.5)
|
|
68
|
+
.map(([name, dim]) => `${name} (${dim.score.toFixed(2)}): ${dim.justification}`);
|
|
69
|
+
evidenceMissed = lowDimensions.join("; ");
|
|
70
|
+
} else {
|
|
71
|
+
// No threshold proximity data — the machinery had no close calls
|
|
72
|
+
gateThatShouldHaveFired = "unknown";
|
|
73
|
+
thresholdMargin = 1;
|
|
74
|
+
evidenceMissed = `Layer B justification: ${verdictB.justification}`;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Determine fix type based on threshold proximity
|
|
78
|
+
if (thresholdMargin < 0.1) {
|
|
79
|
+
fixType = "threshold_tuning";
|
|
80
|
+
fixParameter = gateThatShouldHaveFired;
|
|
81
|
+
const prox = verdictA.threshold_proximity[gateThatShouldHaveFired];
|
|
82
|
+
fixCurrentValue = prox?.threshold ?? 0;
|
|
83
|
+
// Tighten the threshold — lower it so this gate fires
|
|
84
|
+
fixProposedValue = fixCurrentValue > 0
|
|
85
|
+
? fixCurrentValue * 0.9
|
|
86
|
+
: 0.1;
|
|
87
|
+
fixJustification =
|
|
88
|
+
`Gate "${gateThatShouldHaveFired}" was within ${(thresholdMargin * 100).toFixed(1)}% ` +
|
|
89
|
+
`of firing. Lowering threshold should catch this class of miss.`;
|
|
90
|
+
} else {
|
|
91
|
+
// Margin is wide — the existing gates are not close to catching this
|
|
92
|
+
const hasRelevantGates = trace.gates_triggered.length > 0;
|
|
93
|
+
if (hasRelevantGates) {
|
|
94
|
+
fixType = "scoring_dimension_gap";
|
|
95
|
+
fixJustification =
|
|
96
|
+
`Gates fired (${trace.gates_triggered.join(", ")}) but still passed. ` +
|
|
97
|
+
`The scoring dimensions may not weight this failure mode heavily enough.`;
|
|
98
|
+
} else {
|
|
99
|
+
fixType = "new_gate";
|
|
100
|
+
fixJustification =
|
|
101
|
+
`No gates were close to firing (margin: ${(thresholdMargin * 100).toFixed(1)}%). ` +
|
|
102
|
+
`A new gate may be needed to catch this class of failure.`;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
} else if (diff.category === "a_fail_b_pass") {
|
|
106
|
+
// Machinery was too strict — Layer A failed but B passed
|
|
107
|
+
detail = `Layer A failed but Layer B passed. The trust machinery was too strict. ` +
|
|
108
|
+
`Gates triggered: [${verdictA.gates_triggered.join(", ")}]. ` +
|
|
109
|
+
`Layer B composite: ${verdictB.composite_score.toFixed(3)}.`;
|
|
110
|
+
|
|
111
|
+
// The gate that fired was too aggressive
|
|
112
|
+
if (verdictA.gates_triggered.length > 0) {
|
|
113
|
+
gateThatShouldHaveFired = verdictA.gates_triggered[0];
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Check threshold proximity for the triggered gate
|
|
117
|
+
const triggeredProx = verdictA.threshold_proximity[gateThatShouldHaveFired];
|
|
118
|
+
if (triggeredProx) {
|
|
119
|
+
thresholdMargin = triggeredProx.margin;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
evidenceMissed = `Layer B passed with composite ${verdictB.composite_score.toFixed(3)}. ` +
|
|
123
|
+
`All dimensions above threshold.`;
|
|
124
|
+
|
|
125
|
+
if (thresholdMargin < 0.1) {
|
|
126
|
+
fixType = "threshold_tuning";
|
|
127
|
+
fixParameter = gateThatShouldHaveFired;
|
|
128
|
+
fixCurrentValue = triggeredProx?.threshold ?? 0;
|
|
129
|
+
// Loosen the threshold — raise it so this gate doesn't fire
|
|
130
|
+
fixProposedValue = fixCurrentValue > 0
|
|
131
|
+
? fixCurrentValue * 1.1
|
|
132
|
+
: 0.5;
|
|
133
|
+
fixJustification =
|
|
134
|
+
`Gate "${gateThatShouldHaveFired}" fired with margin ${(thresholdMargin * 100).toFixed(1)}%. ` +
|
|
135
|
+
`Raising threshold should reduce false positives for this pattern.`;
|
|
136
|
+
} else {
|
|
137
|
+
fixType = "scoring_dimension_gap";
|
|
138
|
+
fixJustification =
|
|
139
|
+
`Layer A rejected with gates [${verdictA.gates_triggered.join(", ")}] but ` +
|
|
140
|
+
`Layer B passed convincingly (composite: ${verdictB.composite_score.toFixed(3)}). ` +
|
|
141
|
+
`Gate scoring may be miscalibrated for this seed type.`;
|
|
142
|
+
}
|
|
143
|
+
} else {
|
|
144
|
+
// both_fail_different — different failure models
|
|
145
|
+
detail = `Both layers failed but for different reasons. ` +
|
|
146
|
+
`Layer A gates: [${verdictA.gates_triggered.join(", ")}]. ` +
|
|
147
|
+
`Layer B composite: ${verdictB.composite_score.toFixed(3)}. ` +
|
|
148
|
+
`The failure models diverge, suggesting a scoring dimension gap.`;
|
|
149
|
+
|
|
150
|
+
const bothDims = verdictB.dimensions as Record<string, { score: number; justification: string }>;
|
|
151
|
+
const lowDimensions = Object.entries(bothDims)
|
|
152
|
+
.filter(([, dim]) => dim.score < 0.5)
|
|
153
|
+
.map(([name]) => name);
|
|
154
|
+
|
|
155
|
+
evidenceMissed = `Layer A focused on gates [${verdictA.gates_triggered.join(", ")}], ` +
|
|
156
|
+
`Layer B flagged dimensions [${lowDimensions.join(", ")}].`;
|
|
157
|
+
|
|
158
|
+
fixType = "scoring_dimension_gap";
|
|
159
|
+
fixJustification =
|
|
160
|
+
`Failure models diverge: machinery gates and independent scoring ` +
|
|
161
|
+
`are catching different problems. Alignment of scoring dimensions needed.`;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return {
|
|
165
|
+
id,
|
|
166
|
+
seed_id: seed.seed_id,
|
|
167
|
+
layer_a_verdict: verdictA.verdict,
|
|
168
|
+
layer_b_verdict: verdictB.verdict,
|
|
169
|
+
return_mode: diff.return_mode,
|
|
170
|
+
root_cause: {
|
|
171
|
+
gate_that_should_have_fired: gateThatShouldHaveFired,
|
|
172
|
+
threshold_margin: thresholdMargin,
|
|
173
|
+
evidence_missed: evidenceMissed,
|
|
174
|
+
detail,
|
|
175
|
+
},
|
|
176
|
+
recommended_fix: {
|
|
177
|
+
type: fixType,
|
|
178
|
+
parameter: fixParameter,
|
|
179
|
+
current_value: fixCurrentValue,
|
|
180
|
+
proposed_value: fixProposedValue,
|
|
181
|
+
justification: fixJustification,
|
|
182
|
+
},
|
|
183
|
+
verification_plan: {
|
|
184
|
+
retest_this_seed: true,
|
|
185
|
+
retest_full_suite: true,
|
|
186
|
+
regression_check: true,
|
|
187
|
+
},
|
|
188
|
+
status: "open",
|
|
189
|
+
created_at: now,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import type { LayerAVerdict, LayerBVerdict, VerdictDiff } from "../types";
|
|
2
|
+
|
|
3
|
+
type ReturnMode = "honest" | "drift";
|
|
4
|
+
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
// Verdict comparison — computes the diff between Layer A and Layer B verdicts
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Computes the diff between a Layer A verdict (machinery) and a Layer B verdict
|
|
11
|
+
* (independent scoring). Determines agreement/disagreement category and whether
|
|
12
|
+
* the result should be escalated for human review.
|
|
13
|
+
*
|
|
14
|
+
* Disagreements are always escalated.
|
|
15
|
+
* Agreements are escalated at `sampleRate` probability for calibration sampling.
|
|
16
|
+
*/
|
|
17
|
+
export function computeDiff(
|
|
18
|
+
a: LayerAVerdict,
|
|
19
|
+
b: LayerBVerdict,
|
|
20
|
+
returnMode: ReturnMode,
|
|
21
|
+
sampleRate: number,
|
|
22
|
+
): VerdictDiff {
|
|
23
|
+
const aPass = a.verdict === "pass";
|
|
24
|
+
const bPass = b.verdict === "pass";
|
|
25
|
+
const aFail = a.verdict === "fail";
|
|
26
|
+
const bFail = b.verdict === "fail";
|
|
27
|
+
|
|
28
|
+
let category: VerdictDiff["category"];
|
|
29
|
+
let reasoning: string;
|
|
30
|
+
let isDisagreement: boolean;
|
|
31
|
+
|
|
32
|
+
if (aPass && bPass) {
|
|
33
|
+
// Both layers agree the seed passes
|
|
34
|
+
category = "agreement_pass";
|
|
35
|
+
reasoning = "Both Layer A and Layer B agree: pass.";
|
|
36
|
+
isDisagreement = false;
|
|
37
|
+
} else if (aFail && bFail) {
|
|
38
|
+
// Both fail — check if for similar reasons
|
|
39
|
+
const aGates = new Set(a.gates_triggered);
|
|
40
|
+
const dims = b.dimensions as Record<string, { score: number; justification: string }>;
|
|
41
|
+
const bDimensions = Object.entries(dims)
|
|
42
|
+
.filter(([, dim]) => dim.score < 0.5)
|
|
43
|
+
.map(([name]) => name);
|
|
44
|
+
|
|
45
|
+
// If Layer A triggered gates and Layer B has low-scoring dimensions,
|
|
46
|
+
// check for overlap in failure signal. If gates triggered and dimensions
|
|
47
|
+
// failed, they may be pointing at the same issue (agreement) or different
|
|
48
|
+
// issues (disagreement).
|
|
49
|
+
const hasOverlap = bDimensions.length > 0 && aGates.size > 0;
|
|
50
|
+
|
|
51
|
+
if (hasOverlap) {
|
|
52
|
+
category = "agreement_fail";
|
|
53
|
+
reasoning =
|
|
54
|
+
`Both layers fail. Layer A triggered gates: [${[...aGates].join(", ")}]. ` +
|
|
55
|
+
`Layer B low dimensions: [${bDimensions.join(", ")}]. Failure signals overlap.`;
|
|
56
|
+
isDisagreement = false;
|
|
57
|
+
} else {
|
|
58
|
+
category = "both_fail_different";
|
|
59
|
+
reasoning =
|
|
60
|
+
`Both layers fail but for different reasons. ` +
|
|
61
|
+
`Layer A gates: [${[...aGates].join(", ")}], justification: ${a.justification}. ` +
|
|
62
|
+
`Layer B low dimensions: [${bDimensions.join(", ")}], justification: ${b.justification}.`;
|
|
63
|
+
isDisagreement = true;
|
|
64
|
+
}
|
|
65
|
+
} else if (aPass && bFail) {
|
|
66
|
+
// Layer A too lenient — machinery missed something
|
|
67
|
+
category = "a_pass_b_fail";
|
|
68
|
+
reasoning =
|
|
69
|
+
`Layer A passed but Layer B failed (composite: ${b.composite_score.toFixed(3)}). ` +
|
|
70
|
+
`Machinery may be too lenient. Layer B justification: ${b.justification}`;
|
|
71
|
+
isDisagreement = true;
|
|
72
|
+
} else if (aFail && bPass) {
|
|
73
|
+
// Layer A too strict
|
|
74
|
+
category = "a_fail_b_pass";
|
|
75
|
+
reasoning =
|
|
76
|
+
`Layer A failed but Layer B passed (composite: ${b.composite_score.toFixed(3)}). ` +
|
|
77
|
+
`Machinery may be too strict. Layer A gates: [${a.gates_triggered.join(", ")}]. ` +
|
|
78
|
+
`Layer A justification: ${a.justification}`;
|
|
79
|
+
isDisagreement = true;
|
|
80
|
+
} else {
|
|
81
|
+
// Inconclusive verdicts — treat as agreement for the dominant signal
|
|
82
|
+
if (aPass || bPass) {
|
|
83
|
+
category = "agreement_pass";
|
|
84
|
+
reasoning = "One or both layers inconclusive, leaning pass.";
|
|
85
|
+
} else {
|
|
86
|
+
category = "agreement_fail";
|
|
87
|
+
reasoning = "One or both layers inconclusive, leaning fail.";
|
|
88
|
+
}
|
|
89
|
+
isDisagreement = false;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Determine escalation
|
|
93
|
+
let escalated: boolean;
|
|
94
|
+
let escalationReason: string;
|
|
95
|
+
|
|
96
|
+
if (isDisagreement) {
|
|
97
|
+
escalated = true;
|
|
98
|
+
escalationReason = `Disagreement: ${category}`;
|
|
99
|
+
} else if (Math.random() < sampleRate) {
|
|
100
|
+
escalated = true;
|
|
101
|
+
escalationReason = `Sampled agreement for calibration (rate: ${sampleRate})`;
|
|
102
|
+
} else {
|
|
103
|
+
escalated = false;
|
|
104
|
+
escalationReason = "";
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
seed_id: a.seed_id,
|
|
109
|
+
category,
|
|
110
|
+
layer_a_verdict: a.verdict,
|
|
111
|
+
layer_b_verdict: b.verdict,
|
|
112
|
+
return_mode: returnMode,
|
|
113
|
+
reasoning,
|
|
114
|
+
is_disagreement: isDisagreement,
|
|
115
|
+
escalated,
|
|
116
|
+
escalation_reason: escalationReason,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { randomBytes } from "crypto";
|
|
2
|
+
|
|
3
|
+
import type { VerdictDiff, DiagnosisCard, EscalationItem } from "../types";
|
|
4
|
+
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
// Human review queue — builds escalation items from diffs and diagnoses
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Builds the escalation queue from verdict diffs and diagnosis cards.
|
|
11
|
+
*
|
|
12
|
+
* All disagreements and sampled agreements that were marked as escalated
|
|
13
|
+
* become escalation items. Diagnosis cards are linked to their corresponding
|
|
14
|
+
* diffs by seed_id.
|
|
15
|
+
*/
|
|
16
|
+
export function buildEscalationQueue(
|
|
17
|
+
diffs: VerdictDiff[],
|
|
18
|
+
diagnoses: DiagnosisCard[],
|
|
19
|
+
): EscalationItem[] {
|
|
20
|
+
// Index diagnoses by seed_id for fast lookup
|
|
21
|
+
const diagBySeed = new Map<string, DiagnosisCard>();
|
|
22
|
+
for (const diag of diagnoses) {
|
|
23
|
+
diagBySeed.set(diag.seed_id, diag);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const items: EscalationItem[] = [];
|
|
27
|
+
|
|
28
|
+
for (const diff of diffs) {
|
|
29
|
+
// Only escalated diffs become escalation items
|
|
30
|
+
if (!diff.escalated) continue;
|
|
31
|
+
|
|
32
|
+
const id = `esc-${randomBytes(8).toString("hex")}`;
|
|
33
|
+
const diagnosis = diagBySeed.get(diff.seed_id);
|
|
34
|
+
|
|
35
|
+
const item: EscalationItem = {
|
|
36
|
+
id,
|
|
37
|
+
seed_id: diff.seed_id,
|
|
38
|
+
diff,
|
|
39
|
+
diagnosis,
|
|
40
|
+
human_judgment: "pending",
|
|
41
|
+
human_reasoning: "",
|
|
42
|
+
reviewed_at: "",
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
items.push(item);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return items;
|
|
49
|
+
}
|