signalbrain 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- signalbrain-0.1.0/LICENSE +17 -0
- signalbrain-0.1.0/PKG-INFO +104 -0
- signalbrain-0.1.0/README.md +83 -0
- signalbrain-0.1.0/pyproject.toml +36 -0
- signalbrain-0.1.0/setup.cfg +4 -0
- signalbrain-0.1.0/src/agi_os_backend/__init__.py +0 -0
- signalbrain-0.1.0/src/agi_os_backend/governance/__init__.py +0 -0
- signalbrain-0.1.0/src/agi_os_backend/governance/calibration_autonomy_gate.py +1 -0
- signalbrain-0.1.0/src/agi_os_backend/governance/calibration_ledger_core.py +1 -0
- signalbrain-0.1.0/src/agi_os_backend/governance/calibration_same_pr_pin.py +1 -0
- signalbrain-0.1.0/src/signalbrain/__init__.py +3 -0
- signalbrain-0.1.0/src/signalbrain/cli.py +97 -0
- signalbrain-0.1.0/src/signalbrain/gate.py +104 -0
- signalbrain-0.1.0/src/signalbrain/governance/__init__.py +19 -0
- signalbrain-0.1.0/src/signalbrain/governance/calibration_autonomy_gate.py +184 -0
- signalbrain-0.1.0/src/signalbrain/governance/calibration_ledger_core.py +193 -0
- signalbrain-0.1.0/src/signalbrain/governance/calibration_same_pr_pin.py +112 -0
- signalbrain-0.1.0/src/signalbrain/guard.py +65 -0
- signalbrain-0.1.0/src/signalbrain/ledger.py +175 -0
- signalbrain-0.1.0/src/signalbrain/pins.py +115 -0
- signalbrain-0.1.0/src/signalbrain/receipt.py +200 -0
- signalbrain-0.1.0/src/signalbrain/scorer.py +171 -0
- signalbrain-0.1.0/src/signalbrain.egg-info/PKG-INFO +104 -0
- signalbrain-0.1.0/src/signalbrain.egg-info/SOURCES.txt +25 -0
- signalbrain-0.1.0/src/signalbrain.egg-info/dependency_links.txt +1 -0
- signalbrain-0.1.0/src/signalbrain.egg-info/entry_points.txt +2 -0
- signalbrain-0.1.0/src/signalbrain.egg-info/top_level.txt +2 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
Copyright 2026 SignalBrain
|
|
6
|
+
|
|
7
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
you may not use this file except in compliance with the License.
|
|
9
|
+
You may obtain a copy of the License at
|
|
10
|
+
|
|
11
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
|
|
13
|
+
Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
See the License for the specific language governing permissions and
|
|
17
|
+
limitations under the License.
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: signalbrain
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Trust layer for AI-modified software — receipts, ledger, calibrated autonomy
|
|
5
|
+
Author: SignalBrain
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://signalbrain.ai
|
|
8
|
+
Project-URL: Repository, https://github.com/whitestone1121-web/signalbrain
|
|
9
|
+
Project-URL: Documentation, https://github.com/whitestone1121-web/signalbrain/blob/main/docs/RECEIPT_SPEC.md
|
|
10
|
+
Project-URL: Incident, https://github.com/whitestone1121-web/signalbrain/blob/main/docs/incidents/2026-07-tooling-trust-streak-gaming.md
|
|
11
|
+
Keywords: ai,agents,calibration,trust,receipts,governance
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
|
|
22
|
+
# SignalBrain
|
|
23
|
+
|
|
24
|
+
[](https://github.com/whitestone1121-web/signalbrain/blob/main/docs/incidents/2026-07-tooling-trust-streak-gaming.md)
|
|
25
|
+
|
|
26
|
+
**Trust layer for AI-modified software.**
|
|
27
|
+
|
|
28
|
+
Every company is letting agents change systems that matter. Every agent overstates what it did. SignalBrain is the referee: signed improvement receipts, objective re-score, and per-class calibrated trust — so autonomy is earned, not self-reported.
|
|
29
|
+
|
|
30
|
+
This repository is **Phase 0 v0.1**: the receipt spec, ledger math, scoring lane, anti-Goodhart machinery, and the founding incident record — extracted from the [Titan reference deployment](https://github.com/whitestone1121-web/neural-chat-v3) (R&D dummy that keeps trying to game its own ledger, in public).
|
|
31
|
+
|
|
32
|
+
## 60-second demo — run it, don't trust it
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install "git+https://github.com/whitestone1121-web/signalbrain"
|
|
36
|
+
bash demo/demo.sh
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Real output (scratch repo built on the fly — no mocks):
|
|
40
|
+
|
|
41
|
+
```text
|
|
42
|
+
▶ 1. An agent tries to score its own claim BEFORE anyone merged it
|
|
43
|
+
{"status": "refused_guard", "code": 3, "message": "... not on HEAD — score only human-merged receipts"}
|
|
44
|
+
refused: unmerged claims cannot enter the ledger. No agent grades its own homework.
|
|
45
|
+
|
|
46
|
+
▶ 2. A batch of receipts measured only by tests the agent wrote itself
|
|
47
|
+
ledger now holds 3 rows — every one classified: 3 "claim_kind": "invariant_pin"
|
|
48
|
+
{} (no class has ANY trust-eligible claims)
|
|
49
|
+
three green results, ZERO earned trust: held-by-construction pins are recorded, never counted.
|
|
50
|
+
|
|
51
|
+
▶ 3. An honest failure
|
|
52
|
+
"held": false
|
|
53
|
+
the agent said 0.9 confidence. The measurement said no. That gap is the product.
|
|
54
|
+
|
|
55
|
+
▶ 4. Ten claims that actually hold
|
|
56
|
+
"tooling": { "hit_rate": 1.0, "n": 10, "status": "auto-merge ELIGIBLE" }
|
|
57
|
+
earned by track record, revocable by evidence. Autonomy is graduated, never granted.
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Three layers
|
|
61
|
+
|
|
62
|
+
| Layer | What | Status |
|
|
63
|
+
|-------|------|--------|
|
|
64
|
+
| **Receipt** | Open standard — signed, re-runnable claims | [`docs/RECEIPT_SPEC.md`](docs/RECEIPT_SPEC.md) v0.1 |
|
|
65
|
+
| **Ledger** | Per-class trust from objectively re-scored receipts | `src/signalbrain/governance/` |
|
|
66
|
+
| **Refuter** | Adversarial verification + SPC (premium) | scripts + roadmap |
|
|
67
|
+
|
|
68
|
+
## Founding proof
|
|
69
|
+
|
|
70
|
+
Our own autonomous lane tried to pad its trust score to 100% ELIGIBLE in a local working tree. It never reached git. Full receipt-style incident record with reproduce commands:
|
|
71
|
+
|
|
72
|
+
[`docs/incidents/2026-07-tooling-trust-streak-gaming.md`](docs/incidents/2026-07-tooling-trust-streak-gaming.md)
|
|
73
|
+
|
|
74
|
+
Every number in that document is re-derivable from cited SHAs.
|
|
75
|
+
|
|
76
|
+
## Quick start
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
export PYTHONPATH=src:scripts
|
|
80
|
+
|
|
81
|
+
# Gate report (requires a ledger at docs/calibration/improvement_claim_ledger.jsonl)
|
|
82
|
+
python scripts/calibration_ledger.py docs/calibration/improvement_claim_ledger.jsonl \
|
|
83
|
+
--require-measured --by-class --window 10
|
|
84
|
+
|
|
85
|
+
# Score one merged receipt
|
|
86
|
+
bash scripts/calibration_score_receipt.sh docs/improvements/NNNN-name.md
|
|
87
|
+
|
|
88
|
+
# Contract suite (product spec)
|
|
89
|
+
pytest tests/contracts/ -q
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## v0.1 scope and known seams
|
|
93
|
+
|
|
94
|
+
See [`docs/PHASE0_EXTRACT_PLAN.md`](docs/PHASE0_EXTRACT_PLAN.md). This release copies the working Titan implementation; the six-week refactor (configurable paths, packaged CLI, GitHub Action) starts when three design-partner conversations exist.
|
|
95
|
+
|
|
96
|
+
**Compat note:** governance modules live under `signalbrain.governance`; `agi_os_backend.governance` shims preserve script import paths from the reference deployment.
|
|
97
|
+
|
|
98
|
+
## Design partner offer
|
|
99
|
+
|
|
100
|
+
We score your coding agents' claims against what actually merged. First caught overclaim is free — if we don't find one, you still get an audit. Contact: [signalbrain.ai](https://signalbrain.ai)
|
|
101
|
+
|
|
102
|
+
## License
|
|
103
|
+
|
|
104
|
+
Apache-2.0 — see LICENSE.
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# SignalBrain
|
|
2
|
+
|
|
3
|
+
[](https://github.com/whitestone1121-web/signalbrain/blob/main/docs/incidents/2026-07-tooling-trust-streak-gaming.md)
|
|
4
|
+
|
|
5
|
+
**Trust layer for AI-modified software.**
|
|
6
|
+
|
|
7
|
+
Every company is letting agents change systems that matter. Every agent overstates what it did. SignalBrain is the referee: signed improvement receipts, objective re-score, and per-class calibrated trust — so autonomy is earned, not self-reported.
|
|
8
|
+
|
|
9
|
+
This repository is **Phase 0 v0.1**: the receipt spec, ledger math, scoring lane, anti-Goodhart machinery, and the founding incident record — extracted from the [Titan reference deployment](https://github.com/whitestone1121-web/neural-chat-v3) (R&D dummy that keeps trying to game its own ledger, in public).
|
|
10
|
+
|
|
11
|
+
## 60-second demo — run it, don't trust it
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install "git+https://github.com/whitestone1121-web/signalbrain"
|
|
15
|
+
bash demo/demo.sh
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Real output (scratch repo built on the fly — no mocks):
|
|
19
|
+
|
|
20
|
+
```text
|
|
21
|
+
▶ 1. An agent tries to score its own claim BEFORE anyone merged it
|
|
22
|
+
{"status": "refused_guard", "code": 3, "message": "... not on HEAD — score only human-merged receipts"}
|
|
23
|
+
refused: unmerged claims cannot enter the ledger. No agent grades its own homework.
|
|
24
|
+
|
|
25
|
+
▶ 2. A batch of receipts measured only by tests the agent wrote itself
|
|
26
|
+
ledger now holds 3 rows — every one classified: 3 "claim_kind": "invariant_pin"
|
|
27
|
+
{} (no class has ANY trust-eligible claims)
|
|
28
|
+
three green results, ZERO earned trust: held-by-construction pins are recorded, never counted.
|
|
29
|
+
|
|
30
|
+
▶ 3. An honest failure
|
|
31
|
+
"held": false
|
|
32
|
+
the agent said 0.9 confidence. The measurement said no. That gap is the product.
|
|
33
|
+
|
|
34
|
+
▶ 4. Ten claims that actually hold
|
|
35
|
+
"tooling": { "hit_rate": 1.0, "n": 10, "status": "auto-merge ELIGIBLE" }
|
|
36
|
+
earned by track record, revocable by evidence. Autonomy is graduated, never granted.
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Three layers
|
|
40
|
+
|
|
41
|
+
| Layer | What | Status |
|
|
42
|
+
|-------|------|--------|
|
|
43
|
+
| **Receipt** | Open standard — signed, re-runnable claims | [`docs/RECEIPT_SPEC.md`](docs/RECEIPT_SPEC.md) v0.1 |
|
|
44
|
+
| **Ledger** | Per-class trust from objectively re-scored receipts | `src/signalbrain/governance/` |
|
|
45
|
+
| **Refuter** | Adversarial verification + SPC (premium) | scripts + roadmap |
|
|
46
|
+
|
|
47
|
+
## Founding proof
|
|
48
|
+
|
|
49
|
+
Our own autonomous lane tried to pad its trust score to 100% ELIGIBLE in a local working tree. It never reached git. Full receipt-style incident record with reproduce commands:
|
|
50
|
+
|
|
51
|
+
[`docs/incidents/2026-07-tooling-trust-streak-gaming.md`](docs/incidents/2026-07-tooling-trust-streak-gaming.md)
|
|
52
|
+
|
|
53
|
+
Every number in that document is re-derivable from cited SHAs.
|
|
54
|
+
|
|
55
|
+
## Quick start
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
export PYTHONPATH=src:scripts
|
|
59
|
+
|
|
60
|
+
# Gate report (requires a ledger at docs/calibration/improvement_claim_ledger.jsonl)
|
|
61
|
+
python scripts/calibration_ledger.py docs/calibration/improvement_claim_ledger.jsonl \
|
|
62
|
+
--require-measured --by-class --window 10
|
|
63
|
+
|
|
64
|
+
# Score one merged receipt
|
|
65
|
+
bash scripts/calibration_score_receipt.sh docs/improvements/NNNN-name.md
|
|
66
|
+
|
|
67
|
+
# Contract suite (product spec)
|
|
68
|
+
pytest tests/contracts/ -q
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## v0.1 scope and known seams
|
|
72
|
+
|
|
73
|
+
See [`docs/PHASE0_EXTRACT_PLAN.md`](docs/PHASE0_EXTRACT_PLAN.md). This release copies the working Titan implementation; the six-week refactor (configurable paths, packaged CLI, GitHub Action) starts when three design-partner conversations exist.
|
|
74
|
+
|
|
75
|
+
**Compat note:** governance modules live under `signalbrain.governance`; `agi_os_backend.governance` shims preserve script import paths from the reference deployment.
|
|
76
|
+
|
|
77
|
+
## Design partner offer
|
|
78
|
+
|
|
79
|
+
We score your coding agents' claims against what actually merged. First caught overclaim is free — if we don't find one, you still get an audit. Contact: [signalbrain.ai](https://signalbrain.ai)
|
|
80
|
+
|
|
81
|
+
## License
|
|
82
|
+
|
|
83
|
+
Apache-2.0 — see LICENSE.
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "signalbrain"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Trust layer for AI-modified software — receipts, ledger, calibrated autonomy"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "Apache-2.0" }
|
|
11
|
+
requires-python = ">=3.11"
|
|
12
|
+
authors = [{ name = "SignalBrain" }]
|
|
13
|
+
keywords = ["ai", "agents", "calibration", "trust", "receipts", "governance"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: Apache Software License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Software Development :: Quality Assurance",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.scripts]
|
|
23
|
+
sb = "signalbrain.cli:main"
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Homepage = "https://signalbrain.ai"
|
|
27
|
+
Repository = "https://github.com/whitestone1121-web/signalbrain"
|
|
28
|
+
Documentation = "https://github.com/whitestone1121-web/signalbrain/blob/main/docs/RECEIPT_SPEC.md"
|
|
29
|
+
Incident = "https://github.com/whitestone1121-web/signalbrain/blob/main/docs/incidents/2026-07-tooling-trust-streak-gaming.md"
|
|
30
|
+
|
|
31
|
+
[tool.setuptools.packages.find]
|
|
32
|
+
where = ["src"]
|
|
33
|
+
|
|
34
|
+
[tool.pytest.ini_options]
|
|
35
|
+
pythonpath = ["src", "scripts"]
|
|
36
|
+
testpaths = ["tests"]
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from signalbrain.governance.calibration_autonomy_gate import * # noqa: F403
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from signalbrain.governance.calibration_ledger_core import * # noqa: F403
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from signalbrain.governance.calibration_same_pr_pin import * # noqa: F403
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""sb — score receipts, read gates.
|
|
2
|
+
|
|
3
|
+
sb score <receipt.md ...> [--root .] [--ledger ledger.jsonl] [--ref origin/main]
|
|
4
|
+
[--rescore] [--allow-unmerged] [--timeout 180]
|
|
5
|
+
sb gate [--ledger ledger.jsonl] [--window N] [--by-class] [--recency-only]
|
|
6
|
+
sb check <receipt.md> [--root .] [--ref origin/main]
|
|
7
|
+
|
|
8
|
+
Exit codes: score → 0 if every receipt scored (held or not — honest failure is a
|
|
9
|
+
result), 3 if any was refused by the merged-receipt guard; gate → 0 TRUST, 1 GATE;
|
|
10
|
+
check → the guard's own code (0/3/4/5).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import json
|
|
17
|
+
import sys
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
from . import gate as gate_mod
|
|
21
|
+
from . import guard as guard_mod
|
|
22
|
+
from .scorer import ScoreConfig, score_receipt
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _cmd_score(args: argparse.Namespace) -> int:
|
|
26
|
+
cfg = ScoreConfig(
|
|
27
|
+
root=Path(args.root).resolve(),
|
|
28
|
+
ledger_path=Path(args.ledger),
|
|
29
|
+
merged_ref=args.ref,
|
|
30
|
+
allow_unmerged=args.allow_unmerged,
|
|
31
|
+
timeout_s=args.timeout,
|
|
32
|
+
rescore=args.rescore,
|
|
33
|
+
)
|
|
34
|
+
refused = 0
|
|
35
|
+
for receipt in args.receipts:
|
|
36
|
+
result = score_receipt(Path(receipt), cfg)
|
|
37
|
+
print(json.dumps(result))
|
|
38
|
+
if result["status"] == "refused_guard":
|
|
39
|
+
refused += 1
|
|
40
|
+
return 3 if refused else 0
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _cmd_gate(args: argparse.Namespace) -> int:
|
|
44
|
+
ledger = Path(args.ledger)
|
|
45
|
+
env = None
|
|
46
|
+
if args.recency_only:
|
|
47
|
+
env = {gate_mod.RECENCY_GATE_ENV: "1"}
|
|
48
|
+
if args.window:
|
|
49
|
+
env[gate_mod.WINDOW_ENV] = str(args.window)
|
|
50
|
+
if args.by_class:
|
|
51
|
+
status = gate_mod.per_class(ledger, window=args.window or None, env=env)
|
|
52
|
+
print(json.dumps(status, indent=2))
|
|
53
|
+
return 0 if any(r.get("status") == "auto-merge ELIGIBLE" for r in status.values()) else 1
|
|
54
|
+
allowed, verdict = gate_mod.widening_allowed(ledger, env=env if env is not None else None)
|
|
55
|
+
print(json.dumps(verdict, indent=2))
|
|
56
|
+
return 0 if allowed else 1
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _cmd_check(args: argparse.Namespace) -> int:
|
|
60
|
+
result = guard_mod.check_merged(Path(args.root).resolve(), Path(args.receipt), merged_ref=args.ref)
|
|
61
|
+
print(result.message, file=sys.stderr)
|
|
62
|
+
return result.code
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def main(argv: list[str] | None = None) -> int:
|
|
66
|
+
ap = argparse.ArgumentParser(prog="sb", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
67
|
+
sub = ap.add_subparsers(dest="cmd", required=True)
|
|
68
|
+
|
|
69
|
+
p = sub.add_parser("score", help="objectively score merged receipts into the ledger")
|
|
70
|
+
p.add_argument("receipts", nargs="+")
|
|
71
|
+
p.add_argument("--root", default=".")
|
|
72
|
+
p.add_argument("--ledger", required=True)
|
|
73
|
+
p.add_argument("--ref", default="origin/main")
|
|
74
|
+
p.add_argument("--rescore", action="store_true")
|
|
75
|
+
p.add_argument("--allow-unmerged", action="store_true", help="supervised experiments only")
|
|
76
|
+
p.add_argument("--timeout", type=int, default=180)
|
|
77
|
+
p.set_defaults(fn=_cmd_score)
|
|
78
|
+
|
|
79
|
+
p = sub.add_parser("gate", help="print trust gates; exit 0 TRUST / 1 GATE")
|
|
80
|
+
p.add_argument("--ledger", required=True)
|
|
81
|
+
p.add_argument("--window", type=int, default=0)
|
|
82
|
+
p.add_argument("--by-class", action="store_true")
|
|
83
|
+
p.add_argument("--recency-only", action="store_true")
|
|
84
|
+
p.set_defaults(fn=_cmd_gate)
|
|
85
|
+
|
|
86
|
+
p = sub.add_parser("check", help="merged-receipt guard only")
|
|
87
|
+
p.add_argument("receipt")
|
|
88
|
+
p.add_argument("--root", default=".")
|
|
89
|
+
p.add_argument("--ref", default="origin/main")
|
|
90
|
+
p.set_defaults(fn=_cmd_check)
|
|
91
|
+
|
|
92
|
+
args = ap.parse_args(argv)
|
|
93
|
+
return args.fn(args)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
if __name__ == "__main__":
|
|
97
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Trust gates — global and per-class (SPEC §3).
|
|
2
|
+
|
|
3
|
+
Config-first port of neural-chat-v3 `calibration_autonomy_gate`. Env variables
|
|
4
|
+
use the SIGNALBRAIN_ prefix and are fallbacks; explicit arguments always win.
|
|
5
|
+
There is deliberately no bypass env: the only override anywhere is the scorer's
|
|
6
|
+
loud ``allow_unmerged``.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from .ledger import (
|
|
16
|
+
DEFAULT_MIN_HIT_RATE,
|
|
17
|
+
DEFAULT_RECENCY_WINDOW,
|
|
18
|
+
class_status,
|
|
19
|
+
load_rows,
|
|
20
|
+
trust_verdict,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
WINDOW_ENV = "SIGNALBRAIN_WINDOW"
|
|
24
|
+
MIN_HIT_RATE_ENV = "SIGNALBRAIN_MIN_HIT_RATE"
|
|
25
|
+
RECENCY_GATE_ENV = "SIGNALBRAIN_RECENCY_GATE"
|
|
26
|
+
DUAL_GATE_ENV = "SIGNALBRAIN_DUAL_GATE"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _truthy(env: dict[str, str], key: str) -> bool:
|
|
30
|
+
return str(env.get(key, "")).strip().lower() in ("1", "true", "yes", "on")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def window_from_env(env: dict[str, str] | None = None) -> int | None:
|
|
34
|
+
env = env if env is not None else dict(os.environ)
|
|
35
|
+
raw = str(env.get(WINDOW_ENV, "")).strip()
|
|
36
|
+
if not raw:
|
|
37
|
+
return None
|
|
38
|
+
try:
|
|
39
|
+
value = int(raw)
|
|
40
|
+
except ValueError:
|
|
41
|
+
return None
|
|
42
|
+
return value if value > 0 else None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def min_hit_rate_from_env(env: dict[str, str] | None = None) -> float:
|
|
46
|
+
env = env if env is not None else dict(os.environ)
|
|
47
|
+
raw = str(env.get(MIN_HIT_RATE_ENV, "") or DEFAULT_MIN_HIT_RATE).strip()
|
|
48
|
+
try:
|
|
49
|
+
value = float(raw)
|
|
50
|
+
except ValueError:
|
|
51
|
+
return DEFAULT_MIN_HIT_RATE
|
|
52
|
+
return value if 0.0 < value <= 1.0 else DEFAULT_MIN_HIT_RATE
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def recency_only_from_env(env: dict[str, str] | None = None) -> bool:
|
|
56
|
+
"""Operator opt-in: gate on the recency window only (past failures fade)."""
|
|
57
|
+
env = env if env is not None else dict(os.environ)
|
|
58
|
+
if _truthy(env, DUAL_GATE_ENV):
|
|
59
|
+
return False
|
|
60
|
+
return _truthy(env, RECENCY_GATE_ENV)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def per_class(ledger_path: Path, *, window: int | None = None, env: dict[str, str] | None = None) -> dict[str, dict[str, Any]]:
|
|
64
|
+
"""Per-class auto-merge status. The window resolves: arg > env > default.
|
|
65
|
+
|
|
66
|
+
One source of truth for BOTH the report and any merge decision — a rail
|
|
67
|
+
that reads a different window than the operator's report is a fake-green.
|
|
68
|
+
"""
|
|
69
|
+
w = window if window is not None else (window_from_env(env) or DEFAULT_RECENCY_WINDOW)
|
|
70
|
+
return class_status(load_rows(ledger_path), window=w, require_measured=True)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def eligible_classes(ledger_path: Path, *, window: int | None = None, env: dict[str, str] | None = None) -> list[str]:
|
|
74
|
+
status = per_class(ledger_path, window=window, env=env)
|
|
75
|
+
return sorted(name for name, row in status.items() if row.get("status") == "auto-merge ELIGIBLE")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def widening_allowed(
|
|
79
|
+
ledger_path: Path,
|
|
80
|
+
*,
|
|
81
|
+
env: dict[str, str] | None = None,
|
|
82
|
+
) -> tuple[bool, dict[str, Any]]:
|
|
83
|
+
"""Global trust gate. Dual by default (full history AND recency window ≥ min rate);
|
|
84
|
+
SIGNALBRAIN_RECENCY_GATE=1 opts into recency-only."""
|
|
85
|
+
env = dict(env if env is not None else os.environ)
|
|
86
|
+
min_rate = min_hit_rate_from_env(env)
|
|
87
|
+
rows = load_rows(ledger_path)
|
|
88
|
+
full = trust_verdict(rows, min_hit_rate=min_rate, require_measured=True, window=None)
|
|
89
|
+
recency_window = window_from_env(env) or DEFAULT_RECENCY_WINDOW
|
|
90
|
+
recency = trust_verdict(rows, min_hit_rate=min_rate, require_measured=True, window=recency_window)
|
|
91
|
+
if recency_only_from_env(env):
|
|
92
|
+
allowed = recency.get("verdict") == "TRUST"
|
|
93
|
+
reason = "recency-windowed measured gate (full history advisory; operator opt-in)"
|
|
94
|
+
else:
|
|
95
|
+
allowed = full.get("verdict") == "TRUST" and recency.get("verdict") == "TRUST"
|
|
96
|
+
reason = "dual gate: full measured history and recency window must both TRUST"
|
|
97
|
+
return allowed, {
|
|
98
|
+
"verdict": "TRUST" if allowed else "GATE",
|
|
99
|
+
"reason": reason,
|
|
100
|
+
"operative_gate": "recency_window" if recency_only_from_env(env) else "dual",
|
|
101
|
+
"full_history": full,
|
|
102
|
+
"recency_window": recency,
|
|
103
|
+
"recency_window_size": recency_window,
|
|
104
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from signalbrain.governance.calibration_ledger_core import (
|
|
2
|
+
CLAIM_KIND_IMPROVEMENT,
|
|
3
|
+
CLAIM_KIND_INVARIANT_PIN,
|
|
4
|
+
calibration_verdict,
|
|
5
|
+
class_auto_merge_status,
|
|
6
|
+
filter_rows,
|
|
7
|
+
is_invariant_pin,
|
|
8
|
+
load_rows,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"CLAIM_KIND_IMPROVEMENT",
|
|
13
|
+
"CLAIM_KIND_INVARIANT_PIN",
|
|
14
|
+
"calibration_verdict",
|
|
15
|
+
"class_auto_merge_status",
|
|
16
|
+
"filter_rows",
|
|
17
|
+
"is_invariant_pin",
|
|
18
|
+
"load_rows",
|
|
19
|
+
]
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""Calibration gate for widening autonomous authority.
|
|
2
|
+
|
|
3
|
+
Scores improvement-claim confidence vs outcome (held/not-held). Autonomy widening
|
|
4
|
+
(factory dispatch, research dispatch, mutation env flags) requires TRUST unless
|
|
5
|
+
an operator explicitly bypasses with TITAN_AUTONOMY_CALIBRATION_BYPASS=1.
|
|
6
|
+
|
|
7
|
+
By default the gate counts only ``scored_by == "measured"`` claims so self-reported
|
|
8
|
+
A/B ingest cannot inflate trust (see calibration_ingest_receipts.py).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from signalbrain.governance.calibration_ledger_core import (
|
|
18
|
+
AUTONOMY_RECENCY_WINDOW,
|
|
19
|
+
DEFAULT_MIN_HIT_RATE,
|
|
20
|
+
calibration_verdict,
|
|
21
|
+
class_auto_merge_status,
|
|
22
|
+
load_rows,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
DEFAULT_LEDGER_REL = "docs/calibration/improvement_claim_ledger.jsonl"
|
|
26
|
+
CALIBRATION_BYPASS_ENV = "TITAN_AUTONOMY_CALIBRATION_BYPASS"
|
|
27
|
+
LEDGER_PATH_ENV = "TITAN_CALIBRATION_LEDGER_PATH"
|
|
28
|
+
MIN_HIT_RATE_ENV = "TITAN_CALIBRATION_MIN_HIT_RATE"
|
|
29
|
+
REQUIRE_MEASURED_ENV = "TITAN_CALIBRATION_REQUIRE_MEASURED"
|
|
30
|
+
WINDOW_ENV = "TITAN_CALIBRATION_WINDOW"
|
|
31
|
+
CHANGE_CLASS_ENV = "TITAN_CALIBRATION_CHANGE_CLASS"
|
|
32
|
+
DUAL_GATE_ENV = "TITAN_CALIBRATION_DUAL_GATE"
|
|
33
|
+
RECENCY_GATE_ENV = "TITAN_CALIBRATION_RECENCY_GATE"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def default_ledger_path(root: Path | None = None) -> Path:
|
|
37
|
+
override = os.getenv(LEDGER_PATH_ENV, "").strip()
|
|
38
|
+
if override:
|
|
39
|
+
return Path(override)
|
|
40
|
+
base = root or Path(__file__).resolve().parents[3]
|
|
41
|
+
return base / DEFAULT_LEDGER_REL
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def min_hit_rate_from_env(env: dict[str, str] | None = None) -> float:
|
|
45
|
+
env = env if env is not None else os.environ
|
|
46
|
+
raw = str(env.get(MIN_HIT_RATE_ENV, "") or DEFAULT_MIN_HIT_RATE).strip()
|
|
47
|
+
try:
|
|
48
|
+
value = float(raw)
|
|
49
|
+
except ValueError:
|
|
50
|
+
return DEFAULT_MIN_HIT_RATE
|
|
51
|
+
return value if 0.0 < value <= 1.0 else DEFAULT_MIN_HIT_RATE
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def require_measured_from_env(env: dict[str, str] | None = None) -> bool:
|
|
55
|
+
env = env if env is not None else os.environ
|
|
56
|
+
raw = str(env.get(REQUIRE_MEASURED_ENV, "1")).strip().lower()
|
|
57
|
+
return raw in ("1", "true", "yes", "on")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def window_from_env(env: dict[str, str] | None = None) -> int | None:
|
|
61
|
+
env = env if env is not None else os.environ
|
|
62
|
+
raw = str(env.get(WINDOW_ENV, "")).strip()
|
|
63
|
+
if not raw:
|
|
64
|
+
return None
|
|
65
|
+
try:
|
|
66
|
+
value = int(raw)
|
|
67
|
+
except ValueError:
|
|
68
|
+
return None
|
|
69
|
+
return value if value > 0 else None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def change_class_from_env(env: dict[str, str] | None = None) -> str | None:
|
|
73
|
+
env = env if env is not None else os.environ
|
|
74
|
+
raw = str(env.get(CHANGE_CLASS_ENV, "")).strip()
|
|
75
|
+
return raw or None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def calibration_bypass_enabled(env: dict[str, str] | None = None) -> bool:
|
|
79
|
+
env = env if env is not None else os.environ
|
|
80
|
+
return str(env.get(CALIBRATION_BYPASS_ENV, "0")).strip().lower() in ("1", "true", "yes", "on")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def calibration_autonomy_verdict(
|
|
84
|
+
ledger_path: Path,
|
|
85
|
+
*,
|
|
86
|
+
min_hit_rate: float = DEFAULT_MIN_HIT_RATE,
|
|
87
|
+
require_measured: bool | None = None,
|
|
88
|
+
window: int | None = None,
|
|
89
|
+
change_class: str | None = None,
|
|
90
|
+
exclude_goodhart: bool = True,
|
|
91
|
+
) -> dict[str, Any]:
|
|
92
|
+
if not ledger_path.is_file():
|
|
93
|
+
return {
|
|
94
|
+
"claims": 0,
|
|
95
|
+
"high_confidence_claims": 0,
|
|
96
|
+
"high_confidence_hit_rate": 0.0,
|
|
97
|
+
"min_hit_rate": min_hit_rate,
|
|
98
|
+
"verdict": "GATE",
|
|
99
|
+
"reason": "calibration ledger missing",
|
|
100
|
+
"require_measured": require_measured,
|
|
101
|
+
}
|
|
102
|
+
rows = load_rows(ledger_path)
|
|
103
|
+
return calibration_verdict(
|
|
104
|
+
rows,
|
|
105
|
+
min_hit_rate=min_hit_rate,
|
|
106
|
+
require_measured=bool(require_measured),
|
|
107
|
+
window=window,
|
|
108
|
+
change_class=change_class,
|
|
109
|
+
exclude_goodhart=exclude_goodhart,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def per_class_auto_merge(ledger_path: Path, *, window: int | None = None) -> dict[str, dict[str, Any]]:
|
|
114
|
+
return class_auto_merge_status(
|
|
115
|
+
load_rows(ledger_path),
|
|
116
|
+
window=window,
|
|
117
|
+
require_measured=True,
|
|
118
|
+
exclude_goodhart=True,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _truthy_env(env: dict[str, str], key: str) -> bool:
|
|
123
|
+
return str(env.get(key, "")).strip().lower() in ("1", "true", "yes", "on")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def recency_only_from_env(env: dict[str, str] | None = None) -> bool:
|
|
127
|
+
"""When True, operative gate uses recency-window TRUST only (operator opt-in to option b)."""
|
|
128
|
+
env = env if env is not None else os.environ
|
|
129
|
+
if _truthy_env(env, DUAL_GATE_ENV):
|
|
130
|
+
return False
|
|
131
|
+
return _truthy_env(env, RECENCY_GATE_ENV)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def dual_gate_from_env(env: dict[str, str] | None = None) -> bool:
|
|
135
|
+
"""Default operative gate: require both full-history and recency-window TRUST."""
|
|
136
|
+
return not recency_only_from_env(env)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def autonomy_widening_allowed(
|
|
140
|
+
root: Path,
|
|
141
|
+
*,
|
|
142
|
+
env: dict[str, str] | None = None,
|
|
143
|
+
ledger_path: Path | None = None,
|
|
144
|
+
) -> tuple[bool, dict[str, Any]]:
|
|
145
|
+
env = dict(env if env is not None else os.environ)
|
|
146
|
+
if calibration_bypass_enabled(env):
|
|
147
|
+
return True, {"verdict": "BYPASS", "reason": "operator bypass env set"}
|
|
148
|
+
path = ledger_path or default_ledger_path(root)
|
|
149
|
+
measured = require_measured_from_env(env)
|
|
150
|
+
min_rate = min_hit_rate_from_env(env)
|
|
151
|
+
operator_window = window_from_env(env)
|
|
152
|
+
full = calibration_autonomy_verdict(
|
|
153
|
+
path,
|
|
154
|
+
min_hit_rate=min_rate,
|
|
155
|
+
require_measured=measured,
|
|
156
|
+
window=None,
|
|
157
|
+
change_class=change_class_from_env(env),
|
|
158
|
+
exclude_goodhart=True,
|
|
159
|
+
)
|
|
160
|
+
recency_window = operator_window if operator_window is not None else AUTONOMY_RECENCY_WINDOW
|
|
161
|
+
recency = calibration_autonomy_verdict(
|
|
162
|
+
path,
|
|
163
|
+
min_hit_rate=min_rate,
|
|
164
|
+
require_measured=measured,
|
|
165
|
+
window=recency_window,
|
|
166
|
+
change_class=change_class_from_env(env),
|
|
167
|
+
exclude_goodhart=True,
|
|
168
|
+
)
|
|
169
|
+
if recency_only_from_env(env):
|
|
170
|
+
allowed = recency.get("verdict") == "TRUST"
|
|
171
|
+
reason = "recency-windowed measured gate (full history advisory only; operator opt-in)"
|
|
172
|
+
else:
|
|
173
|
+
allowed = full.get("verdict") == "TRUST" and recency.get("verdict") == "TRUST"
|
|
174
|
+
reason = "dual gate: full measured history and recency window must both TRUST"
|
|
175
|
+
verdict = {
|
|
176
|
+
"verdict": "TRUST" if allowed else "GATE",
|
|
177
|
+
"reason": reason,
|
|
178
|
+
"operative_gate": "recency_window" if recency_only_from_env(env) else "dual",
|
|
179
|
+
"full_history": full,
|
|
180
|
+
"recency_window": recency,
|
|
181
|
+
"recency_window_size": recency_window,
|
|
182
|
+
"require_measured": measured,
|
|
183
|
+
}
|
|
184
|
+
return allowed, verdict
|