proofbundle 0.4.1__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {proofbundle-0.4.1/src/proofbundle.egg-info → proofbundle-0.5.0}/PKG-INFO +29 -6
- {proofbundle-0.4.1 → proofbundle-0.5.0}/README.md +24 -5
- {proofbundle-0.4.1 → proofbundle-0.5.0}/pyproject.toml +8 -3
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle/__init__.py +1 -1
- proofbundle-0.5.0/src/proofbundle/adapters/inspect_ai.py +65 -0
- proofbundle-0.5.0/src/proofbundle/intoto.py +63 -0
- proofbundle-0.5.0/src/proofbundle/sdjwt_issue.py +119 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0/src/proofbundle.egg-info}/PKG-INFO +29 -6
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle.egg-info/SOURCES.txt +4 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle.egg-info/requires.txt +5 -0
- proofbundle-0.5.0/tests/test_adapters.py +48 -0
- proofbundle-0.5.0/tests/test_intoto.py +63 -0
- proofbundle-0.5.0/tests/test_sdjwt_issue.py +98 -0
- proofbundle-0.4.1/src/proofbundle/adapters/inspect_ai.py +0 -36
- proofbundle-0.4.1/tests/test_adapters.py +0 -32
- {proofbundle-0.4.1 → proofbundle-0.5.0}/LICENSE +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/setup.cfg +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle/adapters/__init__.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle/adapters/lm_eval.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle/bundle.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle/cli.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle/emit.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle/errors.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle/evalclaim.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle/merkle.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle/py.typed +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle/sdjwt.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle/signature.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle.egg-info/dependency_links.txt +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle.egg-info/entry_points.txt +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/src/proofbundle.egg-info/top_level.txt +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/tests/test_bundle.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/tests/test_cli.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/tests/test_cli_eval.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/tests/test_emit.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/tests/test_eval_claim_schema.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/tests/test_evalclaim.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/tests/test_merkle.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/tests/test_merkle_property.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/tests/test_rekor_interop.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/tests/test_rfc6962_external_vectors.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/tests/test_schema.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/tests/test_sdjwt_reference.py +0 -0
- {proofbundle-0.4.1 → proofbundle-0.5.0}/tests/test_signature.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: proofbundle
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT.
|
|
5
5
|
Author: Konrad Gruszka
|
|
6
6
|
License: MIT
|
|
@@ -27,6 +27,8 @@ Provides-Extra: sdjwt
|
|
|
27
27
|
Provides-Extra: eval
|
|
28
28
|
Requires-Dist: rfc8785>=0.1.4; extra == "eval"
|
|
29
29
|
Provides-Extra: adapters
|
|
30
|
+
Provides-Extra: inspect
|
|
31
|
+
Requires-Dist: inspect_ai<0.4,>=0.3.100; extra == "inspect"
|
|
30
32
|
Provides-Extra: dev
|
|
31
33
|
Requires-Dist: pytest>=7; extra == "dev"
|
|
32
34
|
Requires-Dist: ruff>=0.5; extra == "dev"
|
|
@@ -35,6 +37,8 @@ Requires-Dist: mypy>=1.8; extra == "dev"
|
|
|
35
37
|
Requires-Dist: build>=1; extra == "dev"
|
|
36
38
|
Requires-Dist: hypothesis>=6; extra == "dev"
|
|
37
39
|
Requires-Dist: rfc8785>=0.1.4; extra == "dev"
|
|
40
|
+
Requires-Dist: sd-jwt>=0.10; extra == "dev"
|
|
41
|
+
Requires-Dist: inspect_ai<0.4,>=0.3.100; extra == "dev"
|
|
38
42
|
Dynamic: license-file
|
|
39
43
|
|
|
40
44
|
<div align="center">
|
|
@@ -61,7 +65,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
|
|
|
61
65
|
|
|
62
66
|
**At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
|
|
63
67
|
verify` checks one self-contained `bundle.json` with three offline cryptographic
|
|
64
|
-
checks → `OK` or `FAILED`. No network, no daemon, no own crypto.
|
|
68
|
+
checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 62 tests.
|
|
65
69
|
|
|
66
70
|
## Contents
|
|
67
71
|
|
|
@@ -282,15 +286,34 @@ commitments — it does **not** prove the evaluation was well designed or that t
|
|
|
282
286
|
itself is correct. Those are human judgements; what it removes is the need to simply
|
|
283
287
|
trust the number.
|
|
284
288
|
|
|
289
|
+
### Since v0.5: framework adapter, in-toto, selective disclosure
|
|
290
|
+
|
|
291
|
+
- **inspect_ai adapter** (`pip install "proofbundle[inspect]"`) reads a UK AISI
|
|
292
|
+
[inspect_ai](https://github.com/UKGovernmentBEIS/inspect_ai) eval log via the stable
|
|
293
|
+
`read_eval_log` API (lazy import; the core stays dependency-free) and maps it to a claim.
|
|
294
|
+
`proofbundle.adapters.from_lm_eval_results` reads lm-evaluation-harness `results.json`
|
|
295
|
+
without importing anything.
|
|
296
|
+
- **in-toto Statement v1** — `proofbundle.intoto.to_intoto_statement(claim, root_b64=…)`
|
|
297
|
+
emits the receipt as an in-toto statement with a self-hosted predicate type. The subject
|
|
298
|
+
digest is an *honest salted commitment* under a custom key, never `sha256` (see
|
|
299
|
+
[PREDICATE.md](PREDICATE.md)).
|
|
300
|
+
- **SD-JWT issuance** (RFC 9901) — `proofbundle.sdjwt_issue.issue_sd_jwt(claim, signer,
|
|
301
|
+
root_b64=…, exact_score=…)` issues the receipt so a holder can disclose `passed` +
|
|
302
|
+
`threshold` while **withholding the exact score** and the identifier openings. The signed
|
|
303
|
+
bundle payload is the source of truth; the SD-JWT is a derived, bundle-bound view, verified
|
|
304
|
+
by proofbundle's own verifier **and** the `sd-jwt-python` reference.
|
|
305
|
+
|
|
285
306
|
## Roadmap
|
|
286
307
|
|
|
287
308
|
- **v0.1** — the offline verifier plus a real example bundle.
|
|
288
309
|
- **v0.2** — the emitter: `emit_bundle` / `proofbundle emit`.
|
|
289
310
|
- **v0.3** — external RFC 6962 conformance vectors + real Sigstore Rekor interop.
|
|
290
|
-
- **v0.4
|
|
291
|
-
|
|
292
|
-
- **v0.5** —
|
|
293
|
-
|
|
311
|
+
- **v0.4** — the eval-receipt emitter (`emit_eval_receipt` / `proofbundle emit-eval`),
|
|
312
|
+
salted commitments, issuer binding.
|
|
313
|
+
- **v0.5 (current release)** — inspect_ai adapter (stable API), in-toto Statement v1 view,
|
|
314
|
+
and SD-JWT **issuance** per RFC 9901 (selective disclosure of the exact score).
|
|
315
|
+
- **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
|
|
316
|
+
Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
|
|
294
317
|
|
|
295
318
|
## Contributing
|
|
296
319
|
|
|
@@ -22,7 +22,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
|
|
|
22
22
|
|
|
23
23
|
**At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
|
|
24
24
|
verify` checks one self-contained `bundle.json` with three offline cryptographic
|
|
25
|
-
checks → `OK` or `FAILED`. No network, no daemon, no own crypto.
|
|
25
|
+
checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 62 tests.
|
|
26
26
|
|
|
27
27
|
## Contents
|
|
28
28
|
|
|
@@ -243,15 +243,34 @@ commitments — it does **not** prove the evaluation was well designed or that t
|
|
|
243
243
|
itself is correct. Those are human judgements; what it removes is the need to simply
|
|
244
244
|
trust the number.
|
|
245
245
|
|
|
246
|
+
### Since v0.5: framework adapter, in-toto, selective disclosure
|
|
247
|
+
|
|
248
|
+
- **inspect_ai adapter** (`pip install "proofbundle[inspect]"`) reads a UK AISI
|
|
249
|
+
[inspect_ai](https://github.com/UKGovernmentBEIS/inspect_ai) eval log via the stable
|
|
250
|
+
`read_eval_log` API (lazy import; the core stays dependency-free) and maps it to a claim.
|
|
251
|
+
`proofbundle.adapters.from_lm_eval_results` reads lm-evaluation-harness `results.json`
|
|
252
|
+
without importing anything.
|
|
253
|
+
- **in-toto Statement v1** — `proofbundle.intoto.to_intoto_statement(claim, root_b64=…)`
|
|
254
|
+
emits the receipt as an in-toto statement with a self-hosted predicate type. The subject
|
|
255
|
+
digest is an *honest salted commitment* under a custom key, never `sha256` (see
|
|
256
|
+
[PREDICATE.md](PREDICATE.md)).
|
|
257
|
+
- **SD-JWT issuance** (RFC 9901) — `proofbundle.sdjwt_issue.issue_sd_jwt(claim, signer,
|
|
258
|
+
root_b64=…, exact_score=…)` issues the receipt so a holder can disclose `passed` +
|
|
259
|
+
`threshold` while **withholding the exact score** and the identifier openings. The signed
|
|
260
|
+
bundle payload is the source of truth; the SD-JWT is a derived, bundle-bound view, verified
|
|
261
|
+
by proofbundle's own verifier **and** the `sd-jwt-python` reference.
|
|
262
|
+
|
|
246
263
|
## Roadmap
|
|
247
264
|
|
|
248
265
|
- **v0.1** — the offline verifier plus a real example bundle.
|
|
249
266
|
- **v0.2** — the emitter: `emit_bundle` / `proofbundle emit`.
|
|
250
267
|
- **v0.3** — external RFC 6962 conformance vectors + real Sigstore Rekor interop.
|
|
251
|
-
- **v0.4
|
|
252
|
-
|
|
253
|
-
- **v0.5** —
|
|
254
|
-
|
|
268
|
+
- **v0.4** — the eval-receipt emitter (`emit_eval_receipt` / `proofbundle emit-eval`),
|
|
269
|
+
salted commitments, issuer binding.
|
|
270
|
+
- **v0.5 (current release)** — inspect_ai adapter (stable API), in-toto Statement v1 view,
|
|
271
|
+
and SD-JWT **issuance** per RFC 9901 (selective disclosure of the exact score).
|
|
272
|
+
- **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
|
|
273
|
+
Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
|
|
255
274
|
|
|
256
275
|
## Contributing
|
|
257
276
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "proofbundle"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.5.0"
|
|
8
8
|
description = "Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -43,9 +43,14 @@ sdjwt = []
|
|
|
43
43
|
# path (verify_bundle / decode_eval_claim) never canonicalizes — it checks stored bytes — so the
|
|
44
44
|
# verifier stays dependency-free. `pip install proofbundle[eval]` adds emit-side canonicalization.
|
|
45
45
|
eval = ["rfc8785>=0.1.4"]
|
|
46
|
-
#
|
|
46
|
+
# The lm-eval adapter reads exported results.json (no import) → pure stdlib.
|
|
47
47
|
adapters = []
|
|
48
|
-
|
|
48
|
+
# The inspect_ai adapter uses the STABLE read_eval_log API (lazy import). Pinned with an UPPER bound:
|
|
49
|
+
# the .eval format + pydantic schema change between versions (inspect_ai issue 834), and the fixture
|
|
50
|
+
# test is bound to this range. `pip install "proofbundle[inspect]"`.
|
|
51
|
+
inspect = ["inspect_ai>=0.3.100,<0.4"]
|
|
52
|
+
dev = ["pytest>=7", "ruff>=0.5", "jsonschema>=4", "mypy>=1.8", "build>=1", "hypothesis>=6",
|
|
53
|
+
"rfc8785>=0.1.4", "sd-jwt>=0.10", "inspect_ai>=0.3.100,<0.4"]
|
|
49
54
|
|
|
50
55
|
[project.urls]
|
|
51
56
|
Homepage = "https://b7n0de.com"
|
|
@@ -13,7 +13,7 @@ from .emit import emit_bundle, generate_signer
|
|
|
13
13
|
from .errors import Check, ProofBundleError, VerificationResult
|
|
14
14
|
from .merkle import verify_consistency, verify_inclusion
|
|
15
15
|
|
|
16
|
-
__version__ = "0.
|
|
16
|
+
__version__ = "0.5.0"
|
|
17
17
|
|
|
18
18
|
__all__ = [
|
|
19
19
|
"__version__",
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Adapter for UK AISI inspect_ai eval logs — via the STABLE API, optional extra `proofbundle[inspect]`.
|
|
2
|
+
|
|
3
|
+
Unlike the v0.4 file-based reader, this uses the stable `inspect_ai.log.read_eval_log(path,
|
|
4
|
+
header_only=True)` API (the `.eval` on-disk format + its pydantic schema change between versions, see
|
|
5
|
+
inspect_ai issue 834; the stable API is robust). inspect_ai is imported LAZILY inside the function, so
|
|
6
|
+
the proofbundle core stays dependency-free — only `pip install "proofbundle[inspect]"` pulls it.
|
|
7
|
+
|
|
8
|
+
Object model (inspect_ai): `log.eval.task` is the suite; `log.results.scores` is a list of EvalScore;
|
|
9
|
+
`EvalScore.metrics` is a dict name→EvalMetric; `EvalMetric.value` is the number. threshold, comparator
|
|
10
|
+
and thus `passed` are set by proofbundle, NOT read from the log. model_id/dataset_id become salted
|
|
11
|
+
commitments (never plaintext in the payload).
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
from ..evalclaim import build_eval_claim
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class InspectAdapterError(RuntimeError):
|
|
21
|
+
"""Raised when inspect_ai is missing or the log lacks the expected structure (no bare AttributeError)."""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def from_inspect_ai_log(path, metric: str, *, comparator: str, threshold: str, timestamp: str,
|
|
25
|
+
model_salt: Optional[bytes] = None, dataset_salt: Optional[bytes] = None):
|
|
26
|
+
"""Read an inspect_ai eval log via the stable API and build an eval claim for `metric`.
|
|
27
|
+
|
|
28
|
+
Returns (claim, salts). Raises InspectAdapterError if inspect_ai is unavailable or the log is
|
|
29
|
+
missing the expected attributes — a clear error instead of an opaque AttributeError.
|
|
30
|
+
"""
|
|
31
|
+
try:
|
|
32
|
+
from inspect_ai.log import read_eval_log # noqa: PLC0415 — lazy: keeps the core dependency-free
|
|
33
|
+
except ImportError as e:
|
|
34
|
+
raise InspectAdapterError(
|
|
35
|
+
"inspect_ai is required for this adapter — install with: pip install \"proofbundle[inspect]\"") from e
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
log = read_eval_log(str(path), header_only=True)
|
|
39
|
+
except Exception as e: # noqa: BLE001 — surface any read/parse failure as a clear adapter error
|
|
40
|
+
raise InspectAdapterError(f"could not read inspect_ai log {path!r}: {e}") from e
|
|
41
|
+
|
|
42
|
+
ev = getattr(log, "eval", None)
|
|
43
|
+
results = getattr(log, "results", None)
|
|
44
|
+
if ev is None or results is None:
|
|
45
|
+
raise InspectAdapterError("inspect_ai log missing .eval or .results (empty or malformed log)")
|
|
46
|
+
|
|
47
|
+
value = None
|
|
48
|
+
for score in (getattr(results, "scores", None) or []):
|
|
49
|
+
metrics = getattr(score, "metrics", None) or {}
|
|
50
|
+
if metric in metrics:
|
|
51
|
+
value = getattr(metrics[metric], "value", None)
|
|
52
|
+
break
|
|
53
|
+
if value is None:
|
|
54
|
+
raise InspectAdapterError(f"metric {metric!r} not found in any score.metrics of the log")
|
|
55
|
+
|
|
56
|
+
suite = str(getattr(ev, "task", "inspect_ai"))
|
|
57
|
+
model_id = str(getattr(ev, "model", "unknown"))
|
|
58
|
+
dataset = getattr(ev, "dataset", None)
|
|
59
|
+
dataset_id = str(getattr(dataset, "name", None) or suite)
|
|
60
|
+
return build_eval_claim(
|
|
61
|
+
suite=suite, suite_version=str(getattr(ev, "task_version", "1")),
|
|
62
|
+
metric=metric, comparator=comparator, threshold=threshold, score=repr(value),
|
|
63
|
+
n=int(getattr(results, "total_samples", 0) or 0),
|
|
64
|
+
model_id=model_id, dataset_id=dataset_id, issuer="", timestamp=timestamp,
|
|
65
|
+
model_salt=model_salt, dataset_salt=dataset_salt)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""in-toto Statement v1 view of an eval receipt (self-hosted predicate type).
|
|
2
|
+
|
|
3
|
+
A self-hosted `predicateType` URI is fully in-toto-spec-conform and the right choice for a solo v0.x
|
|
4
|
+
(no official in-toto/attestation PR needed). See PREDICATE.md.
|
|
5
|
+
|
|
6
|
+
HONESTY (important): the `subject.digest` here is a SALTED COMMITMENT to the model identifier, NOT the
|
|
7
|
+
content hash of an artifact. Placing it under the standard `sha256` key would suggest an artifact hash
|
|
8
|
+
and mislead generic in-toto verifiers. in-toto permits arbitrary digest keys, so we use a unique custom
|
|
9
|
+
key `proofbundleModelCommitV1`; the `subject.name` is the descriptive `model-id-commitment`; and the
|
|
10
|
+
predicate mirrors the note in `subject_digest_note`. Full artifact digests come only once a model artifact
|
|
11
|
+
exists (deferred, see the roadmap).
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
STATEMENT_TYPE = "https://in-toto.io/Statement/v1"
|
|
18
|
+
PREDICATE_TYPE = "https://b7n0de.com/proofbundle/eval-receipt/v0.1"
|
|
19
|
+
VERIFIER_ID = "https://b7n0de.com/proofbundle"
|
|
20
|
+
MODEL_COMMIT_DIGEST_KEY = "proofbundleModelCommitV1"
|
|
21
|
+
|
|
22
|
+
_SUBJECT_DIGEST_NOTE = (
|
|
23
|
+
"subject.digest is a salted commitment to the model identifier (key "
|
|
24
|
+
f"{MODEL_COMMIT_DIGEST_KEY}), NOT an artifact content hash — do not treat it as sha256.")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _commit_hex(commit: str) -> str:
|
|
28
|
+
"""Extract the hex of a `sha256:<hex>` salted commitment (the value that goes into the digest)."""
|
|
29
|
+
return commit.split(":", 1)[1] if ":" in commit else commit
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def to_intoto_statement(claim: dict, *, root_b64: Optional[str] = None,
|
|
33
|
+
harness: Optional[dict] = None) -> dict:
|
|
34
|
+
"""Build an in-toto Statement v1 whose predicate is the eval receipt.
|
|
35
|
+
|
|
36
|
+
`root_b64` (from the signed bundle's merkle root) binds the statement to the receipt. `harness`
|
|
37
|
+
(e.g. {"name": "inspect_ai", "version": "0.3.217"}) is optional. The subject digest is the model
|
|
38
|
+
commitment under a custom key (never `sha256`).
|
|
39
|
+
"""
|
|
40
|
+
statement = {
|
|
41
|
+
"_type": STATEMENT_TYPE,
|
|
42
|
+
"subject": [{
|
|
43
|
+
"name": "model-id-commitment",
|
|
44
|
+
"digest": {MODEL_COMMIT_DIGEST_KEY: _commit_hex(claim["model_id_commit"])},
|
|
45
|
+
}],
|
|
46
|
+
"predicateType": PREDICATE_TYPE,
|
|
47
|
+
"predicate": {
|
|
48
|
+
"verifier": {"id": VERIFIER_ID},
|
|
49
|
+
"evaluatedAt": claim["timestamp"],
|
|
50
|
+
"suite": claim["suite"],
|
|
51
|
+
"claims": [{
|
|
52
|
+
"metric": claim["metric"], "comparator": claim["comparator"],
|
|
53
|
+
"threshold": claim["threshold"], "passed": claim["passed"],
|
|
54
|
+
}],
|
|
55
|
+
"datasetCommit": claim.get("dataset_id_commit"),
|
|
56
|
+
"subject_digest_note": _SUBJECT_DIGEST_NOTE,
|
|
57
|
+
},
|
|
58
|
+
}
|
|
59
|
+
if harness:
|
|
60
|
+
statement["predicate"]["harness"] = harness
|
|
61
|
+
if root_b64:
|
|
62
|
+
statement["predicate"]["receipt"] = {"schema": "proofbundle/v0.1", "root_b64": root_b64}
|
|
63
|
+
return statement
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""SD-JWT issuance per RFC 9901 — the differentiation feature (v0.5).
|
|
2
|
+
|
|
3
|
+
Issue an eval receipt so a holder can disclose `passed` + `threshold` while WITHHOLDING the exact score
|
|
4
|
+
and the identifier openings. The existing verifier (proofbundle.sdjwt) stays; this adds issuance.
|
|
5
|
+
|
|
6
|
+
Source of truth: the signed canonical bundle payload (evalclaim) is the ONLY truth. This SD-JWT is a
|
|
7
|
+
derived view — its always-open claims are copied bit-exact from that payload, and it binds the bundle
|
|
8
|
+
anchor via `receipt.root_b64`. Sign the SD-JWT with the SAME Ed25519 key that signed the bundle (matching
|
|
9
|
+
the `issuer` field). A holder cannot lift a claim under a different key.
|
|
10
|
+
|
|
11
|
+
Always-open (plaintext JWT claims, NEVER a disclosure): passed, threshold, comparator, suite, issuer,
|
|
12
|
+
receipt.root_b64. Selectively-disclosable (via `_sd` + disclosures): the exact metric value, ci95, and
|
|
13
|
+
the identifier-commitment openings (identifier + salt).
|
|
14
|
+
|
|
15
|
+
RFC 9901 §4.2.4.1 digest byte-chain (the subtle, load-bearing detail): for each disclosable field, a
|
|
16
|
+
CSPRNG salt of ≥128 bit (base64url); the disclosure is base64url(UTF-8(JSON array [salt, name, value]));
|
|
17
|
+
the digest placed in `_sd` is **base64url(SHA-256(ASCII bytes of the base64url-ENCODED disclosure
|
|
18
|
+
string)))** — hashed over the ENCODED string, NOT over the JSON bytes. `_sd_alg` = "sha-256" at the top
|
|
19
|
+
level. The JWT is signed with EdDSA. Compact form is tilde-separated: JWT~disclosure1~...~ (trailing ~).
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import base64
|
|
24
|
+
import hashlib
|
|
25
|
+
import json
|
|
26
|
+
import os
|
|
27
|
+
from typing import Optional, Sequence
|
|
28
|
+
|
|
29
|
+
from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey
|
|
30
|
+
from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat
|
|
31
|
+
|
|
32
|
+
SD_ALG = "sha-256"
|
|
33
|
+
_SALT_BYTES = 16 # 128 bit
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _b64url(data: bytes) -> str:
|
|
37
|
+
return base64.urlsafe_b64encode(data).rstrip(b"=").decode("ascii")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _make_disclosure(name: str, value, salt_b64: str) -> tuple[str, str]:
|
|
41
|
+
"""Return (disclosure_b64url, digest_b64url) per RFC 9901 §4.2.4.1.
|
|
42
|
+
|
|
43
|
+
The digest hashes the ASCII bytes of the base64url-ENCODED disclosure string (not the JSON bytes)."""
|
|
44
|
+
disclosure_json = json.dumps([salt_b64, name, value]) # array [salt, name, value]
|
|
45
|
+
disclosure_b64 = _b64url(disclosure_json.encode("utf-8"))
|
|
46
|
+
digest = _b64url(hashlib.sha256(disclosure_b64.encode("ascii")).digest())
|
|
47
|
+
return disclosure_b64, digest
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def issue_sd_jwt(claim: dict, signer: Ed25519PrivateKey, *, root_b64: str,
|
|
51
|
+
exact_score: Optional[str] = None, ci95: Optional[Sequence[str]] = None,
|
|
52
|
+
model_id_opening: Optional[Sequence] = None,
|
|
53
|
+
dataset_id_opening: Optional[Sequence] = None) -> str:
|
|
54
|
+
"""Issue a compact SD-JWT for the eval claim, signed with `signer` (must match claim['issuer']).
|
|
55
|
+
|
|
56
|
+
Openings are (identifier, salt_hex) pairs the issuer may later reveal; `exact_score`/`ci95` are the
|
|
57
|
+
withheld numeric detail. All extras are selectively-disclosable; the pass/threshold facts are open.
|
|
58
|
+
"""
|
|
59
|
+
always_open = {
|
|
60
|
+
"passed": claim["passed"], "threshold": claim["threshold"],
|
|
61
|
+
"comparator": claim["comparator"], "suite": claim["suite"],
|
|
62
|
+
"issuer": claim["issuer"], "receipt": {"root_b64": root_b64},
|
|
63
|
+
}
|
|
64
|
+
disclosures: list[str] = []
|
|
65
|
+
sd_digests: list[str] = []
|
|
66
|
+
|
|
67
|
+
def _add(name: str, value):
|
|
68
|
+
d, dig = _make_disclosure(name, value, _b64url(os.urandom(_SALT_BYTES)))
|
|
69
|
+
disclosures.append(d)
|
|
70
|
+
sd_digests.append(dig)
|
|
71
|
+
|
|
72
|
+
if exact_score is not None:
|
|
73
|
+
_add("exact_score", exact_score)
|
|
74
|
+
if ci95 is not None:
|
|
75
|
+
_add("ci95", list(ci95))
|
|
76
|
+
if model_id_opening is not None:
|
|
77
|
+
_add("model_id_opening", list(model_id_opening))
|
|
78
|
+
if dataset_id_opening is not None:
|
|
79
|
+
_add("dataset_id_opening", list(dataset_id_opening))
|
|
80
|
+
|
|
81
|
+
payload = dict(always_open)
|
|
82
|
+
if sd_digests:
|
|
83
|
+
payload["_sd"] = sd_digests
|
|
84
|
+
payload["_sd_alg"] = SD_ALG
|
|
85
|
+
|
|
86
|
+
header = {"alg": "EdDSA", "typ": "sd-jwt"}
|
|
87
|
+
signing_input = _b64url(json.dumps(header).encode("utf-8")) + "." + _b64url(json.dumps(payload).encode("utf-8"))
|
|
88
|
+
signature = signer.sign(signing_input.encode("ascii"))
|
|
89
|
+
jwt = signing_input + "." + _b64url(signature)
|
|
90
|
+
|
|
91
|
+
# compact: JWT ~ disclosure1 ~ ... ~ (trailing tilde, no key-binding JWT in v0.5)
|
|
92
|
+
return "~".join([jwt, *disclosures]) + "~"
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def issuer_matches(claim: dict, signer: Ed25519PrivateKey) -> bool:
|
|
96
|
+
"""True iff the claim's issuer fingerprint equals the signer's public key (bundle↔SD-JWT same key)."""
|
|
97
|
+
raw = signer.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw)
|
|
98
|
+
return claim.get("issuer") == "ed25519:" + base64.b64encode(raw).decode("ascii")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _jwt_payload(compact: str) -> dict:
|
|
102
|
+
"""Decode the always-open JWT payload of a compact SD-JWT (the part before the first '~')."""
|
|
103
|
+
jwt = compact.split("~", 1)[0]
|
|
104
|
+
payload_b64 = jwt.split(".")[1]
|
|
105
|
+
padded = payload_b64 + "=" * (-len(payload_b64) % 4)
|
|
106
|
+
return json.loads(base64.urlsafe_b64decode(padded).decode("utf-8"))
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def check_binds_bundle(compact: str, claim: dict, root_b64: str) -> bool:
|
|
110
|
+
"""No-Fake binding: the SD-JWT's always-open claims MUST match the signed bundle payload bit-exact and
|
|
111
|
+
bind its merkle root. A derived SD-JWT that diverges from its bundle source of truth is rejected."""
|
|
112
|
+
try:
|
|
113
|
+
p = _jwt_payload(compact)
|
|
114
|
+
except (ValueError, KeyError, IndexError):
|
|
115
|
+
return False
|
|
116
|
+
return (p.get("passed") == claim["passed"] and p.get("threshold") == claim["threshold"]
|
|
117
|
+
and p.get("comparator") == claim["comparator"] and p.get("suite") == claim["suite"]
|
|
118
|
+
and p.get("issuer") == claim["issuer"]
|
|
119
|
+
and (p.get("receipt") or {}).get("root_b64") == root_b64)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: proofbundle
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT.
|
|
5
5
|
Author: Konrad Gruszka
|
|
6
6
|
License: MIT
|
|
@@ -27,6 +27,8 @@ Provides-Extra: sdjwt
|
|
|
27
27
|
Provides-Extra: eval
|
|
28
28
|
Requires-Dist: rfc8785>=0.1.4; extra == "eval"
|
|
29
29
|
Provides-Extra: adapters
|
|
30
|
+
Provides-Extra: inspect
|
|
31
|
+
Requires-Dist: inspect_ai<0.4,>=0.3.100; extra == "inspect"
|
|
30
32
|
Provides-Extra: dev
|
|
31
33
|
Requires-Dist: pytest>=7; extra == "dev"
|
|
32
34
|
Requires-Dist: ruff>=0.5; extra == "dev"
|
|
@@ -35,6 +37,8 @@ Requires-Dist: mypy>=1.8; extra == "dev"
|
|
|
35
37
|
Requires-Dist: build>=1; extra == "dev"
|
|
36
38
|
Requires-Dist: hypothesis>=6; extra == "dev"
|
|
37
39
|
Requires-Dist: rfc8785>=0.1.4; extra == "dev"
|
|
40
|
+
Requires-Dist: sd-jwt>=0.10; extra == "dev"
|
|
41
|
+
Requires-Dist: inspect_ai<0.4,>=0.3.100; extra == "dev"
|
|
38
42
|
Dynamic: license-file
|
|
39
43
|
|
|
40
44
|
<div align="center">
|
|
@@ -61,7 +65,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
|
|
|
61
65
|
|
|
62
66
|
**At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
|
|
63
67
|
verify` checks one self-contained `bundle.json` with three offline cryptographic
|
|
64
|
-
checks → `OK` or `FAILED`. No network, no daemon, no own crypto.
|
|
68
|
+
checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 62 tests.
|
|
65
69
|
|
|
66
70
|
## Contents
|
|
67
71
|
|
|
@@ -282,15 +286,34 @@ commitments — it does **not** prove the evaluation was well designed or that t
|
|
|
282
286
|
itself is correct. Those are human judgements; what it removes is the need to simply
|
|
283
287
|
trust the number.
|
|
284
288
|
|
|
289
|
+
### Since v0.5: framework adapter, in-toto, selective disclosure
|
|
290
|
+
|
|
291
|
+
- **inspect_ai adapter** (`pip install "proofbundle[inspect]"`) reads a UK AISI
|
|
292
|
+
[inspect_ai](https://github.com/UKGovernmentBEIS/inspect_ai) eval log via the stable
|
|
293
|
+
`read_eval_log` API (lazy import; the core stays dependency-free) and maps it to a claim.
|
|
294
|
+
`proofbundle.adapters.from_lm_eval_results` reads lm-evaluation-harness `results.json`
|
|
295
|
+
without importing anything.
|
|
296
|
+
- **in-toto Statement v1** — `proofbundle.intoto.to_intoto_statement(claim, root_b64=…)`
|
|
297
|
+
emits the receipt as an in-toto statement with a self-hosted predicate type. The subject
|
|
298
|
+
digest is an *honest salted commitment* under a custom key, never `sha256` (see
|
|
299
|
+
[PREDICATE.md](PREDICATE.md)).
|
|
300
|
+
- **SD-JWT issuance** (RFC 9901) — `proofbundle.sdjwt_issue.issue_sd_jwt(claim, signer,
|
|
301
|
+
root_b64=…, exact_score=…)` issues the receipt so a holder can disclose `passed` +
|
|
302
|
+
`threshold` while **withholding the exact score** and the identifier openings. The signed
|
|
303
|
+
bundle payload is the source of truth; the SD-JWT is a derived, bundle-bound view, verified
|
|
304
|
+
by proofbundle's own verifier **and** the `sd-jwt-python` reference.
|
|
305
|
+
|
|
285
306
|
## Roadmap
|
|
286
307
|
|
|
287
308
|
- **v0.1** — the offline verifier plus a real example bundle.
|
|
288
309
|
- **v0.2** — the emitter: `emit_bundle` / `proofbundle emit`.
|
|
289
310
|
- **v0.3** — external RFC 6962 conformance vectors + real Sigstore Rekor interop.
|
|
290
|
-
- **v0.4
|
|
291
|
-
|
|
292
|
-
- **v0.5** —
|
|
293
|
-
|
|
311
|
+
- **v0.4** — the eval-receipt emitter (`emit_eval_receipt` / `proofbundle emit-eval`),
|
|
312
|
+
salted commitments, issuer binding.
|
|
313
|
+
- **v0.5 (current release)** — inspect_ai adapter (stable API), in-toto Statement v1 view,
|
|
314
|
+
and SD-JWT **issuance** per RFC 9901 (selective disclosure of the exact score).
|
|
315
|
+
- **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
|
|
316
|
+
Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
|
|
294
317
|
|
|
295
318
|
## Contributing
|
|
296
319
|
|
|
@@ -7,9 +7,11 @@ src/proofbundle/cli.py
|
|
|
7
7
|
src/proofbundle/emit.py
|
|
8
8
|
src/proofbundle/errors.py
|
|
9
9
|
src/proofbundle/evalclaim.py
|
|
10
|
+
src/proofbundle/intoto.py
|
|
10
11
|
src/proofbundle/merkle.py
|
|
11
12
|
src/proofbundle/py.typed
|
|
12
13
|
src/proofbundle/sdjwt.py
|
|
14
|
+
src/proofbundle/sdjwt_issue.py
|
|
13
15
|
src/proofbundle/signature.py
|
|
14
16
|
src/proofbundle.egg-info/PKG-INFO
|
|
15
17
|
src/proofbundle.egg-info/SOURCES.txt
|
|
@@ -27,10 +29,12 @@ tests/test_cli_eval.py
|
|
|
27
29
|
tests/test_emit.py
|
|
28
30
|
tests/test_eval_claim_schema.py
|
|
29
31
|
tests/test_evalclaim.py
|
|
32
|
+
tests/test_intoto.py
|
|
30
33
|
tests/test_merkle.py
|
|
31
34
|
tests/test_merkle_property.py
|
|
32
35
|
tests/test_rekor_interop.py
|
|
33
36
|
tests/test_rfc6962_external_vectors.py
|
|
34
37
|
tests/test_schema.py
|
|
38
|
+
tests/test_sdjwt_issue.py
|
|
35
39
|
tests/test_sdjwt_reference.py
|
|
36
40
|
tests/test_signature.py
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Adapters map real exported eval JSON to a valid claim (file-based, no framework import)."""
|
|
2
|
+
import unittest
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from proofbundle.adapters import from_inspect_ai_log, from_lm_eval_results
|
|
6
|
+
|
|
7
|
+
FX = Path(__file__).resolve().parent / "fixtures"
|
|
8
|
+
TS = "2026-07-01T12:00:00Z"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestAdapters(unittest.TestCase):
|
|
12
|
+
def test_lm_eval(self):
|
|
13
|
+
claim, salts = from_lm_eval_results(FX / "lm_eval_results.json", "hellaswag", "acc",
|
|
14
|
+
comparator=">=", threshold="0.70", timestamp=TS,
|
|
15
|
+
model_salt=b"0" * 16, dataset_salt=b"1" * 16)
|
|
16
|
+
self.assertEqual(claim["suite"], "hellaswag")
|
|
17
|
+
self.assertEqual(claim["threshold"], "0.70")
|
|
18
|
+
self.assertTrue(claim["passed"]) # 0.7534 >= 0.70
|
|
19
|
+
self.assertNotIn("acme/model-x", str(claim)) # id only as salted commitment
|
|
20
|
+
self.assertEqual(claim["n"], 10042)
|
|
21
|
+
|
|
22
|
+
def test_inspect_ai_stable_api(self):
|
|
23
|
+
# Real .eval log fixture, read via the stable inspect_ai.log.read_eval_log API (proofbundle[inspect]).
|
|
24
|
+
try:
|
|
25
|
+
import inspect_ai.log # noqa: F401
|
|
26
|
+
except ImportError:
|
|
27
|
+
self.skipTest("inspect_ai not installed (pip install proofbundle[inspect])")
|
|
28
|
+
claim, salts = from_inspect_ai_log(FX / "inspect_logs" / "safety_refusal_demo.eval", "accuracy",
|
|
29
|
+
comparator=">=", threshold="0.00", timestamp=TS,
|
|
30
|
+
model_salt=b"0" * 16, dataset_salt=b"1" * 16)
|
|
31
|
+
self.assertEqual(claim["suite"], "safety_refusal_demo")
|
|
32
|
+
self.assertTrue(claim["passed"]) # accuracy 0.0 >= 0.00
|
|
33
|
+
self.assertNotIn("mockllm/model", str(claim)) # model id only as salted commitment
|
|
34
|
+
|
|
35
|
+
def test_inspect_ai_missing_metric_clear_error(self):
|
|
36
|
+
from proofbundle.adapters.inspect_ai import InspectAdapterError
|
|
37
|
+
try:
|
|
38
|
+
import inspect_ai.log # noqa: F401
|
|
39
|
+
except ImportError:
|
|
40
|
+
self.skipTest("inspect_ai not installed")
|
|
41
|
+
with self.assertRaises(InspectAdapterError):
|
|
42
|
+
from_inspect_ai_log(FX / "inspect_logs" / "safety_refusal_demo.eval", "nonexistent_metric",
|
|
43
|
+
comparator=">=", threshold="0.5", timestamp=TS,
|
|
44
|
+
model_salt=b"0" * 16, dataset_salt=b"1" * 16)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
if __name__ == "__main__":
|
|
48
|
+
unittest.main()
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""in-toto Statement v1 view of an eval receipt — structurally valid + honest salted-commitment digest."""
|
|
2
|
+
import json
|
|
3
|
+
import unittest
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
import jsonschema
|
|
8
|
+
except ImportError:
|
|
9
|
+
jsonschema = None
|
|
10
|
+
|
|
11
|
+
from proofbundle.emit import generate_signer
|
|
12
|
+
from proofbundle.evalclaim import build_eval_claim, issuer_fingerprint
|
|
13
|
+
from proofbundle.intoto import MODEL_COMMIT_DIGEST_KEY, PREDICATE_TYPE, to_intoto_statement
|
|
14
|
+
|
|
15
|
+
ROOT = Path(__file__).resolve().parents[1]
|
|
16
|
+
TS = "2026-07-01T12:00:00Z"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _claim():
|
|
20
|
+
signer = generate_signer()
|
|
21
|
+
claim, _ = build_eval_claim(
|
|
22
|
+
suite="safety-refusal", suite_version="v1", metric="accuracy", comparator=">=",
|
|
23
|
+
threshold="0.65", score="0.92", n=500, model_id="acme/model-x", dataset_id="acme/set",
|
|
24
|
+
issuer=issuer_fingerprint(signer), timestamp=TS, model_salt=b"0" * 16, dataset_salt=b"1" * 16)
|
|
25
|
+
return claim
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TestInToto(unittest.TestCase):
|
|
29
|
+
def test_structure(self):
|
|
30
|
+
stmt = to_intoto_statement(_claim(), root_b64="cm9vdA==",
|
|
31
|
+
harness={"name": "inspect_ai", "version": "0.3.217"})
|
|
32
|
+
self.assertEqual(stmt["_type"], "https://in-toto.io/Statement/v1")
|
|
33
|
+
self.assertEqual(stmt["predicateType"], PREDICATE_TYPE)
|
|
34
|
+
self.assertEqual(len(stmt["subject"]), 1)
|
|
35
|
+
self.assertIn("digest", stmt["subject"][0])
|
|
36
|
+
# honest custom digest key, NOT sha256 (would mislead generic verifiers about an artifact hash)
|
|
37
|
+
self.assertIn(MODEL_COMMIT_DIGEST_KEY, stmt["subject"][0]["digest"])
|
|
38
|
+
self.assertNotIn("sha256", stmt["subject"][0]["digest"])
|
|
39
|
+
self.assertIn("salted commitment", stmt["predicate"]["subject_digest_note"])
|
|
40
|
+
self.assertEqual(stmt["predicate"]["receipt"]["root_b64"], "cm9vdA==")
|
|
41
|
+
|
|
42
|
+
def test_digest_is_commit_hex(self):
|
|
43
|
+
claim = _claim()
|
|
44
|
+
stmt = to_intoto_statement(claim)
|
|
45
|
+
expected_hex = claim["model_id_commit"].split(":", 1)[1]
|
|
46
|
+
self.assertEqual(stmt["subject"][0]["digest"][MODEL_COMMIT_DIGEST_KEY], expected_hex)
|
|
47
|
+
|
|
48
|
+
@unittest.skipIf(jsonschema is None, "jsonschema not installed (pip install proofbundle[dev])")
|
|
49
|
+
def test_validates_against_official_intoto_v1_schema(self):
|
|
50
|
+
schema = json.loads((ROOT / "schemas" / "in_toto_statement_v1.schema.json").read_text(encoding="utf-8"))
|
|
51
|
+
stmt = to_intoto_statement(_claim(), root_b64="cm9vdA==")
|
|
52
|
+
jsonschema.validate(instance=stmt, schema=schema) # raises if invalid
|
|
53
|
+
|
|
54
|
+
@unittest.skipIf(jsonschema is None, "jsonschema not installed")
|
|
55
|
+
def test_schema_rejects_missing_subject(self):
|
|
56
|
+
schema = json.loads((ROOT / "schemas" / "in_toto_statement_v1.schema.json").read_text(encoding="utf-8"))
|
|
57
|
+
bad = {"_type": "https://in-toto.io/Statement/v1", "predicateType": "x", "subject": []}
|
|
58
|
+
with self.assertRaises(jsonschema.ValidationError):
|
|
59
|
+
jsonschema.validate(instance=bad, schema=schema)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
if __name__ == "__main__":
|
|
63
|
+
unittest.main()
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""SD-JWT issuance (v0.5, RFC 9901) — own verifier + reference interop + red-tests. No-Fake."""
|
|
2
|
+
import base64
|
|
3
|
+
import json
|
|
4
|
+
import unittest
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat
|
|
8
|
+
|
|
9
|
+
from proofbundle.emit import generate_signer
|
|
10
|
+
from proofbundle.evalclaim import build_eval_claim, issuer_fingerprint
|
|
11
|
+
from proofbundle.sdjwt import verify_sd_jwt
|
|
12
|
+
from proofbundle.sdjwt_issue import (
|
|
13
|
+
_make_disclosure,
|
|
14
|
+
check_binds_bundle,
|
|
15
|
+
issue_sd_jwt,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
FX = Path(__file__).resolve().parent / "fixtures"
|
|
19
|
+
TS = "2026-07-01T12:00:00Z"
|
|
20
|
+
ROOT_B64 = "cm9vdA=="
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _claim(signer):
|
|
24
|
+
claim, _ = build_eval_claim(suite="safety", suite_version="v1", metric="accuracy", comparator=">=",
|
|
25
|
+
threshold="0.65", score="0.92", n=500, model_id="acme/model-x", dataset_id="acme/set",
|
|
26
|
+
issuer=issuer_fingerprint(signer), timestamp=TS, model_salt=b"0" * 16, dataset_salt=b"1" * 16)
|
|
27
|
+
return claim
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _raw_pub(signer):
|
|
31
|
+
return signer.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class TestSdJwtIssue(unittest.TestCase):
|
|
35
|
+
def test_own_verifier_accepts(self):
|
|
36
|
+
signer = generate_signer()
|
|
37
|
+
compact = issue_sd_jwt(_claim(signer), signer, root_b64=ROOT_B64, exact_score="0.92", ci95=["0.90", "0.94"])
|
|
38
|
+
res = verify_sd_jwt(compact, _raw_pub(signer))
|
|
39
|
+
self.assertTrue(res["structure_ok"], res)
|
|
40
|
+
self.assertTrue(res["sig_ok"], res)
|
|
41
|
+
|
|
42
|
+
def test_reference_verifier_accepts(self):
|
|
43
|
+
try:
|
|
44
|
+
from jwcrypto.jwk import JWK
|
|
45
|
+
from sd_jwt.verifier import SDJWTVerifier
|
|
46
|
+
except ImportError:
|
|
47
|
+
self.skipTest("sd-jwt-python not installed (dev extra)")
|
|
48
|
+
signer = generate_signer()
|
|
49
|
+
compact = issue_sd_jwt(_claim(signer), signer, root_b64=ROOT_B64, exact_score="0.92")
|
|
50
|
+
jwk = JWK(kty="OKP", crv="Ed25519", x=base64.urlsafe_b64encode(_raw_pub(signer)).rstrip(b"=").decode())
|
|
51
|
+
payload = SDJWTVerifier(compact, lambda *_a, **_k: jwk).get_verified_payload()
|
|
52
|
+
self.assertEqual(payload["passed"], True) # always-open
|
|
53
|
+
self.assertEqual(payload["exact_score"], "0.92") # selectively disclosed
|
|
54
|
+
|
|
55
|
+
def test_digest_byte_chain_vector(self):
|
|
56
|
+
# RFC 9901 §4.2.4.1: digest over the base64url-ENCODED disclosure string, not the JSON bytes.
|
|
57
|
+
v = json.loads((FX / "sdjwt_disclosure_vector.json").read_text(encoding="utf-8"))
|
|
58
|
+
d_b64, dig = _make_disclosure(v["name"], v["value"], v["salt_b64url"])
|
|
59
|
+
self.assertEqual(d_b64, v["disclosure_b64url"])
|
|
60
|
+
self.assertEqual(dig, v["expected_digest_b64url"])
|
|
61
|
+
|
|
62
|
+
def test_always_open_vs_selective(self):
|
|
63
|
+
signer = generate_signer()
|
|
64
|
+
compact = issue_sd_jwt(_claim(signer), signer, root_b64=ROOT_B64, exact_score="0.92")
|
|
65
|
+
jwt_payload = json.loads(base64.urlsafe_b64decode(
|
|
66
|
+
compact.split("~")[0].split(".")[1] + "==").decode("utf-8"))
|
|
67
|
+
# passed/threshold are plaintext; exact_score is NOT (only its digest is in _sd)
|
|
68
|
+
self.assertEqual(jwt_payload["passed"], True)
|
|
69
|
+
self.assertIn("threshold", jwt_payload)
|
|
70
|
+
self.assertNotIn("exact_score", jwt_payload)
|
|
71
|
+
self.assertIn("_sd", jwt_payload)
|
|
72
|
+
|
|
73
|
+
def test_binds_bundle(self):
|
|
74
|
+
signer = generate_signer()
|
|
75
|
+
claim = _claim(signer)
|
|
76
|
+
compact = issue_sd_jwt(claim, signer, root_b64=ROOT_B64, exact_score="0.92")
|
|
77
|
+
self.assertTrue(check_binds_bundle(compact, claim, ROOT_B64))
|
|
78
|
+
|
|
79
|
+
def test_divergence_red(self): # SD-JWT claims diverge from bundle → rejected
|
|
80
|
+
signer = generate_signer()
|
|
81
|
+
claim = _claim(signer)
|
|
82
|
+
compact = issue_sd_jwt(claim, signer, root_b64=ROOT_B64, exact_score="0.92")
|
|
83
|
+
diverged = dict(claim, passed=False) # bundle says passed=False, SD-JWT says True
|
|
84
|
+
self.assertFalse(check_binds_bundle(compact, diverged, ROOT_B64))
|
|
85
|
+
self.assertFalse(check_binds_bundle(compact, claim, "d3Jvbmc=")) # wrong root
|
|
86
|
+
|
|
87
|
+
def test_tamper_disclosure_red(self): # tampered disclosure → digest mismatch → own verifier fails
|
|
88
|
+
signer = generate_signer()
|
|
89
|
+
compact = issue_sd_jwt(_claim(signer), signer, root_b64=ROOT_B64, exact_score="0.92")
|
|
90
|
+
jwt, *disc = compact.rstrip("~").split("~")
|
|
91
|
+
tampered_d, _ = _make_disclosure("exact_score", "0.99", "AAAAAAAAAAAAAAAAAAAAAA") # not committed in _sd
|
|
92
|
+
tampered = "~".join([jwt, tampered_d]) + "~"
|
|
93
|
+
res = verify_sd_jwt(tampered, _raw_pub(signer))
|
|
94
|
+
self.assertFalse(res.get("structure_ok") and res.get("sig_ok") and "1 disclosure" in res.get("detail", ""))
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
if __name__ == "__main__":
|
|
98
|
+
unittest.main()
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
"""Adapter for UK AISI inspect_ai eval-log JSON (file-based, no framework import)."""
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
import json
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import Optional
|
|
7
|
-
|
|
8
|
-
from ..evalclaim import build_eval_claim
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def from_inspect_ai_log(path, metric: str, *, comparator: str, threshold: str, timestamp: str,
|
|
12
|
-
model_salt: Optional[bytes] = None, dataset_salt: Optional[bytes] = None):
|
|
13
|
-
"""Read an inspect_ai eval-log JSON and build an eval claim.
|
|
14
|
-
|
|
15
|
-
Expects: {"eval": {"task": ..., "model": ..., "dataset": {"name": ...}},
|
|
16
|
-
"results": {"total_samples": n, "scores": [{"metrics": {metric: {"value": <number>}}}]}}.
|
|
17
|
-
Returns (claim, salts).
|
|
18
|
-
"""
|
|
19
|
-
data = json.loads(Path(path).read_text(encoding="utf-8"))
|
|
20
|
-
ev = data.get("eval", {})
|
|
21
|
-
scores = data.get("results", {}).get("scores", [])
|
|
22
|
-
value = None
|
|
23
|
-
for s in scores:
|
|
24
|
-
m = s.get("metrics", {})
|
|
25
|
-
if metric in m:
|
|
26
|
-
value = m[metric].get("value")
|
|
27
|
-
break
|
|
28
|
-
if value is None:
|
|
29
|
-
raise ValueError(f"metric {metric!r} not found in inspect_ai scores")
|
|
30
|
-
n = int(data.get("results", {}).get("total_samples") or 0)
|
|
31
|
-
return build_eval_claim(
|
|
32
|
-
suite=str(ev.get("task", "inspect_ai")), suite_version=str(ev.get("task_version", "1")),
|
|
33
|
-
metric=metric, comparator=comparator, threshold=threshold, score=repr(value), n=n,
|
|
34
|
-
model_id=str(ev.get("model", "unknown")),
|
|
35
|
-
dataset_id=str(ev.get("dataset", {}).get("name", ev.get("task", "unknown"))),
|
|
36
|
-
issuer="", timestamp=timestamp, model_salt=model_salt, dataset_salt=dataset_salt)
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
"""Adapters map real exported eval JSON to a valid claim (file-based, no framework import)."""
|
|
2
|
-
import unittest
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
from proofbundle.adapters import from_inspect_ai_log, from_lm_eval_results
|
|
6
|
-
|
|
7
|
-
FX = Path(__file__).resolve().parent / "fixtures"
|
|
8
|
-
TS = "2026-07-01T12:00:00Z"
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class TestAdapters(unittest.TestCase):
|
|
12
|
-
def test_lm_eval(self):
|
|
13
|
-
claim, salts = from_lm_eval_results(FX / "lm_eval_results.json", "hellaswag", "acc",
|
|
14
|
-
comparator=">=", threshold="0.70", timestamp=TS,
|
|
15
|
-
model_salt=b"0" * 16, dataset_salt=b"1" * 16)
|
|
16
|
-
self.assertEqual(claim["suite"], "hellaswag")
|
|
17
|
-
self.assertEqual(claim["threshold"], "0.70")
|
|
18
|
-
self.assertTrue(claim["passed"]) # 0.7534 >= 0.70
|
|
19
|
-
self.assertNotIn("acme/model-x", str(claim)) # id only as salted commitment
|
|
20
|
-
self.assertEqual(claim["n"], 10042)
|
|
21
|
-
|
|
22
|
-
def test_inspect_ai(self):
|
|
23
|
-
claim, salts = from_inspect_ai_log(FX / "inspect_ai_log.json", "accuracy",
|
|
24
|
-
comparator=">=", threshold="0.80", timestamp=TS,
|
|
25
|
-
model_salt=b"0" * 16, dataset_salt=b"1" * 16)
|
|
26
|
-
self.assertEqual(claim["suite"], "safety_refusal")
|
|
27
|
-
self.assertTrue(claim["passed"]) # 0.92 >= 0.80
|
|
28
|
-
self.assertEqual(claim["n"], 500)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
if __name__ == "__main__":
|
|
32
|
-
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|