proofbundle 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {proofbundle-0.4.0/src/proofbundle.egg-info → proofbundle-0.5.0}/PKG-INFO +31 -8
  2. {proofbundle-0.4.0 → proofbundle-0.5.0}/README.md +26 -7
  3. {proofbundle-0.4.0 → proofbundle-0.5.0}/pyproject.toml +8 -3
  4. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle/__init__.py +1 -1
  5. proofbundle-0.5.0/src/proofbundle/adapters/inspect_ai.py +65 -0
  6. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle/emit.py +2 -29
  7. proofbundle-0.5.0/src/proofbundle/intoto.py +63 -0
  8. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle/sdjwt.py +1 -1
  9. proofbundle-0.5.0/src/proofbundle/sdjwt_issue.py +119 -0
  10. {proofbundle-0.4.0 → proofbundle-0.5.0/src/proofbundle.egg-info}/PKG-INFO +31 -8
  11. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle.egg-info/SOURCES.txt +4 -0
  12. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle.egg-info/requires.txt +5 -0
  13. proofbundle-0.5.0/tests/test_adapters.py +48 -0
  14. proofbundle-0.5.0/tests/test_intoto.py +63 -0
  15. proofbundle-0.5.0/tests/test_sdjwt_issue.py +98 -0
  16. proofbundle-0.4.0/src/proofbundle/adapters/inspect_ai.py +0 -36
  17. proofbundle-0.4.0/tests/test_adapters.py +0 -32
  18. {proofbundle-0.4.0 → proofbundle-0.5.0}/LICENSE +0 -0
  19. {proofbundle-0.4.0 → proofbundle-0.5.0}/setup.cfg +0 -0
  20. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle/adapters/__init__.py +0 -0
  21. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle/adapters/lm_eval.py +0 -0
  22. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle/bundle.py +0 -0
  23. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle/cli.py +0 -0
  24. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle/errors.py +0 -0
  25. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle/evalclaim.py +0 -0
  26. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle/merkle.py +0 -0
  27. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle/py.typed +0 -0
  28. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle/signature.py +0 -0
  29. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle.egg-info/dependency_links.txt +0 -0
  30. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle.egg-info/entry_points.txt +0 -0
  31. {proofbundle-0.4.0 → proofbundle-0.5.0}/src/proofbundle.egg-info/top_level.txt +0 -0
  32. {proofbundle-0.4.0 → proofbundle-0.5.0}/tests/test_bundle.py +0 -0
  33. {proofbundle-0.4.0 → proofbundle-0.5.0}/tests/test_cli.py +0 -0
  34. {proofbundle-0.4.0 → proofbundle-0.5.0}/tests/test_cli_eval.py +0 -0
  35. {proofbundle-0.4.0 → proofbundle-0.5.0}/tests/test_emit.py +0 -0
  36. {proofbundle-0.4.0 → proofbundle-0.5.0}/tests/test_eval_claim_schema.py +0 -0
  37. {proofbundle-0.4.0 → proofbundle-0.5.0}/tests/test_evalclaim.py +0 -0
  38. {proofbundle-0.4.0 → proofbundle-0.5.0}/tests/test_merkle.py +0 -0
  39. {proofbundle-0.4.0 → proofbundle-0.5.0}/tests/test_merkle_property.py +0 -0
  40. {proofbundle-0.4.0 → proofbundle-0.5.0}/tests/test_rekor_interop.py +0 -0
  41. {proofbundle-0.4.0 → proofbundle-0.5.0}/tests/test_rfc6962_external_vectors.py +0 -0
  42. {proofbundle-0.4.0 → proofbundle-0.5.0}/tests/test_schema.py +0 -0
  43. {proofbundle-0.4.0 → proofbundle-0.5.0}/tests/test_sdjwt_reference.py +0 -0
  44. {proofbundle-0.4.0 → proofbundle-0.5.0}/tests/test_signature.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: proofbundle
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT.
5
5
  Author: Konrad Gruszka
6
6
  License: MIT
@@ -27,6 +27,8 @@ Provides-Extra: sdjwt
27
27
  Provides-Extra: eval
28
28
  Requires-Dist: rfc8785>=0.1.4; extra == "eval"
29
29
  Provides-Extra: adapters
30
+ Provides-Extra: inspect
31
+ Requires-Dist: inspect_ai<0.4,>=0.3.100; extra == "inspect"
30
32
  Provides-Extra: dev
31
33
  Requires-Dist: pytest>=7; extra == "dev"
32
34
  Requires-Dist: ruff>=0.5; extra == "dev"
@@ -35,6 +37,8 @@ Requires-Dist: mypy>=1.8; extra == "dev"
35
37
  Requires-Dist: build>=1; extra == "dev"
36
38
  Requires-Dist: hypothesis>=6; extra == "dev"
37
39
  Requires-Dist: rfc8785>=0.1.4; extra == "dev"
40
+ Requires-Dist: sd-jwt>=0.10; extra == "dev"
41
+ Requires-Dist: inspect_ai<0.4,>=0.3.100; extra == "dev"
38
42
  Dynamic: license-file
39
43
 
40
44
  <div align="center">
@@ -61,7 +65,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
61
65
 
62
66
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
63
67
  verify` checks one self-contained `bundle.json` with three offline cryptographic
64
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 25 tests.
68
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 62 tests.
65
69
 
66
70
  ## Contents
67
71
 
@@ -241,12 +245,12 @@ string uses base64url as per the spec.
241
245
 
242
246
  ## Security notes and scope, stated honestly
243
247
 
244
- This is v0.1. It does exactly what it says and no more:
248
+ The scope is deliberately narrow. It does exactly what it says and no more:
245
249
 
246
250
  - Ed25519 signatures only, for both the payload and the optional SD-JWT issuer
247
251
  signature.
248
252
  - SD-JWT: the SD-JWT core is now [RFC 9901](https://datatracker.ietf.org/doc/rfc9901/)
249
- (Dec 2025); this verifies that every presented disclosure is committed in the
253
+ (November 2025); this verifies that every presented disclosure is committed in the
250
254
  issuer-signed payload, and the issuer signature (EdDSA) if a key is supplied. It
251
255
  does **not** verify a Key Binding JWT, an X.509 or trust-list chain, status
252
256
  lists, or `vct` type metadata. **SD-JWT VC** (the credential-type profile) is
@@ -282,15 +286,34 @@ commitments — it does **not** prove the evaluation was well designed or that t
282
286
  itself is correct. Those are human judgements; what it removes is the need to simply
283
287
  trust the number.
284
288
 
289
+ ### Since v0.5: framework adapter, in-toto, selective disclosure
290
+
291
+ - **inspect_ai adapter** (`pip install "proofbundle[inspect]"`) reads a UK AISI
292
+ [inspect_ai](https://github.com/UKGovernmentBEIS/inspect_ai) eval log via the stable
293
+ `read_eval_log` API (lazy import; the core stays dependency-free) and maps it to a claim.
294
+ `proofbundle.adapters.from_lm_eval_results` reads lm-evaluation-harness `results.json`
295
+ without importing anything.
296
+ - **in-toto Statement v1** — `proofbundle.intoto.to_intoto_statement(claim, root_b64=…)`
297
+ emits the receipt as an in-toto statement with a self-hosted predicate type. The subject
298
+ digest is an *honest salted commitment* under a custom key, never `sha256` (see
299
+ [PREDICATE.md](PREDICATE.md)).
300
+ - **SD-JWT issuance** (RFC 9901) — `proofbundle.sdjwt_issue.issue_sd_jwt(claim, signer,
301
+ root_b64=…, exact_score=…)` issues the receipt so a holder can disclose `passed` +
302
+ `threshold` while **withholding the exact score** and the identifier openings. The signed
303
+ bundle payload is the source of truth; the SD-JWT is a derived, bundle-bound view, verified
304
+ by proofbundle's own verifier **and** the `sd-jwt-python` reference.
305
+
285
306
  ## Roadmap
286
307
 
287
308
  - **v0.1** — the offline verifier plus a real example bundle.
288
309
  - **v0.2** — the emitter: `emit_bundle` / `proofbundle emit`.
289
310
  - **v0.3** — external RFC 6962 conformance vectors + real Sigstore Rekor interop.
290
- - **v0.4 (current release)** — the eval-receipt emitter (`emit_eval_receipt` /
291
- `proofbundle emit-eval`), salted commitments, issuer binding, file-based adapters.
292
- - **v0.5** — selective disclosure of the exact score via SD-JWT **issuance** (the issuer
293
- reveals identifier + salt on demand) and full SD-JWT VC conformance.
311
+ - **v0.4** — the eval-receipt emitter (`emit_eval_receipt` / `proofbundle emit-eval`),
312
+ salted commitments, issuer binding.
313
+ - **v0.5 (current release)** — inspect_ai adapter (stable API), in-toto Statement v1 view,
314
+ and SD-JWT **issuance** per RFC 9901 (selective disclosure of the exact score).
315
+ - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
316
+ Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
294
317
 
295
318
  ## Contributing
296
319
 
@@ -22,7 +22,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
22
22
 
23
23
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
24
24
  verify` checks one self-contained `bundle.json` with three offline cryptographic
25
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 25 tests.
25
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 62 tests.
26
26
 
27
27
  ## Contents
28
28
 
@@ -202,12 +202,12 @@ string uses base64url as per the spec.
202
202
 
203
203
  ## Security notes and scope, stated honestly
204
204
 
205
- This is v0.1. It does exactly what it says and no more:
205
+ The scope is deliberately narrow. It does exactly what it says and no more:
206
206
 
207
207
  - Ed25519 signatures only, for both the payload and the optional SD-JWT issuer
208
208
  signature.
209
209
  - SD-JWT: the SD-JWT core is now [RFC 9901](https://datatracker.ietf.org/doc/rfc9901/)
210
- (Dec 2025); this verifies that every presented disclosure is committed in the
210
+ (November 2025); this verifies that every presented disclosure is committed in the
211
211
  issuer-signed payload, and the issuer signature (EdDSA) if a key is supplied. It
212
212
  does **not** verify a Key Binding JWT, an X.509 or trust-list chain, status
213
213
  lists, or `vct` type metadata. **SD-JWT VC** (the credential-type profile) is
@@ -243,15 +243,34 @@ commitments — it does **not** prove the evaluation was well designed or that t
243
243
  itself is correct. Those are human judgements; what it removes is the need to simply
244
244
  trust the number.
245
245
 
246
+ ### Since v0.5: framework adapter, in-toto, selective disclosure
247
+
248
+ - **inspect_ai adapter** (`pip install "proofbundle[inspect]"`) reads a UK AISI
249
+ [inspect_ai](https://github.com/UKGovernmentBEIS/inspect_ai) eval log via the stable
250
+ `read_eval_log` API (lazy import; the core stays dependency-free) and maps it to a claim.
251
+ `proofbundle.adapters.from_lm_eval_results` reads lm-evaluation-harness `results.json`
252
+ without importing anything.
253
+ - **in-toto Statement v1** — `proofbundle.intoto.to_intoto_statement(claim, root_b64=…)`
254
+ emits the receipt as an in-toto statement with a self-hosted predicate type. The subject
255
+ digest is an *honest salted commitment* under a custom key, never `sha256` (see
256
+ [PREDICATE.md](PREDICATE.md)).
257
+ - **SD-JWT issuance** (RFC 9901) — `proofbundle.sdjwt_issue.issue_sd_jwt(claim, signer,
258
+ root_b64=…, exact_score=…)` issues the receipt so a holder can disclose `passed` +
259
+ `threshold` while **withholding the exact score** and the identifier openings. The signed
260
+ bundle payload is the source of truth; the SD-JWT is a derived, bundle-bound view, verified
261
+ by proofbundle's own verifier **and** the `sd-jwt-python` reference.
262
+
246
263
  ## Roadmap
247
264
 
248
265
  - **v0.1** — the offline verifier plus a real example bundle.
249
266
  - **v0.2** — the emitter: `emit_bundle` / `proofbundle emit`.
250
267
  - **v0.3** — external RFC 6962 conformance vectors + real Sigstore Rekor interop.
251
- - **v0.4 (current release)** — the eval-receipt emitter (`emit_eval_receipt` /
252
- `proofbundle emit-eval`), salted commitments, issuer binding, file-based adapters.
253
- - **v0.5** — selective disclosure of the exact score via SD-JWT **issuance** (the issuer
254
- reveals identifier + salt on demand) and full SD-JWT VC conformance.
268
+ - **v0.4** — the eval-receipt emitter (`emit_eval_receipt` / `proofbundle emit-eval`),
269
+ salted commitments, issuer binding.
270
+ - **v0.5 (current release)** — inspect_ai adapter (stable API), in-toto Statement v1 view,
271
+ and SD-JWT **issuance** per RFC 9901 (selective disclosure of the exact score).
272
+ - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
273
+ Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
255
274
 
256
275
  ## Contributing
257
276
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "proofbundle"
7
- version = "0.4.0"
7
+ version = "0.5.0"
8
8
  description = "Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -43,9 +43,14 @@ sdjwt = []
43
43
  # path (verify_bundle / decode_eval_claim) never canonicalizes — it checks stored bytes — so the
44
44
  # verifier stays dependency-free. `pip install proofbundle[eval]` adds emit-side canonicalization.
45
45
  eval = ["rfc8785>=0.1.4"]
46
- # Framework adapters read exported result JSON only (no framework import) → pure stdlib today.
46
+ # The lm-eval adapter reads exported results.json (no import) → pure stdlib.
47
47
  adapters = []
48
- dev = ["pytest>=7", "ruff>=0.5", "jsonschema>=4", "mypy>=1.8", "build>=1", "hypothesis>=6", "rfc8785>=0.1.4"]
48
+ # The inspect_ai adapter uses the STABLE read_eval_log API (lazy import). Pinned with an UPPER bound:
49
+ # the .eval format + pydantic schema change between versions (inspect_ai issue 834), and the fixture
50
+ # test is bound to this range. `pip install "proofbundle[inspect]"`.
51
+ inspect = ["inspect_ai>=0.3.100,<0.4"]
52
+ dev = ["pytest>=7", "ruff>=0.5", "jsonschema>=4", "mypy>=1.8", "build>=1", "hypothesis>=6",
53
+ "rfc8785>=0.1.4", "sd-jwt>=0.10", "inspect_ai>=0.3.100,<0.4"]
49
54
 
50
55
  [project.urls]
51
56
  Homepage = "https://b7n0de.com"
@@ -13,7 +13,7 @@ from .emit import emit_bundle, generate_signer
13
13
  from .errors import Check, ProofBundleError, VerificationResult
14
14
  from .merkle import verify_consistency, verify_inclusion
15
15
 
16
- __version__ = "0.4.0"
16
+ __version__ = "0.5.0"
17
17
 
18
18
  __all__ = [
19
19
  "__version__",
@@ -0,0 +1,65 @@
1
+ """Adapter for UK AISI inspect_ai eval logs — via the STABLE API, optional extra `proofbundle[inspect]`.
2
+
3
+ Unlike the v0.4 file-based reader, this uses the stable `inspect_ai.log.read_eval_log(path,
4
+ header_only=True)` API (the `.eval` on-disk format + its pydantic schema change between versions, see
5
+ inspect_ai issue 834; the stable API is robust). inspect_ai is imported LAZILY inside the function, so
6
+ the proofbundle core stays dependency-free — only `pip install "proofbundle[inspect]"` pulls it.
7
+
8
+ Object model (inspect_ai): `log.eval.task` is the suite; `log.results.scores` is a list of EvalScore;
9
+ `EvalScore.metrics` is a dict name→EvalMetric; `EvalMetric.value` is the number. threshold, comparator
10
+ and thus `passed` are set by proofbundle, NOT read from the log. model_id/dataset_id become salted
11
+ commitments (never plaintext in the payload).
12
+ """
13
+ from __future__ import annotations
14
+
15
+ from typing import Optional
16
+
17
+ from ..evalclaim import build_eval_claim
18
+
19
+
20
+ class InspectAdapterError(RuntimeError):
21
+ """Raised when inspect_ai is missing or the log lacks the expected structure (no bare AttributeError)."""
22
+
23
+
24
+ def from_inspect_ai_log(path, metric: str, *, comparator: str, threshold: str, timestamp: str,
25
+ model_salt: Optional[bytes] = None, dataset_salt: Optional[bytes] = None):
26
+ """Read an inspect_ai eval log via the stable API and build an eval claim for `metric`.
27
+
28
+ Returns (claim, salts). Raises InspectAdapterError if inspect_ai is unavailable or the log is
29
+ missing the expected attributes — a clear error instead of an opaque AttributeError.
30
+ """
31
+ try:
32
+ from inspect_ai.log import read_eval_log # noqa: PLC0415 — lazy: keeps the core dependency-free
33
+ except ImportError as e:
34
+ raise InspectAdapterError(
35
+ "inspect_ai is required for this adapter — install with: pip install \"proofbundle[inspect]\"") from e
36
+
37
+ try:
38
+ log = read_eval_log(str(path), header_only=True)
39
+ except Exception as e: # noqa: BLE001 — surface any read/parse failure as a clear adapter error
40
+ raise InspectAdapterError(f"could not read inspect_ai log {path!r}: {e}") from e
41
+
42
+ ev = getattr(log, "eval", None)
43
+ results = getattr(log, "results", None)
44
+ if ev is None or results is None:
45
+ raise InspectAdapterError("inspect_ai log missing .eval or .results (empty or malformed log)")
46
+
47
+ value = None
48
+ for score in (getattr(results, "scores", None) or []):
49
+ metrics = getattr(score, "metrics", None) or {}
50
+ if metric in metrics:
51
+ value = getattr(metrics[metric], "value", None)
52
+ break
53
+ if value is None:
54
+ raise InspectAdapterError(f"metric {metric!r} not found in any score.metrics of the log")
55
+
56
+ suite = str(getattr(ev, "task", "inspect_ai"))
57
+ model_id = str(getattr(ev, "model", "unknown"))
58
+ dataset = getattr(ev, "dataset", None)
59
+ dataset_id = str(getattr(dataset, "name", None) or suite)
60
+ return build_eval_claim(
61
+ suite=suite, suite_version=str(getattr(ev, "task_version", "1")),
62
+ metric=metric, comparator=comparator, threshold=threshold, score=repr(value),
63
+ n=int(getattr(results, "total_samples", 0) or 0),
64
+ model_id=model_id, dataset_id=dataset_id, issuer="", timestamp=timestamp,
65
+ model_salt=model_salt, dataset_salt=dataset_salt)
@@ -5,9 +5,8 @@ Merkle tree, producing a bundle that ``verify_bundle`` accepts. This is the
5
5
  counterpart to the verifier: create the evidence here, check it anywhere with
6
6
  ``proofbundle verify``, fully offline.
7
7
 
8
- The v0.3 eval-receipt emitter (wrap one evaluation run into a signed,
9
- selectively disclosable receipt) is still a roadmap stub at the bottom of this
10
- module.
8
+ The eval-receipt emitter that builds on this (``emit_eval_receipt``) lives in
9
+ :mod:`proofbundle.evalclaim` since v0.4.
11
10
  """
12
11
 
13
12
  from __future__ import annotations
@@ -110,29 +109,3 @@ def emit_bundle(
110
109
  if sd_jwt_vc is not None:
111
110
  bundle["sd_jwt_vc"] = sd_jwt_vc
112
111
  return bundle
113
-
114
-
115
- # --------------------------------------------------------------------------
116
- # Roadmap stub, v0.3
117
- # --------------------------------------------------------------------------
118
-
119
-
120
- class NotYetImplemented(NotImplementedError):
121
- """Raised by roadmap functions that are planned but not implemented yet."""
122
-
123
-
124
- def emit_eval_receipt(*args, **kwargs): # pragma: no cover - roadmap stub
125
- """v0.3, the core differentiator.
126
-
127
- Wrap one evaluation framework run (Inspect AI, lm-evaluation-harness) into a
128
- signed receipt whose payload is a minimal, RFC 8785 canonicalized claim such
129
- as ``{"suite": "...", "threshold": 0.8, "passed": true}``, optionally wrapped
130
- as an SD-JWT VC so a holder can disclose "passed above threshold" without
131
- revealing the model, weights or dataset, carrying a cluster-bootstrap
132
- confidence interval, a multiple-testing correction and a preregistration
133
- hash. Built on top of :func:`emit_bundle`.
134
- """
135
- raise NotYetImplemented(
136
- "emit_eval_receipt lands in v0.3. Use emit_bundle for a generic signed, "
137
- "anchored bundle today."
138
- )
@@ -0,0 +1,63 @@
1
+ """in-toto Statement v1 view of an eval receipt (self-hosted predicate type).
2
+
3
+ A self-hosted `predicateType` URI is fully in-toto-spec-conform and the right choice for a solo v0.x
4
+ (no official in-toto/attestation PR needed). See PREDICATE.md.
5
+
6
+ HONESTY (important): the `subject.digest` here is a SALTED COMMITMENT to the model identifier, NOT the
7
+ content hash of an artifact. Placing it under the standard `sha256` key would suggest an artifact hash
8
+ and mislead generic in-toto verifiers. in-toto permits arbitrary digest keys, so we use a unique custom
9
+ key `proofbundleModelCommitV1`; the `subject.name` is the descriptive `model-id-commitment`; and the
10
+ predicate mirrors the note in `subject_digest_note`. Full artifact digests come only once a model artifact
11
+ exists (deferred, see the roadmap).
12
+ """
13
+ from __future__ import annotations
14
+
15
+ from typing import Optional
16
+
17
+ STATEMENT_TYPE = "https://in-toto.io/Statement/v1"
18
+ PREDICATE_TYPE = "https://b7n0de.com/proofbundle/eval-receipt/v0.1"
19
+ VERIFIER_ID = "https://b7n0de.com/proofbundle"
20
+ MODEL_COMMIT_DIGEST_KEY = "proofbundleModelCommitV1"
21
+
22
+ _SUBJECT_DIGEST_NOTE = (
23
+ "subject.digest is a salted commitment to the model identifier (key "
24
+ f"{MODEL_COMMIT_DIGEST_KEY}), NOT an artifact content hash — do not treat it as sha256.")
25
+
26
+
27
+ def _commit_hex(commit: str) -> str:
28
+ """Extract the hex of a `sha256:<hex>` salted commitment (the value that goes into the digest)."""
29
+ return commit.split(":", 1)[1] if ":" in commit else commit
30
+
31
+
32
+ def to_intoto_statement(claim: dict, *, root_b64: Optional[str] = None,
33
+ harness: Optional[dict] = None) -> dict:
34
+ """Build an in-toto Statement v1 whose predicate is the eval receipt.
35
+
36
+ `root_b64` (from the signed bundle's merkle root) binds the statement to the receipt. `harness`
37
+ (e.g. {"name": "inspect_ai", "version": "0.3.217"}) is optional. The subject digest is the model
38
+ commitment under a custom key (never `sha256`).
39
+ """
40
+ statement = {
41
+ "_type": STATEMENT_TYPE,
42
+ "subject": [{
43
+ "name": "model-id-commitment",
44
+ "digest": {MODEL_COMMIT_DIGEST_KEY: _commit_hex(claim["model_id_commit"])},
45
+ }],
46
+ "predicateType": PREDICATE_TYPE,
47
+ "predicate": {
48
+ "verifier": {"id": VERIFIER_ID},
49
+ "evaluatedAt": claim["timestamp"],
50
+ "suite": claim["suite"],
51
+ "claims": [{
52
+ "metric": claim["metric"], "comparator": claim["comparator"],
53
+ "threshold": claim["threshold"], "passed": claim["passed"],
54
+ }],
55
+ "datasetCommit": claim.get("dataset_id_commit"),
56
+ "subject_digest_note": _SUBJECT_DIGEST_NOTE,
57
+ },
58
+ }
59
+ if harness:
60
+ statement["predicate"]["harness"] = harness
61
+ if root_b64:
62
+ statement["predicate"]["receipt"] = {"schema": "proofbundle/v0.1", "root_b64": root_b64}
63
+ return statement
@@ -1,6 +1,6 @@
1
1
  """Minimal SD-JWT selective disclosure verification.
2
2
 
3
- The SD-JWT *core* is now a published standard, RFC 9901 (December 2025). This
3
+ The SD-JWT *core* is now a published standard, RFC 9901 (November 2025). This
4
4
  module verifies the heart of it: that every presented Disclosure hashes to a
5
5
  digest that is actually committed in the issuer-signed JWT payload, and, if an
6
6
  issuer public key is supplied and the algorithm is EdDSA, that the issuer
@@ -0,0 +1,119 @@
1
+ """SD-JWT issuance per RFC 9901 — the differentiation feature (v0.5).
2
+
3
+ Issue an eval receipt so a holder can disclose `passed` + `threshold` while WITHHOLDING the exact score
4
+ and the identifier openings. The existing verifier (proofbundle.sdjwt) stays; this adds issuance.
5
+
6
+ Source of truth: the signed canonical bundle payload (evalclaim) is the ONLY truth. This SD-JWT is a
7
+ derived view — its always-open claims are copied bit-exact from that payload, and it binds the bundle
8
+ anchor via `receipt.root_b64`. Sign the SD-JWT with the SAME Ed25519 key that signed the bundle (matching
9
+ the `issuer` field). A holder cannot lift a claim under a different key.
10
+
11
+ Always-open (plaintext JWT claims, NEVER a disclosure): passed, threshold, comparator, suite, issuer,
12
+ receipt.root_b64. Selectively-disclosable (via `_sd` + disclosures): the exact metric value, ci95, and
13
+ the identifier-commitment openings (identifier + salt).
14
+
15
+ RFC 9901 §4.2.4.1 digest byte-chain (the subtle, load-bearing detail): for each disclosable field, a
16
+ CSPRNG salt of ≥128 bit (base64url); the disclosure is base64url(UTF-8(JSON array [salt, name, value]));
17
+ the digest placed in `_sd` is **base64url(SHA-256(ASCII bytes of the base64url-ENCODED disclosure
18
+ string)))** — hashed over the ENCODED string, NOT over the JSON bytes. `_sd_alg` = "sha-256" at the top
19
+ level. The JWT is signed with EdDSA. Compact form is tilde-separated: JWT~disclosure1~...~ (trailing ~).
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import base64
24
+ import hashlib
25
+ import json
26
+ import os
27
+ from typing import Optional, Sequence
28
+
29
+ from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey
30
+ from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat
31
+
32
+ SD_ALG = "sha-256"
33
+ _SALT_BYTES = 16 # 128 bit
34
+
35
+
36
+ def _b64url(data: bytes) -> str:
37
+ return base64.urlsafe_b64encode(data).rstrip(b"=").decode("ascii")
38
+
39
+
40
+ def _make_disclosure(name: str, value, salt_b64: str) -> tuple[str, str]:
41
+ """Return (disclosure_b64url, digest_b64url) per RFC 9901 §4.2.4.1.
42
+
43
+ The digest hashes the ASCII bytes of the base64url-ENCODED disclosure string (not the JSON bytes)."""
44
+ disclosure_json = json.dumps([salt_b64, name, value]) # array [salt, name, value]
45
+ disclosure_b64 = _b64url(disclosure_json.encode("utf-8"))
46
+ digest = _b64url(hashlib.sha256(disclosure_b64.encode("ascii")).digest())
47
+ return disclosure_b64, digest
48
+
49
+
50
+ def issue_sd_jwt(claim: dict, signer: Ed25519PrivateKey, *, root_b64: str,
51
+ exact_score: Optional[str] = None, ci95: Optional[Sequence[str]] = None,
52
+ model_id_opening: Optional[Sequence] = None,
53
+ dataset_id_opening: Optional[Sequence] = None) -> str:
54
+ """Issue a compact SD-JWT for the eval claim, signed with `signer` (must match claim['issuer']).
55
+
56
+ Openings are (identifier, salt_hex) pairs the issuer may later reveal; `exact_score`/`ci95` are the
57
+ withheld numeric detail. All extras are selectively-disclosable; the pass/threshold facts are open.
58
+ """
59
+ always_open = {
60
+ "passed": claim["passed"], "threshold": claim["threshold"],
61
+ "comparator": claim["comparator"], "suite": claim["suite"],
62
+ "issuer": claim["issuer"], "receipt": {"root_b64": root_b64},
63
+ }
64
+ disclosures: list[str] = []
65
+ sd_digests: list[str] = []
66
+
67
+ def _add(name: str, value):
68
+ d, dig = _make_disclosure(name, value, _b64url(os.urandom(_SALT_BYTES)))
69
+ disclosures.append(d)
70
+ sd_digests.append(dig)
71
+
72
+ if exact_score is not None:
73
+ _add("exact_score", exact_score)
74
+ if ci95 is not None:
75
+ _add("ci95", list(ci95))
76
+ if model_id_opening is not None:
77
+ _add("model_id_opening", list(model_id_opening))
78
+ if dataset_id_opening is not None:
79
+ _add("dataset_id_opening", list(dataset_id_opening))
80
+
81
+ payload = dict(always_open)
82
+ if sd_digests:
83
+ payload["_sd"] = sd_digests
84
+ payload["_sd_alg"] = SD_ALG
85
+
86
+ header = {"alg": "EdDSA", "typ": "sd-jwt"}
87
+ signing_input = _b64url(json.dumps(header).encode("utf-8")) + "." + _b64url(json.dumps(payload).encode("utf-8"))
88
+ signature = signer.sign(signing_input.encode("ascii"))
89
+ jwt = signing_input + "." + _b64url(signature)
90
+
91
+ # compact: JWT ~ disclosure1 ~ ... ~ (trailing tilde, no key-binding JWT in v0.5)
92
+ return "~".join([jwt, *disclosures]) + "~"
93
+
94
+
95
+ def issuer_matches(claim: dict, signer: Ed25519PrivateKey) -> bool:
96
+ """True iff the claim's issuer fingerprint equals the signer's public key (bundle↔SD-JWT same key)."""
97
+ raw = signer.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw)
98
+ return claim.get("issuer") == "ed25519:" + base64.b64encode(raw).decode("ascii")
99
+
100
+
101
+ def _jwt_payload(compact: str) -> dict:
102
+ """Decode the always-open JWT payload of a compact SD-JWT (the part before the first '~')."""
103
+ jwt = compact.split("~", 1)[0]
104
+ payload_b64 = jwt.split(".")[1]
105
+ padded = payload_b64 + "=" * (-len(payload_b64) % 4)
106
+ return json.loads(base64.urlsafe_b64decode(padded).decode("utf-8"))
107
+
108
+
109
+ def check_binds_bundle(compact: str, claim: dict, root_b64: str) -> bool:
110
+ """No-Fake binding: the SD-JWT's always-open claims MUST match the signed bundle payload bit-exact and
111
+ bind its merkle root. A derived SD-JWT that diverges from its bundle source of truth is rejected."""
112
+ try:
113
+ p = _jwt_payload(compact)
114
+ except (ValueError, KeyError, IndexError):
115
+ return False
116
+ return (p.get("passed") == claim["passed"] and p.get("threshold") == claim["threshold"]
117
+ and p.get("comparator") == claim["comparator"] and p.get("suite") == claim["suite"]
118
+ and p.get("issuer") == claim["issuer"]
119
+ and (p.get("receipt") or {}).get("root_b64") == root_b64)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: proofbundle
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT.
5
5
  Author: Konrad Gruszka
6
6
  License: MIT
@@ -27,6 +27,8 @@ Provides-Extra: sdjwt
27
27
  Provides-Extra: eval
28
28
  Requires-Dist: rfc8785>=0.1.4; extra == "eval"
29
29
  Provides-Extra: adapters
30
+ Provides-Extra: inspect
31
+ Requires-Dist: inspect_ai<0.4,>=0.3.100; extra == "inspect"
30
32
  Provides-Extra: dev
31
33
  Requires-Dist: pytest>=7; extra == "dev"
32
34
  Requires-Dist: ruff>=0.5; extra == "dev"
@@ -35,6 +37,8 @@ Requires-Dist: mypy>=1.8; extra == "dev"
35
37
  Requires-Dist: build>=1; extra == "dev"
36
38
  Requires-Dist: hypothesis>=6; extra == "dev"
37
39
  Requires-Dist: rfc8785>=0.1.4; extra == "dev"
40
+ Requires-Dist: sd-jwt>=0.10; extra == "dev"
41
+ Requires-Dist: inspect_ai<0.4,>=0.3.100; extra == "dev"
38
42
  Dynamic: license-file
39
43
 
40
44
  <div align="center">
@@ -61,7 +65,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
61
65
 
62
66
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
63
67
  verify` checks one self-contained `bundle.json` with three offline cryptographic
64
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 25 tests.
68
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 62 tests.
65
69
 
66
70
  ## Contents
67
71
 
@@ -241,12 +245,12 @@ string uses base64url as per the spec.
241
245
 
242
246
  ## Security notes and scope, stated honestly
243
247
 
244
- This is v0.1. It does exactly what it says and no more:
248
+ The scope is deliberately narrow. It does exactly what it says and no more:
245
249
 
246
250
  - Ed25519 signatures only, for both the payload and the optional SD-JWT issuer
247
251
  signature.
248
252
  - SD-JWT: the SD-JWT core is now [RFC 9901](https://datatracker.ietf.org/doc/rfc9901/)
249
- (Dec 2025); this verifies that every presented disclosure is committed in the
253
+ (November 2025); this verifies that every presented disclosure is committed in the
250
254
  issuer-signed payload, and the issuer signature (EdDSA) if a key is supplied. It
251
255
  does **not** verify a Key Binding JWT, an X.509 or trust-list chain, status
252
256
  lists, or `vct` type metadata. **SD-JWT VC** (the credential-type profile) is
@@ -282,15 +286,34 @@ commitments — it does **not** prove the evaluation was well designed or that t
282
286
  itself is correct. Those are human judgements; what it removes is the need to simply
283
287
  trust the number.
284
288
 
289
+ ### Since v0.5: framework adapter, in-toto, selective disclosure
290
+
291
+ - **inspect_ai adapter** (`pip install "proofbundle[inspect]"`) reads a UK AISI
292
+ [inspect_ai](https://github.com/UKGovernmentBEIS/inspect_ai) eval log via the stable
293
+ `read_eval_log` API (lazy import; the core stays dependency-free) and maps it to a claim.
294
+ `proofbundle.adapters.from_lm_eval_results` reads lm-evaluation-harness `results.json`
295
+ without importing anything.
296
+ - **in-toto Statement v1** — `proofbundle.intoto.to_intoto_statement(claim, root_b64=…)`
297
+ emits the receipt as an in-toto statement with a self-hosted predicate type. The subject
298
+ digest is an *honest salted commitment* under a custom key, never `sha256` (see
299
+ [PREDICATE.md](PREDICATE.md)).
300
+ - **SD-JWT issuance** (RFC 9901) — `proofbundle.sdjwt_issue.issue_sd_jwt(claim, signer,
301
+ root_b64=…, exact_score=…)` issues the receipt so a holder can disclose `passed` +
302
+ `threshold` while **withholding the exact score** and the identifier openings. The signed
303
+ bundle payload is the source of truth; the SD-JWT is a derived, bundle-bound view, verified
304
+ by proofbundle's own verifier **and** the `sd-jwt-python` reference.
305
+
285
306
  ## Roadmap
286
307
 
287
308
  - **v0.1** — the offline verifier plus a real example bundle.
288
309
  - **v0.2** — the emitter: `emit_bundle` / `proofbundle emit`.
289
310
  - **v0.3** — external RFC 6962 conformance vectors + real Sigstore Rekor interop.
290
- - **v0.4 (current release)** — the eval-receipt emitter (`emit_eval_receipt` /
291
- `proofbundle emit-eval`), salted commitments, issuer binding, file-based adapters.
292
- - **v0.5** — selective disclosure of the exact score via SD-JWT **issuance** (the issuer
293
- reveals identifier + salt on demand) and full SD-JWT VC conformance.
311
+ - **v0.4** — the eval-receipt emitter (`emit_eval_receipt` / `proofbundle emit-eval`),
312
+ salted commitments, issuer binding.
313
+ - **v0.5 (current release)** — inspect_ai adapter (stable API), in-toto Statement v1 view,
314
+ and SD-JWT **issuance** per RFC 9901 (selective disclosure of the exact score).
315
+ - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
316
+ Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
294
317
 
295
318
  ## Contributing
296
319
 
@@ -7,9 +7,11 @@ src/proofbundle/cli.py
7
7
  src/proofbundle/emit.py
8
8
  src/proofbundle/errors.py
9
9
  src/proofbundle/evalclaim.py
10
+ src/proofbundle/intoto.py
10
11
  src/proofbundle/merkle.py
11
12
  src/proofbundle/py.typed
12
13
  src/proofbundle/sdjwt.py
14
+ src/proofbundle/sdjwt_issue.py
13
15
  src/proofbundle/signature.py
14
16
  src/proofbundle.egg-info/PKG-INFO
15
17
  src/proofbundle.egg-info/SOURCES.txt
@@ -27,10 +29,12 @@ tests/test_cli_eval.py
27
29
  tests/test_emit.py
28
30
  tests/test_eval_claim_schema.py
29
31
  tests/test_evalclaim.py
32
+ tests/test_intoto.py
30
33
  tests/test_merkle.py
31
34
  tests/test_merkle_property.py
32
35
  tests/test_rekor_interop.py
33
36
  tests/test_rfc6962_external_vectors.py
34
37
  tests/test_schema.py
38
+ tests/test_sdjwt_issue.py
35
39
  tests/test_sdjwt_reference.py
36
40
  tests/test_signature.py
@@ -10,8 +10,13 @@ mypy>=1.8
10
10
  build>=1
11
11
  hypothesis>=6
12
12
  rfc8785>=0.1.4
13
+ sd-jwt>=0.10
14
+ inspect_ai<0.4,>=0.3.100
13
15
 
14
16
  [eval]
15
17
  rfc8785>=0.1.4
16
18
 
19
+ [inspect]
20
+ inspect_ai<0.4,>=0.3.100
21
+
17
22
  [sdjwt]
@@ -0,0 +1,48 @@
1
+ """Adapters map real exported eval JSON to a valid claim (file-based, no framework import)."""
2
+ import unittest
3
+ from pathlib import Path
4
+
5
+ from proofbundle.adapters import from_inspect_ai_log, from_lm_eval_results
6
+
7
+ FX = Path(__file__).resolve().parent / "fixtures"
8
+ TS = "2026-07-01T12:00:00Z"
9
+
10
+
11
+ class TestAdapters(unittest.TestCase):
12
+ def test_lm_eval(self):
13
+ claim, salts = from_lm_eval_results(FX / "lm_eval_results.json", "hellaswag", "acc",
14
+ comparator=">=", threshold="0.70", timestamp=TS,
15
+ model_salt=b"0" * 16, dataset_salt=b"1" * 16)
16
+ self.assertEqual(claim["suite"], "hellaswag")
17
+ self.assertEqual(claim["threshold"], "0.70")
18
+ self.assertTrue(claim["passed"]) # 0.7534 >= 0.70
19
+ self.assertNotIn("acme/model-x", str(claim)) # id only as salted commitment
20
+ self.assertEqual(claim["n"], 10042)
21
+
22
+ def test_inspect_ai_stable_api(self):
23
+ # Real .eval log fixture, read via the stable inspect_ai.log.read_eval_log API (proofbundle[inspect]).
24
+ try:
25
+ import inspect_ai.log # noqa: F401
26
+ except ImportError:
27
+ self.skipTest("inspect_ai not installed (pip install proofbundle[inspect])")
28
+ claim, salts = from_inspect_ai_log(FX / "inspect_logs" / "safety_refusal_demo.eval", "accuracy",
29
+ comparator=">=", threshold="0.00", timestamp=TS,
30
+ model_salt=b"0" * 16, dataset_salt=b"1" * 16)
31
+ self.assertEqual(claim["suite"], "safety_refusal_demo")
32
+ self.assertTrue(claim["passed"]) # accuracy 0.0 >= 0.00
33
+ self.assertNotIn("mockllm/model", str(claim)) # model id only as salted commitment
34
+
35
+ def test_inspect_ai_missing_metric_clear_error(self):
36
+ from proofbundle.adapters.inspect_ai import InspectAdapterError
37
+ try:
38
+ import inspect_ai.log # noqa: F401
39
+ except ImportError:
40
+ self.skipTest("inspect_ai not installed")
41
+ with self.assertRaises(InspectAdapterError):
42
+ from_inspect_ai_log(FX / "inspect_logs" / "safety_refusal_demo.eval", "nonexistent_metric",
43
+ comparator=">=", threshold="0.5", timestamp=TS,
44
+ model_salt=b"0" * 16, dataset_salt=b"1" * 16)
45
+
46
+
47
+ if __name__ == "__main__":
48
+ unittest.main()
@@ -0,0 +1,63 @@
1
+ """in-toto Statement v1 view of an eval receipt — structurally valid + honest salted-commitment digest."""
2
+ import json
3
+ import unittest
4
+ from pathlib import Path
5
+
6
+ try:
7
+ import jsonschema
8
+ except ImportError:
9
+ jsonschema = None
10
+
11
+ from proofbundle.emit import generate_signer
12
+ from proofbundle.evalclaim import build_eval_claim, issuer_fingerprint
13
+ from proofbundle.intoto import MODEL_COMMIT_DIGEST_KEY, PREDICATE_TYPE, to_intoto_statement
14
+
15
+ ROOT = Path(__file__).resolve().parents[1]
16
+ TS = "2026-07-01T12:00:00Z"
17
+
18
+
19
+ def _claim():
20
+ signer = generate_signer()
21
+ claim, _ = build_eval_claim(
22
+ suite="safety-refusal", suite_version="v1", metric="accuracy", comparator=">=",
23
+ threshold="0.65", score="0.92", n=500, model_id="acme/model-x", dataset_id="acme/set",
24
+ issuer=issuer_fingerprint(signer), timestamp=TS, model_salt=b"0" * 16, dataset_salt=b"1" * 16)
25
+ return claim
26
+
27
+
28
+ class TestInToto(unittest.TestCase):
29
+ def test_structure(self):
30
+ stmt = to_intoto_statement(_claim(), root_b64="cm9vdA==",
31
+ harness={"name": "inspect_ai", "version": "0.3.217"})
32
+ self.assertEqual(stmt["_type"], "https://in-toto.io/Statement/v1")
33
+ self.assertEqual(stmt["predicateType"], PREDICATE_TYPE)
34
+ self.assertEqual(len(stmt["subject"]), 1)
35
+ self.assertIn("digest", stmt["subject"][0])
36
+ # honest custom digest key, NOT sha256 (would mislead generic verifiers about an artifact hash)
37
+ self.assertIn(MODEL_COMMIT_DIGEST_KEY, stmt["subject"][0]["digest"])
38
+ self.assertNotIn("sha256", stmt["subject"][0]["digest"])
39
+ self.assertIn("salted commitment", stmt["predicate"]["subject_digest_note"])
40
+ self.assertEqual(stmt["predicate"]["receipt"]["root_b64"], "cm9vdA==")
41
+
42
+ def test_digest_is_commit_hex(self):
43
+ claim = _claim()
44
+ stmt = to_intoto_statement(claim)
45
+ expected_hex = claim["model_id_commit"].split(":", 1)[1]
46
+ self.assertEqual(stmt["subject"][0]["digest"][MODEL_COMMIT_DIGEST_KEY], expected_hex)
47
+
48
+ @unittest.skipIf(jsonschema is None, "jsonschema not installed (pip install proofbundle[dev])")
49
+ def test_validates_against_official_intoto_v1_schema(self):
50
+ schema = json.loads((ROOT / "schemas" / "in_toto_statement_v1.schema.json").read_text(encoding="utf-8"))
51
+ stmt = to_intoto_statement(_claim(), root_b64="cm9vdA==")
52
+ jsonschema.validate(instance=stmt, schema=schema) # raises if invalid
53
+
54
+ @unittest.skipIf(jsonschema is None, "jsonschema not installed")
55
+ def test_schema_rejects_missing_subject(self):
56
+ schema = json.loads((ROOT / "schemas" / "in_toto_statement_v1.schema.json").read_text(encoding="utf-8"))
57
+ bad = {"_type": "https://in-toto.io/Statement/v1", "predicateType": "x", "subject": []}
58
+ with self.assertRaises(jsonschema.ValidationError):
59
+ jsonschema.validate(instance=bad, schema=schema)
60
+
61
+
62
+ if __name__ == "__main__":
63
+ unittest.main()
@@ -0,0 +1,98 @@
1
+ """SD-JWT issuance (v0.5, RFC 9901) — own verifier + reference interop + red-tests. No-Fake."""
2
+ import base64
3
+ import json
4
+ import unittest
5
+ from pathlib import Path
6
+
7
+ from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat
8
+
9
+ from proofbundle.emit import generate_signer
10
+ from proofbundle.evalclaim import build_eval_claim, issuer_fingerprint
11
+ from proofbundle.sdjwt import verify_sd_jwt
12
+ from proofbundle.sdjwt_issue import (
13
+ _make_disclosure,
14
+ check_binds_bundle,
15
+ issue_sd_jwt,
16
+ )
17
+
18
+ FX = Path(__file__).resolve().parent / "fixtures"
19
+ TS = "2026-07-01T12:00:00Z"
20
+ ROOT_B64 = "cm9vdA=="
21
+
22
+
23
+ def _claim(signer):
24
+ claim, _ = build_eval_claim(suite="safety", suite_version="v1", metric="accuracy", comparator=">=",
25
+ threshold="0.65", score="0.92", n=500, model_id="acme/model-x", dataset_id="acme/set",
26
+ issuer=issuer_fingerprint(signer), timestamp=TS, model_salt=b"0" * 16, dataset_salt=b"1" * 16)
27
+ return claim
28
+
29
+
30
+ def _raw_pub(signer):
31
+ return signer.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw)
32
+
33
+
34
+ class TestSdJwtIssue(unittest.TestCase):
35
+ def test_own_verifier_accepts(self):
36
+ signer = generate_signer()
37
+ compact = issue_sd_jwt(_claim(signer), signer, root_b64=ROOT_B64, exact_score="0.92", ci95=["0.90", "0.94"])
38
+ res = verify_sd_jwt(compact, _raw_pub(signer))
39
+ self.assertTrue(res["structure_ok"], res)
40
+ self.assertTrue(res["sig_ok"], res)
41
+
42
+ def test_reference_verifier_accepts(self):
43
+ try:
44
+ from jwcrypto.jwk import JWK
45
+ from sd_jwt.verifier import SDJWTVerifier
46
+ except ImportError:
47
+ self.skipTest("sd-jwt-python not installed (dev extra)")
48
+ signer = generate_signer()
49
+ compact = issue_sd_jwt(_claim(signer), signer, root_b64=ROOT_B64, exact_score="0.92")
50
+ jwk = JWK(kty="OKP", crv="Ed25519", x=base64.urlsafe_b64encode(_raw_pub(signer)).rstrip(b"=").decode())
51
+ payload = SDJWTVerifier(compact, lambda *_a, **_k: jwk).get_verified_payload()
52
+ self.assertEqual(payload["passed"], True) # always-open
53
+ self.assertEqual(payload["exact_score"], "0.92") # selectively disclosed
54
+
55
+ def test_digest_byte_chain_vector(self):
56
+ # RFC 9901 §4.2.4.1: digest over the base64url-ENCODED disclosure string, not the JSON bytes.
57
+ v = json.loads((FX / "sdjwt_disclosure_vector.json").read_text(encoding="utf-8"))
58
+ d_b64, dig = _make_disclosure(v["name"], v["value"], v["salt_b64url"])
59
+ self.assertEqual(d_b64, v["disclosure_b64url"])
60
+ self.assertEqual(dig, v["expected_digest_b64url"])
61
+
62
+ def test_always_open_vs_selective(self):
63
+ signer = generate_signer()
64
+ compact = issue_sd_jwt(_claim(signer), signer, root_b64=ROOT_B64, exact_score="0.92")
65
+ jwt_payload = json.loads(base64.urlsafe_b64decode(
66
+ compact.split("~")[0].split(".")[1] + "==").decode("utf-8"))
67
+ # passed/threshold are plaintext; exact_score is NOT (only its digest is in _sd)
68
+ self.assertEqual(jwt_payload["passed"], True)
69
+ self.assertIn("threshold", jwt_payload)
70
+ self.assertNotIn("exact_score", jwt_payload)
71
+ self.assertIn("_sd", jwt_payload)
72
+
73
+ def test_binds_bundle(self):
74
+ signer = generate_signer()
75
+ claim = _claim(signer)
76
+ compact = issue_sd_jwt(claim, signer, root_b64=ROOT_B64, exact_score="0.92")
77
+ self.assertTrue(check_binds_bundle(compact, claim, ROOT_B64))
78
+
79
+ def test_divergence_red(self): # SD-JWT claims diverge from bundle → rejected
80
+ signer = generate_signer()
81
+ claim = _claim(signer)
82
+ compact = issue_sd_jwt(claim, signer, root_b64=ROOT_B64, exact_score="0.92")
83
+ diverged = dict(claim, passed=False) # bundle says passed=False, SD-JWT says True
84
+ self.assertFalse(check_binds_bundle(compact, diverged, ROOT_B64))
85
+ self.assertFalse(check_binds_bundle(compact, claim, "d3Jvbmc=")) # wrong root
86
+
87
+ def test_tamper_disclosure_red(self): # tampered disclosure → digest mismatch → own verifier fails
88
+ signer = generate_signer()
89
+ compact = issue_sd_jwt(_claim(signer), signer, root_b64=ROOT_B64, exact_score="0.92")
90
+ jwt, *disc = compact.rstrip("~").split("~")
91
+ tampered_d, _ = _make_disclosure("exact_score", "0.99", "AAAAAAAAAAAAAAAAAAAAAA") # not committed in _sd
92
+ tampered = "~".join([jwt, tampered_d]) + "~"
93
+ res = verify_sd_jwt(tampered, _raw_pub(signer))
94
+ self.assertFalse(res.get("structure_ok") and res.get("sig_ok") and "1 disclosure" in res.get("detail", ""))
95
+
96
+
97
+ if __name__ == "__main__":
98
+ unittest.main()
@@ -1,36 +0,0 @@
1
- """Adapter for UK AISI inspect_ai eval-log JSON (file-based, no framework import)."""
2
- from __future__ import annotations
3
-
4
- import json
5
- from pathlib import Path
6
- from typing import Optional
7
-
8
- from ..evalclaim import build_eval_claim
9
-
10
-
11
- def from_inspect_ai_log(path, metric: str, *, comparator: str, threshold: str, timestamp: str,
12
- model_salt: Optional[bytes] = None, dataset_salt: Optional[bytes] = None):
13
- """Read an inspect_ai eval-log JSON and build an eval claim.
14
-
15
- Expects: {"eval": {"task": ..., "model": ..., "dataset": {"name": ...}},
16
- "results": {"total_samples": n, "scores": [{"metrics": {metric: {"value": <number>}}}]}}.
17
- Returns (claim, salts).
18
- """
19
- data = json.loads(Path(path).read_text(encoding="utf-8"))
20
- ev = data.get("eval", {})
21
- scores = data.get("results", {}).get("scores", [])
22
- value = None
23
- for s in scores:
24
- m = s.get("metrics", {})
25
- if metric in m:
26
- value = m[metric].get("value")
27
- break
28
- if value is None:
29
- raise ValueError(f"metric {metric!r} not found in inspect_ai scores")
30
- n = int(data.get("results", {}).get("total_samples") or 0)
31
- return build_eval_claim(
32
- suite=str(ev.get("task", "inspect_ai")), suite_version=str(ev.get("task_version", "1")),
33
- metric=metric, comparator=comparator, threshold=threshold, score=repr(value), n=n,
34
- model_id=str(ev.get("model", "unknown")),
35
- dataset_id=str(ev.get("dataset", {}).get("name", ev.get("task", "unknown"))),
36
- issuer="", timestamp=timestamp, model_salt=model_salt, dataset_salt=dataset_salt)
@@ -1,32 +0,0 @@
1
- """Adapters map real exported eval JSON to a valid claim (file-based, no framework import)."""
2
- import unittest
3
- from pathlib import Path
4
-
5
- from proofbundle.adapters import from_inspect_ai_log, from_lm_eval_results
6
-
7
- FX = Path(__file__).resolve().parent / "fixtures"
8
- TS = "2026-07-01T12:00:00Z"
9
-
10
-
11
- class TestAdapters(unittest.TestCase):
12
- def test_lm_eval(self):
13
- claim, salts = from_lm_eval_results(FX / "lm_eval_results.json", "hellaswag", "acc",
14
- comparator=">=", threshold="0.70", timestamp=TS,
15
- model_salt=b"0" * 16, dataset_salt=b"1" * 16)
16
- self.assertEqual(claim["suite"], "hellaswag")
17
- self.assertEqual(claim["threshold"], "0.70")
18
- self.assertTrue(claim["passed"]) # 0.7534 >= 0.70
19
- self.assertNotIn("acme/model-x", str(claim)) # id only as salted commitment
20
- self.assertEqual(claim["n"], 10042)
21
-
22
- def test_inspect_ai(self):
23
- claim, salts = from_inspect_ai_log(FX / "inspect_ai_log.json", "accuracy",
24
- comparator=">=", threshold="0.80", timestamp=TS,
25
- model_salt=b"0" * 16, dataset_salt=b"1" * 16)
26
- self.assertEqual(claim["suite"], "safety_refusal")
27
- self.assertTrue(claim["passed"]) # 0.92 >= 0.80
28
- self.assertEqual(claim["n"], 500)
29
-
30
-
31
- if __name__ == "__main__":
32
- unittest.main()
File without changes
File without changes