proofbundle 0.6.0__tar.gz → 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {proofbundle-0.6.0/src/proofbundle.egg-info → proofbundle-0.7.1}/PKG-INFO +13 -7
  2. {proofbundle-0.6.0 → proofbundle-0.7.1}/README.md +10 -4
  3. {proofbundle-0.6.0 → proofbundle-0.7.1}/pyproject.toml +5 -4
  4. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/__init__.py +1 -1
  5. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/adapters/inspect_ai.py +15 -1
  6. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/bundle.py +46 -6
  7. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/evalclaim.py +17 -6
  8. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/intoto.py +17 -16
  9. {proofbundle-0.6.0 → proofbundle-0.7.1/src/proofbundle.egg-info}/PKG-INFO +13 -7
  10. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle.egg-info/SOURCES.txt +1 -0
  11. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle.egg-info/requires.txt +4 -0
  12. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_adapters.py +2 -0
  13. proofbundle-0.7.1/tests/test_bundle_robustness.py +74 -0
  14. {proofbundle-0.6.0 → proofbundle-0.7.1}/LICENSE +0 -0
  15. {proofbundle-0.6.0 → proofbundle-0.7.1}/setup.cfg +0 -0
  16. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/adapters/__init__.py +0 -0
  17. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/adapters/lm_eval.py +0 -0
  18. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/cli.py +0 -0
  19. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/emit.py +0 -0
  20. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/errors.py +0 -0
  21. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/merkle.py +0 -0
  22. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/py.typed +0 -0
  23. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/sdjwt.py +0 -0
  24. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/sdjwt_issue.py +0 -0
  25. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle/signature.py +0 -0
  26. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle.egg-info/dependency_links.txt +0 -0
  27. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle.egg-info/entry_points.txt +0 -0
  28. {proofbundle-0.6.0 → proofbundle-0.7.1}/src/proofbundle.egg-info/top_level.txt +0 -0
  29. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_bundle.py +0 -0
  30. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_cli.py +0 -0
  31. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_cli_eval.py +0 -0
  32. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_emit.py +0 -0
  33. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_eval_claim_schema.py +0 -0
  34. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_evalclaim.py +0 -0
  35. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_intoto.py +0 -0
  36. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_merkle.py +0 -0
  37. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_merkle_property.py +0 -0
  38. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_rekor_interop.py +0 -0
  39. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_rfc6962_external_vectors.py +0 -0
  40. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_schema.py +0 -0
  41. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_sdjwt_issue.py +0 -0
  42. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_sdjwt_reference.py +0 -0
  43. {proofbundle-0.6.0 → proofbundle-0.7.1}/tests/test_signature.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: proofbundle
3
- Version: 0.6.0
3
+ Version: 0.7.1
4
4
  Summary: Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT.
5
5
  Author: Konrad Gruszka
6
6
  License: MIT
@@ -28,7 +28,7 @@ Provides-Extra: eval
28
28
  Requires-Dist: rfc8785>=0.1.4; extra == "eval"
29
29
  Provides-Extra: adapters
30
30
  Provides-Extra: inspect
31
- Requires-Dist: inspect_ai<0.4,>=0.3.100; extra == "inspect"
31
+ Requires-Dist: inspect_ai<0.4,>=0.3.100; python_version >= "3.10" and extra == "inspect"
32
32
  Provides-Extra: dev
33
33
  Requires-Dist: pytest>=7; extra == "dev"
34
34
  Requires-Dist: ruff>=0.5; extra == "dev"
@@ -38,7 +38,7 @@ Requires-Dist: build>=1; extra == "dev"
38
38
  Requires-Dist: hypothesis>=6; extra == "dev"
39
39
  Requires-Dist: rfc8785>=0.1.4; extra == "dev"
40
40
  Requires-Dist: sd-jwt>=0.10; extra == "dev"
41
- Requires-Dist: inspect_ai<0.4,>=0.3.100; extra == "dev"
41
+ Requires-Dist: inspect_ai<0.4,>=0.3.100; python_version >= "3.10" and extra == "dev"
42
42
  Dynamic: license-file
43
43
 
44
44
  <div align="center">
@@ -62,12 +62,15 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
62
62
  [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
63
63
  [![SLSA build provenance](https://img.shields.io/badge/SLSA-build_provenance-D6248A.svg)](https://slsa.dev)
64
64
  [![PyPI attestations](https://img.shields.io/badge/PyPI-attestations_(PEP_740)-D6248A.svg)](https://pypi.org/project/proofbundle/)
65
+ <!-- DOI badge placeholder: enable Zenodo archiving for this repo, then add the Zenodo concept-DOI
66
+ badge here (and the DOI to CITATION.cff) once Zenodo assigns one on the next release. No DOI has
67
+ been assigned yet (no archived record exists at build time) — tracked in the human checklist. -->
65
68
 
66
69
  </div>
67
70
 
68
71
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
69
72
  verify` checks one self-contained `bundle.json` with three offline cryptographic
70
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 63 tests.
73
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 72 tests.
71
74
 
72
75
  ## Contents
73
76
 
@@ -290,7 +293,8 @@ trust the number.
290
293
 
291
294
  ### A verification layer for trustworthy eval logs
292
295
 
293
- The UK AISI inspect_ai team names an open gap ([arXiv:2507.06893](https://arxiv.org/abs/2507.06893)):
296
+ The maintainers of inspect_evals (Arcadia Impact, funded by the UK AI Safety Institute) name an open
297
+ gap ([arXiv:2507.06893](https://arxiv.org/abs/2507.06893)):
294
298
  a database of trustworthy evaluation results with proper provenance tracking. proofbundle is the
295
299
  missing **signature + selective-disclosure layer** for exactly that — complementary to metadata
296
300
  aggregation (Every Eval Ever) and documentation taxonomies (Eval Factsheets), not a competitor.
@@ -322,8 +326,10 @@ attestation — see [SECURITY.md](SECURITY.md).
322
326
  - **v0.4** — the eval-receipt emitter (`emit_eval_receipt` / `proofbundle emit-eval`),
323
327
  salted commitments, issuer binding.
324
328
  - **v0.5** — inspect_ai adapter (stable API), in-toto Statement v1 view, SD-JWT **issuance** (RFC 9901).
325
- - **v0.6 (current release)** — a second eval adapter (lm-evaluation-harness, real format + provenance),
326
- INTEROP.md, CITATION.cff, PEP 740 attestations documented.
329
+ - **v0.6** — a second eval adapter (lm-evaluation-harness, real format + provenance), INTEROP.md,
330
+ CITATION.cff, PEP 740 attestations documented.
331
+ - **v0.7 (current release)** — citability polish: ORCID in CITATION.cff, a Zenodo DOI placeholder
332
+ (assigned on release), and a draft in-toto ML-eval predicate proposal.
327
333
  - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
328
334
  Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
329
335
 
@@ -19,12 +19,15 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
19
19
  [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
20
20
  [![SLSA build provenance](https://img.shields.io/badge/SLSA-build_provenance-D6248A.svg)](https://slsa.dev)
21
21
  [![PyPI attestations](https://img.shields.io/badge/PyPI-attestations_(PEP_740)-D6248A.svg)](https://pypi.org/project/proofbundle/)
22
+ <!-- DOI badge placeholder: enable Zenodo archiving for this repo, then add the Zenodo concept-DOI
23
+ badge here (and the DOI to CITATION.cff) once Zenodo assigns one on the next release. No DOI has
24
+ been assigned yet (no archived record exists at build time) — tracked in the human checklist. -->
22
25
 
23
26
  </div>
24
27
 
25
28
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
26
29
  verify` checks one self-contained `bundle.json` with three offline cryptographic
27
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 63 tests.
30
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 72 tests.
28
31
 
29
32
  ## Contents
30
33
 
@@ -247,7 +250,8 @@ trust the number.
247
250
 
248
251
  ### A verification layer for trustworthy eval logs
249
252
 
250
- The UK AISI inspect_ai team names an open gap ([arXiv:2507.06893](https://arxiv.org/abs/2507.06893)):
253
+ The maintainers of inspect_evals (Arcadia Impact, funded by the UK AI Safety Institute) name an open
254
+ gap ([arXiv:2507.06893](https://arxiv.org/abs/2507.06893)):
251
255
  a database of trustworthy evaluation results with proper provenance tracking. proofbundle is the
252
256
  missing **signature + selective-disclosure layer** for exactly that — complementary to metadata
253
257
  aggregation (Every Eval Ever) and documentation taxonomies (Eval Factsheets), not a competitor.
@@ -279,8 +283,10 @@ attestation — see [SECURITY.md](SECURITY.md).
279
283
  - **v0.4** — the eval-receipt emitter (`emit_eval_receipt` / `proofbundle emit-eval`),
280
284
  salted commitments, issuer binding.
281
285
  - **v0.5** — inspect_ai adapter (stable API), in-toto Statement v1 view, SD-JWT **issuance** (RFC 9901).
282
- - **v0.6 (current release)** — a second eval adapter (lm-evaluation-harness, real format + provenance),
283
- INTEROP.md, CITATION.cff, PEP 740 attestations documented.
286
+ - **v0.6** — a second eval adapter (lm-evaluation-harness, real format + provenance), INTEROP.md,
287
+ CITATION.cff, PEP 740 attestations documented.
288
+ - **v0.7 (current release)** — citability polish: ORCID in CITATION.cff, a Zenodo DOI placeholder
289
+ (assigned on release), and a draft in-toto ML-eval predicate proposal.
284
290
  - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
285
291
  Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
286
292
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "proofbundle"
7
- version = "0.6.0"
7
+ version = "0.7.1"
8
8
  description = "Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -47,10 +47,11 @@ eval = ["rfc8785>=0.1.4"]
47
47
  adapters = []
48
48
  # The inspect_ai adapter uses the STABLE read_eval_log API (lazy import). Pinned with an UPPER bound:
49
49
  # the .eval format + pydantic schema change between versions (inspect_ai issue 834), and the fixture
50
- # test is bound to this range. `pip install "proofbundle[inspect]"`.
51
- inspect = ["inspect_ai>=0.3.100,<0.4"]
50
+ # test is bound to this range. inspect_ai requires Python >= 3.10, so the marker gates it out on 3.9
51
+ # (base + [eval]/[sdjwt] still work on 3.9; the inspect adapter test skips there). Fixes the red 3.9 CI.
52
+ inspect = ['inspect_ai>=0.3.100,<0.4; python_version >= "3.10"']
52
53
  dev = ["pytest>=7", "ruff>=0.5", "jsonschema>=4", "mypy>=1.8", "build>=1", "hypothesis>=6",
53
- "rfc8785>=0.1.4", "sd-jwt>=0.10", "inspect_ai>=0.3.100,<0.4"]
54
+ "rfc8785>=0.1.4", "sd-jwt>=0.10", 'inspect_ai>=0.3.100,<0.4; python_version >= "3.10"']
54
55
 
55
56
  [project.urls]
56
57
  Homepage = "https://b7n0de.com"
@@ -13,7 +13,7 @@ from .emit import emit_bundle, generate_signer
13
13
  from .errors import Check, ProofBundleError, VerificationResult
14
14
  from .merkle import verify_consistency, verify_inclusion
15
15
 
16
- __version__ = "0.6.0"
16
+ __version__ = "0.7.1"
17
17
 
18
18
  __all__ = [
19
19
  "__version__",
@@ -57,9 +57,23 @@ def from_inspect_ai_log(path, metric: str, *, comparator: str, threshold: str, t
57
57
  model_id = str(getattr(ev, "model", "unknown"))
58
58
  dataset = getattr(ev, "dataset", None)
59
59
  dataset_id = str(getattr(dataset, "name", None) or suite)
60
+
61
+ # Provenance parity with the lm-eval adapter: inspect_ai exposes the same run provenance for free.
62
+ provenance = {"harness": "inspect_ai"}
63
+ revision = getattr(ev, "revision", None)
64
+ commit = getattr(revision, "commit", None)
65
+ if commit:
66
+ provenance["git_hash"] = str(commit)
67
+ packages = getattr(ev, "packages", None) or {}
68
+ if isinstance(packages, dict) and packages.get("inspect_ai"):
69
+ provenance["harness_version"] = str(packages["inspect_ai"])
70
+ tv = getattr(ev, "task_version", None)
71
+ if tv is not None:
72
+ provenance["task_version"] = str(tv)
73
+
60
74
  return build_eval_claim(
61
75
  suite=suite, suite_version=str(getattr(ev, "task_version", "1")),
62
76
  metric=metric, comparator=comparator, threshold=threshold, score=repr(value),
63
77
  n=int(getattr(results, "total_samples", 0) or 0),
64
78
  model_id=model_id, dataset_id=dataset_id, issuer="", timestamp=timestamp,
65
- model_salt=model_salt, dataset_salt=dataset_salt)
79
+ provenance=provenance, model_salt=model_salt, dataset_salt=dataset_salt)
@@ -12,7 +12,11 @@ checks, fully offline and without any running log server:
12
12
  The verifier treats ``payload`` as opaque bytes: it proves *that these exact
13
13
  bytes were signed and anchored*, not what they mean. That keeps v0.1 small and
14
14
  correct. Turning a reproducible eval run into such a payload is the job of the
15
- emitter (see ``emit.py``, roadmap).
15
+ eval-receipt emitter (see :mod:`proofbundle.evalclaim`, since v0.4).
16
+
17
+ Malformed input (wrong types, missing or unknown fields) is rejected with a
18
+ ``BundleFormatError`` — never a raw traceback — so a caller gets the documented
19
+ malformed exit code, not a crash.
16
20
  """
17
21
 
18
22
  from __future__ import annotations
@@ -30,6 +34,13 @@ __all__ = ["SCHEMA", "verify_bundle", "load_bundle"]
30
34
 
31
35
  SCHEMA = "proofbundle/v0.1"
32
36
 
37
+ # Allowed keys per object — SPEC.md §3: a verifier MUST reject unknown fields (schema is
38
+ # additionalProperties: false). Enforced here so the code matches its own normative spec.
39
+ _TOP_KEYS = {"schema", "payload_b64", "signature", "merkle", "sd_jwt_vc"}
40
+ _SIG_KEYS = {"alg", "public_key_b64", "sig_b64"}
41
+ _MERKLE_KEYS = {"hash_alg", "leaf_index", "tree_size", "inclusion_proof_b64", "root_b64"}
42
+ _SD_KEYS = {"compact", "issuer_public_key_b64"}
43
+
33
44
 
34
45
  def _b64d(value: str, field: str) -> bytes:
35
46
  try:
@@ -44,6 +55,27 @@ def _require(obj: dict, key: str, field: str):
44
55
  return obj[key]
45
56
 
46
57
 
58
+ def _require_dict(obj, field: str) -> dict:
59
+ """The value must be a JSON object — a string/list/number is malformed, not a crash."""
60
+ if not isinstance(obj, dict):
61
+ raise BundleFormatError(f"field {field} must be a JSON object")
62
+ return obj
63
+
64
+
65
+ def _require_int(obj: dict, key: str, field: str) -> int:
66
+ """The value must be a JSON integer — reject floats (SPEC §2) and non-numeric strings/None."""
67
+ val = _require(obj, key, field)
68
+ if isinstance(val, bool) or not isinstance(val, int): # bool is an int subclass; a float/str/None is not
69
+ raise BundleFormatError(f"field {field} must be an integer, got {type(val).__name__}")
70
+ return val
71
+
72
+
73
+ def _reject_unknown(obj: dict, allowed: set, field: str) -> None:
74
+ extra = set(obj) - allowed
75
+ if extra:
76
+ raise BundleFormatError(f"unknown field(s) in {field}: {sorted(extra)}")
77
+
78
+
47
79
  def load_bundle(path: str) -> dict:
48
80
  """Read and JSON-parse a bundle file."""
49
81
  with open(path, "r", encoding="utf-8") as handle:
@@ -60,12 +92,14 @@ def verify_bundle(bundle: Union[dict, str]) -> VerificationResult:
60
92
  schema = bundle.get("schema")
61
93
  if schema != SCHEMA:
62
94
  raise UnsupportedError(f"unsupported schema {schema!r}, expected {SCHEMA!r}")
95
+ _reject_unknown(bundle, _TOP_KEYS, "bundle")
63
96
 
64
97
  result = VerificationResult()
65
98
  payload = _b64d(_require(bundle, "payload_b64", "payload_b64"), "payload_b64")
66
99
 
67
100
  # 1. signature over the payload
68
- sig = _require(bundle, "signature", "signature")
101
+ sig = _require_dict(_require(bundle, "signature", "signature"), "signature")
102
+ _reject_unknown(sig, _SIG_KEYS, "signature")
69
103
  alg = sig.get("alg")
70
104
  if alg != "ed25519":
71
105
  raise UnsupportedError(f"signature alg {alg!r} not supported in v0.1")
@@ -75,13 +109,17 @@ def verify_bundle(bundle: Union[dict, str]) -> VerificationResult:
75
109
  result.add("ed25519-signature", sig_ok, "payload signed by stated key" if sig_ok else "invalid signature")
76
110
 
77
111
  # 2. merkle inclusion of the payload
78
- mk = _require(bundle, "merkle", "merkle")
112
+ mk = _require_dict(_require(bundle, "merkle", "merkle"), "merkle")
113
+ _reject_unknown(mk, _MERKLE_KEYS, "merkle")
79
114
  hash_alg = mk.get("hash_alg", "sha256-rfc6962")
80
115
  if hash_alg != "sha256-rfc6962":
81
116
  raise UnsupportedError(f"merkle hash_alg {hash_alg!r} not supported in v0.1")
82
- leaf_index = int(_require(mk, "leaf_index", "merkle.leaf_index"))
83
- tree_size = int(_require(mk, "tree_size", "merkle.tree_size"))
84
- proof = [_b64d(p, "merkle.inclusion_proof_b64[]") for p in mk.get("inclusion_proof_b64", [])]
117
+ leaf_index = _require_int(mk, "leaf_index", "merkle.leaf_index")
118
+ tree_size = _require_int(mk, "tree_size", "merkle.tree_size")
119
+ proof_list = _require(mk, "inclusion_proof_b64", "merkle.inclusion_proof_b64") # required per SPEC §5
120
+ if not isinstance(proof_list, list):
121
+ raise BundleFormatError("field merkle.inclusion_proof_b64 must be a list")
122
+ proof = [_b64d(p, "merkle.inclusion_proof_b64[]") for p in proof_list]
85
123
  root = _b64d(_require(mk, "root_b64", "merkle.root_b64"), "merkle.root_b64")
86
124
  incl_ok = merkle.verify_inclusion(payload, leaf_index, tree_size, proof, root)
87
125
  result.add(
@@ -93,6 +131,8 @@ def verify_bundle(bundle: Union[dict, str]) -> VerificationResult:
93
131
  # 3. optional SD-JWT selective disclosure credential
94
132
  sd = bundle.get("sd_jwt_vc")
95
133
  if sd is not None:
134
+ sd = _require_dict(sd, "sd_jwt_vc")
135
+ _reject_unknown(sd, _SD_KEYS, "sd_jwt_vc")
96
136
  compact = _require(sd, "compact", "sd_jwt_vc.compact")
97
137
  issuer_pub = None
98
138
  if sd.get("issuer_public_key_b64"):
@@ -21,6 +21,7 @@ import base64
21
21
  import hashlib
22
22
  import json
23
23
  import os
24
+ import re
24
25
  import unicodedata
25
26
  from typing import Optional, Sequence
26
27
 
@@ -34,6 +35,8 @@ EVAL_CLAIM_SCHEMA = "proofbundle/eval-claim/v0.1"
34
35
  COMMIT_ALG = "sha256-salted-v1"
35
36
  _COMPARATORS = {">=", ">", "<=", "<"}
36
37
  _MAX_SAFE_INT = 2 ** 53 - 1
38
+ # The published eval-claim schema's decimal pattern for threshold/score (no exponent, no sign+, no spaces).
39
+ _DECIMAL_RE = re.compile(r"^-?[0-9]+(\.[0-9]+)?$")
37
40
  # The exact key set of an eval claim; decode/validate reject anything else.
38
41
  _REQUIRED = {"schema", "suite", "suite_version", "metric", "comparator", "threshold",
39
42
  "passed", "n", "model_id_commit", "dataset_id_commit", "commit_alg", "issuer", "timestamp"}
@@ -103,7 +106,12 @@ def canonicalize(claim: dict) -> bytes:
103
106
  for the UTF-16 code-unit key sort + compact UTF-8 serialization.
104
107
  """
105
108
  _reject_non_jcs(claim)
106
- import rfc8785 # noqa: PLC0415 — lazy: only the emit path pulls the JCS dependency
109
+ try:
110
+ import rfc8785 # noqa: PLC0415 — lazy: only the emit path pulls the JCS dependency
111
+ except ImportError as e:
112
+ raise EvalClaimError(
113
+ "emitting eval receipts needs an RFC 8785 canonicalizer — install with: "
114
+ "pip install \"proofbundle[eval]\"") from e
107
115
  try:
108
116
  return rfc8785.dumps(claim)
109
117
  except (rfc8785.FloatDomainError, rfc8785.IntegerDomainError, rfc8785.CanonicalizationError) as e:
@@ -137,14 +145,17 @@ def build_eval_claim(*, suite: str, suite_version: str, metric: str, comparator:
137
145
  """
138
146
  if comparator not in _COMPARATORS:
139
147
  raise EvalClaimError(f"comparator must be one of {sorted(_COMPARATORS)}")
148
+ # threshold/score must match the PUBLISHED schema's decimal pattern exactly — reject "1e2",
149
+ # "Infinity", "+5", " 5 " etc. that Decimal() would accept but jsonschema rejects (schema-conformance).
140
150
  for name, val in (("threshold", threshold), ("score", score)):
141
151
  if not isinstance(val, str):
142
152
  raise EvalClaimError(f"{name} must be a decimal STRING, not {type(val).__name__}")
143
- from decimal import Decimal, InvalidOperation # noqa: PLC0415
144
- try:
145
- s, t = Decimal(score), Decimal(threshold)
146
- except InvalidOperation as e:
147
- raise EvalClaimError(f"threshold/score are not valid decimals: {e}") from e
153
+ if not _DECIMAL_RE.match(val):
154
+ raise EvalClaimError(f"{name} must be a plain decimal string (^-?[0-9]+(\\.[0-9]+)?$), got {val!r}")
155
+ if not isinstance(n, int) or isinstance(n, bool) or n < 0 or n > _MAX_SAFE_INT:
156
+ raise EvalClaimError(f"n must be a non-negative integer <= 2**53-1, got {n!r}")
157
+ from decimal import Decimal # noqa: PLC0415
158
+ s, t = Decimal(score), Decimal(threshold)
148
159
  passed = {">=": s >= t, ">": s > t, "<=": s <= t, "<": s < t}[comparator]
149
160
  m_salt = model_salt if model_salt is not None else os.urandom(16)
150
161
  d_salt = dataset_salt if dataset_salt is not None else os.urandom(16)
@@ -12,7 +12,7 @@ exists (deferred, see the roadmap).
12
12
  """
13
13
  from __future__ import annotations
14
14
 
15
- from typing import Optional
15
+ from typing import Any, Optional
16
16
 
17
17
  STATEMENT_TYPE = "https://in-toto.io/Statement/v1"
18
18
  PREDICATE_TYPE = "https://b7n0de.com/proofbundle/eval-receipt/v0.1"
@@ -37,6 +37,21 @@ def to_intoto_statement(claim: dict, *, root_b64: Optional[str] = None,
37
37
  (e.g. {"name": "inspect_ai", "version": "0.3.217"}) is optional. The subject digest is the model
38
38
  commitment under a custom key (never `sha256`).
39
39
  """
40
+ predicate: dict[str, Any] = {
41
+ "verifier": {"id": VERIFIER_ID},
42
+ "evaluatedAt": claim["timestamp"],
43
+ "suite": claim["suite"],
44
+ "claims": [{
45
+ "metric": claim["metric"], "comparator": claim["comparator"],
46
+ "threshold": claim["threshold"], "passed": claim["passed"],
47
+ }],
48
+ "datasetCommit": claim.get("dataset_id_commit"),
49
+ "subject_digest_note": _SUBJECT_DIGEST_NOTE,
50
+ }
51
+ if harness:
52
+ predicate["harness"] = harness
53
+ if root_b64:
54
+ predicate["receipt"] = {"schema": "proofbundle/v0.1", "root_b64": root_b64}
40
55
  statement = {
41
56
  "_type": STATEMENT_TYPE,
42
57
  "subject": [{
@@ -44,20 +59,6 @@ def to_intoto_statement(claim: dict, *, root_b64: Optional[str] = None,
44
59
  "digest": {MODEL_COMMIT_DIGEST_KEY: _commit_hex(claim["model_id_commit"])},
45
60
  }],
46
61
  "predicateType": PREDICATE_TYPE,
47
- "predicate": {
48
- "verifier": {"id": VERIFIER_ID},
49
- "evaluatedAt": claim["timestamp"],
50
- "suite": claim["suite"],
51
- "claims": [{
52
- "metric": claim["metric"], "comparator": claim["comparator"],
53
- "threshold": claim["threshold"], "passed": claim["passed"],
54
- }],
55
- "datasetCommit": claim.get("dataset_id_commit"),
56
- "subject_digest_note": _SUBJECT_DIGEST_NOTE,
57
- },
62
+ "predicate": predicate,
58
63
  }
59
- if harness:
60
- statement["predicate"]["harness"] = harness
61
- if root_b64:
62
- statement["predicate"]["receipt"] = {"schema": "proofbundle/v0.1", "root_b64": root_b64}
63
64
  return statement
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: proofbundle
3
- Version: 0.6.0
3
+ Version: 0.7.1
4
4
  Summary: Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT.
5
5
  Author: Konrad Gruszka
6
6
  License: MIT
@@ -28,7 +28,7 @@ Provides-Extra: eval
28
28
  Requires-Dist: rfc8785>=0.1.4; extra == "eval"
29
29
  Provides-Extra: adapters
30
30
  Provides-Extra: inspect
31
- Requires-Dist: inspect_ai<0.4,>=0.3.100; extra == "inspect"
31
+ Requires-Dist: inspect_ai<0.4,>=0.3.100; python_version >= "3.10" and extra == "inspect"
32
32
  Provides-Extra: dev
33
33
  Requires-Dist: pytest>=7; extra == "dev"
34
34
  Requires-Dist: ruff>=0.5; extra == "dev"
@@ -38,7 +38,7 @@ Requires-Dist: build>=1; extra == "dev"
38
38
  Requires-Dist: hypothesis>=6; extra == "dev"
39
39
  Requires-Dist: rfc8785>=0.1.4; extra == "dev"
40
40
  Requires-Dist: sd-jwt>=0.10; extra == "dev"
41
- Requires-Dist: inspect_ai<0.4,>=0.3.100; extra == "dev"
41
+ Requires-Dist: inspect_ai<0.4,>=0.3.100; python_version >= "3.10" and extra == "dev"
42
42
  Dynamic: license-file
43
43
 
44
44
  <div align="center">
@@ -62,12 +62,15 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
62
62
  [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
63
63
  [![SLSA build provenance](https://img.shields.io/badge/SLSA-build_provenance-D6248A.svg)](https://slsa.dev)
64
64
  [![PyPI attestations](https://img.shields.io/badge/PyPI-attestations_(PEP_740)-D6248A.svg)](https://pypi.org/project/proofbundle/)
65
+ <!-- DOI badge placeholder: enable Zenodo archiving for this repo, then add the Zenodo concept-DOI
66
+ badge here (and the DOI to CITATION.cff) once Zenodo assigns one on the next release. No DOI has
67
+ been assigned yet (no archived record exists at build time) — tracked in the human checklist. -->
65
68
 
66
69
  </div>
67
70
 
68
71
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
69
72
  verify` checks one self-contained `bundle.json` with three offline cryptographic
70
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 63 tests.
73
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 72 tests.
71
74
 
72
75
  ## Contents
73
76
 
@@ -290,7 +293,8 @@ trust the number.
290
293
 
291
294
  ### A verification layer for trustworthy eval logs
292
295
 
293
- The UK AISI inspect_ai team names an open gap ([arXiv:2507.06893](https://arxiv.org/abs/2507.06893)):
296
+ The maintainers of inspect_evals (Arcadia Impact, funded by the UK AI Safety Institute) name an open
297
+ gap ([arXiv:2507.06893](https://arxiv.org/abs/2507.06893)):
294
298
  a database of trustworthy evaluation results with proper provenance tracking. proofbundle is the
295
299
  missing **signature + selective-disclosure layer** for exactly that — complementary to metadata
296
300
  aggregation (Every Eval Ever) and documentation taxonomies (Eval Factsheets), not a competitor.
@@ -322,8 +326,10 @@ attestation — see [SECURITY.md](SECURITY.md).
322
326
  - **v0.4** — the eval-receipt emitter (`emit_eval_receipt` / `proofbundle emit-eval`),
323
327
  salted commitments, issuer binding.
324
328
  - **v0.5** — inspect_ai adapter (stable API), in-toto Statement v1 view, SD-JWT **issuance** (RFC 9901).
325
- - **v0.6 (current release)** — a second eval adapter (lm-evaluation-harness, real format + provenance),
326
- INTEROP.md, CITATION.cff, PEP 740 attestations documented.
329
+ - **v0.6** — a second eval adapter (lm-evaluation-harness, real format + provenance), INTEROP.md,
330
+ CITATION.cff, PEP 740 attestations documented.
331
+ - **v0.7 (current release)** — citability polish: ORCID in CITATION.cff, a Zenodo DOI placeholder
332
+ (assigned on release), and a draft in-toto ML-eval predicate proposal.
327
333
  - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
328
334
  Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
329
335
 
@@ -24,6 +24,7 @@ src/proofbundle/adapters/inspect_ai.py
24
24
  src/proofbundle/adapters/lm_eval.py
25
25
  tests/test_adapters.py
26
26
  tests/test_bundle.py
27
+ tests/test_bundle_robustness.py
27
28
  tests/test_cli.py
28
29
  tests/test_cli_eval.py
29
30
  tests/test_emit.py
@@ -11,12 +11,16 @@ build>=1
11
11
  hypothesis>=6
12
12
  rfc8785>=0.1.4
13
13
  sd-jwt>=0.10
14
+
15
+ [dev:python_version >= "3.10"]
14
16
  inspect_ai<0.4,>=0.3.100
15
17
 
16
18
  [eval]
17
19
  rfc8785>=0.1.4
18
20
 
19
21
  [inspect]
22
+
23
+ [inspect:python_version >= "3.10"]
20
24
  inspect_ai<0.4,>=0.3.100
21
25
 
22
26
  [sdjwt]
@@ -39,6 +39,8 @@ class TestAdapters(unittest.TestCase):
39
39
  self.assertEqual(claim["suite"], "safety_refusal_demo")
40
40
  self.assertTrue(claim["passed"]) # accuracy 0.0 >= 0.00
41
41
  self.assertNotIn("mockllm/model", str(claim)) # model id only as salted commitment
42
+ self.assertEqual(claim["provenance"]["harness"], "inspect_ai") # provenance parity with lm-eval
43
+ self.assertIn("harness_version", claim["provenance"])
42
44
 
43
45
  def test_inspect_ai_missing_metric_clear_error(self):
44
46
  from proofbundle.adapters.inspect_ai import InspectAdapterError
@@ -0,0 +1,74 @@
1
+ """Malformed-input robustness of verify_bundle + build_eval_claim (holistic-review findings, 0.7.1).
2
+
3
+ The verifier's contract is OK/FAILED/malformed — never a raw traceback. build_eval_claim must not emit a
4
+ receipt that fails its own published schema. One red-test per finding."""
5
+ import copy
6
+ import unittest
7
+
8
+ from proofbundle import verify_bundle
9
+ from proofbundle.emit import emit_bundle, generate_signer
10
+ from proofbundle.errors import BundleFormatError
11
+ from proofbundle.evalclaim import EvalClaimError, build_eval_claim
12
+
13
+
14
+ def _bundle():
15
+ return emit_bundle(b"payload", generate_signer())
16
+
17
+
18
+ def _mut(mut):
19
+ b = copy.deepcopy(_bundle())
20
+ mut(b)
21
+ return b
22
+
23
+
24
+ class TestBundleRobustness(unittest.TestCase):
25
+ def test_leaf_index_non_numeric_raises_format_error(self): # D1
26
+ with self.assertRaises(BundleFormatError):
27
+ verify_bundle(_mut(lambda b: b["merkle"].__setitem__("leaf_index", "abc")))
28
+
29
+ def test_signature_non_object_raises_format_error(self): # D2
30
+ with self.assertRaises(BundleFormatError):
31
+ verify_bundle(_mut(lambda b: b.__setitem__("signature", "notadict")))
32
+ with self.assertRaises(BundleFormatError):
33
+ verify_bundle(_mut(lambda b: b.__setitem__("merkle", ["x"])))
34
+
35
+ def test_tree_size_float_rejected(self): # D3 (SPEC §2: integers only)
36
+ with self.assertRaises(BundleFormatError):
37
+ verify_bundle(_mut(lambda b: b["merkle"].__setitem__("tree_size", 1.5)))
38
+
39
+ def test_missing_inclusion_proof_rejected(self): # D4 (SPEC §5: required)
40
+ with self.assertRaises(BundleFormatError):
41
+ verify_bundle(_mut(lambda b: b["merkle"].pop("inclusion_proof_b64")))
42
+
43
+ def test_unknown_fields_rejected(self): # SPEC §3: additionalProperties false
44
+ with self.assertRaises(BundleFormatError):
45
+ verify_bundle(_mut(lambda b: b.__setitem__("evil", "x")))
46
+ with self.assertRaises(BundleFormatError):
47
+ verify_bundle(_mut(lambda b: b["signature"].__setitem__("evil", "x")))
48
+ with self.assertRaises(BundleFormatError):
49
+ verify_bundle(_mut(lambda b: b["merkle"].__setitem__("evil", "x")))
50
+
51
+ def test_well_formed_still_ok(self): # no false positive
52
+ self.assertTrue(verify_bundle(_bundle()).ok)
53
+
54
+
55
+ class TestEvalClaimSchemaConformance(unittest.TestCase):
56
+ def _build(self, **kw):
57
+ base = dict(suite="s", suite_version="v1", metric="acc", comparator=">=", threshold="0.8",
58
+ score="0.9", n=1, model_id="m", dataset_id="d", issuer="",
59
+ timestamp="2026-07-01T12:00:00Z", model_salt=b"0" * 16, dataset_salt=b"1" * 16)
60
+ base.update(kw)
61
+ return build_eval_claim(**base)
62
+
63
+ def test_negative_n_rejected(self): # schema minimum 0
64
+ with self.assertRaises(EvalClaimError):
65
+ self._build(n=-5)
66
+
67
+ def test_exponent_and_sign_threshold_rejected(self): # schema decimal pattern
68
+ for bad in ("1e2", "Infinity", "+5", " 0.9 "):
69
+ with self.assertRaises(EvalClaimError):
70
+ self._build(threshold=bad)
71
+
72
+ def test_plain_decimal_accepted(self):
73
+ claim, _ = self._build(threshold="0.80", score="0.92")
74
+ self.assertTrue(claim["passed"])
File without changes
File without changes