proofbundle 0.8.1__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {proofbundle-0.8.1/src/proofbundle.egg-info → proofbundle-1.0.0}/PKG-INFO +71 -22
  2. {proofbundle-0.8.1 → proofbundle-1.0.0}/README.md +66 -19
  3. {proofbundle-0.8.1 → proofbundle-1.0.0}/pyproject.toml +13 -4
  4. proofbundle-1.0.0/src/proofbundle/__init__.py +56 -0
  5. proofbundle-1.0.0/src/proofbundle/_inspect_registry.py +3 -0
  6. proofbundle-1.0.0/src/proofbundle/_integration.py +84 -0
  7. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle/adapters/__init__.py +2 -1
  8. proofbundle-1.0.0/src/proofbundle/adapters/eee.py +175 -0
  9. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle/adapters/inspect_ai.py +32 -13
  10. proofbundle-1.0.0/src/proofbundle/checkpoint.py +157 -0
  11. proofbundle-1.0.0/src/proofbundle/dsse.py +110 -0
  12. proofbundle-1.0.0/src/proofbundle/eee_eval_schema.json +769 -0
  13. proofbundle-1.0.0/src/proofbundle/inspect_hook.py +63 -0
  14. proofbundle-1.0.0/src/proofbundle/intoto.py +182 -0
  15. proofbundle-1.0.0/src/proofbundle/pytest_plugin.py +67 -0
  16. {proofbundle-0.8.1 → proofbundle-1.0.0/src/proofbundle.egg-info}/PKG-INFO +71 -22
  17. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle.egg-info/SOURCES.txt +13 -0
  18. proofbundle-1.0.0/src/proofbundle.egg-info/entry_points.txt +8 -0
  19. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle.egg-info/requires.txt +5 -2
  20. proofbundle-1.0.0/tests/test_checkpoint.py +69 -0
  21. proofbundle-1.0.0/tests/test_eee.py +67 -0
  22. proofbundle-1.0.0/tests/test_inspect_hook.py +57 -0
  23. proofbundle-1.0.0/tests/test_intoto_dsse.py +83 -0
  24. proofbundle-1.0.0/tests/test_pytest_plugin.py +62 -0
  25. proofbundle-0.8.1/src/proofbundle/__init__.py +0 -30
  26. proofbundle-0.8.1/src/proofbundle/intoto.py +0 -64
  27. proofbundle-0.8.1/src/proofbundle.egg-info/entry_points.txt +0 -2
  28. {proofbundle-0.8.1 → proofbundle-1.0.0}/LICENSE +0 -0
  29. {proofbundle-0.8.1 → proofbundle-1.0.0}/setup.cfg +0 -0
  30. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle/adapters/lm_eval.py +0 -0
  31. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle/bundle.py +0 -0
  32. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle/cli.py +0 -0
  33. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle/emit.py +0 -0
  34. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle/errors.py +0 -0
  35. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle/evalclaim.py +0 -0
  36. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle/merkle.py +0 -0
  37. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle/py.typed +0 -0
  38. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle/sdjwt.py +0 -0
  39. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle/sdjwt_issue.py +0 -0
  40. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle/signature.py +0 -0
  41. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle.egg-info/dependency_links.txt +0 -0
  42. {proofbundle-0.8.1 → proofbundle-1.0.0}/src/proofbundle.egg-info/top_level.txt +0 -0
  43. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_adapters.py +0 -0
  44. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_bundle.py +0 -0
  45. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_bundle_robustness.py +0 -0
  46. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_cli.py +0 -0
  47. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_cli_eval.py +0 -0
  48. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_emit.py +0 -0
  49. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_eval_claim_schema.py +0 -0
  50. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_evalclaim.py +0 -0
  51. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_examples.py +0 -0
  52. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_intoto.py +0 -0
  53. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_merkle.py +0 -0
  54. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_merkle_property.py +0 -0
  55. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_rekor_interop.py +0 -0
  56. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_rfc6962_external_vectors.py +0 -0
  57. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_schema.py +0 -0
  58. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_sdjwt_issue.py +0 -0
  59. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_sdjwt_reference.py +0 -0
  60. {proofbundle-0.8.1 → proofbundle-1.0.0}/tests/test_signature.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: proofbundle
3
- Version: 0.8.1
3
+ Version: 1.0.0
4
4
  Summary: Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT.
5
5
  Author: Konrad Gruszka
6
6
  License: MIT
@@ -27,8 +27,10 @@ Provides-Extra: sdjwt
27
27
  Provides-Extra: eval
28
28
  Requires-Dist: rfc8785>=0.1.4; extra == "eval"
29
29
  Provides-Extra: adapters
30
+ Provides-Extra: pytest
31
+ Requires-Dist: pytest>=7; extra == "pytest"
30
32
  Provides-Extra: inspect
31
- Requires-Dist: inspect_ai<0.4,>=0.3.100; python_version >= "3.10" and extra == "inspect"
33
+ Requires-Dist: inspect_ai<0.4,>=0.3.112; python_version >= "3.10" and extra == "inspect"
32
34
  Provides-Extra: dev
33
35
  Requires-Dist: pytest>=7; extra == "dev"
34
36
  Requires-Dist: ruff>=0.5; extra == "dev"
@@ -38,7 +40,7 @@ Requires-Dist: build>=1; extra == "dev"
38
40
  Requires-Dist: hypothesis>=6; extra == "dev"
39
41
  Requires-Dist: rfc8785>=0.1.4; extra == "dev"
40
42
  Requires-Dist: sd-jwt>=0.10; extra == "dev"
41
- Requires-Dist: inspect_ai<0.4,>=0.3.100; python_version >= "3.10" and extra == "dev"
43
+ Requires-Dist: inspect_ai<0.4,>=0.3.112; python_version >= "3.10" and extra == "dev"
42
44
  Dynamic: license-file
43
45
 
44
46
  <div align="center">
@@ -50,9 +52,10 @@ Dynamic: license-file
50
52
 
51
53
  <h1>proofbundle</h1>
52
54
 
53
- **Emit and verify, fully offline, portable evidence that a piece of data was
54
- signed and anchored in a tamper-evident log and optionally carries a
55
- selectively disclosable credential. Pure Python, no server, no daemon, one JSON file.**
55
+ **An offline verifier for AI eval receipts. Standards-native: Ed25519 signature,
56
+ RFC 6962 transparency-log Merkle anchoring, optional SD-JWT (RFC 9901) selective
57
+ disclosure, aligned to the in-toto test-result predicate. One portable JSON file,
58
+ no server, no network.**
56
59
 
57
60
  [![CI](https://github.com/b7n0de/proofbundle/actions/workflows/ci.yml/badge.svg)](https://github.com/b7n0de/proofbundle/actions/workflows/ci.yml)
58
61
  [![PyPI](https://img.shields.io/pypi/v/proofbundle.svg?color=D6248A&cacheSeconds=3600)](https://pypi.org/project/proofbundle/)
@@ -70,7 +73,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
70
73
 
71
74
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
72
75
  verify` checks one self-contained `bundle.json` with three offline cryptographic
73
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 74 tests.
76
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 102 tests.
74
77
 
75
78
  ## Contents
76
79
 
@@ -80,6 +83,7 @@ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 74 tests.
80
83
  - [Install](#install)
81
84
  - [Quickstart](#quickstart)
82
85
  - [Demo](#demo--a-real-eval-log-to-a-verified-receipt-offline)
86
+ - [Integrations](#integrations--a-signed-receipt-of-your-eval-or-test-run-automatically-v10)
83
87
  - [Interoperability](#interoperability)
84
88
  - [Bundle format](#bundle-format-proofbundlev01)
85
89
  - [Eval receipts](#eval-receipts)
@@ -225,6 +229,25 @@ that the *artifact* is signed and offline-verifiable, with model and dataset kep
225
229
  See [`examples/inspect_receipt.py`](examples/inspect_receipt.py) and
226
230
  [`examples/lm_eval_receipt.py`](examples/lm_eval_receipt.py).
227
231
 
232
+ ## Integrations — a signed receipt of your eval or test run, automatically (v1.0)
233
+
234
+ Since v1.0, proofbundle can **auto-emit** a signed receipt of an **inspect_ai eval** or a **pytest run** via
235
+ each framework's native plugin API — installed and ready, but strictly **opt-in** (it emits only when you set
236
+ `PROOFBUNDLE_EMIT=1` or pass a flag; never silently, never failing your run):
237
+
238
+ ```bash
239
+ pip install "proofbundle[inspect,eval]" && PROOFBUNDLE_EMIT=1 inspect eval task.py --model mockllm/model
240
+ pip install "proofbundle[pytest,eval]" && PROOFBUNDLE_EMIT=1 pytest
241
+ ```
242
+
243
+ The distinguishing angle is exactly this opt-in **auto-emit of an Ed25519-signed receipt via the framework's
244
+ own plugin** (an inspect_ai end-of-task hook + a pytest11 plugin), on top of the standards stack. Named
245
+ fairly: [ai-audit-trail](https://pypi.org/project/ai-audit-trail/) records *runtime* agent Decision Receipts
246
+ (FastAPI/LangChain, ISO 42001), a different layer; [ValiChord](https://github.com/topeuph-ai/ValiChord)
247
+ builds attestation bundles from inspect_ai logs *post-hoc* (its v1 library is JCS + SHA-256 Merkle + HMAC
248
+ challenge-response, **not digitally signed** — signatures are v2 scope). See
249
+ [INTEGRATIONS.md](INTEGRATIONS.md) (+ a prepared composite GitHub Action under [`action/`](action/action.yml)).
250
+
228
251
  ## Interoperability
229
252
 
230
253
  proofbundle uses the same RFC 6962 / RFC 9162 Merkle primitive as
@@ -325,24 +348,46 @@ SD-JWT selective disclosure over one portable file, offline.
325
348
  The maintainers of inspect_evals (Arcadia Impact, funded by the UK AI Safety Institute) name an open
326
349
  gap ([arXiv:2507.06893](https://arxiv.org/abs/2507.06893)):
327
350
  a database of trustworthy evaluation results with proper provenance tracking. proofbundle is the
328
- missing **signature + selective-disclosure layer** for exactly that — complementary to metadata
329
- aggregation (Every Eval Ever) and documentation taxonomies (Eval Factsheets), not a competitor.
330
- See [INTEROP.md](INTEROP.md) for how it maps to OpenSSF Model Signing, CycloneDX ML-BOM, and in-toto.
331
-
332
- - **Two framework adapters** `pip install "proofbundle[inspect]"` reads a UK AISI
351
+ missing **signature + selective-disclosure layer** for exactly that.
352
+
353
+ **How it fits standards-native, and honest about the neighbours.** proofbundle attests that a *claimed*
354
+ evaluation result is authentic, tamper-evident, and selectively disclosable. It does **not** attest that
355
+ the evaluation was computed correctly or that results were not cherry-picked — proving faithful
356
+ computation is the domain of TEE approaches such as
357
+ [Attestable Audits](https://arxiv.org/abs/2506.23706). It is complementary to its neighbours, named
358
+ fairly: [Every Eval Ever](https://github.com/evaleval/every_eval_ever) standardizes eval *metadata* but
359
+ adds no cryptography (proofbundle ships an EEE→receipt converter);
360
+ [OpenSSF Model Signing](https://github.com/ossf/model-signing-spec) signs *model weights*, not eval
361
+ results; [ValiChord](https://github.com/topeuph-ai/ValiChord) plans blind peer consensus and a
362
+ Holochain attested log (v2 scope); its current v1 attestation library uses a simple SHA-256 Merkle tree with
363
+ no digital signature, no SD-JWT, no in-toto. proofbundle is the lightweight, **standards-native** piece between them:
364
+ a portable receipt a third party verifies offline, with selective disclosure so an auditor can prove a
365
+ threshold was met without revealing the model or the data. See [INTEROP.md](INTEROP.md).
366
+
367
+ - **Three framework bridges** — `pip install "proofbundle[inspect]"` reads a UK AISI
333
368
  [inspect_ai](https://github.com/UKGovernmentBEIS/inspect_ai) eval log via the stable `read_eval_log`
334
369
  API (lazy import). `proofbundle.adapters.from_lm_eval_results` reads a real EleutherAI
335
370
  [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness) `results_*.json` (the
336
- genuine `acc,none` filter-suffix format) and captures run provenance — no framework import either way.
337
- - **in-toto Statement v1**`proofbundle.intoto.to_intoto_statement(claim, root_b64=…)`
338
- emits the receipt as an in-toto statement with a self-hosted predicate type. The subject
339
- digest is an *honest salted commitment* under a custom key, never `sha256` (see
340
- [PREDICATE.md](PREDICATE.md)).
341
- - **SD-JWT issuance** (RFC 9901) `proofbundle.sdjwt_issue.issue_sd_jwt(claim, signer,
371
+ genuine `acc,none` filter-suffix format). **`proofbundle.adapters.from_eee_dataset`** (v0.9) reads an
372
+ Every Eval Ever v0.2.2 aggregate JSON and builds a signed receipt validated against the vendored EEE
373
+ schema, with **no runtime import** of `every_eval_ever` (it needs Python 3.12; proofbundle stays 3.9+).
374
+ - **in-toto test-result export, DSSE-signed** (v0.9) `proofbundle.intoto.export_intoto_dsse(claim,
375
+ signer)` emits the receipt as a DSSE-signed in-toto Statement v1 with the **generic
376
+ `test-result/v0.1` predicate** (result PASSED/FAILED, `configuration` ResourceDescriptors), so a generic
377
+ in-toto verifier understands it. Alongside the self-hosted-predicate `to_intoto_statement` (see
378
+ [PREDICATE.md](PREDICATE.md)). Metric details live in `annotations` (test-result has no native metric
379
+ field); the model/dataset stay salted commitments, never `sha256`.
380
+ - **C2SP tlog-checkpoint** (v0.9) — `proofbundle.checkpoint.sign_checkpoint(origin, tree_size, root, …)`
381
+ emits a valid [C2SP](https://github.com/C2SP/C2SP/blob/main/tlog-checkpoint.md) signed note over the
382
+ RFC 6962 Merkle root, making a receipt witness-network / transparency-log compatible. Pure serialization
383
+ over the Ed25519 key already in use — no new crypto.
384
+ - **SD-JWT issuance** (RFC 9901, verified Nov 2025) — `proofbundle.sdjwt_issue.issue_sd_jwt(claim, signer,
342
385
  root_b64=…, exact_score=…)` issues the receipt so a holder can disclose `passed` +
343
- `threshold` while **withholding the exact score** and the identifier openings. The signed
344
- bundle payload is the source of truth; the SD-JWT is a derived, bundle-bound view, verified
345
- by proofbundle's own verifier **and** the `sd-jwt-python` reference.
386
+ `threshold` while **withholding the exact score** and the identifier openings. The digest mechanic is
387
+ RFC 9901 §4.2.3 (base64url of SHA-256 over the base64url-encoded Disclosure), cross-checked against the
388
+ `sd-jwt-python` reference.
389
+ The signed bundle payload is always the source of truth; the SD-JWT and the in-toto export are derived,
390
+ bundle-bound views.
346
391
 
347
392
  Every release ships **PEP 740 attestations** (Trusted Publishing) + an SLSA build-provenance
348
393
  attestation — see [SECURITY.md](SECURITY.md).
@@ -359,8 +404,12 @@ attestation — see [SECURITY.md](SECURITY.md).
359
404
  CITATION.cff, PEP 740 attestations documented.
360
405
  - **v0.7** — citability polish (ORCID, Zenodo DOI placeholder, in-toto proposal draft); v0.7.1 hardened
361
406
  verifier robustness + CI on Python 3.9 after a holistic review.
362
- - **v0.8 (current release)** — an offline `make demo` (real eval log -> signed receipt -> verified),
407
+ - **v0.8** — an offline `make demo` (real eval log -> signed receipt -> verified),
363
408
  a sharpened honesty guardrail (authenticity/integrity, not computation-correctness), and outreach drafts.
409
+ - **v0.9** — the standards moat: a DSSE-signed in-toto `test-result` export, a C2SP tlog-checkpoint over
410
+ the RFC 6962 root, an Every Eval Ever converter, and standards-native repositioning.
411
+ - **v1.0 (current release)** — distribution: opt-in framework integrations that auto-emit a signed receipt
412
+ of an inspect_ai eval (end-of-task hook) or a pytest run (pytest11 plugin), plus a composite GitHub Action.
364
413
  - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
365
414
  Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
366
415
 
@@ -7,9 +7,10 @@
7
7
 
8
8
  <h1>proofbundle</h1>
9
9
 
10
- **Emit and verify, fully offline, portable evidence that a piece of data was
11
- signed and anchored in a tamper-evident log and optionally carries a
12
- selectively disclosable credential. Pure Python, no server, no daemon, one JSON file.**
10
+ **An offline verifier for AI eval receipts. Standards-native: Ed25519 signature,
11
+ RFC 6962 transparency-log Merkle anchoring, optional SD-JWT (RFC 9901) selective
12
+ disclosure, aligned to the in-toto test-result predicate. One portable JSON file,
13
+ no server, no network.**
13
14
 
14
15
  [![CI](https://github.com/b7n0de/proofbundle/actions/workflows/ci.yml/badge.svg)](https://github.com/b7n0de/proofbundle/actions/workflows/ci.yml)
15
16
  [![PyPI](https://img.shields.io/pypi/v/proofbundle.svg?color=D6248A&cacheSeconds=3600)](https://pypi.org/project/proofbundle/)
@@ -27,7 +28,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
27
28
 
28
29
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
29
30
  verify` checks one self-contained `bundle.json` with three offline cryptographic
30
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 74 tests.
31
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 102 tests.
31
32
 
32
33
  ## Contents
33
34
 
@@ -37,6 +38,7 @@ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 74 tests.
37
38
  - [Install](#install)
38
39
  - [Quickstart](#quickstart)
39
40
  - [Demo](#demo--a-real-eval-log-to-a-verified-receipt-offline)
41
+ - [Integrations](#integrations--a-signed-receipt-of-your-eval-or-test-run-automatically-v10)
40
42
  - [Interoperability](#interoperability)
41
43
  - [Bundle format](#bundle-format-proofbundlev01)
42
44
  - [Eval receipts](#eval-receipts)
@@ -182,6 +184,25 @@ that the *artifact* is signed and offline-verifiable, with model and dataset kep
182
184
  See [`examples/inspect_receipt.py`](examples/inspect_receipt.py) and
183
185
  [`examples/lm_eval_receipt.py`](examples/lm_eval_receipt.py).
184
186
 
187
+ ## Integrations — a signed receipt of your eval or test run, automatically (v1.0)
188
+
189
+ Since v1.0, proofbundle can **auto-emit** a signed receipt of an **inspect_ai eval** or a **pytest run** via
190
+ each framework's native plugin API — installed and ready, but strictly **opt-in** (it emits only when you set
191
+ `PROOFBUNDLE_EMIT=1` or pass a flag; never silently, never failing your run):
192
+
193
+ ```bash
194
+ pip install "proofbundle[inspect,eval]" && PROOFBUNDLE_EMIT=1 inspect eval task.py --model mockllm/model
195
+ pip install "proofbundle[pytest,eval]" && PROOFBUNDLE_EMIT=1 pytest
196
+ ```
197
+
198
+ The distinguishing angle is exactly this opt-in **auto-emit of an Ed25519-signed receipt via the framework's
199
+ own plugin** (an inspect_ai end-of-task hook + a pytest11 plugin), on top of the standards stack. Named
200
+ fairly: [ai-audit-trail](https://pypi.org/project/ai-audit-trail/) records *runtime* agent Decision Receipts
201
+ (FastAPI/LangChain, ISO 42001), a different layer; [ValiChord](https://github.com/topeuph-ai/ValiChord)
202
+ builds attestation bundles from inspect_ai logs *post-hoc* (its v1 library is JCS + SHA-256 Merkle + HMAC
203
+ challenge-response, **not digitally signed** — signatures are v2 scope). See
204
+ [INTEGRATIONS.md](INTEGRATIONS.md) (+ a prepared composite GitHub Action under [`action/`](action/action.yml)).
205
+
185
206
  ## Interoperability
186
207
 
187
208
  proofbundle uses the same RFC 6962 / RFC 9162 Merkle primitive as
@@ -282,24 +303,46 @@ SD-JWT selective disclosure over one portable file, offline.
282
303
  The maintainers of inspect_evals (Arcadia Impact, funded by the UK AI Safety Institute) name an open
283
304
  gap ([arXiv:2507.06893](https://arxiv.org/abs/2507.06893)):
284
305
  a database of trustworthy evaluation results with proper provenance tracking. proofbundle is the
285
- missing **signature + selective-disclosure layer** for exactly that — complementary to metadata
286
- aggregation (Every Eval Ever) and documentation taxonomies (Eval Factsheets), not a competitor.
287
- See [INTEROP.md](INTEROP.md) for how it maps to OpenSSF Model Signing, CycloneDX ML-BOM, and in-toto.
288
-
289
- - **Two framework adapters** `pip install "proofbundle[inspect]"` reads a UK AISI
306
+ missing **signature + selective-disclosure layer** for exactly that.
307
+
308
+ **How it fits standards-native, and honest about the neighbours.** proofbundle attests that a *claimed*
309
+ evaluation result is authentic, tamper-evident, and selectively disclosable. It does **not** attest that
310
+ the evaluation was computed correctly or that results were not cherry-picked — proving faithful
311
+ computation is the domain of TEE approaches such as
312
+ [Attestable Audits](https://arxiv.org/abs/2506.23706). It is complementary to its neighbours, named
313
+ fairly: [Every Eval Ever](https://github.com/evaleval/every_eval_ever) standardizes eval *metadata* but
314
+ adds no cryptography (proofbundle ships an EEE→receipt converter);
315
+ [OpenSSF Model Signing](https://github.com/ossf/model-signing-spec) signs *model weights*, not eval
316
+ results; [ValiChord](https://github.com/topeuph-ai/ValiChord) plans blind peer consensus and a
317
+ Holochain attested log (v2 scope); its current v1 attestation library uses a simple SHA-256 Merkle tree with
318
+ no digital signature, no SD-JWT, no in-toto. proofbundle is the lightweight, **standards-native** piece between them:
319
+ a portable receipt a third party verifies offline, with selective disclosure so an auditor can prove a
320
+ threshold was met without revealing the model or the data. See [INTEROP.md](INTEROP.md).
321
+
322
+ - **Three framework bridges** — `pip install "proofbundle[inspect]"` reads a UK AISI
290
323
  [inspect_ai](https://github.com/UKGovernmentBEIS/inspect_ai) eval log via the stable `read_eval_log`
291
324
  API (lazy import). `proofbundle.adapters.from_lm_eval_results` reads a real EleutherAI
292
325
  [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness) `results_*.json` (the
293
- genuine `acc,none` filter-suffix format) and captures run provenance — no framework import either way.
294
- - **in-toto Statement v1**`proofbundle.intoto.to_intoto_statement(claim, root_b64=…)`
295
- emits the receipt as an in-toto statement with a self-hosted predicate type. The subject
296
- digest is an *honest salted commitment* under a custom key, never `sha256` (see
297
- [PREDICATE.md](PREDICATE.md)).
298
- - **SD-JWT issuance** (RFC 9901) `proofbundle.sdjwt_issue.issue_sd_jwt(claim, signer,
326
+ genuine `acc,none` filter-suffix format). **`proofbundle.adapters.from_eee_dataset`** (v0.9) reads an
327
+ Every Eval Ever v0.2.2 aggregate JSON and builds a signed receipt validated against the vendored EEE
328
+ schema, with **no runtime import** of `every_eval_ever` (it needs Python 3.12; proofbundle stays 3.9+).
329
+ - **in-toto test-result export, DSSE-signed** (v0.9) `proofbundle.intoto.export_intoto_dsse(claim,
330
+ signer)` emits the receipt as a DSSE-signed in-toto Statement v1 with the **generic
331
+ `test-result/v0.1` predicate** (result PASSED/FAILED, `configuration` ResourceDescriptors), so a generic
332
+ in-toto verifier understands it. Alongside the self-hosted-predicate `to_intoto_statement` (see
333
+ [PREDICATE.md](PREDICATE.md)). Metric details live in `annotations` (test-result has no native metric
334
+ field); the model/dataset stay salted commitments, never `sha256`.
335
+ - **C2SP tlog-checkpoint** (v0.9) — `proofbundle.checkpoint.sign_checkpoint(origin, tree_size, root, …)`
336
+ emits a valid [C2SP](https://github.com/C2SP/C2SP/blob/main/tlog-checkpoint.md) signed note over the
337
+ RFC 6962 Merkle root, making a receipt witness-network / transparency-log compatible. Pure serialization
338
+ over the Ed25519 key already in use — no new crypto.
339
+ - **SD-JWT issuance** (RFC 9901, verified Nov 2025) — `proofbundle.sdjwt_issue.issue_sd_jwt(claim, signer,
299
340
  root_b64=…, exact_score=…)` issues the receipt so a holder can disclose `passed` +
300
- `threshold` while **withholding the exact score** and the identifier openings. The signed
301
- bundle payload is the source of truth; the SD-JWT is a derived, bundle-bound view, verified
302
- by proofbundle's own verifier **and** the `sd-jwt-python` reference.
341
+ `threshold` while **withholding the exact score** and the identifier openings. The digest mechanic is
342
+ RFC 9901 §4.2.3 (base64url of SHA-256 over the base64url-encoded Disclosure), cross-checked against the
343
+ `sd-jwt-python` reference.
344
+ The signed bundle payload is always the source of truth; the SD-JWT and the in-toto export are derived,
345
+ bundle-bound views.
303
346
 
304
347
  Every release ships **PEP 740 attestations** (Trusted Publishing) + an SLSA build-provenance
305
348
  attestation — see [SECURITY.md](SECURITY.md).
@@ -316,8 +359,12 @@ attestation — see [SECURITY.md](SECURITY.md).
316
359
  CITATION.cff, PEP 740 attestations documented.
317
360
  - **v0.7** — citability polish (ORCID, Zenodo DOI placeholder, in-toto proposal draft); v0.7.1 hardened
318
361
  verifier robustness + CI on Python 3.9 after a holistic review.
319
- - **v0.8 (current release)** — an offline `make demo` (real eval log -> signed receipt -> verified),
362
+ - **v0.8** — an offline `make demo` (real eval log -> signed receipt -> verified),
320
363
  a sharpened honesty guardrail (authenticity/integrity, not computation-correctness), and outreach drafts.
364
+ - **v0.9** — the standards moat: a DSSE-signed in-toto `test-result` export, a C2SP tlog-checkpoint over
365
+ the RFC 6962 root, an Every Eval Ever converter, and standards-native repositioning.
366
+ - **v1.0 (current release)** — distribution: opt-in framework integrations that auto-emit a signed receipt
367
+ of an inspect_ai eval (end-of-task hook) or a pytest run (pytest11 plugin), plus a composite GitHub Action.
321
368
  - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
322
369
  Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
323
370
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "proofbundle"
7
- version = "0.8.1"
7
+ version = "1.0.0"
8
8
  description = "Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -45,13 +45,16 @@ sdjwt = []
45
45
  eval = ["rfc8785>=0.1.4"]
46
46
  # The lm-eval adapter reads exported results.json (no import) → pure stdlib.
47
47
  adapters = []
48
+ # The pytest plugin (opt-in test-run receipt via the pytest11 entry-point) — pytest is an optional extra,
49
+ # never a core dependency. Floor 7.0: the hooks/stats API is signature-stable across pytest 7/8/9.
50
+ pytest = ["pytest>=7"]
48
51
  # The inspect_ai adapter uses the STABLE read_eval_log API (lazy import). Pinned with an UPPER bound:
49
52
  # the .eval format + pydantic schema change between versions (inspect_ai issue 834), and the fixture
50
53
  # test is bound to this range. inspect_ai requires Python >= 3.10, so the marker gates it out on 3.9
51
54
  # (base + [eval]/[sdjwt] still work on 3.9; the inspect adapter test skips there). Fixes the red 3.9 CI.
52
- inspect = ['inspect_ai>=0.3.100,<0.4; python_version >= "3.10"']
55
+ inspect = ['inspect_ai>=0.3.112,<0.4; python_version >= "3.10"']
53
56
  dev = ["pytest>=7", "ruff>=0.5", "jsonschema>=4", "mypy>=1.8", "build>=1", "hypothesis>=6",
54
- "rfc8785>=0.1.4", "sd-jwt>=0.10", 'inspect_ai>=0.3.100,<0.4; python_version >= "3.10"']
57
+ "rfc8785>=0.1.4", "sd-jwt>=0.10", 'inspect_ai>=0.3.112,<0.4; python_version >= "3.10"']
55
58
 
56
59
  [project.urls]
57
60
  Homepage = "https://b7n0de.com"
@@ -63,11 +66,17 @@ Documentation = "https://github.com/b7n0de/proofbundle#readme"
63
66
  [project.scripts]
64
67
  proofbundle = "proofbundle.cli:main"
65
68
 
69
+ # Framework integrations (opt-in auto-emit; gated on PROOFBUNDLE_EMIT / a flag, never silent).
70
+ [project.entry-points.inspect_ai]
71
+ proofbundle = "proofbundle._inspect_registry"
72
+ [project.entry-points.pytest11]
73
+ proofbundle = "proofbundle.pytest_plugin"
74
+
66
75
  [tool.setuptools.packages.find]
67
76
  where = ["src"]
68
77
 
69
78
  [tool.setuptools.package-data]
70
- proofbundle = ["py.typed"]
79
+ proofbundle = ["py.typed", "eee_eval_schema.json"]
71
80
 
72
81
  [tool.ruff]
73
82
  line-length = 100
@@ -0,0 +1,56 @@
1
+ """proofbundle — emit and verify portable, offline cryptographic evidence bundles for AI eval receipts.
2
+
3
+ Verify, fully offline and in pure Python, that a payload was Ed25519 signed and anchored under an RFC 6962
4
+ Merkle root, with optional SD-JWT selective disclosure — plus opt-in framework integrations that auto-emit a
5
+ signed receipt of an inspect_ai eval or a pytest run.
6
+
7
+ The public API is loaded LAZILY (PEP 562): ``import proofbundle`` — and, via the entry points, loading the
8
+ pytest plugin / inspect_ai hook — does NOT pull the crypto core until a name like ``verify_bundle`` is
9
+ actually used. ``from proofbundle import verify_bundle`` works exactly as before; it just imports the backing
10
+ module on first access. This keeps the framework integrations light at startup.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ from typing import TYPE_CHECKING
15
+
16
+ __version__ = "1.0.0"
17
+
18
+ __all__ = [
19
+ "__version__",
20
+ "SCHEMA",
21
+ "verify_bundle",
22
+ "load_bundle",
23
+ "emit_bundle",
24
+ "generate_signer",
25
+ "verify_inclusion",
26
+ "verify_consistency",
27
+ "VerificationResult",
28
+ "Check",
29
+ "ProofBundleError",
30
+ ]
31
+
32
+ # name → backing submodule (relative). Loaded on first attribute access.
33
+ _LAZY = {
34
+ "SCHEMA": ".bundle", "load_bundle": ".bundle", "verify_bundle": ".bundle",
35
+ "emit_bundle": ".emit", "generate_signer": ".emit",
36
+ "Check": ".errors", "ProofBundleError": ".errors", "VerificationResult": ".errors",
37
+ "verify_consistency": ".merkle", "verify_inclusion": ".merkle",
38
+ }
39
+
40
+ if TYPE_CHECKING: # static analysers + IDEs see the real names/types; runtime stays lazy
41
+ from .bundle import SCHEMA, load_bundle, verify_bundle
42
+ from .emit import emit_bundle, generate_signer
43
+ from .errors import Check, ProofBundleError, VerificationResult
44
+ from .merkle import verify_consistency, verify_inclusion
45
+
46
+
47
+ def __getattr__(name: str):
48
+ module = _LAZY.get(name)
49
+ if module is None:
50
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
51
+ import importlib # noqa: PLC0415
52
+ return getattr(importlib.import_module(module, __name__), name)
53
+
54
+
55
+ def __dir__():
56
+ return sorted(__all__)
@@ -0,0 +1,3 @@
1
+ """Entry-point target for the inspect_ai hook group. Importing this module registers ProofbundleHooks as an
2
+ import side-effect. Kept intentionally minimal (no crypto) so inspect's startup discovery stays fast."""
3
+ from .inspect_hook import ProofbundleHooks # noqa: F401 — import side-effect registers the hook
@@ -0,0 +1,84 @@
1
+ """Shared opt-in helper for the framework integrations (inspect_ai hook, pytest plugin) — v1.0.
2
+
3
+ THE TOP RULE (opt-in safety): an integration must NEVER silently write a file or alter a host run. It emits
4
+ a receipt ONLY when the user explicitly turns it on — the ``PROOFBUNDLE_EMIT=1`` environment variable, or a
5
+ framework flag that maps to it. A security tool that surprises you loses trust. Every function here is a
6
+ no-op unless emission is enabled, catches its own errors (an integration must never fail the host run), and
7
+ imports the crypto lazily (this module is only imported from inside a hook body, never at framework startup).
8
+
9
+ Configuration (all optional, all env):
10
+ PROOFBUNDLE_EMIT "1" to enable emission (the master opt-in). Anything else = disabled.
11
+ PROOFBUNDLE_KEY path to a 32-byte raw Ed25519 seed to sign with. If unset, an EPHEMERAL key is
12
+ generated (a warning is printed; the receipt is self-verifiable but not tied to a
13
+ durable identity).
14
+ PROOFBUNDLE_OUT output path: a file, or a directory (the default file name is written into it).
15
+ Default: the default file name in the current directory.
16
+ PROOFBUNDLE_METRIC which metric to bind (else the integration's first/most-relevant metric).
17
+ PROOFBUNDLE_COMPARATOR ">=" | ">" | "<=" | "<" (default ">=").
18
+ PROOFBUNDLE_THRESHOLD decimal string (default "0") — the pass/fail threshold to assert.
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import os
23
+ from pathlib import Path
24
+ from typing import Optional
25
+
26
+ DEFAULT_COMPARATOR = ">="
27
+ DEFAULT_THRESHOLD = "0"
28
+
29
+
30
+ def emit_enabled(flag: bool = False) -> bool:
31
+ """The master opt-in gate. True only if PROOFBUNDLE_EMIT == "1" OR an explicit framework flag is set."""
32
+ return flag or os.environ.get("PROOFBUNDLE_EMIT") == "1"
33
+
34
+
35
+ def emit_config() -> dict:
36
+ """Read the (metric, comparator, threshold) emission config from the environment, with safe defaults."""
37
+ return {
38
+ "metric": os.environ.get("PROOFBUNDLE_METRIC"),
39
+ "comparator": os.environ.get("PROOFBUNDLE_COMPARATOR") or DEFAULT_COMPARATOR,
40
+ "threshold": os.environ.get("PROOFBUNDLE_THRESHOLD") or DEFAULT_THRESHOLD,
41
+ }
42
+
43
+
44
+ def _resolve_signer():
45
+ """Return (signer, is_ephemeral). Loads PROOFBUNDLE_KEY if set, else generates an ephemeral key."""
46
+ from .emit import generate_signer, load_signer # noqa: PLC0415 — lazy: only on actual emit
47
+ key_path = os.environ.get("PROOFBUNDLE_KEY")
48
+ if key_path:
49
+ return load_signer(key_path), False
50
+ return generate_signer(), True
51
+
52
+
53
+ def _output_path(default_name: str) -> Path:
54
+ """Resolve the output file path from PROOFBUNDLE_OUT (file or directory) or the default name in cwd."""
55
+ out = os.environ.get("PROOFBUNDLE_OUT")
56
+ if not out:
57
+ return Path.cwd() / default_name
58
+ p = Path(out)
59
+ if p.is_dir() or out.endswith(os.sep):
60
+ return p / default_name
61
+ return p
62
+
63
+
64
+ def emit_claim_receipt(claim: dict, default_name: str) -> Optional[str]:
65
+ """Sign ``claim`` into an eval receipt and write it to the resolved output path. Returns the path, or
66
+ None on any failure (an integration must never raise into the host run). Assumes emission is enabled
67
+ (the caller checks ``emit_enabled`` first)."""
68
+ try:
69
+ from .evalclaim import emit_eval_receipt # noqa: PLC0415 — lazy
70
+ import json # noqa: PLC0415
71
+
72
+ signer, ephemeral = _resolve_signer()
73
+ if ephemeral:
74
+ print("[proofbundle] PROOFBUNDLE_KEY not set — signing with an EPHEMERAL key "
75
+ "(receipt is self-verifiable but not bound to a durable identity).")
76
+ bundle = emit_eval_receipt(claim, signer)
77
+ out = _output_path(default_name)
78
+ out.parent.mkdir(parents=True, exist_ok=True)
79
+ out.write_text(json.dumps(bundle, indent=2), encoding="utf-8")
80
+ print(f"[proofbundle] wrote signed eval receipt → {out}")
81
+ return str(out)
82
+ except Exception as e: # noqa: BLE001 — never let emission break the host run
83
+ print(f"[proofbundle] receipt emission skipped ({type(e).__name__}: {e})")
84
+ return None
@@ -5,6 +5,7 @@ add no runtime dependency. The output-format mapping is bound to a framework ver
5
5
  each fixture in tests/fixtures documents its source + version.
6
6
  """
7
7
  from .inspect_ai import from_inspect_ai_log
8
+ from .eee import from_eee_dataset
8
9
  from .lm_eval import from_lm_eval_results
9
10
 
10
- __all__ = ["from_lm_eval_results", "from_inspect_ai_log"]
11
+ __all__ = ["from_lm_eval_results", "from_inspect_ai_log", "from_eee_dataset"]