proofbundle 0.9.0__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {proofbundle-0.9.0/src/proofbundle.egg-info → proofbundle-1.0.0}/PKG-INFO +33 -9
  2. {proofbundle-0.9.0 → proofbundle-1.0.0}/README.md +28 -6
  3. {proofbundle-0.9.0 → proofbundle-1.0.0}/pyproject.toml +12 -3
  4. proofbundle-1.0.0/src/proofbundle/__init__.py +56 -0
  5. proofbundle-1.0.0/src/proofbundle/_inspect_registry.py +3 -0
  6. proofbundle-1.0.0/src/proofbundle/_integration.py +84 -0
  7. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/adapters/inspect_ai.py +32 -13
  8. proofbundle-1.0.0/src/proofbundle/inspect_hook.py +63 -0
  9. proofbundle-1.0.0/src/proofbundle/pytest_plugin.py +67 -0
  10. {proofbundle-0.9.0 → proofbundle-1.0.0/src/proofbundle.egg-info}/PKG-INFO +33 -9
  11. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle.egg-info/SOURCES.txt +6 -0
  12. proofbundle-1.0.0/src/proofbundle.egg-info/entry_points.txt +8 -0
  13. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle.egg-info/requires.txt +5 -2
  14. proofbundle-1.0.0/tests/test_inspect_hook.py +57 -0
  15. proofbundle-1.0.0/tests/test_pytest_plugin.py +62 -0
  16. proofbundle-0.9.0/src/proofbundle/__init__.py +0 -30
  17. proofbundle-0.9.0/src/proofbundle.egg-info/entry_points.txt +0 -2
  18. {proofbundle-0.9.0 → proofbundle-1.0.0}/LICENSE +0 -0
  19. {proofbundle-0.9.0 → proofbundle-1.0.0}/setup.cfg +0 -0
  20. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/adapters/__init__.py +0 -0
  21. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/adapters/eee.py +0 -0
  22. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/adapters/lm_eval.py +0 -0
  23. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/bundle.py +0 -0
  24. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/checkpoint.py +0 -0
  25. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/cli.py +0 -0
  26. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/dsse.py +0 -0
  27. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/eee_eval_schema.json +0 -0
  28. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/emit.py +0 -0
  29. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/errors.py +0 -0
  30. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/evalclaim.py +0 -0
  31. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/intoto.py +0 -0
  32. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/merkle.py +0 -0
  33. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/py.typed +0 -0
  34. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/sdjwt.py +0 -0
  35. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/sdjwt_issue.py +0 -0
  36. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle/signature.py +0 -0
  37. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle.egg-info/dependency_links.txt +0 -0
  38. {proofbundle-0.9.0 → proofbundle-1.0.0}/src/proofbundle.egg-info/top_level.txt +0 -0
  39. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_adapters.py +0 -0
  40. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_bundle.py +0 -0
  41. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_bundle_robustness.py +0 -0
  42. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_checkpoint.py +0 -0
  43. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_cli.py +0 -0
  44. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_cli_eval.py +0 -0
  45. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_eee.py +0 -0
  46. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_emit.py +0 -0
  47. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_eval_claim_schema.py +0 -0
  48. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_evalclaim.py +0 -0
  49. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_examples.py +0 -0
  50. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_intoto.py +0 -0
  51. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_intoto_dsse.py +0 -0
  52. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_merkle.py +0 -0
  53. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_merkle_property.py +0 -0
  54. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_rekor_interop.py +0 -0
  55. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_rfc6962_external_vectors.py +0 -0
  56. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_schema.py +0 -0
  57. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_sdjwt_issue.py +0 -0
  58. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_sdjwt_reference.py +0 -0
  59. {proofbundle-0.9.0 → proofbundle-1.0.0}/tests/test_signature.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: proofbundle
3
- Version: 0.9.0
3
+ Version: 1.0.0
4
4
  Summary: Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT.
5
5
  Author: Konrad Gruszka
6
6
  License: MIT
@@ -27,8 +27,10 @@ Provides-Extra: sdjwt
27
27
  Provides-Extra: eval
28
28
  Requires-Dist: rfc8785>=0.1.4; extra == "eval"
29
29
  Provides-Extra: adapters
30
+ Provides-Extra: pytest
31
+ Requires-Dist: pytest>=7; extra == "pytest"
30
32
  Provides-Extra: inspect
31
- Requires-Dist: inspect_ai<0.4,>=0.3.100; python_version >= "3.10" and extra == "inspect"
33
+ Requires-Dist: inspect_ai<0.4,>=0.3.112; python_version >= "3.10" and extra == "inspect"
32
34
  Provides-Extra: dev
33
35
  Requires-Dist: pytest>=7; extra == "dev"
34
36
  Requires-Dist: ruff>=0.5; extra == "dev"
@@ -38,7 +40,7 @@ Requires-Dist: build>=1; extra == "dev"
38
40
  Requires-Dist: hypothesis>=6; extra == "dev"
39
41
  Requires-Dist: rfc8785>=0.1.4; extra == "dev"
40
42
  Requires-Dist: sd-jwt>=0.10; extra == "dev"
41
- Requires-Dist: inspect_ai<0.4,>=0.3.100; python_version >= "3.10" and extra == "dev"
43
+ Requires-Dist: inspect_ai<0.4,>=0.3.112; python_version >= "3.10" and extra == "dev"
42
44
  Dynamic: license-file
43
45
 
44
46
  <div align="center">
@@ -71,7 +73,7 @@ no server, no network.**
71
73
 
72
74
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
73
75
  verify` checks one self-contained `bundle.json` with three offline cryptographic
74
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 96 tests.
76
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 102 tests.
75
77
 
76
78
  ## Contents
77
79
 
@@ -81,6 +83,7 @@ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 96 tests.
81
83
  - [Install](#install)
82
84
  - [Quickstart](#quickstart)
83
85
  - [Demo](#demo--a-real-eval-log-to-a-verified-receipt-offline)
86
+ - [Integrations](#integrations--a-signed-receipt-of-your-eval-or-test-run-automatically-v10)
84
87
  - [Interoperability](#interoperability)
85
88
  - [Bundle format](#bundle-format-proofbundlev01)
86
89
  - [Eval receipts](#eval-receipts)
@@ -226,6 +229,25 @@ that the *artifact* is signed and offline-verifiable, with model and dataset kep
226
229
  See [`examples/inspect_receipt.py`](examples/inspect_receipt.py) and
227
230
  [`examples/lm_eval_receipt.py`](examples/lm_eval_receipt.py).
228
231
 
232
+ ## Integrations — a signed receipt of your eval or test run, automatically (v1.0)
233
+
234
+ Since v1.0, proofbundle can **auto-emit** a signed receipt of an **inspect_ai eval** or a **pytest run** via
235
+ each framework's native plugin API — installed and ready, but strictly **opt-in** (it emits only when you set
236
+ `PROOFBUNDLE_EMIT=1` or pass a flag; never silently, never failing your run):
237
+
238
+ ```bash
239
+ pip install "proofbundle[inspect,eval]" && PROOFBUNDLE_EMIT=1 inspect eval task.py --model mockllm/model
240
+ pip install "proofbundle[pytest,eval]" && PROOFBUNDLE_EMIT=1 pytest
241
+ ```
242
+
243
+ The distinguishing angle is exactly this opt-in **auto-emit of an Ed25519-signed receipt via the framework's
244
+ own plugin** (an inspect_ai end-of-task hook + a pytest11 plugin), on top of the standards stack. Named
245
+ fairly: [ai-audit-trail](https://pypi.org/project/ai-audit-trail/) records *runtime* agent Decision Receipts
246
+ (FastAPI/LangChain, ISO 42001), a different layer; [ValiChord](https://github.com/topeuph-ai/ValiChord)
247
+ builds attestation bundles from inspect_ai logs *post-hoc* (its v1 library is JCS + SHA-256 Merkle + HMAC
248
+ challenge-response, **not digitally signed** — signatures are v2 scope). See
249
+ [INTEGRATIONS.md](INTEGRATIONS.md) (+ a prepared composite GitHub Action under [`action/`](action/action.yml)).
250
+
229
251
  ## Interoperability
230
252
 
231
253
  proofbundle uses the same RFC 6962 / RFC 9162 Merkle primitive as
@@ -336,9 +358,9 @@ computation is the domain of TEE approaches such as
336
358
  fairly: [Every Eval Ever](https://github.com/evaleval/every_eval_ever) standardizes eval *metadata* but
337
359
  adds no cryptography (proofbundle ships an EEE→receipt converter);
338
360
  [OpenSSF Model Signing](https://github.com/ossf/model-signing-spec) signs *model weights*, not eval
339
- results; [ValiChord](https://github.com/topeuph-ai/ValiChord) provides blind peer consensus and an
340
- attested log on a Holochain network (its v1 attestation library uses a simple SHA-256 Merkle tree, no
341
- signature, no SD-JWT, no in-toto). proofbundle is the lightweight, **standards-native** piece between them:
361
+ results; [ValiChord](https://github.com/topeuph-ai/ValiChord) plans blind peer consensus and a
362
+ Holochain attested log (v2 scope); its current v1 attestation library uses a simple SHA-256 Merkle tree with
363
+ no digital signature, no SD-JWT, no in-toto. proofbundle is the lightweight, **standards-native** piece between them:
342
364
  a portable receipt a third party verifies offline, with selective disclosure so an auditor can prove a
343
365
  threshold was met without revealing the model or the data. See [INTEROP.md](INTEROP.md).
344
366
 
@@ -384,8 +406,10 @@ attestation — see [SECURITY.md](SECURITY.md).
384
406
  verifier robustness + CI on Python 3.9 after a holistic review.
385
407
  - **v0.8** — an offline `make demo` (real eval log -> signed receipt -> verified),
386
408
  a sharpened honesty guardrail (authenticity/integrity, not computation-correctness), and outreach drafts.
387
- - **v0.9 (current release)** — the standards moat: a DSSE-signed in-toto `test-result` export, a C2SP
388
- tlog-checkpoint over the RFC 6962 root, an Every Eval Ever converter, and standards-native repositioning.
409
+ - **v0.9** — the standards moat: a DSSE-signed in-toto `test-result` export, a C2SP tlog-checkpoint over
410
+ the RFC 6962 root, an Every Eval Ever converter, and standards-native repositioning.
411
+ - **v1.0 (current release)** — distribution: opt-in framework integrations that auto-emit a signed receipt
412
+ of an inspect_ai eval (end-of-task hook) or a pytest run (pytest11 plugin), plus a composite GitHub Action.
389
413
  - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
390
414
  Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
391
415
 
@@ -28,7 +28,7 @@ no server, no network.**
28
28
 
29
29
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
30
30
  verify` checks one self-contained `bundle.json` with three offline cryptographic
31
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 96 tests.
31
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 102 tests.
32
32
 
33
33
  ## Contents
34
34
 
@@ -38,6 +38,7 @@ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 96 tests.
38
38
  - [Install](#install)
39
39
  - [Quickstart](#quickstart)
40
40
  - [Demo](#demo--a-real-eval-log-to-a-verified-receipt-offline)
41
+ - [Integrations](#integrations--a-signed-receipt-of-your-eval-or-test-run-automatically-v10)
41
42
  - [Interoperability](#interoperability)
42
43
  - [Bundle format](#bundle-format-proofbundlev01)
43
44
  - [Eval receipts](#eval-receipts)
@@ -183,6 +184,25 @@ that the *artifact* is signed and offline-verifiable, with model and dataset kep
183
184
  See [`examples/inspect_receipt.py`](examples/inspect_receipt.py) and
184
185
  [`examples/lm_eval_receipt.py`](examples/lm_eval_receipt.py).
185
186
 
187
+ ## Integrations — a signed receipt of your eval or test run, automatically (v1.0)
188
+
189
+ Since v1.0, proofbundle can **auto-emit** a signed receipt of an **inspect_ai eval** or a **pytest run** via
190
+ each framework's native plugin API — installed and ready, but strictly **opt-in** (it emits only when you set
191
+ `PROOFBUNDLE_EMIT=1` or pass a flag; never silently, never failing your run):
192
+
193
+ ```bash
194
+ pip install "proofbundle[inspect,eval]" && PROOFBUNDLE_EMIT=1 inspect eval task.py --model mockllm/model
195
+ pip install "proofbundle[pytest,eval]" && PROOFBUNDLE_EMIT=1 pytest
196
+ ```
197
+
198
+ The distinguishing angle is exactly this opt-in **auto-emit of an Ed25519-signed receipt via the framework's
199
+ own plugin** (an inspect_ai end-of-task hook + a pytest11 plugin), on top of the standards stack. Named
200
+ fairly: [ai-audit-trail](https://pypi.org/project/ai-audit-trail/) records *runtime* agent Decision Receipts
201
+ (FastAPI/LangChain, ISO 42001), a different layer; [ValiChord](https://github.com/topeuph-ai/ValiChord)
202
+ builds attestation bundles from inspect_ai logs *post-hoc* (its v1 library is JCS + SHA-256 Merkle + HMAC
203
+ challenge-response, **not digitally signed** — signatures are v2 scope). See
204
+ [INTEGRATIONS.md](INTEGRATIONS.md) (+ a prepared composite GitHub Action under [`action/`](action/action.yml)).
205
+
186
206
  ## Interoperability
187
207
 
188
208
  proofbundle uses the same RFC 6962 / RFC 9162 Merkle primitive as
@@ -293,9 +313,9 @@ computation is the domain of TEE approaches such as
293
313
  fairly: [Every Eval Ever](https://github.com/evaleval/every_eval_ever) standardizes eval *metadata* but
294
314
  adds no cryptography (proofbundle ships an EEE→receipt converter);
295
315
  [OpenSSF Model Signing](https://github.com/ossf/model-signing-spec) signs *model weights*, not eval
296
- results; [ValiChord](https://github.com/topeuph-ai/ValiChord) provides blind peer consensus and an
297
- attested log on a Holochain network (its v1 attestation library uses a simple SHA-256 Merkle tree, no
298
- signature, no SD-JWT, no in-toto). proofbundle is the lightweight, **standards-native** piece between them:
316
+ results; [ValiChord](https://github.com/topeuph-ai/ValiChord) plans blind peer consensus and a
317
+ Holochain attested log (v2 scope); its current v1 attestation library uses a simple SHA-256 Merkle tree with
318
+ no digital signature, no SD-JWT, no in-toto. proofbundle is the lightweight, **standards-native** piece between them:
299
319
  a portable receipt a third party verifies offline, with selective disclosure so an auditor can prove a
300
320
  threshold was met without revealing the model or the data. See [INTEROP.md](INTEROP.md).
301
321
 
@@ -341,8 +361,10 @@ attestation — see [SECURITY.md](SECURITY.md).
341
361
  verifier robustness + CI on Python 3.9 after a holistic review.
342
362
  - **v0.8** — an offline `make demo` (real eval log -> signed receipt -> verified),
343
363
  a sharpened honesty guardrail (authenticity/integrity, not computation-correctness), and outreach drafts.
344
- - **v0.9 (current release)** — the standards moat: a DSSE-signed in-toto `test-result` export, a C2SP
345
- tlog-checkpoint over the RFC 6962 root, an Every Eval Ever converter, and standards-native repositioning.
364
+ - **v0.9** — the standards moat: a DSSE-signed in-toto `test-result` export, a C2SP tlog-checkpoint over
365
+ the RFC 6962 root, an Every Eval Ever converter, and standards-native repositioning.
366
+ - **v1.0 (current release)** — distribution: opt-in framework integrations that auto-emit a signed receipt
367
+ of an inspect_ai eval (end-of-task hook) or a pytest run (pytest11 plugin), plus a composite GitHub Action.
346
368
  - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
347
369
  Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
348
370
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "proofbundle"
7
- version = "0.9.0"
7
+ version = "1.0.0"
8
8
  description = "Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -45,13 +45,16 @@ sdjwt = []
45
45
  eval = ["rfc8785>=0.1.4"]
46
46
  # The lm-eval adapter reads exported results.json (no import) → pure stdlib.
47
47
  adapters = []
48
+ # The pytest plugin (opt-in test-run receipt via the pytest11 entry-point) — pytest is an optional extra,
49
+ # never a core dependency. Floor 7.0: the hooks/stats API is signature-stable across pytest 7/8/9.
50
+ pytest = ["pytest>=7"]
48
51
  # The inspect_ai adapter uses the STABLE read_eval_log API (lazy import). Pinned with an UPPER bound:
49
52
  # the .eval format + pydantic schema change between versions (inspect_ai issue 834), and the fixture
50
53
  # test is bound to this range. inspect_ai requires Python >= 3.10, so the marker gates it out on 3.9
51
54
  # (base + [eval]/[sdjwt] still work on 3.9; the inspect adapter test skips there). Fixes the red 3.9 CI.
52
- inspect = ['inspect_ai>=0.3.100,<0.4; python_version >= "3.10"']
55
+ inspect = ['inspect_ai>=0.3.112,<0.4; python_version >= "3.10"']
53
56
  dev = ["pytest>=7", "ruff>=0.5", "jsonschema>=4", "mypy>=1.8", "build>=1", "hypothesis>=6",
54
- "rfc8785>=0.1.4", "sd-jwt>=0.10", 'inspect_ai>=0.3.100,<0.4; python_version >= "3.10"']
57
+ "rfc8785>=0.1.4", "sd-jwt>=0.10", 'inspect_ai>=0.3.112,<0.4; python_version >= "3.10"']
55
58
 
56
59
  [project.urls]
57
60
  Homepage = "https://b7n0de.com"
@@ -63,6 +66,12 @@ Documentation = "https://github.com/b7n0de/proofbundle#readme"
63
66
  [project.scripts]
64
67
  proofbundle = "proofbundle.cli:main"
65
68
 
69
+ # Framework integrations (opt-in auto-emit; gated on PROOFBUNDLE_EMIT / a flag, never silent).
70
+ [project.entry-points.inspect_ai]
71
+ proofbundle = "proofbundle._inspect_registry"
72
+ [project.entry-points.pytest11]
73
+ proofbundle = "proofbundle.pytest_plugin"
74
+
66
75
  [tool.setuptools.packages.find]
67
76
  where = ["src"]
68
77
 
@@ -0,0 +1,56 @@
1
+ """proofbundle — emit and verify portable, offline cryptographic evidence bundles for AI eval receipts.
2
+
3
+ Verify, fully offline and in pure Python, that a payload was Ed25519 signed and anchored under an RFC 6962
4
+ Merkle root, with optional SD-JWT selective disclosure — plus opt-in framework integrations that auto-emit a
5
+ signed receipt of an inspect_ai eval or a pytest run.
6
+
7
+ The public API is loaded LAZILY (PEP 562): ``import proofbundle`` — and, via the entry points, loading the
8
+ pytest plugin / inspect_ai hook — does NOT pull the crypto core until a name like ``verify_bundle`` is
9
+ actually used. ``from proofbundle import verify_bundle`` works exactly as before; it just imports the backing
10
+ module on first access. This keeps the framework integrations light at startup.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ from typing import TYPE_CHECKING
15
+
16
+ __version__ = "1.0.0"
17
+
18
+ __all__ = [
19
+ "__version__",
20
+ "SCHEMA",
21
+ "verify_bundle",
22
+ "load_bundle",
23
+ "emit_bundle",
24
+ "generate_signer",
25
+ "verify_inclusion",
26
+ "verify_consistency",
27
+ "VerificationResult",
28
+ "Check",
29
+ "ProofBundleError",
30
+ ]
31
+
32
+ # name → backing submodule (relative). Loaded on first attribute access.
33
+ _LAZY = {
34
+ "SCHEMA": ".bundle", "load_bundle": ".bundle", "verify_bundle": ".bundle",
35
+ "emit_bundle": ".emit", "generate_signer": ".emit",
36
+ "Check": ".errors", "ProofBundleError": ".errors", "VerificationResult": ".errors",
37
+ "verify_consistency": ".merkle", "verify_inclusion": ".merkle",
38
+ }
39
+
40
+ if TYPE_CHECKING: # static analysers + IDEs see the real names/types; runtime stays lazy
41
+ from .bundle import SCHEMA, load_bundle, verify_bundle
42
+ from .emit import emit_bundle, generate_signer
43
+ from .errors import Check, ProofBundleError, VerificationResult
44
+ from .merkle import verify_consistency, verify_inclusion
45
+
46
+
47
+ def __getattr__(name: str):
48
+ module = _LAZY.get(name)
49
+ if module is None:
50
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
51
+ import importlib # noqa: PLC0415
52
+ return getattr(importlib.import_module(module, __name__), name)
53
+
54
+
55
+ def __dir__():
56
+ return sorted(__all__)
@@ -0,0 +1,3 @@
1
+ """Entry-point target for the inspect_ai hook group. Importing this module registers ProofbundleHooks as an
2
+ import side-effect. Kept intentionally minimal (no crypto) so inspect's startup discovery stays fast."""
3
+ from .inspect_hook import ProofbundleHooks # noqa: F401 — import side-effect registers the hook
@@ -0,0 +1,84 @@
1
+ """Shared opt-in helper for the framework integrations (inspect_ai hook, pytest plugin) — v1.0.
2
+
3
+ THE TOP RULE (opt-in safety): an integration must NEVER silently write a file or alter a host run. It emits
4
+ a receipt ONLY when the user explicitly turns it on — the ``PROOFBUNDLE_EMIT=1`` environment variable, or a
5
+ framework flag that maps to it. A security tool that surprises you loses trust. Every function here is a
6
+ no-op unless emission is enabled, catches its own errors (an integration must never fail the host run), and
7
+ imports the crypto lazily (this module is only imported from inside a hook body, never at framework startup).
8
+
9
+ Configuration (all optional, all env):
10
+ PROOFBUNDLE_EMIT "1" to enable emission (the master opt-in). Anything else = disabled.
11
+ PROOFBUNDLE_KEY path to a 32-byte raw Ed25519 seed to sign with. If unset, an EPHEMERAL key is
12
+ generated (a warning is printed; the receipt is self-verifiable but not tied to a
13
+ durable identity).
14
+ PROOFBUNDLE_OUT output path: a file, or a directory (the default file name is written into it).
15
+ Default: the default file name in the current directory.
16
+ PROOFBUNDLE_METRIC which metric to bind (else the integration's first/most-relevant metric).
17
+ PROOFBUNDLE_COMPARATOR ">=" | ">" | "<=" | "<" (default ">=").
18
+ PROOFBUNDLE_THRESHOLD decimal string (default "0") — the pass/fail threshold to assert.
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import os
23
+ from pathlib import Path
24
+ from typing import Optional
25
+
26
+ DEFAULT_COMPARATOR = ">="
27
+ DEFAULT_THRESHOLD = "0"
28
+
29
+
30
+ def emit_enabled(flag: bool = False) -> bool:
31
+ """The master opt-in gate. True only if PROOFBUNDLE_EMIT == "1" OR an explicit framework flag is set."""
32
+ return flag or os.environ.get("PROOFBUNDLE_EMIT") == "1"
33
+
34
+
35
+ def emit_config() -> dict:
36
+ """Read the (metric, comparator, threshold) emission config from the environment, with safe defaults."""
37
+ return {
38
+ "metric": os.environ.get("PROOFBUNDLE_METRIC"),
39
+ "comparator": os.environ.get("PROOFBUNDLE_COMPARATOR") or DEFAULT_COMPARATOR,
40
+ "threshold": os.environ.get("PROOFBUNDLE_THRESHOLD") or DEFAULT_THRESHOLD,
41
+ }
42
+
43
+
44
+ def _resolve_signer():
45
+ """Return (signer, is_ephemeral). Loads PROOFBUNDLE_KEY if set, else generates an ephemeral key."""
46
+ from .emit import generate_signer, load_signer # noqa: PLC0415 — lazy: only on actual emit
47
+ key_path = os.environ.get("PROOFBUNDLE_KEY")
48
+ if key_path:
49
+ return load_signer(key_path), False
50
+ return generate_signer(), True
51
+
52
+
53
+ def _output_path(default_name: str) -> Path:
54
+ """Resolve the output file path from PROOFBUNDLE_OUT (file or directory) or the default name in cwd."""
55
+ out = os.environ.get("PROOFBUNDLE_OUT")
56
+ if not out:
57
+ return Path.cwd() / default_name
58
+ p = Path(out)
59
+ if p.is_dir() or out.endswith(os.sep):
60
+ return p / default_name
61
+ return p
62
+
63
+
64
+ def emit_claim_receipt(claim: dict, default_name: str) -> Optional[str]:
65
+ """Sign ``claim`` into an eval receipt and write it to the resolved output path. Returns the path, or
66
+ None on any failure (an integration must never raise into the host run). Assumes emission is enabled
67
+ (the caller checks ``emit_enabled`` first)."""
68
+ try:
69
+ from .evalclaim import emit_eval_receipt # noqa: PLC0415 — lazy
70
+ import json # noqa: PLC0415
71
+
72
+ signer, ephemeral = _resolve_signer()
73
+ if ephemeral:
74
+ print("[proofbundle] PROOFBUNDLE_KEY not set — signing with an EPHEMERAL key "
75
+ "(receipt is self-verifiable but not bound to a durable identity).")
76
+ bundle = emit_eval_receipt(claim, signer)
77
+ out = _output_path(default_name)
78
+ out.parent.mkdir(parents=True, exist_ok=True)
79
+ out.write_text(json.dumps(bundle, indent=2), encoding="utf-8")
80
+ print(f"[proofbundle] wrote signed eval receipt → {out}")
81
+ return str(out)
82
+ except Exception as e: # noqa: BLE001 — never let emission break the host run
83
+ print(f"[proofbundle] receipt emission skipped ({type(e).__name__}: {e})")
84
+ return None
@@ -21,23 +21,42 @@ class InspectAdapterError(RuntimeError):
21
21
  """Raised when inspect_ai is missing or the log lacks the expected structure (no bare AttributeError)."""
22
22
 
23
23
 
24
+ def _score_str(value) -> str:
25
+ """Render a metric value as a PLAIN decimal string (no scientific notation) that build_eval_claim
26
+ accepts. ``repr(float)`` emits '1e-05'/'1e+20' for very small/large values, which the claim's decimal
27
+ pattern rejects — so numbers are formatted fixed-point (like the pytest plugin's ``_fmt``)."""
28
+ if isinstance(value, bool):
29
+ return "1" if value else "0"
30
+ if isinstance(value, int):
31
+ return str(value)
32
+ if isinstance(value, float):
33
+ if value != value or value in (float("inf"), float("-inf")):
34
+ raise InspectAdapterError("metric value must be finite")
35
+ return format(value, ".12f").rstrip("0").rstrip(".") or "0"
36
+ return str(value)
37
+
38
+
24
39
  def from_inspect_ai_log(path, metric: str, *, comparator: str, threshold: str, timestamp: str,
25
40
  model_salt: Optional[bytes] = None, dataset_salt: Optional[bytes] = None):
26
41
  """Read an inspect_ai eval log via the stable API and build an eval claim for `metric`.
27
42
 
28
- Returns (claim, salts). Raises InspectAdapterError if inspect_ai is unavailable or the log is
29
- missing the expected attributes a clear error instead of an opaque AttributeError.
43
+ ``path`` may be a path/str to a ``.eval`` log OR an already-loaded EvalLog object (e.g. the inspect_ai
44
+ hook's ``data.log``). Returns (claim, salts). Raises InspectAdapterError if inspect_ai is unavailable
45
+ or the log is missing the expected attributes — a clear error instead of an opaque AttributeError.
30
46
  """
31
- try:
32
- from inspect_ai.log import read_eval_log # noqa: PLC0415 — lazy: keeps the core dependency-free
33
- except ImportError as e:
34
- raise InspectAdapterError(
35
- "inspect_ai is required for this adapter — install with: pip install \"proofbundle[inspect]\"") from e
36
-
37
- try:
38
- log = read_eval_log(str(path), header_only=True)
39
- except Exception as e: # noqa: BLE001surface any read/parse failure as a clear adapter error
40
- raise InspectAdapterError(f"could not read inspect_ai log {path!r}: {e}") from e
47
+ # An already-loaded EvalLog (has .eval + .results) is used directly — no re-read from disk.
48
+ if hasattr(path, "eval") and hasattr(path, "results"):
49
+ log = path
50
+ else:
51
+ try:
52
+ from inspect_ai.log import read_eval_log # noqa: PLC0415 — lazy: keeps the core dependency-free
53
+ except ImportError as e:
54
+ raise InspectAdapterError(
55
+ "inspect_ai is required for this adapterinstall with: pip install \"proofbundle[inspect]\"") from e
56
+ try:
57
+ log = read_eval_log(str(path), header_only=True)
58
+ except Exception as e: # noqa: BLE001 — surface any read/parse failure as a clear adapter error
59
+ raise InspectAdapterError(f"could not read inspect_ai log {path!r}: {e}") from e
41
60
 
42
61
  ev = getattr(log, "eval", None)
43
62
  results = getattr(log, "results", None)
@@ -73,7 +92,7 @@ def from_inspect_ai_log(path, metric: str, *, comparator: str, threshold: str, t
73
92
 
74
93
  return build_eval_claim(
75
94
  suite=suite, suite_version=str(getattr(ev, "task_version", "1")),
76
- metric=metric, comparator=comparator, threshold=threshold, score=repr(value),
95
+ metric=metric, comparator=comparator, threshold=threshold, score=_score_str(value),
77
96
  n=int(getattr(results, "total_samples", 0) or 0),
78
97
  model_id=model_id, dataset_id=dataset_id, issuer="", timestamp=timestamp,
79
98
  provenance=provenance, model_salt=model_salt, dataset_salt=dataset_salt)
@@ -0,0 +1,63 @@
1
+ """inspect_ai end-of-task hook — opt-in auto-emit of a signed eval receipt (v1.0).
2
+
3
+ Registered via the ``inspect_ai`` entry-point group (see ``_inspect_registry``). When a task ends, and ONLY
4
+ when the user has opted in (``PROOFBUNDLE_EMIT=1``), this builds a signed proofbundle receipt from the eval
5
+ log and writes it out — never silently, never failing the eval. The heavy crypto is imported lazily inside
6
+ the hook body, so merely installing proofbundle does not slow inspect's startup.
7
+
8
+ Requires ``inspect_ai>=0.3.112`` (the generic lifecycle hooks + ``on_task_end``). Reuses the existing
9
+ ``from_inspect_ai_log`` adapter — ``data.log`` is already an EvalLog, so no re-read of the .eval file for a
10
+ normal ``eval()``. For an ``eval_set()`` the log may be header-only; then we fall back to reading the log
11
+ from ``data.log.location`` to recover the results.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import os
16
+
17
+ from inspect_ai.hooks import Hooks, hooks
18
+
19
+
20
+ def _emit_enabled() -> bool:
21
+ # kept inline + cheap: enabled() is consulted before every hook invocation (potentially per sample)
22
+ return os.environ.get("PROOFBUNDLE_EMIT") == "1"
23
+
24
+
25
+ def _first_metric(log) -> str:
26
+ """The first score's first metric name from an EvalLog, as a sensible default binding target."""
27
+ results = getattr(log, "results", None)
28
+ for score in (getattr(results, "scores", None) or []):
29
+ metrics = getattr(score, "metrics", None) or {}
30
+ for name in metrics:
31
+ return name
32
+ return "accuracy"
33
+
34
+
35
+ @hooks(name="proofbundle_hooks", description="Emit a signed proofbundle eval receipt at end of task (opt-in via PROOFBUNDLE_EMIT)")
36
+ class ProofbundleHooks(Hooks):
37
+ def enabled(self) -> bool:
38
+ return _emit_enabled()
39
+
40
+ async def on_task_end(self, data) -> None:
41
+ if not _emit_enabled():
42
+ return
43
+ try:
44
+ from datetime import datetime, timezone # noqa: PLC0415
45
+
46
+ from ._integration import emit_claim_receipt, emit_config # noqa: PLC0415
47
+ from .adapters.inspect_ai import from_inspect_ai_log # noqa: PLC0415
48
+
49
+ log = data.log
50
+ # header-only (eval_set) fallback: if results are missing, re-read the full log from its location
51
+ if getattr(log, "results", None) is None and getattr(log, "location", None):
52
+ from inspect_ai.log import read_eval_log # noqa: PLC0415
53
+ log = read_eval_log(str(log.location))
54
+
55
+ cfg = emit_config()
56
+ metric = cfg["metric"] or _first_metric(log)
57
+ claim, _ = from_inspect_ai_log(log, metric, comparator=cfg["comparator"],
58
+ threshold=cfg["threshold"],
59
+ timestamp=datetime.now(timezone.utc).isoformat())
60
+ eval_id = getattr(data, "eval_id", None) or "eval"
61
+ emit_claim_receipt(claim, f"proofbundle_receipt_{eval_id}.json")
62
+ except Exception as e: # noqa: BLE001 — an integration must never fail the host eval
63
+ print(f"[proofbundle] inspect_ai receipt emission skipped ({type(e).__name__}: {e})")
@@ -0,0 +1,67 @@
1
+ """pytest plugin — opt-in auto-emit of a signed receipt of the test run (v1.0).
2
+
3
+ Registered via the ``pytest11`` entry-point, so pytest loads it automatically at startup. It emits a signed
4
+ proofbundle receipt of the run ONLY when the user opts in — the ``--proofbundle`` flag or ``PROOFBUNDLE_EMIT=1``
5
+ — never on a normal run, never failing the run. This module must import cheaply (it loads at every pytest
6
+ startup), so the crypto is imported lazily inside the terminal-summary hook, not at module top.
7
+
8
+ Counts come from ``terminalreporter.stats`` (the canonical count source): a dict outcome→list. Only the
9
+ pass/fail outcomes (passed/failed/error) form the denominator; skipped/xfailed/xpassed are recorded in
10
+ provenance. ``error`` (collect/setup/teardown failures) is kept separate from ``failed`` (call-phase).
11
+ """
12
+ from __future__ import annotations
13
+
14
+
15
+ def pytest_addoption(parser):
16
+ group = parser.getgroup("proofbundle", "proofbundle signed test-run receipt")
17
+ group.addoption("--proofbundle", action="store_true", dest="proofbundle", default=False,
18
+ help="emit a signed proofbundle receipt of the test run (also enabled by PROOFBUNDLE_EMIT=1)")
19
+
20
+
21
+ def _fmt(x: float) -> str:
22
+ return format(x, ".6f").rstrip("0").rstrip(".") or "0"
23
+
24
+
25
+ def pytest_terminal_summary(terminalreporter, exitstatus, config):
26
+ flag = bool(config.getoption("proofbundle", default=False))
27
+ from ._integration import emit_enabled # noqa: PLC0415
28
+ if not emit_enabled(flag):
29
+ return
30
+ try:
31
+ from datetime import datetime, timezone # noqa: PLC0415
32
+
33
+ from ._integration import emit_claim_receipt, emit_config # noqa: PLC0415
34
+ from .evalclaim import build_eval_claim # noqa: PLC0415
35
+
36
+ stats = terminalreporter.stats
37
+ counts = {k: len(stats.get(k, [])) for k in
38
+ ("passed", "failed", "error", "skipped", "xfailed", "xpassed")}
39
+
40
+ # Count UNIQUE tests by node id, not the sum of reports: a single test can produce both a call-phase
41
+ # report (passed/failed) AND a separate setup/teardown 'error' report, so summing over-counts. A test
42
+ # is a clean pass only if it passed AND has no failure/error report; ran = distinct nodes with an
43
+ # outcome. This keeps the signed pass_rate + n honest for fixture-teardown-erroring suites.
44
+ def _ids(key):
45
+ return {getattr(r, "nodeid", id(r)) for r in stats.get(key, [])}
46
+ passed_ids, failed_ids, error_ids = _ids("passed"), _ids("failed"), _ids("error")
47
+ ran_ids = passed_ids | failed_ids | error_ids
48
+ clean_passed = passed_ids - failed_ids - error_ids
49
+ ran = len(ran_ids)
50
+ if ran == 0:
51
+ print("[proofbundle] no pass/fail tests to attest — receipt skipped")
52
+ return
53
+ pass_rate = len(clean_passed) / ran
54
+
55
+ cfg = emit_config()
56
+ rootname = getattr(getattr(config, "rootpath", None), "name", None) or "pytest"
57
+ provenance = {"harness": "pytest", "exit_status": int(exitstatus), "tests_ran": ran,
58
+ "tests_passed": len(clean_passed), **{f"n_{k}": v for k, v in counts.items()}}
59
+ claim, _ = build_eval_claim(
60
+ suite="pytest", suite_version=str(getattr(__import__("pytest"), "__version__", "unknown")),
61
+ metric=cfg["metric"] or "pass_rate", comparator=cfg["comparator"], threshold=cfg["threshold"],
62
+ score=_fmt(pass_rate), n=ran, model_id=str(getattr(config, "rootpath", rootname)),
63
+ dataset_id="pytest-suite", issuer="", timestamp=datetime.now(timezone.utc).isoformat(),
64
+ provenance=provenance)
65
+ emit_claim_receipt(claim, "proofbundle_pytest_receipt.json")
66
+ except Exception as e: # noqa: BLE001 — an integration must never fail the host test run
67
+ print(f"[proofbundle] pytest receipt emission skipped ({type(e).__name__}: {e})")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: proofbundle
3
- Version: 0.9.0
3
+ Version: 1.0.0
4
4
  Summary: Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT.
5
5
  Author: Konrad Gruszka
6
6
  License: MIT
@@ -27,8 +27,10 @@ Provides-Extra: sdjwt
27
27
  Provides-Extra: eval
28
28
  Requires-Dist: rfc8785>=0.1.4; extra == "eval"
29
29
  Provides-Extra: adapters
30
+ Provides-Extra: pytest
31
+ Requires-Dist: pytest>=7; extra == "pytest"
30
32
  Provides-Extra: inspect
31
- Requires-Dist: inspect_ai<0.4,>=0.3.100; python_version >= "3.10" and extra == "inspect"
33
+ Requires-Dist: inspect_ai<0.4,>=0.3.112; python_version >= "3.10" and extra == "inspect"
32
34
  Provides-Extra: dev
33
35
  Requires-Dist: pytest>=7; extra == "dev"
34
36
  Requires-Dist: ruff>=0.5; extra == "dev"
@@ -38,7 +40,7 @@ Requires-Dist: build>=1; extra == "dev"
38
40
  Requires-Dist: hypothesis>=6; extra == "dev"
39
41
  Requires-Dist: rfc8785>=0.1.4; extra == "dev"
40
42
  Requires-Dist: sd-jwt>=0.10; extra == "dev"
41
- Requires-Dist: inspect_ai<0.4,>=0.3.100; python_version >= "3.10" and extra == "dev"
43
+ Requires-Dist: inspect_ai<0.4,>=0.3.112; python_version >= "3.10" and extra == "dev"
42
44
  Dynamic: license-file
43
45
 
44
46
  <div align="center">
@@ -71,7 +73,7 @@ no server, no network.**
71
73
 
72
74
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
73
75
  verify` checks one self-contained `bundle.json` with three offline cryptographic
74
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 96 tests.
76
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 102 tests.
75
77
 
76
78
  ## Contents
77
79
 
@@ -81,6 +83,7 @@ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 96 tests.
81
83
  - [Install](#install)
82
84
  - [Quickstart](#quickstart)
83
85
  - [Demo](#demo--a-real-eval-log-to-a-verified-receipt-offline)
86
+ - [Integrations](#integrations--a-signed-receipt-of-your-eval-or-test-run-automatically-v10)
84
87
  - [Interoperability](#interoperability)
85
88
  - [Bundle format](#bundle-format-proofbundlev01)
86
89
  - [Eval receipts](#eval-receipts)
@@ -226,6 +229,25 @@ that the *artifact* is signed and offline-verifiable, with model and dataset kep
226
229
  See [`examples/inspect_receipt.py`](examples/inspect_receipt.py) and
227
230
  [`examples/lm_eval_receipt.py`](examples/lm_eval_receipt.py).
228
231
 
232
+ ## Integrations — a signed receipt of your eval or test run, automatically (v1.0)
233
+
234
+ Since v1.0, proofbundle can **auto-emit** a signed receipt of an **inspect_ai eval** or a **pytest run** via
235
+ each framework's native plugin API — installed and ready, but strictly **opt-in** (it emits only when you set
236
+ `PROOFBUNDLE_EMIT=1` or pass a flag; never silently, never failing your run):
237
+
238
+ ```bash
239
+ pip install "proofbundle[inspect,eval]" && PROOFBUNDLE_EMIT=1 inspect eval task.py --model mockllm/model
240
+ pip install "proofbundle[pytest,eval]" && PROOFBUNDLE_EMIT=1 pytest
241
+ ```
242
+
243
+ The distinguishing angle is exactly this opt-in **auto-emit of an Ed25519-signed receipt via the framework's
244
+ own plugin** (an inspect_ai end-of-task hook + a pytest11 plugin), on top of the standards stack. Named
245
+ fairly: [ai-audit-trail](https://pypi.org/project/ai-audit-trail/) records *runtime* agent Decision Receipts
246
+ (FastAPI/LangChain, ISO 42001), a different layer; [ValiChord](https://github.com/topeuph-ai/ValiChord)
247
+ builds attestation bundles from inspect_ai logs *post-hoc* (its v1 library is JCS + SHA-256 Merkle + HMAC
248
+ challenge-response, **not digitally signed** — signatures are v2 scope). See
249
+ [INTEGRATIONS.md](INTEGRATIONS.md) (+ a prepared composite GitHub Action under [`action/`](action/action.yml)).
250
+
229
251
  ## Interoperability
230
252
 
231
253
  proofbundle uses the same RFC 6962 / RFC 9162 Merkle primitive as
@@ -336,9 +358,9 @@ computation is the domain of TEE approaches such as
336
358
  fairly: [Every Eval Ever](https://github.com/evaleval/every_eval_ever) standardizes eval *metadata* but
337
359
  adds no cryptography (proofbundle ships an EEE→receipt converter);
338
360
  [OpenSSF Model Signing](https://github.com/ossf/model-signing-spec) signs *model weights*, not eval
339
- results; [ValiChord](https://github.com/topeuph-ai/ValiChord) provides blind peer consensus and an
340
- attested log on a Holochain network (its v1 attestation library uses a simple SHA-256 Merkle tree, no
341
- signature, no SD-JWT, no in-toto). proofbundle is the lightweight, **standards-native** piece between them:
361
+ results; [ValiChord](https://github.com/topeuph-ai/ValiChord) plans blind peer consensus and a
362
+ Holochain attested log (v2 scope); its current v1 attestation library uses a simple SHA-256 Merkle tree with
363
+ no digital signature, no SD-JWT, no in-toto. proofbundle is the lightweight, **standards-native** piece between them:
342
364
  a portable receipt a third party verifies offline, with selective disclosure so an auditor can prove a
343
365
  threshold was met without revealing the model or the data. See [INTEROP.md](INTEROP.md).
344
366
 
@@ -384,8 +406,10 @@ attestation — see [SECURITY.md](SECURITY.md).
384
406
  verifier robustness + CI on Python 3.9 after a holistic review.
385
407
  - **v0.8** — an offline `make demo` (real eval log -> signed receipt -> verified),
386
408
  a sharpened honesty guardrail (authenticity/integrity, not computation-correctness), and outreach drafts.
387
- - **v0.9 (current release)** — the standards moat: a DSSE-signed in-toto `test-result` export, a C2SP
388
- tlog-checkpoint over the RFC 6962 root, an Every Eval Ever converter, and standards-native repositioning.
409
+ - **v0.9** — the standards moat: a DSSE-signed in-toto `test-result` export, a C2SP tlog-checkpoint over
410
+ the RFC 6962 root, an Every Eval Ever converter, and standards-native repositioning.
411
+ - **v1.0 (current release)** — distribution: opt-in framework integrations that auto-emit a signed receipt
412
+ of an inspect_ai eval (end-of-task hook) or a pytest run (pytest11 plugin), plus a composite GitHub Action.
389
413
  - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
390
414
  Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
391
415
 
@@ -2,6 +2,8 @@ LICENSE
2
2
  README.md
3
3
  pyproject.toml
4
4
  src/proofbundle/__init__.py
5
+ src/proofbundle/_inspect_registry.py
6
+ src/proofbundle/_integration.py
5
7
  src/proofbundle/bundle.py
6
8
  src/proofbundle/checkpoint.py
7
9
  src/proofbundle/cli.py
@@ -10,9 +12,11 @@ src/proofbundle/eee_eval_schema.json
10
12
  src/proofbundle/emit.py
11
13
  src/proofbundle/errors.py
12
14
  src/proofbundle/evalclaim.py
15
+ src/proofbundle/inspect_hook.py
13
16
  src/proofbundle/intoto.py
14
17
  src/proofbundle/merkle.py
15
18
  src/proofbundle/py.typed
19
+ src/proofbundle/pytest_plugin.py
16
20
  src/proofbundle/sdjwt.py
17
21
  src/proofbundle/sdjwt_issue.py
18
22
  src/proofbundle/signature.py
@@ -37,10 +41,12 @@ tests/test_emit.py
37
41
  tests/test_eval_claim_schema.py
38
42
  tests/test_evalclaim.py
39
43
  tests/test_examples.py
44
+ tests/test_inspect_hook.py
40
45
  tests/test_intoto.py
41
46
  tests/test_intoto_dsse.py
42
47
  tests/test_merkle.py
43
48
  tests/test_merkle_property.py
49
+ tests/test_pytest_plugin.py
44
50
  tests/test_rekor_interop.py
45
51
  tests/test_rfc6962_external_vectors.py
46
52
  tests/test_schema.py
@@ -0,0 +1,8 @@
1
+ [console_scripts]
2
+ proofbundle = proofbundle.cli:main
3
+
4
+ [inspect_ai]
5
+ proofbundle = proofbundle._inspect_registry
6
+
7
+ [pytest11]
8
+ proofbundle = proofbundle.pytest_plugin
@@ -13,7 +13,7 @@ rfc8785>=0.1.4
13
13
  sd-jwt>=0.10
14
14
 
15
15
  [dev:python_version >= "3.10"]
16
- inspect_ai<0.4,>=0.3.100
16
+ inspect_ai<0.4,>=0.3.112
17
17
 
18
18
  [eval]
19
19
  rfc8785>=0.1.4
@@ -21,6 +21,9 @@ rfc8785>=0.1.4
21
21
  [inspect]
22
22
 
23
23
  [inspect:python_version >= "3.10"]
24
- inspect_ai<0.4,>=0.3.100
24
+ inspect_ai<0.4,>=0.3.112
25
+
26
+ [pytest]
27
+ pytest>=7
25
28
 
26
29
  [sdjwt]
@@ -0,0 +1,57 @@
1
+ """inspect_ai hook (v1.0): opt-in safety + signed receipt from a real EvalLog (data.log)."""
2
+ import asyncio
3
+ import json
4
+ import os
5
+ import types
6
+ import unittest
7
+ from pathlib import Path
8
+ from tempfile import TemporaryDirectory
9
+
10
+ FX = Path(__file__).resolve().parent / "fixtures" / "inspect_logs" / "safety_refusal_demo.eval"
11
+
12
+
13
+ class TestInspectHook(unittest.TestCase):
14
+ def setUp(self):
15
+ try:
16
+ from inspect_ai.log import read_eval_log # noqa: F401
17
+ except ImportError:
18
+ self.skipTest("inspect_ai not installed (pip install proofbundle[inspect])")
19
+
20
+ def _data(self):
21
+ from inspect_ai.log import read_eval_log
22
+ log = read_eval_log(str(FX), header_only=True)
23
+ return types.SimpleNamespace(log=log, eval_id="demo", run_id="r", eval_set_id=None)
24
+
25
+ def test_opt_in_off_no_receipt(self):
26
+ from proofbundle.inspect_hook import ProofbundleHooks
27
+ with TemporaryDirectory() as d:
28
+ os.environ.pop("PROOFBUNDLE_EMIT", None)
29
+ os.environ["PROOFBUNDLE_OUT"] = d
30
+ asyncio.run(ProofbundleHooks().on_task_end(self._data()))
31
+ self.assertEqual(list(Path(d).glob("*.json")), [])
32
+
33
+ def test_opt_in_emits_verifiable_receipt(self):
34
+ from proofbundle import verify_bundle
35
+ from proofbundle.evalclaim import decode_eval_claim
36
+ from proofbundle.inspect_hook import ProofbundleHooks
37
+ with TemporaryDirectory() as d:
38
+ os.environ["PROOFBUNDLE_EMIT"] = "1"
39
+ os.environ["PROOFBUNDLE_OUT"] = d
40
+ os.environ["PROOFBUNDLE_THRESHOLD"] = "0"
41
+ asyncio.run(ProofbundleHooks().on_task_end(self._data()))
42
+ files = list(Path(d).glob("*.json"))
43
+ os.environ.pop("PROOFBUNDLE_EMIT", None)
44
+ self.assertEqual(len(files), 1)
45
+ b = json.loads(files[0].read_text())
46
+ self.assertTrue(verify_bundle(b).ok)
47
+ dec = decode_eval_claim(b)
48
+ self.assertEqual(dec["suite"], "safety_refusal_demo")
49
+ self.assertNotIn("mockllm", json.dumps(dec)) # model stays a salted commitment
50
+
51
+ def test_enabled_reflects_env(self):
52
+ from proofbundle.inspect_hook import ProofbundleHooks
53
+ os.environ["PROOFBUNDLE_EMIT"] = "1"
54
+ self.assertTrue(ProofbundleHooks().enabled())
55
+ os.environ["PROOFBUNDLE_EMIT"] = "0"
56
+ self.assertFalse(ProofbundleHooks().enabled())
57
+ os.environ.pop("PROOFBUNDLE_EMIT", None)
@@ -0,0 +1,62 @@
1
+ """pytest plugin (v1.0): opt-in safety + signed receipt of the test run from terminalreporter.stats."""
2
+ import json
3
+ import types
4
+ import unittest
5
+ from pathlib import Path
6
+ from tempfile import TemporaryDirectory
7
+
8
+ from proofbundle import verify_bundle
9
+ from proofbundle.evalclaim import decode_eval_claim
10
+ from proofbundle.pytest_plugin import pytest_terminal_summary
11
+
12
+
13
+ def _reporter(passed=3, failed=0, error=0):
14
+ stats = {}
15
+ if passed:
16
+ stats["passed"] = [object()] * passed
17
+ if failed:
18
+ stats["failed"] = [object()] * failed
19
+ if error:
20
+ stats["error"] = [object()] * error
21
+ return types.SimpleNamespace(stats=stats)
22
+
23
+
24
+ def _config(flag, rootpath="myproj"):
25
+ return types.SimpleNamespace(getoption=lambda name, default=False: flag,
26
+ rootpath=types.SimpleNamespace(name=rootpath, __str__=lambda self: rootpath))
27
+
28
+
29
+ class TestPytestPlugin(unittest.TestCase):
30
+ def test_opt_in_off_no_receipt(self):
31
+ with TemporaryDirectory() as d:
32
+ import os
33
+ os.environ.pop("PROOFBUNDLE_EMIT", None)
34
+ os.environ["PROOFBUNDLE_OUT"] = d
35
+ pytest_terminal_summary(_reporter(), 0, _config(False))
36
+ self.assertEqual([f for f in Path(d).iterdir()], [])
37
+
38
+ def test_opt_in_flag_emits_receipt(self):
39
+ with TemporaryDirectory() as d:
40
+ import os
41
+ os.environ["PROOFBUNDLE_OUT"] = d
42
+ os.environ["PROOFBUNDLE_THRESHOLD"] = "0.5"
43
+ os.environ.pop("PROOFBUNDLE_EMIT", None)
44
+ pytest_terminal_summary(_reporter(passed=3, failed=1), 1, _config(True))
45
+ files = list(Path(d).glob("*.json"))
46
+ self.assertEqual(len(files), 1)
47
+ b = json.loads(files[0].read_text())
48
+ self.assertTrue(verify_bundle(b).ok)
49
+ d2 = decode_eval_claim(b)
50
+ self.assertEqual(d2["suite"], "pytest")
51
+ self.assertEqual(d2["metric"], "pass_rate")
52
+ self.assertTrue(d2["passed"]) # 3/4 = 0.75 >= 0.5
53
+ self.assertEqual(d2["provenance"]["n_failed"], 1)
54
+
55
+ def test_no_tests_no_receipt(self):
56
+ with TemporaryDirectory() as d:
57
+ import os
58
+ os.environ["PROOFBUNDLE_EMIT"] = "1"
59
+ os.environ["PROOFBUNDLE_OUT"] = d
60
+ pytest_terminal_summary(_reporter(passed=0), 5, _config(False))
61
+ self.assertEqual(list(Path(d).glob("*.json")), [])
62
+ os.environ.pop("PROOFBUNDLE_EMIT", None)
@@ -1,30 +0,0 @@
1
- """proofbundle, an offline verifier for portable cryptographic evidence bundles.
2
-
3
- Verify, fully offline and in pure Python, that a payload was Ed25519 signed and
4
- anchored under an RFC 6962 Merkle root, with optional SD-JWT selective
5
- disclosure. The verification half of a signed, third-party-verifiable evidence
6
- receipt.
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- from .bundle import SCHEMA, load_bundle, verify_bundle
12
- from .emit import emit_bundle, generate_signer
13
- from .errors import Check, ProofBundleError, VerificationResult
14
- from .merkle import verify_consistency, verify_inclusion
15
-
16
- __version__ = "0.9.0"
17
-
18
- __all__ = [
19
- "__version__",
20
- "SCHEMA",
21
- "verify_bundle",
22
- "load_bundle",
23
- "emit_bundle",
24
- "generate_signer",
25
- "verify_inclusion",
26
- "verify_consistency",
27
- "VerificationResult",
28
- "Check",
29
- "ProofBundleError",
30
- ]
@@ -1,2 +0,0 @@
1
- [console_scripts]
2
- proofbundle = proofbundle.cli:main
File without changes
File without changes