proofbundle 0.7.1__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {proofbundle-0.7.1/src/proofbundle.egg-info → proofbundle-0.8.1}/PKG-INFO +40 -9
- {proofbundle-0.7.1 → proofbundle-0.8.1}/README.md +39 -8
- {proofbundle-0.7.1 → proofbundle-0.8.1}/pyproject.toml +1 -1
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/__init__.py +1 -1
- {proofbundle-0.7.1 → proofbundle-0.8.1/src/proofbundle.egg-info}/PKG-INFO +40 -9
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle.egg-info/SOURCES.txt +1 -0
- proofbundle-0.8.1/tests/test_examples.py +28 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/LICENSE +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/setup.cfg +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/adapters/__init__.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/adapters/inspect_ai.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/adapters/lm_eval.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/bundle.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/cli.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/emit.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/errors.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/evalclaim.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/intoto.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/merkle.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/py.typed +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/sdjwt.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/sdjwt_issue.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle/signature.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle.egg-info/dependency_links.txt +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle.egg-info/entry_points.txt +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle.egg-info/requires.txt +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/src/proofbundle.egg-info/top_level.txt +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_adapters.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_bundle.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_bundle_robustness.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_cli.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_cli_eval.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_emit.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_eval_claim_schema.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_evalclaim.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_intoto.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_merkle.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_merkle_property.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_rekor_interop.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_rfc6962_external_vectors.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_schema.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_sdjwt_issue.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_sdjwt_reference.py +0 -0
- {proofbundle-0.7.1 → proofbundle-0.8.1}/tests/test_signature.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: proofbundle
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT.
|
|
5
5
|
Author: Konrad Gruszka
|
|
6
6
|
License: MIT
|
|
@@ -70,7 +70,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
|
|
|
70
70
|
|
|
71
71
|
**At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
|
|
72
72
|
verify` checks one self-contained `bundle.json` with three offline cryptographic
|
|
73
|
-
checks → `OK` or `FAILED`. No network, no daemon, no own crypto.
|
|
73
|
+
checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 74 tests.
|
|
74
74
|
|
|
75
75
|
## Contents
|
|
76
76
|
|
|
@@ -79,6 +79,7 @@ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 72 tests.
|
|
|
79
79
|
- [How it fits together](#how-it-fits-together)
|
|
80
80
|
- [Install](#install)
|
|
81
81
|
- [Quickstart](#quickstart)
|
|
82
|
+
- [Demo](#demo--a-real-eval-log-to-a-verified-receipt-offline)
|
|
82
83
|
- [Interoperability](#interoperability)
|
|
83
84
|
- [Bundle format](#bundle-format-proofbundlev01)
|
|
84
85
|
- [Eval receipts](#eval-receipts)
|
|
@@ -209,6 +210,21 @@ from proofbundle import verify_consistency
|
|
|
209
210
|
verify_consistency(first_size, second_size, proof, first_root, second_root) # -> bool
|
|
210
211
|
```
|
|
211
212
|
|
|
213
|
+
## Demo — a real eval log to a verified receipt, offline
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
pip install "proofbundle[eval,inspect]"
|
|
217
|
+
make demo # or: bash scripts/demo.sh
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
`make demo` runs end-to-end with **no network, no API key, no GPU**: it takes genuine eval logs — an
|
|
221
|
+
inspect_ai `mockllm/model` `.eval` log and an lm-evaluation-harness `--model dummy` `results.json`
|
|
222
|
+
(committed under `tests/fixtures/`, generated offline) — turns each into a signed, Merkle-anchored
|
|
223
|
+
proofbundle receipt, and verifies it to `=> OK`. The scores are random (a dummy model); the point is
|
|
224
|
+
that the *artifact* is signed and offline-verifiable, with model and dataset kept as salted commitments.
|
|
225
|
+
See [`examples/inspect_receipt.py`](examples/inspect_receipt.py) and
|
|
226
|
+
[`examples/lm_eval_receipt.py`](examples/lm_eval_receipt.py).
|
|
227
|
+
|
|
212
228
|
## Interoperability
|
|
213
229
|
|
|
214
230
|
proofbundle uses the same RFC 6962 / RFC 9162 Merkle primitive as
|
|
@@ -285,11 +301,24 @@ proofbundle show-eval receipt.json # verify + print the claim (issuer-boun
|
|
|
285
301
|
```
|
|
286
302
|
|
|
287
303
|
The claim format is specified in [EVAL_CLAIM.md](EVAL_CLAIM.md); the emit path uses
|
|
288
|
-
RFC 8785 JCS canonicalization, the verify path stays dependency-free.
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
304
|
+
RFC 8785 JCS canonicalization, the verify path stays dependency-free.
|
|
305
|
+
|
|
306
|
+
**Honesty guardrail (the exact scope).** A receipt attests the **authenticity and integrity** of a
|
|
307
|
+
*claimed* result and its context — these exact bytes, signed by this key, anchored under this root, with
|
|
308
|
+
model/dataset kept as salted commitments. It does **not** attest the **correctness of the computation**,
|
|
309
|
+
and it cannot detect **cherry-picking** of the eval. Whether the eval was well designed, whether the
|
|
310
|
+
suite measures what it claims, and whether the number was computed honestly are separate questions.
|
|
311
|
+
Trusted-execution approaches such as [Attestable Audits](https://arxiv.org/abs/2506.23706) target
|
|
312
|
+
computation-correctness with a different (hardware) trust model; proofbundle is the lightweight,
|
|
313
|
+
hardware-free path to a portable, tamper-evident, selectively disclosable *result artifact*.
|
|
314
|
+
|
|
315
|
+
**How this differs from a bare hash or a TEE.** A plain SHA-256 of a log commits to bytes but carries no
|
|
316
|
+
signature, no tamper-evident anchor, and no selective disclosure (an attestation-exporter idea along
|
|
317
|
+
those lines,
|
|
318
|
+
[inspect_evals PR #1610](https://github.com/UKGovernmentBEIS/inspect_evals/pull/1610), was closed as
|
|
319
|
+
belonging *a layer above* the framework — which is exactly where proofbundle sits). A TEE proves the
|
|
320
|
+
computation ran untampered but needs specific hardware. proofbundle adds Ed25519 + RFC 6962 Merkle +
|
|
321
|
+
SD-JWT selective disclosure over one portable file, offline.
|
|
293
322
|
|
|
294
323
|
### A verification layer for trustworthy eval logs
|
|
295
324
|
|
|
@@ -328,8 +357,10 @@ attestation — see [SECURITY.md](SECURITY.md).
|
|
|
328
357
|
- **v0.5** — inspect_ai adapter (stable API), in-toto Statement v1 view, SD-JWT **issuance** (RFC 9901).
|
|
329
358
|
- **v0.6** — a second eval adapter (lm-evaluation-harness, real format + provenance), INTEROP.md,
|
|
330
359
|
CITATION.cff, PEP 740 attestations documented.
|
|
331
|
-
- **v0.7
|
|
332
|
-
|
|
360
|
+
- **v0.7** — citability polish (ORCID, Zenodo DOI placeholder, in-toto proposal draft); v0.7.1 hardened
|
|
361
|
+
verifier robustness + CI on Python 3.9 after a holistic review.
|
|
362
|
+
- **v0.8 (current release)** — an offline `make demo` (real eval log -> signed receipt -> verified),
|
|
363
|
+
a sharpened honesty guardrail (authenticity/integrity, not computation-correctness), and outreach drafts.
|
|
333
364
|
- **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
|
|
334
365
|
Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
|
|
335
366
|
|
|
@@ -27,7 +27,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
|
|
|
27
27
|
|
|
28
28
|
**At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
|
|
29
29
|
verify` checks one self-contained `bundle.json` with three offline cryptographic
|
|
30
|
-
checks → `OK` or `FAILED`. No network, no daemon, no own crypto.
|
|
30
|
+
checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 74 tests.
|
|
31
31
|
|
|
32
32
|
## Contents
|
|
33
33
|
|
|
@@ -36,6 +36,7 @@ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 72 tests.
|
|
|
36
36
|
- [How it fits together](#how-it-fits-together)
|
|
37
37
|
- [Install](#install)
|
|
38
38
|
- [Quickstart](#quickstart)
|
|
39
|
+
- [Demo](#demo--a-real-eval-log-to-a-verified-receipt-offline)
|
|
39
40
|
- [Interoperability](#interoperability)
|
|
40
41
|
- [Bundle format](#bundle-format-proofbundlev01)
|
|
41
42
|
- [Eval receipts](#eval-receipts)
|
|
@@ -166,6 +167,21 @@ from proofbundle import verify_consistency
|
|
|
166
167
|
verify_consistency(first_size, second_size, proof, first_root, second_root) # -> bool
|
|
167
168
|
```
|
|
168
169
|
|
|
170
|
+
## Demo — a real eval log to a verified receipt, offline
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
pip install "proofbundle[eval,inspect]"
|
|
174
|
+
make demo # or: bash scripts/demo.sh
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
`make demo` runs end-to-end with **no network, no API key, no GPU**: it takes genuine eval logs — an
|
|
178
|
+
inspect_ai `mockllm/model` `.eval` log and an lm-evaluation-harness `--model dummy` `results.json`
|
|
179
|
+
(committed under `tests/fixtures/`, generated offline) — turns each into a signed, Merkle-anchored
|
|
180
|
+
proofbundle receipt, and verifies it to `=> OK`. The scores are random (a dummy model); the point is
|
|
181
|
+
that the *artifact* is signed and offline-verifiable, with model and dataset kept as salted commitments.
|
|
182
|
+
See [`examples/inspect_receipt.py`](examples/inspect_receipt.py) and
|
|
183
|
+
[`examples/lm_eval_receipt.py`](examples/lm_eval_receipt.py).
|
|
184
|
+
|
|
169
185
|
## Interoperability
|
|
170
186
|
|
|
171
187
|
proofbundle uses the same RFC 6962 / RFC 9162 Merkle primitive as
|
|
@@ -242,11 +258,24 @@ proofbundle show-eval receipt.json # verify + print the claim (issuer-boun
|
|
|
242
258
|
```
|
|
243
259
|
|
|
244
260
|
The claim format is specified in [EVAL_CLAIM.md](EVAL_CLAIM.md); the emit path uses
|
|
245
|
-
RFC 8785 JCS canonicalization, the verify path stays dependency-free.
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
261
|
+
RFC 8785 JCS canonicalization, the verify path stays dependency-free.
|
|
262
|
+
|
|
263
|
+
**Honesty guardrail (the exact scope).** A receipt attests the **authenticity and integrity** of a
|
|
264
|
+
*claimed* result and its context — these exact bytes, signed by this key, anchored under this root, with
|
|
265
|
+
model/dataset kept as salted commitments. It does **not** attest the **correctness of the computation**,
|
|
266
|
+
and it cannot detect **cherry-picking** of the eval. Whether the eval was well designed, whether the
|
|
267
|
+
suite measures what it claims, and whether the number was computed honestly are separate questions.
|
|
268
|
+
Trusted-execution approaches such as [Attestable Audits](https://arxiv.org/abs/2506.23706) target
|
|
269
|
+
computation-correctness with a different (hardware) trust model; proofbundle is the lightweight,
|
|
270
|
+
hardware-free path to a portable, tamper-evident, selectively disclosable *result artifact*.
|
|
271
|
+
|
|
272
|
+
**How this differs from a bare hash or a TEE.** A plain SHA-256 of a log commits to bytes but carries no
|
|
273
|
+
signature, no tamper-evident anchor, and no selective disclosure (an attestation-exporter idea along
|
|
274
|
+
those lines,
|
|
275
|
+
[inspect_evals PR #1610](https://github.com/UKGovernmentBEIS/inspect_evals/pull/1610), was closed as
|
|
276
|
+
belonging *a layer above* the framework — which is exactly where proofbundle sits). A TEE proves the
|
|
277
|
+
computation ran untampered but needs specific hardware. proofbundle adds Ed25519 + RFC 6962 Merkle +
|
|
278
|
+
SD-JWT selective disclosure over one portable file, offline.
|
|
250
279
|
|
|
251
280
|
### A verification layer for trustworthy eval logs
|
|
252
281
|
|
|
@@ -285,8 +314,10 @@ attestation — see [SECURITY.md](SECURITY.md).
|
|
|
285
314
|
- **v0.5** — inspect_ai adapter (stable API), in-toto Statement v1 view, SD-JWT **issuance** (RFC 9901).
|
|
286
315
|
- **v0.6** — a second eval adapter (lm-evaluation-harness, real format + provenance), INTEROP.md,
|
|
287
316
|
CITATION.cff, PEP 740 attestations documented.
|
|
288
|
-
- **v0.7
|
|
289
|
-
|
|
317
|
+
- **v0.7** — citability polish (ORCID, Zenodo DOI placeholder, in-toto proposal draft); v0.7.1 hardened
|
|
318
|
+
verifier robustness + CI on Python 3.9 after a holistic review.
|
|
319
|
+
- **v0.8 (current release)** — an offline `make demo` (real eval log -> signed receipt -> verified),
|
|
320
|
+
a sharpened honesty guardrail (authenticity/integrity, not computation-correctness), and outreach drafts.
|
|
290
321
|
- **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
|
|
291
322
|
Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
|
|
292
323
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "proofbundle"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.8.1"
|
|
8
8
|
description = "Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -13,7 +13,7 @@ from .emit import emit_bundle, generate_signer
|
|
|
13
13
|
from .errors import Check, ProofBundleError, VerificationResult
|
|
14
14
|
from .merkle import verify_consistency, verify_inclusion
|
|
15
15
|
|
|
16
|
-
__version__ = "0.
|
|
16
|
+
__version__ = "0.8.1"
|
|
17
17
|
|
|
18
18
|
__all__ = [
|
|
19
19
|
"__version__",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: proofbundle
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT.
|
|
5
5
|
Author: Konrad Gruszka
|
|
6
6
|
License: MIT
|
|
@@ -70,7 +70,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
|
|
|
70
70
|
|
|
71
71
|
**At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
|
|
72
72
|
verify` checks one self-contained `bundle.json` with three offline cryptographic
|
|
73
|
-
checks → `OK` or `FAILED`. No network, no daemon, no own crypto.
|
|
73
|
+
checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 74 tests.
|
|
74
74
|
|
|
75
75
|
## Contents
|
|
76
76
|
|
|
@@ -79,6 +79,7 @@ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 72 tests.
|
|
|
79
79
|
- [How it fits together](#how-it-fits-together)
|
|
80
80
|
- [Install](#install)
|
|
81
81
|
- [Quickstart](#quickstart)
|
|
82
|
+
- [Demo](#demo--a-real-eval-log-to-a-verified-receipt-offline)
|
|
82
83
|
- [Interoperability](#interoperability)
|
|
83
84
|
- [Bundle format](#bundle-format-proofbundlev01)
|
|
84
85
|
- [Eval receipts](#eval-receipts)
|
|
@@ -209,6 +210,21 @@ from proofbundle import verify_consistency
|
|
|
209
210
|
verify_consistency(first_size, second_size, proof, first_root, second_root) # -> bool
|
|
210
211
|
```
|
|
211
212
|
|
|
213
|
+
## Demo — a real eval log to a verified receipt, offline
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
pip install "proofbundle[eval,inspect]"
|
|
217
|
+
make demo # or: bash scripts/demo.sh
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
`make demo` runs end-to-end with **no network, no API key, no GPU**: it takes genuine eval logs — an
|
|
221
|
+
inspect_ai `mockllm/model` `.eval` log and an lm-evaluation-harness `--model dummy` `results.json`
|
|
222
|
+
(committed under `tests/fixtures/`, generated offline) — turns each into a signed, Merkle-anchored
|
|
223
|
+
proofbundle receipt, and verifies it to `=> OK`. The scores are random (a dummy model); the point is
|
|
224
|
+
that the *artifact* is signed and offline-verifiable, with model and dataset kept as salted commitments.
|
|
225
|
+
See [`examples/inspect_receipt.py`](examples/inspect_receipt.py) and
|
|
226
|
+
[`examples/lm_eval_receipt.py`](examples/lm_eval_receipt.py).
|
|
227
|
+
|
|
212
228
|
## Interoperability
|
|
213
229
|
|
|
214
230
|
proofbundle uses the same RFC 6962 / RFC 9162 Merkle primitive as
|
|
@@ -285,11 +301,24 @@ proofbundle show-eval receipt.json # verify + print the claim (issuer-boun
|
|
|
285
301
|
```
|
|
286
302
|
|
|
287
303
|
The claim format is specified in [EVAL_CLAIM.md](EVAL_CLAIM.md); the emit path uses
|
|
288
|
-
RFC 8785 JCS canonicalization, the verify path stays dependency-free.
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
304
|
+
RFC 8785 JCS canonicalization, the verify path stays dependency-free.
|
|
305
|
+
|
|
306
|
+
**Honesty guardrail (the exact scope).** A receipt attests the **authenticity and integrity** of a
|
|
307
|
+
*claimed* result and its context — these exact bytes, signed by this key, anchored under this root, with
|
|
308
|
+
model/dataset kept as salted commitments. It does **not** attest the **correctness of the computation**,
|
|
309
|
+
and it cannot detect **cherry-picking** of the eval. Whether the eval was well designed, whether the
|
|
310
|
+
suite measures what it claims, and whether the number was computed honestly are separate questions.
|
|
311
|
+
Trusted-execution approaches such as [Attestable Audits](https://arxiv.org/abs/2506.23706) target
|
|
312
|
+
computation-correctness with a different (hardware) trust model; proofbundle is the lightweight,
|
|
313
|
+
hardware-free path to a portable, tamper-evident, selectively disclosable *result artifact*.
|
|
314
|
+
|
|
315
|
+
**How this differs from a bare hash or a TEE.** A plain SHA-256 of a log commits to bytes but carries no
|
|
316
|
+
signature, no tamper-evident anchor, and no selective disclosure (an attestation-exporter idea along
|
|
317
|
+
those lines,
|
|
318
|
+
[inspect_evals PR #1610](https://github.com/UKGovernmentBEIS/inspect_evals/pull/1610), was closed as
|
|
319
|
+
belonging *a layer above* the framework — which is exactly where proofbundle sits). A TEE proves the
|
|
320
|
+
computation ran untampered but needs specific hardware. proofbundle adds Ed25519 + RFC 6962 Merkle +
|
|
321
|
+
SD-JWT selective disclosure over one portable file, offline.
|
|
293
322
|
|
|
294
323
|
### A verification layer for trustworthy eval logs
|
|
295
324
|
|
|
@@ -328,8 +357,10 @@ attestation — see [SECURITY.md](SECURITY.md).
|
|
|
328
357
|
- **v0.5** — inspect_ai adapter (stable API), in-toto Statement v1 view, SD-JWT **issuance** (RFC 9901).
|
|
329
358
|
- **v0.6** — a second eval adapter (lm-evaluation-harness, real format + provenance), INTEROP.md,
|
|
330
359
|
CITATION.cff, PEP 740 attestations documented.
|
|
331
|
-
- **v0.7
|
|
332
|
-
|
|
360
|
+
- **v0.7** — citability polish (ORCID, Zenodo DOI placeholder, in-toto proposal draft); v0.7.1 hardened
|
|
361
|
+
verifier robustness + CI on Python 3.9 after a holistic review.
|
|
362
|
+
- **v0.8 (current release)** — an offline `make demo` (real eval log -> signed receipt -> verified),
|
|
363
|
+
a sharpened honesty guardrail (authenticity/integrity, not computation-correctness), and outreach drafts.
|
|
333
364
|
- **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
|
|
334
365
|
Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
|
|
335
366
|
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""The demo examples run end-to-end (real fixtures -> receipt -> verify). Covers `make demo` (Phase B)."""
|
|
2
|
+
import importlib.util
|
|
3
|
+
import sys
|
|
4
|
+
import unittest
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
REPO = Path(__file__).resolve().parents[1]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _run_example(name):
|
|
11
|
+
try:
|
|
12
|
+
import inspect_ai.log # noqa: F401 (inspect example needs it)
|
|
13
|
+
except ImportError:
|
|
14
|
+
if name == "inspect_receipt":
|
|
15
|
+
raise unittest.SkipTest("inspect_ai not installed")
|
|
16
|
+
spec = importlib.util.spec_from_file_location(name, REPO / "examples" / f"{name}.py")
|
|
17
|
+
m = importlib.util.module_from_spec(spec)
|
|
18
|
+
sys.modules[name] = m
|
|
19
|
+
spec.loader.exec_module(m)
|
|
20
|
+
return m.main()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TestExamples(unittest.TestCase):
|
|
24
|
+
def test_lm_eval_receipt_example(self):
|
|
25
|
+
self.assertEqual(_run_example("lm_eval_receipt"), 0)
|
|
26
|
+
|
|
27
|
+
def test_inspect_receipt_example(self):
|
|
28
|
+
self.assertEqual(_run_example("inspect_receipt"), 0)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|