proofbundle 0.7.1__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {proofbundle-0.7.1/src/proofbundle.egg-info → proofbundle-0.8.0}/PKG-INFO +40 -9
  2. {proofbundle-0.7.1 → proofbundle-0.8.0}/README.md +39 -8
  3. {proofbundle-0.7.1 → proofbundle-0.8.0}/pyproject.toml +1 -1
  4. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/__init__.py +1 -1
  5. {proofbundle-0.7.1 → proofbundle-0.8.0/src/proofbundle.egg-info}/PKG-INFO +40 -9
  6. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle.egg-info/SOURCES.txt +1 -0
  7. proofbundle-0.8.0/tests/test_examples.py +28 -0
  8. {proofbundle-0.7.1 → proofbundle-0.8.0}/LICENSE +0 -0
  9. {proofbundle-0.7.1 → proofbundle-0.8.0}/setup.cfg +0 -0
  10. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/adapters/__init__.py +0 -0
  11. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/adapters/inspect_ai.py +0 -0
  12. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/adapters/lm_eval.py +0 -0
  13. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/bundle.py +0 -0
  14. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/cli.py +0 -0
  15. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/emit.py +0 -0
  16. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/errors.py +0 -0
  17. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/evalclaim.py +0 -0
  18. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/intoto.py +0 -0
  19. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/merkle.py +0 -0
  20. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/py.typed +0 -0
  21. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/sdjwt.py +0 -0
  22. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/sdjwt_issue.py +0 -0
  23. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle/signature.py +0 -0
  24. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle.egg-info/dependency_links.txt +0 -0
  25. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle.egg-info/entry_points.txt +0 -0
  26. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle.egg-info/requires.txt +0 -0
  27. {proofbundle-0.7.1 → proofbundle-0.8.0}/src/proofbundle.egg-info/top_level.txt +0 -0
  28. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_adapters.py +0 -0
  29. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_bundle.py +0 -0
  30. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_bundle_robustness.py +0 -0
  31. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_cli.py +0 -0
  32. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_cli_eval.py +0 -0
  33. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_emit.py +0 -0
  34. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_eval_claim_schema.py +0 -0
  35. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_evalclaim.py +0 -0
  36. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_intoto.py +0 -0
  37. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_merkle.py +0 -0
  38. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_merkle_property.py +0 -0
  39. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_rekor_interop.py +0 -0
  40. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_rfc6962_external_vectors.py +0 -0
  41. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_schema.py +0 -0
  42. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_sdjwt_issue.py +0 -0
  43. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_sdjwt_reference.py +0 -0
  44. {proofbundle-0.7.1 → proofbundle-0.8.0}/tests/test_signature.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: proofbundle
3
- Version: 0.7.1
3
+ Version: 0.8.0
4
4
  Summary: Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT.
5
5
  Author: Konrad Gruszka
6
6
  License: MIT
@@ -70,7 +70,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
70
70
 
71
71
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
72
72
  verify` checks one self-contained `bundle.json` with three offline cryptographic
73
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 72 tests.
73
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 74 tests.
74
74
 
75
75
  ## Contents
76
76
 
@@ -79,6 +79,7 @@ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 72 tests.
79
79
  - [How it fits together](#how-it-fits-together)
80
80
  - [Install](#install)
81
81
  - [Quickstart](#quickstart)
82
+ - [Demo](#demo--a-real-eval-log-to-a-verified-receipt-offline)
82
83
  - [Interoperability](#interoperability)
83
84
  - [Bundle format](#bundle-format-proofbundlev01)
84
85
  - [Eval receipts](#eval-receipts)
@@ -209,6 +210,21 @@ from proofbundle import verify_consistency
209
210
  verify_consistency(first_size, second_size, proof, first_root, second_root) # -> bool
210
211
  ```
211
212
 
213
+ ## Demo — a real eval log to a verified receipt, offline
214
+
215
+ ```bash
216
+ pip install "proofbundle[eval,inspect]"
217
+ make demo # or: bash scripts/demo.sh
218
+ ```
219
+
220
+ `make demo` runs end-to-end with **no network, no API key, no GPU**: it takes genuine eval logs — an
221
+ inspect_ai `mockllm/model` `.eval` log and an lm-evaluation-harness `--model dummy` `results.json`
222
+ (committed under `tests/fixtures/`, generated offline) — turns each into a signed, Merkle-anchored
223
+ proofbundle receipt, and verifies it to `=> OK`. The scores are random (a dummy model); the point is
224
+ that the *artifact* is signed and offline-verifiable, with model and dataset kept as salted commitments.
225
+ See [`examples/inspect_receipt.py`](examples/inspect_receipt.py) and
226
+ [`examples/lm_eval_receipt.py`](examples/lm_eval_receipt.py).
227
+
212
228
  ## Interoperability
213
229
 
214
230
  proofbundle uses the same RFC 6962 / RFC 9162 Merkle primitive as
@@ -285,11 +301,24 @@ proofbundle show-eval receipt.json # verify + print the claim (issuer-boun
285
301
  ```
286
302
 
287
303
  The claim format is specified in [EVAL_CLAIM.md](EVAL_CLAIM.md); the emit path uses
288
- RFC 8785 JCS canonicalization, the verify path stays dependency-free. **Honest scope:**
289
- a receipt proves `passed` against `threshold` and hides the model/dataset via salted
290
- commitments it does **not** prove the evaluation was well designed or that the score
291
- itself is correct. Those are human judgements; what it removes is the need to simply
292
- trust the number.
304
+ RFC 8785 JCS canonicalization, the verify path stays dependency-free.
305
+
306
+ **Honesty guardrail (the exact scope).** A receipt attests the **authenticity and integrity** of a
307
+ *claimed* result and its context these exact bytes, signed by this key, anchored under this root, with
308
+ model/dataset kept as salted commitments. It does **not** attest the **correctness of the computation**,
309
+ and it cannot detect **cherry-picking** of the eval. Whether the eval was well designed, whether the
310
+ suite measures what it claims, and whether the number was computed honestly are separate questions.
311
+ Trusted-execution approaches such as [Attestable Audits](https://arxiv.org/abs/2506.23706) target
312
+ computation-correctness with a different (hardware) trust model; proofbundle is the lightweight,
313
+ hardware-free path to a portable, tamper-evident, selectively disclosable *result artifact*.
314
+
315
+ **How this differs from a bare hash or a TEE.** A plain SHA-256 of a log commits to bytes but carries no
316
+ signature, no tamper-evident anchor, and no selective disclosure (an attestation-exporter idea along
317
+ those lines,
318
+ [inspect_evals PR #1610](https://github.com/UKGovernmentBEIS/inspect_evals/pull/1610), was closed as
319
+ belonging *a layer above* the framework — which is exactly where proofbundle sits). A TEE proves the
320
+ computation ran untampered but needs specific hardware. proofbundle adds Ed25519 + RFC 6962 Merkle +
321
+ SD-JWT selective disclosure over one portable file, offline.
293
322
 
294
323
  ### A verification layer for trustworthy eval logs
295
324
 
@@ -328,8 +357,10 @@ attestation — see [SECURITY.md](SECURITY.md).
328
357
  - **v0.5** — inspect_ai adapter (stable API), in-toto Statement v1 view, SD-JWT **issuance** (RFC 9901).
329
358
  - **v0.6** — a second eval adapter (lm-evaluation-harness, real format + provenance), INTEROP.md,
330
359
  CITATION.cff, PEP 740 attestations documented.
331
- - **v0.7 (current release)** — citability polish: ORCID in CITATION.cff, a Zenodo DOI placeholder
332
- (assigned on release), and a draft in-toto ML-eval predicate proposal.
360
+ - **v0.7** — citability polish (ORCID, Zenodo DOI placeholder, in-toto proposal draft); v0.7.1 hardened
361
+ verifier robustness + CI on Python 3.9 after a holistic review.
362
+ - **v0.8 (current release)** — an offline `make demo` (real eval log -> signed receipt -> verified),
363
+ a sharpened honesty guardrail (authenticity/integrity, not computation-correctness), and outreach drafts.
333
364
  - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
334
365
  Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
335
366
 
@@ -27,7 +27,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
27
27
 
28
28
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
29
29
  verify` checks one self-contained `bundle.json` with three offline cryptographic
30
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 72 tests.
30
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 74 tests.
31
31
 
32
32
  ## Contents
33
33
 
@@ -36,6 +36,7 @@ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 72 tests.
36
36
  - [How it fits together](#how-it-fits-together)
37
37
  - [Install](#install)
38
38
  - [Quickstart](#quickstart)
39
+ - [Demo](#demo--a-real-eval-log-to-a-verified-receipt-offline)
39
40
  - [Interoperability](#interoperability)
40
41
  - [Bundle format](#bundle-format-proofbundlev01)
41
42
  - [Eval receipts](#eval-receipts)
@@ -166,6 +167,21 @@ from proofbundle import verify_consistency
166
167
  verify_consistency(first_size, second_size, proof, first_root, second_root) # -> bool
167
168
  ```
168
169
 
170
+ ## Demo — a real eval log to a verified receipt, offline
171
+
172
+ ```bash
173
+ pip install "proofbundle[eval,inspect]"
174
+ make demo # or: bash scripts/demo.sh
175
+ ```
176
+
177
+ `make demo` runs end-to-end with **no network, no API key, no GPU**: it takes genuine eval logs — an
178
+ inspect_ai `mockllm/model` `.eval` log and an lm-evaluation-harness `--model dummy` `results.json`
179
+ (committed under `tests/fixtures/`, generated offline) — turns each into a signed, Merkle-anchored
180
+ proofbundle receipt, and verifies it to `=> OK`. The scores are random (a dummy model); the point is
181
+ that the *artifact* is signed and offline-verifiable, with model and dataset kept as salted commitments.
182
+ See [`examples/inspect_receipt.py`](examples/inspect_receipt.py) and
183
+ [`examples/lm_eval_receipt.py`](examples/lm_eval_receipt.py).
184
+
169
185
  ## Interoperability
170
186
 
171
187
  proofbundle uses the same RFC 6962 / RFC 9162 Merkle primitive as
@@ -242,11 +258,24 @@ proofbundle show-eval receipt.json # verify + print the claim (issuer-boun
242
258
  ```
243
259
 
244
260
  The claim format is specified in [EVAL_CLAIM.md](EVAL_CLAIM.md); the emit path uses
245
- RFC 8785 JCS canonicalization, the verify path stays dependency-free. **Honest scope:**
246
- a receipt proves `passed` against `threshold` and hides the model/dataset via salted
247
- commitments it does **not** prove the evaluation was well designed or that the score
248
- itself is correct. Those are human judgements; what it removes is the need to simply
249
- trust the number.
261
+ RFC 8785 JCS canonicalization, the verify path stays dependency-free.
262
+
263
+ **Honesty guardrail (the exact scope).** A receipt attests the **authenticity and integrity** of a
264
+ *claimed* result and its context these exact bytes, signed by this key, anchored under this root, with
265
+ model/dataset kept as salted commitments. It does **not** attest the **correctness of the computation**,
266
+ and it cannot detect **cherry-picking** of the eval. Whether the eval was well designed, whether the
267
+ suite measures what it claims, and whether the number was computed honestly are separate questions.
268
+ Trusted-execution approaches such as [Attestable Audits](https://arxiv.org/abs/2506.23706) target
269
+ computation-correctness with a different (hardware) trust model; proofbundle is the lightweight,
270
+ hardware-free path to a portable, tamper-evident, selectively disclosable *result artifact*.
271
+
272
+ **How this differs from a bare hash or a TEE.** A plain SHA-256 of a log commits to bytes but carries no
273
+ signature, no tamper-evident anchor, and no selective disclosure (an attestation-exporter idea along
274
+ those lines,
275
+ [inspect_evals PR #1610](https://github.com/UKGovernmentBEIS/inspect_evals/pull/1610), was closed as
276
+ belonging *a layer above* the framework — which is exactly where proofbundle sits). A TEE proves the
277
+ computation ran untampered but needs specific hardware. proofbundle adds Ed25519 + RFC 6962 Merkle +
278
+ SD-JWT selective disclosure over one portable file, offline.
250
279
 
251
280
  ### A verification layer for trustworthy eval logs
252
281
 
@@ -285,8 +314,10 @@ attestation — see [SECURITY.md](SECURITY.md).
285
314
  - **v0.5** — inspect_ai adapter (stable API), in-toto Statement v1 view, SD-JWT **issuance** (RFC 9901).
286
315
  - **v0.6** — a second eval adapter (lm-evaluation-harness, real format + provenance), INTEROP.md,
287
316
  CITATION.cff, PEP 740 attestations documented.
288
- - **v0.7 (current release)** — citability polish: ORCID in CITATION.cff, a Zenodo DOI placeholder
289
- (assigned on release), and a draft in-toto ML-eval predicate proposal.
317
+ - **v0.7** — citability polish (ORCID, Zenodo DOI placeholder, in-toto proposal draft); v0.7.1 hardened
318
+ verifier robustness + CI on Python 3.9 after a holistic review.
319
+ - **v0.8 (current release)** — an offline `make demo` (real eval log -> signed receipt -> verified),
320
+ a sharpened honesty guardrail (authenticity/integrity, not computation-correctness), and outreach drafts.
290
321
  - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
291
322
  Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
292
323
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "proofbundle"
7
- version = "0.7.1"
7
+ version = "0.8.0"
8
8
  description = "Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -13,7 +13,7 @@ from .emit import emit_bundle, generate_signer
13
13
  from .errors import Check, ProofBundleError, VerificationResult
14
14
  from .merkle import verify_consistency, verify_inclusion
15
15
 
16
- __version__ = "0.7.1"
16
+ __version__ = "0.8.0"
17
17
 
18
18
  __all__ = [
19
19
  "__version__",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: proofbundle
3
- Version: 0.7.1
3
+ Version: 0.8.0
4
4
  Summary: Emit and verify portable cryptographic evidence bundles, offline: Ed25519 + RFC 6962 Merkle + optional SD-JWT.
5
5
  Author: Konrad Gruszka
6
6
  License: MIT
@@ -70,7 +70,7 @@ selectively disclosable credential. Pure Python, no server, no daemon, one JSON
70
70
 
71
71
  **At a glance:** `proofbundle emit` signs and anchors a payload; `proofbundle
72
72
  verify` checks one self-contained `bundle.json` with three offline cryptographic
73
- checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 72 tests.
73
+ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 74 tests.
74
74
 
75
75
  ## Contents
76
76
 
@@ -79,6 +79,7 @@ checks → `OK` or `FAILED`. No network, no daemon, no own crypto. 72 tests.
79
79
  - [How it fits together](#how-it-fits-together)
80
80
  - [Install](#install)
81
81
  - [Quickstart](#quickstart)
82
+ - [Demo](#demo--a-real-eval-log-to-a-verified-receipt-offline)
82
83
  - [Interoperability](#interoperability)
83
84
  - [Bundle format](#bundle-format-proofbundlev01)
84
85
  - [Eval receipts](#eval-receipts)
@@ -209,6 +210,21 @@ from proofbundle import verify_consistency
209
210
  verify_consistency(first_size, second_size, proof, first_root, second_root) # -> bool
210
211
  ```
211
212
 
213
+ ## Demo — a real eval log to a verified receipt, offline
214
+
215
+ ```bash
216
+ pip install "proofbundle[eval,inspect]"
217
+ make demo # or: bash scripts/demo.sh
218
+ ```
219
+
220
+ `make demo` runs end-to-end with **no network, no API key, no GPU**: it takes genuine eval logs — an
221
+ inspect_ai `mockllm/model` `.eval` log and an lm-evaluation-harness `--model dummy` `results.json`
222
+ (committed under `tests/fixtures/`, generated offline) — turns each into a signed, Merkle-anchored
223
+ proofbundle receipt, and verifies it to `=> OK`. The scores are random (a dummy model); the point is
224
+ that the *artifact* is signed and offline-verifiable, with model and dataset kept as salted commitments.
225
+ See [`examples/inspect_receipt.py`](examples/inspect_receipt.py) and
226
+ [`examples/lm_eval_receipt.py`](examples/lm_eval_receipt.py).
227
+
212
228
  ## Interoperability
213
229
 
214
230
  proofbundle uses the same RFC 6962 / RFC 9162 Merkle primitive as
@@ -285,11 +301,24 @@ proofbundle show-eval receipt.json # verify + print the claim (issuer-boun
285
301
  ```
286
302
 
287
303
  The claim format is specified in [EVAL_CLAIM.md](EVAL_CLAIM.md); the emit path uses
288
- RFC 8785 JCS canonicalization, the verify path stays dependency-free. **Honest scope:**
289
- a receipt proves `passed` against `threshold` and hides the model/dataset via salted
290
- commitments it does **not** prove the evaluation was well designed or that the score
291
- itself is correct. Those are human judgements; what it removes is the need to simply
292
- trust the number.
304
+ RFC 8785 JCS canonicalization, the verify path stays dependency-free.
305
+
306
+ **Honesty guardrail (the exact scope).** A receipt attests the **authenticity and integrity** of a
307
+ *claimed* result and its context these exact bytes, signed by this key, anchored under this root, with
308
+ model/dataset kept as salted commitments. It does **not** attest the **correctness of the computation**,
309
+ and it cannot detect **cherry-picking** of the eval. Whether the eval was well designed, whether the
310
+ suite measures what it claims, and whether the number was computed honestly are separate questions.
311
+ Trusted-execution approaches such as [Attestable Audits](https://arxiv.org/abs/2506.23706) target
312
+ computation-correctness with a different (hardware) trust model; proofbundle is the lightweight,
313
+ hardware-free path to a portable, tamper-evident, selectively disclosable *result artifact*.
314
+
315
+ **How this differs from a bare hash or a TEE.** A plain SHA-256 of a log commits to bytes but carries no
316
+ signature, no tamper-evident anchor, and no selective disclosure (an attestation-exporter idea along
317
+ those lines,
318
+ [inspect_evals PR #1610](https://github.com/UKGovernmentBEIS/inspect_evals/pull/1610), was closed as
319
+ belonging *a layer above* the framework — which is exactly where proofbundle sits). A TEE proves the
320
+ computation ran untampered but needs specific hardware. proofbundle adds Ed25519 + RFC 6962 Merkle +
321
+ SD-JWT selective disclosure over one portable file, offline.
293
322
 
294
323
  ### A verification layer for trustworthy eval logs
295
324
 
@@ -328,8 +357,10 @@ attestation — see [SECURITY.md](SECURITY.md).
328
357
  - **v0.5** — inspect_ai adapter (stable API), in-toto Statement v1 view, SD-JWT **issuance** (RFC 9901).
329
358
  - **v0.6** — a second eval adapter (lm-evaluation-harness, real format + provenance), INTEROP.md,
330
359
  CITATION.cff, PEP 740 attestations documented.
331
- - **v0.7 (current release)** — citability polish: ORCID in CITATION.cff, a Zenodo DOI placeholder
332
- (assigned on release), and a draft in-toto ML-eval predicate proposal.
360
+ - **v0.7** — citability polish (ORCID, Zenodo DOI placeholder, in-toto proposal draft); v0.7.1 hardened
361
+ verifier robustness + CI on Python 3.9 after a holistic review.
362
+ - **v0.8 (current release)** — an offline `make demo` (real eval log -> signed receipt -> verified),
363
+ a sharpened honesty guardrail (authenticity/integrity, not computation-correctness), and outreach drafts.
333
364
  - **Deferred** (explicitly not yet built) — SD-JWT VC conformance + `vct` metadata,
334
365
  Key-Binding JWT, status lists / revocation, an official in-toto PR, DSSE / a full in-toto client.
335
366
 
@@ -30,6 +30,7 @@ tests/test_cli_eval.py
30
30
  tests/test_emit.py
31
31
  tests/test_eval_claim_schema.py
32
32
  tests/test_evalclaim.py
33
+ tests/test_examples.py
33
34
  tests/test_intoto.py
34
35
  tests/test_merkle.py
35
36
  tests/test_merkle_property.py
@@ -0,0 +1,28 @@
1
+ """The demo examples run end-to-end (real fixtures -> receipt -> verify). Covers `make demo` (Phase B)."""
2
+ import importlib.util
3
+ import sys
4
+ import unittest
5
+ from pathlib import Path
6
+
7
+ REPO = Path(__file__).resolve().parents[1]
8
+
9
+
10
+ def _run_example(name):
11
+ try:
12
+ import inspect_ai.log # noqa: F401 (inspect example needs it)
13
+ except ImportError:
14
+ if name == "inspect_receipt":
15
+ raise unittest.SkipTest("inspect_ai not installed")
16
+ spec = importlib.util.spec_from_file_location(name, REPO / "examples" / f"{name}.py")
17
+ m = importlib.util.module_from_spec(spec)
18
+ sys.modules[name] = m
19
+ spec.loader.exec_module(m)
20
+ return m.main()
21
+
22
+
23
+ class TestExamples(unittest.TestCase):
24
+ def test_lm_eval_receipt_example(self):
25
+ self.assertEqual(_run_example("lm_eval_receipt"), 0)
26
+
27
+ def test_inspect_receipt_example(self):
28
+ self.assertEqual(_run_example("inspect_receipt"), 0)
File without changes
File without changes