cloakllm-verifier 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloakllm_verifier-0.12.0/LICENSE +21 -0
- cloakllm_verifier-0.12.0/PKG-INFO +82 -0
- cloakllm_verifier-0.12.0/README.md +61 -0
- cloakllm_verifier-0.12.0/cloakllm_verifier/__init__.py +14 -0
- cloakllm_verifier-0.12.0/cloakllm_verifier/cli.py +138 -0
- cloakllm_verifier-0.12.0/cloakllm_verifier/verify.py +286 -0
- cloakllm_verifier-0.12.0/cloakllm_verifier.egg-info/PKG-INFO +82 -0
- cloakllm_verifier-0.12.0/cloakllm_verifier.egg-info/SOURCES.txt +13 -0
- cloakllm_verifier-0.12.0/cloakllm_verifier.egg-info/dependency_links.txt +1 -0
- cloakllm_verifier-0.12.0/cloakllm_verifier.egg-info/entry_points.txt +2 -0
- cloakllm_verifier-0.12.0/cloakllm_verifier.egg-info/requires.txt +4 -0
- cloakllm_verifier-0.12.0/cloakllm_verifier.egg-info/top_level.txt +1 -0
- cloakllm_verifier-0.12.0/pyproject.toml +38 -0
- cloakllm_verifier-0.12.0/setup.cfg +4 -0
- cloakllm_verifier-0.12.0/tests/test_verifier.py +330 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 The CloakLLM Authors (cloakllm@gmail.com)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cloakllm-verifier
|
|
3
|
+
Version: 0.12.0
|
|
4
|
+
Summary: Standalone, dependency-light verifier for CloakLLM audit artifacts -- verify hash chains, Ed25519 signatures, key provenance, RFC 3161 timestamps, and compliance reports WITHOUT the full SDK or trusting CloakLLM's code.
|
|
5
|
+
Author-email: The CloakLLM Authors <cloakllm@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://cloakllm.dev
|
|
8
|
+
Project-URL: Repository, https://github.com/cloakllm/cloakllm-verifier
|
|
9
|
+
Keywords: audit,verification,eu-ai-act,compliance,attestation,rfc3161,hash-chain
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Security :: Cryptography
|
|
13
|
+
Classifier: Intended Audience :: Legal Industry
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: cloakllm[attestation,timestamping]<0.13.0,>=0.12.0
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
|
|
22
|
+
# cloakllm-verifier
|
|
23
|
+
|
|
24
|
+
**Independently verify CloakLLM audit artifacts — without the PII-detection stack, and without trusting CloakLLM's code.**
|
|
25
|
+
|
|
26
|
+
CloakLLM's whole pitch is *compliance you can verify, not compliance you're asked to believe.* This is the tool that makes that literal: an auditor, regulator, or CI pipeline installs `cloakllm-verifier` and checks the artifacts themselves.
|
|
27
|
+
|
|
28
|
+
It **reuses CloakLLM's own verification code** (single source of truth — no reimplementation, no drift) but pulls **only the cryptography extras** — no spaCy, no NLP models. A lean install for people who need to *check*, not *produce*.
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install cloakllm-verifier # Python; crypto deps only, no spaCy
|
|
32
|
+
npm install cloakllm-verifier # JavaScript; zero deps beyond cloakllm (see js/README.md)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
This repo ships **two packages** from one source of truth: the Python package at the root and the JavaScript package under [`js/`](js/). Both expose the same CLI (`cloakllm-verify`) and the same checks, with byte-comparable `--json` output.
|
|
36
|
+
|
|
37
|
+
## CLI
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
cloakllm-verify audit ./cloakllm_audit # hash-chain integrity
|
|
41
|
+
cloakllm-verify timestamp ./cloakllm_audit # offline RFC 3161 checkpoint tokens
|
|
42
|
+
cloakllm-verify keys cert.json --manifest m.json # KeyManifest provenance + revocation
|
|
43
|
+
cloakllm-verify report report.json ./cloakllm_audit # re-validate a compliance report
|
|
44
|
+
cloakllm-verify all ./cloakllm_audit # everything, one exit code
|
|
45
|
+
cloakllm-verify audit ./cloakllm_audit --json # machine-readable (CI)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Exit code `0` = verified, `1` = failed/invalid. Output is ASCII-only.
|
|
49
|
+
|
|
50
|
+
## Python API
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from cloakllm_verifier import verify_audit, verify_timestamps, verify_all
|
|
54
|
+
|
|
55
|
+
r = verify_all("./cloakllm_audit")
|
|
56
|
+
assert r["ok"], r # {ok, audit: {...}, timestamps: {...}}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## What it checks
|
|
60
|
+
- **Hash-chain integrity** — recomputes every SHA-256 link from the canonical JSON; any tampered, reordered, deleted, or relinked entry fails.
|
|
61
|
+
- **RFC 3161 trusted timestamps** — offline-verifies every `chain_checkpoint` token (proving the chain existed no later than the TSA's time); reports the earliest provable time.
|
|
62
|
+
- **KeyManifest provenance + revocation** — verifies a signed certificate against its published KeyManifest (signature, key-id binding, validity window, manifest-hash integrity, offline-root signature when claimed) and checks it against a root-signed RevocationList.
|
|
63
|
+
- **Compliance-report re-validation** — independently re-verifies the audit chain a report describes and rejects any report that claims a *verified* chain or a *COMPLIANT* verdict over a log that does not actually verify. It does not trust the report's own claims.
|
|
64
|
+
|
|
65
|
+
## What a passing result does — and does NOT — prove
|
|
66
|
+
|
|
67
|
+
Be precise about the guarantees (an auditor needs to know the edges):
|
|
68
|
+
|
|
69
|
+
- **`audit` proves integrity + internal consistency of the entries present** — every SHA-256 link recomputes and chains cleanly. It does **not** prove:
|
|
70
|
+
- **Completeness.** A hash chain is anchored at its genesis, not its head, so removing entries from the **end** (tail truncation) leaves a still-valid prefix. Detecting truncation needs an external head anchor — an **RFC 3161 checkpoint over the final `entry_hash`** (`timestamp`), which binds "the chain was at least this long at time T".
|
|
71
|
+
- **Authenticity.** The chain is a keyless SHA-256 construction: anyone who can write the log can recompute a self-consistent one. Authenticity comes from the **Ed25519 attestation** (`keys`) — a signed certificate whose key provenance you verify against a published KeyManifest.
|
|
72
|
+
- **`keys` without `--manifest`** only checks the certificate's signature against the key embedded in the certificate — it does **not** establish who owns that key. Pass `--manifest` for real provenance; the CLI marks signature-only results `UNVERIFIED`, not "verified".
|
|
73
|
+
- **`report`** re-verifies the chain and checks the report's claims for internal consistency against it; it is not a cryptographic binding of that exact report to that exact log beyond an entry-count sanity check.
|
|
74
|
+
|
|
75
|
+
### Known limitation: cross-SDK whole-number floats
|
|
76
|
+
|
|
77
|
+
A hashed floating-point field that happens to be a whole number (e.g. a timing metric of exactly `0.0` or `5.0`) serializes as `5.0` in Python but `5` in JavaScript, so a chain written by one SDK can be reported as *tampered* by the other SDK's verifier. This is a **false-negative that fails safe** (a genuine chain is flagged for investigation; a forged chain never passes), it is intermittent, and it predates this package. A proper fix is an RFC 8785-style number-canonicalization migration (a hash-semantics change, tracked for a future release). **Workaround today: verify a chain with the same-language verifier that produced it.**
|
|
78
|
+
|
|
79
|
+
## Why a separate package
|
|
80
|
+
So an auditor's install is minimal and its purpose is unambiguous — it's a *verifier*, not the PII middleware. The verification logic lives in `cloakllm` (reused here), so the two can never drift.
|
|
81
|
+
|
|
82
|
+
MIT · part of [CloakLLM](https://cloakllm.dev)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# cloakllm-verifier
|
|
2
|
+
|
|
3
|
+
**Independently verify CloakLLM audit artifacts — without the PII-detection stack, and without trusting CloakLLM's code.**
|
|
4
|
+
|
|
5
|
+
CloakLLM's whole pitch is *compliance you can verify, not compliance you're asked to believe.* This is the tool that makes that literal: an auditor, regulator, or CI pipeline installs `cloakllm-verifier` and checks the artifacts themselves.
|
|
6
|
+
|
|
7
|
+
It **reuses CloakLLM's own verification code** (single source of truth — no reimplementation, no drift) but pulls **only the cryptography extras** — no spaCy, no NLP models. A lean install for people who need to *check*, not *produce*.
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install cloakllm-verifier # Python; crypto deps only, no spaCy
|
|
11
|
+
npm install cloakllm-verifier # JavaScript; zero deps beyond cloakllm (see js/README.md)
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
This repo ships **two packages** from one source of truth: the Python package at the root and the JavaScript package under [`js/`](js/). Both expose the same CLI (`cloakllm-verify`) and the same checks, with byte-comparable `--json` output.
|
|
15
|
+
|
|
16
|
+
## CLI
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
cloakllm-verify audit ./cloakllm_audit # hash-chain integrity
|
|
20
|
+
cloakllm-verify timestamp ./cloakllm_audit # offline RFC 3161 checkpoint tokens
|
|
21
|
+
cloakllm-verify keys cert.json --manifest m.json # KeyManifest provenance + revocation
|
|
22
|
+
cloakllm-verify report report.json ./cloakllm_audit # re-validate a compliance report
|
|
23
|
+
cloakllm-verify all ./cloakllm_audit # everything, one exit code
|
|
24
|
+
cloakllm-verify audit ./cloakllm_audit --json # machine-readable (CI)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Exit code `0` = verified, `1` = failed/invalid. Output is ASCII-only.
|
|
28
|
+
|
|
29
|
+
## Python API
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from cloakllm_verifier import verify_audit, verify_timestamps, verify_all
|
|
33
|
+
|
|
34
|
+
r = verify_all("./cloakllm_audit")
|
|
35
|
+
assert r["ok"], r # {ok, audit: {...}, timestamps: {...}}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## What it checks
|
|
39
|
+
- **Hash-chain integrity** — recomputes every SHA-256 link from the canonical JSON; any tampered, reordered, deleted, or relinked entry fails.
|
|
40
|
+
- **RFC 3161 trusted timestamps** — offline-verifies every `chain_checkpoint` token (proving the chain existed no later than the TSA's time); reports the earliest provable time.
|
|
41
|
+
- **KeyManifest provenance + revocation** — verifies a signed certificate against its published KeyManifest (signature, key-id binding, validity window, manifest-hash integrity, offline-root signature when claimed) and checks it against a root-signed RevocationList.
|
|
42
|
+
- **Compliance-report re-validation** — independently re-verifies the audit chain a report describes and rejects any report that claims a *verified* chain or a *COMPLIANT* verdict over a log that does not actually verify. It does not trust the report's own claims.
|
|
43
|
+
|
|
44
|
+
## What a passing result does — and does NOT — prove
|
|
45
|
+
|
|
46
|
+
Be precise about the guarantees (an auditor needs to know the edges):
|
|
47
|
+
|
|
48
|
+
- **`audit` proves integrity + internal consistency of the entries present** — every SHA-256 link recomputes and chains cleanly. It does **not** prove:
|
|
49
|
+
- **Completeness.** A hash chain is anchored at its genesis, not its head, so removing entries from the **end** (tail truncation) leaves a still-valid prefix. Detecting truncation needs an external head anchor — an **RFC 3161 checkpoint over the final `entry_hash`** (`timestamp`), which binds "the chain was at least this long at time T".
|
|
50
|
+
- **Authenticity.** The chain is a keyless SHA-256 construction: anyone who can write the log can recompute a self-consistent one. Authenticity comes from the **Ed25519 attestation** (`keys`) — a signed certificate whose key provenance you verify against a published KeyManifest.
|
|
51
|
+
- **`keys` without `--manifest`** only checks the certificate's signature against the key embedded in the certificate — it does **not** establish who owns that key. Pass `--manifest` for real provenance; the CLI marks signature-only results `UNVERIFIED`, not "verified".
|
|
52
|
+
- **`report`** re-verifies the chain and checks the report's claims for internal consistency against it; it is not a cryptographic binding of that exact report to that exact log beyond an entry-count sanity check.
|
|
53
|
+
|
|
54
|
+
### Known limitation: cross-SDK whole-number floats
|
|
55
|
+
|
|
56
|
+
A hashed floating-point field that happens to be a whole number (e.g. a timing metric of exactly `0.0` or `5.0`) serializes as `5.0` in Python but `5` in JavaScript, so a chain written by one SDK can be reported as *tampered* by the other SDK's verifier. This is a **false-negative that fails safe** (a genuine chain is flagged for investigation; a forged chain never passes), it is intermittent, and it predates this package. A proper fix is an RFC 8785-style number-canonicalization migration (a hash-semantics change, tracked for a future release). **Workaround today: verify a chain with the same-language verifier that produced it.**
|
|
57
|
+
|
|
58
|
+
## Why a separate package
|
|
59
|
+
So an auditor's install is minimal and its purpose is unambiguous — it's a *verifier*, not the PII middleware. The verification logic lives in `cloakllm` (reused here), so the two can never drift.
|
|
60
|
+
|
|
61
|
+
MIT · part of [CloakLLM](https://cloakllm.dev)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""cloakllm-verifier — standalone, dependency-light verification of CloakLLM
|
|
2
|
+
audit artifacts. Reuses CloakLLM's own verify code (no drift), pulls only the
|
|
3
|
+
crypto extras (no spaCy / detection). For auditors and CI.
|
|
4
|
+
|
|
5
|
+
from cloakllm_verifier import verify_audit, verify_timestamps, verify_all
|
|
6
|
+
result = verify_all("./cloakllm_audit") # {ok: bool, audit: {...}, timestamps: {...}}
|
|
7
|
+
"""
|
|
8
|
+
__version__ = "0.12.0"
|
|
9
|
+
|
|
10
|
+
from cloakllm_verifier.verify import (
|
|
11
|
+
verify_audit, verify_timestamps, verify_keys, verify_report, verify_all)
|
|
12
|
+
|
|
13
|
+
__all__ = ["verify_audit", "verify_timestamps", "verify_keys", "verify_report",
|
|
14
|
+
"verify_all", "__version__"]
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""`cloakllm-verify` -- auditor-facing CLI. ASCII-only output; exit 0 = verified,
|
|
2
|
+
1 = failed/invalid. Add --json for machine-readable output (CI / auditor tools)."""
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
from cloakllm_verifier import __version__
|
|
10
|
+
from cloakllm_verifier.verify import (
|
|
11
|
+
verify_audit, verify_timestamps, verify_keys, verify_report, verify_all)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _emit(result: dict, as_json: bool, lines) -> int:
|
|
15
|
+
if as_json:
|
|
16
|
+
print(json.dumps(result, indent=2, sort_keys=True))
|
|
17
|
+
else:
|
|
18
|
+
for ln in lines:
|
|
19
|
+
print(ln)
|
|
20
|
+
return 0 if result.get("ok") else 1
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def main(argv=None) -> int:
|
|
24
|
+
p = argparse.ArgumentParser(
|
|
25
|
+
prog="cloakllm-verify",
|
|
26
|
+
description="Independently verify CloakLLM audit artifacts (hash chain + RFC 3161 "
|
|
27
|
+
"timestamps) without the full SDK or trusting CloakLLM's code.")
|
|
28
|
+
p.add_argument("--version", action="version", version=f"cloakllm-verify {__version__}")
|
|
29
|
+
sub = p.add_subparsers(dest="cmd")
|
|
30
|
+
|
|
31
|
+
pa = sub.add_parser("audit", help="Verify hash-chain integrity of an audit directory")
|
|
32
|
+
pa.add_argument("log_dir")
|
|
33
|
+
pa.add_argument("--json", action="store_true")
|
|
34
|
+
|
|
35
|
+
pt = sub.add_parser("timestamp", help="Offline-verify RFC 3161 checkpoint tokens in an audit dir")
|
|
36
|
+
pt.add_argument("log_dir")
|
|
37
|
+
pt.add_argument("--tsa-cert", help="PEM of trusted TSA cert(s) to also check the signer chain")
|
|
38
|
+
pt.add_argument("--json", action="store_true")
|
|
39
|
+
|
|
40
|
+
pk = sub.add_parser("keys", help="Verify a certificate's KeyManifest provenance + revocation")
|
|
41
|
+
pk.add_argument("cert", help="Path to a signed SanitizationCertificate JSON")
|
|
42
|
+
pk.add_argument("--manifest", help="Path to the KeyManifest JSON (omit for signature-only)")
|
|
43
|
+
pk.add_argument("--root-key", help="Path to the offline root public key (raw 32B or base64)")
|
|
44
|
+
pk.add_argument("--revocation-list", help="Path to a signed RevocationList JSON")
|
|
45
|
+
pk.add_argument("--json", action="store_true")
|
|
46
|
+
|
|
47
|
+
pr = sub.add_parser("report", help="Re-validate a compliance report against its audit dir")
|
|
48
|
+
pr.add_argument("report", help="Path to a compliance-report JSON")
|
|
49
|
+
pr.add_argument("log_dir", help="The audit directory the report describes")
|
|
50
|
+
pr.add_argument("--json", action="store_true")
|
|
51
|
+
|
|
52
|
+
pall = sub.add_parser("all", help="Run every available check over an audit dir")
|
|
53
|
+
pall.add_argument("log_dir")
|
|
54
|
+
pall.add_argument("--tsa-cert")
|
|
55
|
+
pall.add_argument("--json", action="store_true")
|
|
56
|
+
|
|
57
|
+
args = p.parse_args(argv)
|
|
58
|
+
if not args.cmd:
|
|
59
|
+
p.print_help()
|
|
60
|
+
return 2
|
|
61
|
+
|
|
62
|
+
if args.cmd == "audit":
|
|
63
|
+
r = verify_audit(args.log_dir)
|
|
64
|
+
lines = [f"CloakLLM verify -- hash chain ({args.log_dir})",
|
|
65
|
+
f" entries: {r['entries']}",
|
|
66
|
+
f" final seq: {r['final_seq']}",
|
|
67
|
+
" [OK] chain intact: the present entries are internally consistent."
|
|
68
|
+
if r["ok"]
|
|
69
|
+
else " [FAIL] chain invalid:\n " + "\n ".join(r["errors"] or ["unknown"])]
|
|
70
|
+
if r["ok"]:
|
|
71
|
+
lines.append(" note: proves integrity of the entries present, NOT completeness "
|
|
72
|
+
"(a hash chain cannot detect removal of the LAST entries) or "
|
|
73
|
+
"authenticity (run `keys` for signatures; `timestamp` for an "
|
|
74
|
+
"existence anchor).")
|
|
75
|
+
return _emit(r, args.json, lines)
|
|
76
|
+
|
|
77
|
+
if args.cmd == "timestamp":
|
|
78
|
+
r = verify_timestamps(args.log_dir, args.tsa_cert)
|
|
79
|
+
lines = [f"CloakLLM verify -- RFC 3161 checkpoints ({args.log_dir})",
|
|
80
|
+
f" found: {r['found']}",
|
|
81
|
+
f" verified: {r['verified']}",
|
|
82
|
+
f" earliest provable time: {r.get('earliest_provable_time')}",
|
|
83
|
+
" [OK] all checkpoints verified." if r["ok"]
|
|
84
|
+
else f" [FAIL] {r.get('reason', 'not all checkpoints verified')}"]
|
|
85
|
+
return _emit(r, args.json, lines)
|
|
86
|
+
|
|
87
|
+
if args.cmd == "keys":
|
|
88
|
+
r = verify_keys(args.cert, args.manifest, args.root_key, args.revocation_list)
|
|
89
|
+
c = r["checks"]
|
|
90
|
+
lines = [f"CloakLLM verify -- key provenance ({args.cert})",
|
|
91
|
+
f" provenance: {r['provenance_status']}",
|
|
92
|
+
f" revocation: {r['revocation_status']}",
|
|
93
|
+
f" signature: {c['signature_valid']}",
|
|
94
|
+
f" key id: {c['key_id_matches']}",
|
|
95
|
+
f" validity: {c['within_validity_window']}",
|
|
96
|
+
f" root sig: {c['root_signature_status']}",
|
|
97
|
+
f" manifest: {c['manifest_hash_consistent']}"]
|
|
98
|
+
# F8: signature-only mode (no manifest) validates the cert's signature
|
|
99
|
+
# against the key embedded IN the cert -- it does NOT prove which deployer
|
|
100
|
+
# owns the key. Do not print "verified" for it.
|
|
101
|
+
if r["ok"] and r["provenance_status"] == "VERIFIED":
|
|
102
|
+
lines.append(" [OK] key provenance verified (signature + manifest binding).")
|
|
103
|
+
elif r["ok"] and r["provenance_status"] == "UNVERIFIED":
|
|
104
|
+
lines.append(" [WARN] signature valid, but provenance UNVERIFIED -- no manifest "
|
|
105
|
+
"supplied, so the key's owner/validity is not established. "
|
|
106
|
+
"Pass --manifest to verify provenance.")
|
|
107
|
+
else:
|
|
108
|
+
lines.append(" [FAIL] provenance invalid:\n " + "\n ".join(r["notes"] or ["see status"]))
|
|
109
|
+
return _emit(r, args.json, lines)
|
|
110
|
+
|
|
111
|
+
if args.cmd == "report":
|
|
112
|
+
r = verify_report(args.report, args.log_dir)
|
|
113
|
+
lines = [f"CloakLLM verify -- compliance report ({args.report})",
|
|
114
|
+
f" report verdict: {r['report_verdict']}",
|
|
115
|
+
f" actual chain valid: {r['actual_chain_valid']}",
|
|
116
|
+
" [OK] report claims match the audit log." if r["ok"]
|
|
117
|
+
else " [FAIL] report does NOT match the audit log:\n "
|
|
118
|
+
+ "\n ".join(r["mismatches"] or ["chain does not verify"])]
|
|
119
|
+
return _emit(r, args.json, lines)
|
|
120
|
+
|
|
121
|
+
if args.cmd == "all":
|
|
122
|
+
r = verify_all(args.log_dir, args.tsa_cert)
|
|
123
|
+
a, t = r["audit"], r["timestamps"]
|
|
124
|
+
lines = [f"CloakLLM verify -- full ({args.log_dir})",
|
|
125
|
+
f" hash chain: {'OK' if a['ok'] else 'FAIL'} ({a['entries']} entries)",
|
|
126
|
+
f" timestamps: " + (
|
|
127
|
+
f"{'OK' if t['ok'] else 'FAIL'} ({t['verified']}/{t['found']} verified, "
|
|
128
|
+
f"earliest {t.get('earliest_provable_time')})"
|
|
129
|
+
if r["timestamps_applicable"] else "n/a (no checkpoints)"),
|
|
130
|
+
" [OK] verified." if r["ok"] else " [FAIL] verification failed."]
|
|
131
|
+
return _emit(r, args.json, lines)
|
|
132
|
+
|
|
133
|
+
p.print_help()
|
|
134
|
+
return 2
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
if __name__ == "__main__":
|
|
138
|
+
sys.exit(main())
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
"""Standalone verification of CloakLLM audit artifacts.
|
|
2
|
+
|
|
3
|
+
Every check here REUSES CloakLLM's own verify code (single source of truth, no
|
|
4
|
+
drift): the hash-chain walk from `cloakllm.audit.AuditLogger.verify_chain`, the
|
|
5
|
+
RFC 3161 offline verifier from `cloakllm.timestamping.verify_timestamp_token`.
|
|
6
|
+
None of it imports the PII-detection stack (spaCy) -- that is the whole point:
|
|
7
|
+
an auditor verifies without installing, or trusting, the detector.
|
|
8
|
+
|
|
9
|
+
Each function returns a plain dict {ok, ...} so callers (CLI / CI / other tools)
|
|
10
|
+
never depend on CloakLLM internals.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import re
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _read_chain_entries(d: Path):
|
|
21
|
+
"""Read the audit_*.jsonl entries verify_chain covers, as parsed dicts.
|
|
22
|
+
Raises on undecodable bytes so callers can turn it into a clean failure."""
|
|
23
|
+
entries = []
|
|
24
|
+
for jf in sorted(d.glob("audit_*.jsonl")):
|
|
25
|
+
# utf-8 strict: a verifier must not silently lossy-decode an audit log.
|
|
26
|
+
for line in jf.read_text(encoding="utf-8").splitlines():
|
|
27
|
+
line = line.strip()
|
|
28
|
+
if not line:
|
|
29
|
+
continue
|
|
30
|
+
entries.append(json.loads(line))
|
|
31
|
+
return entries
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def verify_audit(log_dir: str) -> dict:
|
|
35
|
+
"""Verify hash-chain integrity of an audit directory.
|
|
36
|
+
|
|
37
|
+
Returns {ok, valid, final_seq, errors, entries}. `ok` is True iff the chain
|
|
38
|
+
verifies AND contains at least one entry. Recomputes every SHA-256 link from
|
|
39
|
+
the canonical JSON -- a tampered, reordered, or relinked entry fails.
|
|
40
|
+
|
|
41
|
+
IMPORTANT (what this does and does NOT prove): a passing result means the
|
|
42
|
+
present entries are internally consistent and unbroken. It is NOT proof of
|
|
43
|
+
completeness -- a hash chain cannot detect that entries were removed from the
|
|
44
|
+
END (tail truncation), because the remaining prefix still links cleanly. Nor
|
|
45
|
+
is it proof of authenticity -- the chain is a keyless SHA-256 construction, so
|
|
46
|
+
a party who can write the log can also recompute a self-consistent forgery.
|
|
47
|
+
Completeness needs an external anchor (an RFC 3161 checkpoint over the final
|
|
48
|
+
entry_hash); authenticity needs the Ed25519 attestation (`verify_keys`).
|
|
49
|
+
"""
|
|
50
|
+
from cloakllm.audit import AuditLogger
|
|
51
|
+
from cloakllm.config import ShieldConfig
|
|
52
|
+
|
|
53
|
+
d = Path(log_dir)
|
|
54
|
+
if not d.exists():
|
|
55
|
+
return {"ok": False, "valid": False, "errors": [f"log dir not found: {log_dir}"],
|
|
56
|
+
"final_seq": -1, "entries": 0}
|
|
57
|
+
|
|
58
|
+
# F2: verify_chain only reads audit_*.jsonl and returns (True, [], 0) when
|
|
59
|
+
# none exist -- so an empty dir, a mistyped path, or a dir whose logs are
|
|
60
|
+
# named differently would otherwise read as "verified". Treat "nothing to
|
|
61
|
+
# verify" as a failure, and surface ignored .jsonl files.
|
|
62
|
+
audit_files = sorted(d.glob("audit_*.jsonl"))
|
|
63
|
+
all_jsonl = sorted(d.glob("*.jsonl"))
|
|
64
|
+
if not audit_files:
|
|
65
|
+
if all_jsonl:
|
|
66
|
+
ignored = ", ".join(p.name for p in all_jsonl)
|
|
67
|
+
msg = (f"no audit_*.jsonl files found in {log_dir}; the verifier only "
|
|
68
|
+
f"reads audit_-prefixed logs. Ignored: {ignored}")
|
|
69
|
+
else:
|
|
70
|
+
msg = f"no audit log files (audit_*.jsonl) found in {log_dir}"
|
|
71
|
+
return {"ok": False, "valid": False, "errors": [msg], "final_seq": -1, "entries": 0}
|
|
72
|
+
|
|
73
|
+
# F7: undecodable / hostile bytes must become a clean failure, never a crash.
|
|
74
|
+
try:
|
|
75
|
+
parsed = _read_chain_entries(d)
|
|
76
|
+
except (UnicodeDecodeError, ValueError) as e:
|
|
77
|
+
return {"ok": False, "valid": False,
|
|
78
|
+
"errors": [f"audit log is not valid UTF-8 JSONL: {type(e).__name__}: {e}"],
|
|
79
|
+
"final_seq": -1, "entries": 0}
|
|
80
|
+
|
|
81
|
+
n_entries = len(parsed)
|
|
82
|
+
if n_entries == 0:
|
|
83
|
+
return {"ok": False, "valid": False,
|
|
84
|
+
"errors": [f"audit log files present but contain no entries: {log_dir}"],
|
|
85
|
+
"final_seq": -1, "entries": 0}
|
|
86
|
+
|
|
87
|
+
logger = AuditLogger(ShieldConfig(log_dir=str(d), audit_enabled=True))
|
|
88
|
+
try:
|
|
89
|
+
valid, errors, final_seq = logger.verify_chain()
|
|
90
|
+
except Exception as e: # noqa: BLE001 - a verifier never raises out
|
|
91
|
+
return {"ok": False, "valid": False,
|
|
92
|
+
"errors": [f"chain verification raised {type(e).__name__}: {e}"],
|
|
93
|
+
"final_seq": -1, "entries": n_entries}
|
|
94
|
+
return {"ok": bool(valid) and n_entries > 0, "valid": bool(valid), "final_seq": final_seq,
|
|
95
|
+
"errors": list(errors), "entries": n_entries}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _iter_checkpoints(log_dir: Path):
|
|
99
|
+
for jf in sorted(log_dir.glob("*.jsonl")):
|
|
100
|
+
for line in jf.read_text(encoding="utf-8").splitlines():
|
|
101
|
+
line = line.strip()
|
|
102
|
+
if not line:
|
|
103
|
+
continue
|
|
104
|
+
try:
|
|
105
|
+
e = json.loads(line)
|
|
106
|
+
except (ValueError, TypeError):
|
|
107
|
+
continue
|
|
108
|
+
if e.get("event_type") == "chain_checkpoint":
|
|
109
|
+
yield e
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def verify_timestamps(log_dir: str, tsa_cert_path: Optional[str] = None) -> dict:
|
|
113
|
+
"""Offline-verify every RFC 3161 chain_checkpoint token in an audit dir.
|
|
114
|
+
|
|
115
|
+
Returns {ok, found, verified, earliest_provable_time, checkpoints}. `ok` is
|
|
116
|
+
True iff at least one checkpoint exists AND all found checkpoints verify.
|
|
117
|
+
(No checkpoints -> ok False with reason, since there is nothing to prove.)
|
|
118
|
+
"""
|
|
119
|
+
from cloakllm.timestamping import _ts_backend_available, verify_timestamp_token
|
|
120
|
+
|
|
121
|
+
d = Path(log_dir)
|
|
122
|
+
if not d.exists():
|
|
123
|
+
return {"ok": False, "found": 0, "verified": 0, "reason": f"log dir not found: {log_dir}",
|
|
124
|
+
"earliest_provable_time": None, "checkpoints": []}
|
|
125
|
+
if not _ts_backend_available():
|
|
126
|
+
return {"ok": False, "found": 0, "verified": 0,
|
|
127
|
+
"reason": "timestamping backend not installed (pip install cloakllm[timestamping])",
|
|
128
|
+
"earliest_provable_time": None, "checkpoints": []}
|
|
129
|
+
|
|
130
|
+
trusted = None
|
|
131
|
+
if tsa_cert_path:
|
|
132
|
+
pem = Path(tsa_cert_path).read_text(encoding="utf-8")
|
|
133
|
+
trusted = re.findall(
|
|
134
|
+
r"-----BEGIN CERTIFICATE-----.*?-----END CERTIFICATE-----", pem, re.DOTALL) or [pem]
|
|
135
|
+
|
|
136
|
+
# F5: a valid token only proves "SOME hash existed at time T". To credit it
|
|
137
|
+
# as anchoring THIS log, the stamped hash must actually be an entry_hash in
|
|
138
|
+
# the chain. Collect the chain's real hashes so we can bind each checkpoint.
|
|
139
|
+
try:
|
|
140
|
+
chain_hashes = {ev.get("entry_hash") for ev in _read_chain_entries(d)
|
|
141
|
+
if isinstance(ev.get("entry_hash"), str)}
|
|
142
|
+
except (UnicodeDecodeError, ValueError):
|
|
143
|
+
chain_hashes = set()
|
|
144
|
+
|
|
145
|
+
found = verified = 0
|
|
146
|
+
earliest = None
|
|
147
|
+
checkpoints = []
|
|
148
|
+
for e in _iter_checkpoints(d):
|
|
149
|
+
cc = e.get("checkpoint_context") or {}
|
|
150
|
+
found += 1
|
|
151
|
+
stamped = cc.get("stamped_entry_hash", "")
|
|
152
|
+
# Bind to the chain FIRST -- a token over a hash that is in no entry is
|
|
153
|
+
# not evidence about this log, no matter how cryptographically valid.
|
|
154
|
+
bound = stamped in chain_hashes
|
|
155
|
+
try:
|
|
156
|
+
r = verify_timestamp_token(cc.get("tst_token_b64", ""), bytes.fromhex(stamped), trusted)
|
|
157
|
+
except Exception as ex: # noqa: BLE001 - never raise out of a verifier
|
|
158
|
+
checkpoints.append({"seq": e.get("seq"), "valid": False, "bound_to_chain": bound,
|
|
159
|
+
"reason": type(ex).__name__})
|
|
160
|
+
continue
|
|
161
|
+
ok_cp = bool(r.valid) and bound
|
|
162
|
+
reason = r.reason if r.valid else r.reason
|
|
163
|
+
if r.valid and not bound:
|
|
164
|
+
reason = "token valid but stamped_entry_hash is not an entry_hash in this chain"
|
|
165
|
+
checkpoints.append({"seq": e.get("seq"), "valid": ok_cp, "token_valid": bool(r.valid),
|
|
166
|
+
"bound_to_chain": bound, "gen_time": r.gen_time,
|
|
167
|
+
"chain_valid": r.chain_valid, "reason": reason})
|
|
168
|
+
if ok_cp:
|
|
169
|
+
verified += 1
|
|
170
|
+
if r.gen_time and (earliest is None or r.gen_time < earliest):
|
|
171
|
+
earliest = r.gen_time
|
|
172
|
+
return {"ok": found > 0 and verified == found, "found": found, "verified": verified,
|
|
173
|
+
"earliest_provable_time": earliest, "checkpoints": checkpoints}
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _load_root_public_key(path: Optional[str]) -> Optional[bytes]:
|
|
177
|
+
"""Load a 32-byte Ed25519 root public key from a file. Accepts raw 32 bytes
|
|
178
|
+
or a base64 text encoding (with surrounding whitespace)."""
|
|
179
|
+
if not path:
|
|
180
|
+
return None
|
|
181
|
+
raw = Path(path).read_bytes()
|
|
182
|
+
if len(raw) == 32:
|
|
183
|
+
return raw
|
|
184
|
+
import base64
|
|
185
|
+
return base64.b64decode(raw.decode("utf-8", "ignore").strip())
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def verify_keys(cert_path: str, manifest_path: Optional[str] = None,
|
|
189
|
+
root_public_key_path: Optional[str] = None,
|
|
190
|
+
revocation_list_path: Optional[str] = None) -> dict:
|
|
191
|
+
"""Verify a signed SanitizationCertificate's provenance against a KeyManifest
|
|
192
|
+
(and optional root key + RevocationList), reusing `verify_key_provenance`.
|
|
193
|
+
|
|
194
|
+
Returns {ok, provenance_status, revocation_status, checks, notes}. `ok` is
|
|
195
|
+
the report's overall_valid (signature + key-id + validity window + manifest
|
|
196
|
+
hash + root signature when claimed + not-revoked). manifest=None -> signature
|
|
197
|
+
-only (UNVERIFIED). A revoked key or tampered revocation list makes ok False.
|
|
198
|
+
"""
|
|
199
|
+
from cloakllm.attestation import (
|
|
200
|
+
SanitizationCertificate, KeyManifest, RevocationList, verify_key_provenance)
|
|
201
|
+
|
|
202
|
+
cert = SanitizationCertificate.from_dict(json.loads(Path(cert_path).read_text(encoding="utf-8")))
|
|
203
|
+
manifest = None
|
|
204
|
+
if manifest_path:
|
|
205
|
+
manifest = KeyManifest.from_dict(json.loads(Path(manifest_path).read_text(encoding="utf-8")))
|
|
206
|
+
rev = None
|
|
207
|
+
if revocation_list_path:
|
|
208
|
+
rev = RevocationList.from_dict(json.loads(Path(revocation_list_path).read_text(encoding="utf-8")))
|
|
209
|
+
root_pk = _load_root_public_key(root_public_key_path)
|
|
210
|
+
|
|
211
|
+
rep = verify_key_provenance(cert, manifest, root_public_key=root_pk, revocation_list=rev)
|
|
212
|
+
d = rep.to_dict()
|
|
213
|
+
return {
|
|
214
|
+
"ok": bool(d["overall_valid"]),
|
|
215
|
+
"provenance_status": d["provenance_status"],
|
|
216
|
+
"revocation_status": d["revocation_status"],
|
|
217
|
+
"checks": {k: d[k] for k in (
|
|
218
|
+
"signature_valid", "key_id_matches", "within_validity_window",
|
|
219
|
+
"root_signature_status", "manifest_hash_consistent")},
|
|
220
|
+
"notes": d["notes"],
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def verify_report(report_path: str, log_dir: str) -> dict:
|
|
225
|
+
"""Independently re-validate a compliance report against the audit dir it
|
|
226
|
+
describes. Does NOT trust the report's own claims -- re-verifies the chain
|
|
227
|
+
from scratch and cross-checks.
|
|
228
|
+
|
|
229
|
+
The load-bearing guard (the v0.10.3 CRITICAL-1 class): a report that claims
|
|
230
|
+
`chain_integrity.verdict == "verified"` or a `COMPLIANT` verdict CANNOT stand
|
|
231
|
+
over an audit log that does not actually verify. Any mismatch -> ok False.
|
|
232
|
+
|
|
233
|
+
Binding scope (be honest about it): this re-verifies the chain and checks the
|
|
234
|
+
report's claims for INTERNAL CONSISTENCY against it. It does not cryptographic-
|
|
235
|
+
ally bind the report to this specific log beyond the entry-count sanity check
|
|
236
|
+
(a scoped report legitimately covers fewer entries; it can never honestly cover
|
|
237
|
+
more). A report is only as trustworthy as the log you verify it against.
|
|
238
|
+
|
|
239
|
+
Returns {ok, report_verdict, actual_chain_valid, mismatches, ...}.
|
|
240
|
+
"""
|
|
241
|
+
report = json.loads(Path(report_path).read_text(encoding="utf-8"))
|
|
242
|
+
audit = verify_audit(log_dir)
|
|
243
|
+
actual_valid = audit["ok"]
|
|
244
|
+
ci = report.get("chain_integrity") or {}
|
|
245
|
+
claimed_verified = ci.get("verdict") == "verified"
|
|
246
|
+
claimed_entries = ci.get("total_entries")
|
|
247
|
+
report_verdict = report.get("verdict")
|
|
248
|
+
|
|
249
|
+
mismatches: list[str] = []
|
|
250
|
+
# 1. The report cannot claim a verified chain the log doesn't actually verify.
|
|
251
|
+
if claimed_verified and not actual_valid:
|
|
252
|
+
mismatches.append(
|
|
253
|
+
"report claims chain_integrity=verified but the audit log does NOT verify")
|
|
254
|
+
# 2. A COMPLIANT verdict over a broken chain is the exact CRITICAL-1 failure.
|
|
255
|
+
if report_verdict == "COMPLIANT" and not actual_valid:
|
|
256
|
+
mismatches.append("report verdict is COMPLIANT but the audit chain is broken")
|
|
257
|
+
# 3. Entry-count check is ONE-DIRECTIONAL: reports are routinely period- or
|
|
258
|
+
# article-scoped (build_report filters), so a report legitimately covers
|
|
259
|
+
# FEWER entries than the chain holds. It can never honestly cover MORE --
|
|
260
|
+
# an inflated count means the report describes entries the log can't back.
|
|
261
|
+
# A present-but-non-integer total_entries is itself malformed -> reject
|
|
262
|
+
# (F4: do NOT silently skip the one cross-check because the type is wrong).
|
|
263
|
+
if isinstance(claimed_entries, bool) or not isinstance(claimed_entries, int):
|
|
264
|
+
if claimed_entries is not None:
|
|
265
|
+
mismatches.append(
|
|
266
|
+
f"report chain_integrity.total_entries is not an integer "
|
|
267
|
+
f"({type(claimed_entries).__name__}): {claimed_entries!r}")
|
|
268
|
+
elif claimed_entries > audit["entries"]:
|
|
269
|
+
mismatches.append(
|
|
270
|
+
f"report total_entries={claimed_entries} exceeds the {audit['entries']} "
|
|
271
|
+
f"entries actually in the audit log")
|
|
272
|
+
|
|
273
|
+
return {"ok": not mismatches and actual_valid, "report_verdict": report_verdict,
|
|
274
|
+
"actual_chain_valid": actual_valid, "claimed_chain_verified": claimed_verified,
|
|
275
|
+
"mismatches": mismatches, "audit": audit}
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def verify_all(log_dir: str, tsa_cert_path: Optional[str] = None) -> dict:
|
|
279
|
+
"""Run every available check over an audit dir. Timestamps are only required
|
|
280
|
+
if the chain actually contains checkpoints (an un-timestamped but intact
|
|
281
|
+
chain is still 'ok' for integrity)."""
|
|
282
|
+
audit = verify_audit(log_dir)
|
|
283
|
+
ts = verify_timestamps(log_dir, tsa_cert_path)
|
|
284
|
+
ts_applicable = ts["found"] > 0
|
|
285
|
+
ok = audit["ok"] and (ts["ok"] if ts_applicable else True)
|
|
286
|
+
return {"ok": ok, "audit": audit, "timestamps": ts, "timestamps_applicable": ts_applicable}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cloakllm-verifier
|
|
3
|
+
Version: 0.12.0
|
|
4
|
+
Summary: Standalone, dependency-light verifier for CloakLLM audit artifacts -- verify hash chains, Ed25519 signatures, key provenance, RFC 3161 timestamps, and compliance reports WITHOUT the full SDK or trusting CloakLLM's code.
|
|
5
|
+
Author-email: The CloakLLM Authors <cloakllm@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://cloakllm.dev
|
|
8
|
+
Project-URL: Repository, https://github.com/cloakllm/cloakllm-verifier
|
|
9
|
+
Keywords: audit,verification,eu-ai-act,compliance,attestation,rfc3161,hash-chain
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Security :: Cryptography
|
|
13
|
+
Classifier: Intended Audience :: Legal Industry
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: cloakllm[attestation,timestamping]<0.13.0,>=0.12.0
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
|
|
22
|
+
# cloakllm-verifier
|
|
23
|
+
|
|
24
|
+
**Independently verify CloakLLM audit artifacts — without the PII-detection stack, and without trusting CloakLLM's code.**
|
|
25
|
+
|
|
26
|
+
CloakLLM's whole pitch is *compliance you can verify, not compliance you're asked to believe.* This is the tool that makes that literal: an auditor, regulator, or CI pipeline installs `cloakllm-verifier` and checks the artifacts themselves.
|
|
27
|
+
|
|
28
|
+
It **reuses CloakLLM's own verification code** (single source of truth — no reimplementation, no drift) but pulls **only the cryptography extras** — no spaCy, no NLP models. A lean install for people who need to *check*, not *produce*.
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install cloakllm-verifier # Python; crypto deps only, no spaCy
|
|
32
|
+
npm install cloakllm-verifier # JavaScript; zero deps beyond cloakllm (see js/README.md)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
This repo ships **two packages** from one source of truth: the Python package at the root and the JavaScript package under [`js/`](js/). Both expose the same CLI (`cloakllm-verify`) and the same checks, with byte-comparable `--json` output.
|
|
36
|
+
|
|
37
|
+
## CLI
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
cloakllm-verify audit ./cloakllm_audit # hash-chain integrity
|
|
41
|
+
cloakllm-verify timestamp ./cloakllm_audit # offline RFC 3161 checkpoint tokens
|
|
42
|
+
cloakllm-verify keys cert.json --manifest m.json # KeyManifest provenance + revocation
|
|
43
|
+
cloakllm-verify report report.json ./cloakllm_audit # re-validate a compliance report
|
|
44
|
+
cloakllm-verify all ./cloakllm_audit # everything, one exit code
|
|
45
|
+
cloakllm-verify audit ./cloakllm_audit --json # machine-readable (CI)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Exit code `0` = verified, `1` = failed/invalid. Output is ASCII-only.
|
|
49
|
+
|
|
50
|
+
## Python API
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from cloakllm_verifier import verify_audit, verify_timestamps, verify_all
|
|
54
|
+
|
|
55
|
+
r = verify_all("./cloakllm_audit")
|
|
56
|
+
assert r["ok"], r # {ok, audit: {...}, timestamps: {...}}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## What it checks
|
|
60
|
+
- **Hash-chain integrity** — recomputes every SHA-256 link from the canonical JSON; any tampered, reordered, deleted, or relinked entry fails.
|
|
61
|
+
- **RFC 3161 trusted timestamps** — offline-verifies every `chain_checkpoint` token (proving the chain existed no later than the TSA's time); reports the earliest provable time.
|
|
62
|
+
- **KeyManifest provenance + revocation** — verifies a signed certificate against its published KeyManifest (signature, key-id binding, validity window, manifest-hash integrity, offline-root signature when claimed) and checks it against a root-signed RevocationList.
|
|
63
|
+
- **Compliance-report re-validation** — independently re-verifies the audit chain a report describes and rejects any report that claims a *verified* chain or a *COMPLIANT* verdict over a log that does not actually verify. It does not trust the report's own claims.
|
|
64
|
+
|
|
65
|
+
## What a passing result does — and does NOT — prove
|
|
66
|
+
|
|
67
|
+
Be precise about the guarantees (an auditor needs to know the edges):
|
|
68
|
+
|
|
69
|
+
- **`audit` proves integrity + internal consistency of the entries present** — every SHA-256 link recomputes and chains cleanly. It does **not** prove:
|
|
70
|
+
- **Completeness.** A hash chain is anchored at its genesis, not its head, so removing entries from the **end** (tail truncation) leaves a still-valid prefix. Detecting truncation needs an external head anchor — an **RFC 3161 checkpoint over the final `entry_hash`** (`timestamp`), which binds "the chain was at least this long at time T".
|
|
71
|
+
- **Authenticity.** The chain is a keyless SHA-256 construction: anyone who can write the log can recompute a self-consistent one. Authenticity comes from the **Ed25519 attestation** (`keys`) — a signed certificate whose key provenance you verify against a published KeyManifest.
|
|
72
|
+
- **`keys` without `--manifest`** only checks the certificate's signature against the key embedded in the certificate — it does **not** establish who owns that key. Pass `--manifest` for real provenance; the CLI marks signature-only results `UNVERIFIED`, not "verified".
|
|
73
|
+
- **`report`** re-verifies the chain and checks the report's claims for internal consistency against it; it is not a cryptographic binding of that exact report to that exact log beyond an entry-count sanity check.
|
|
74
|
+
|
|
75
|
+
### Known limitation: cross-SDK whole-number floats
|
|
76
|
+
|
|
77
|
+
A hashed floating-point field that happens to be a whole number (e.g. a timing metric of exactly `0.0` or `5.0`) serializes as `5.0` in Python but `5` in JavaScript, so a chain written by one SDK can be reported as *tampered* by the other SDK's verifier. This is a **false-negative that fails safe** (a genuine chain is flagged for investigation; a forged chain never passes), it is intermittent, and it predates this package. A proper fix is an RFC 8785-style number-canonicalization migration (a hash-semantics change, tracked for a future release). **Workaround today: verify a chain with the same-language verifier that produced it.**
|
|
78
|
+
|
|
79
|
+
## Why a separate package
|
|
80
|
+
So an auditor's install is minimal and its purpose is unambiguous — it's a *verifier*, not the PII middleware. The verification logic lives in `cloakllm` (reused here), so the two can never drift.
|
|
81
|
+
|
|
82
|
+
MIT · part of [CloakLLM](https://cloakllm.dev)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
cloakllm_verifier/__init__.py
|
|
5
|
+
cloakllm_verifier/cli.py
|
|
6
|
+
cloakllm_verifier/verify.py
|
|
7
|
+
cloakllm_verifier.egg-info/PKG-INFO
|
|
8
|
+
cloakllm_verifier.egg-info/SOURCES.txt
|
|
9
|
+
cloakllm_verifier.egg-info/dependency_links.txt
|
|
10
|
+
cloakllm_verifier.egg-info/entry_points.txt
|
|
11
|
+
cloakllm_verifier.egg-info/requires.txt
|
|
12
|
+
cloakllm_verifier.egg-info/top_level.txt
|
|
13
|
+
tests/test_verifier.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cloakllm_verifier
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "cloakllm-verifier"
|
|
7
|
+
version = "0.12.0"
|
|
8
|
+
description = "Standalone, dependency-light verifier for CloakLLM audit artifacts -- verify hash chains, Ed25519 signatures, key provenance, RFC 3161 timestamps, and compliance reports WITHOUT the full SDK or trusting CloakLLM's code."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [{name = "The CloakLLM Authors", email = "cloakllm@gmail.com"}]
|
|
13
|
+
keywords = ["audit", "verification", "eu-ai-act", "compliance", "attestation", "rfc3161", "hash-chain"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Topic :: Security :: Cryptography",
|
|
18
|
+
"Intended Audience :: Legal Industry",
|
|
19
|
+
]
|
|
20
|
+
# Reuses CloakLLM's OWN verify code (single source of truth, no drift) but pulls
|
|
21
|
+
# only the crypto extras -- NO spaCy / detection. An auditor installs this to
|
|
22
|
+
# check artifacts; it never needs the PII-detection stack.
|
|
23
|
+
dependencies = [
|
|
24
|
+
"cloakllm[attestation,timestamping]>=0.12.0,<0.13.0",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
dev = ["pytest>=7.0"]
|
|
29
|
+
|
|
30
|
+
[project.scripts]
|
|
31
|
+
cloakllm-verify = "cloakllm_verifier.cli:main"
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://cloakllm.dev"
|
|
35
|
+
Repository = "https://github.com/cloakllm/cloakllm-verifier"
|
|
36
|
+
|
|
37
|
+
[tool.setuptools]
|
|
38
|
+
packages = ["cloakllm_verifier"]
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
"""Adversarial tests for cloakllm-verifier.
|
|
2
|
+
|
|
3
|
+
The property that matters for a verifier is NOT "passes on clean input" — it's
|
|
4
|
+
"FAILS on tampered / reordered / relinked / deleted input." These tests attack
|
|
5
|
+
a real audit chain every way and assert the verifier rejects each, plus the
|
|
6
|
+
verify-don't-assert guard (verify_all must be False if any required sub-check
|
|
7
|
+
fails) and graceful handling of missing/empty inputs.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
import warnings
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
import pytest
|
|
17
|
+
|
|
18
|
+
from cloakllm import Shield, ShieldConfig
|
|
19
|
+
from cloakllm_verifier import (
|
|
20
|
+
verify_audit, verify_timestamps, verify_keys, verify_report, verify_all)
|
|
21
|
+
|
|
22
|
+
_FIX = json.loads(
|
|
23
|
+
(Path(__file__).resolve().parent / "fixtures" / "timestamp_token.json")
|
|
24
|
+
.read_text(encoding="utf-8"))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _make_chain(tmp_path, n=4) -> str:
|
|
28
|
+
adir = str(tmp_path / "audit")
|
|
29
|
+
cwd = os.getcwd()
|
|
30
|
+
os.chdir(tmp_path)
|
|
31
|
+
try:
|
|
32
|
+
with warnings.catch_warnings():
|
|
33
|
+
warnings.simplefilter("ignore")
|
|
34
|
+
sh = Shield(ShieldConfig(audit_enabled=True, log_dir=adir))
|
|
35
|
+
for i in range(n):
|
|
36
|
+
sh.sanitize(f"msg {i}: email user{i}@example.com ssn 123-45-6789")
|
|
37
|
+
finally:
|
|
38
|
+
os.chdir(cwd)
|
|
39
|
+
return adir
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _jsonl(adir):
|
|
43
|
+
return sorted(Path(adir).glob("*.jsonl"))[0]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _lines(adir):
|
|
47
|
+
return _jsonl(adir).read_text(encoding="utf-8").splitlines()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _write(adir, lines):
|
|
51
|
+
_jsonl(adir).write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# --- the chain must VERIFY when intact, and FAIL under every attack ---
|
|
55
|
+
|
|
56
|
+
def test_intact_chain_verifies(tmp_path):
|
|
57
|
+
r = verify_audit(_make_chain(tmp_path))
|
|
58
|
+
assert r["ok"] and r["valid"] and r["entries"] == 4
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_mutated_field_is_rejected(tmp_path):
|
|
62
|
+
adir = _make_chain(tmp_path)
|
|
63
|
+
lines = _lines(adir)
|
|
64
|
+
e = json.loads(lines[1]); e["entity_count"] = 9999
|
|
65
|
+
lines[1] = json.dumps(e)
|
|
66
|
+
_write(adir, lines)
|
|
67
|
+
r = verify_audit(adir)
|
|
68
|
+
assert not r["ok"] and r["errors"]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_deleted_entry_is_rejected(tmp_path):
|
|
72
|
+
adir = _make_chain(tmp_path)
|
|
73
|
+
lines = _lines(adir)
|
|
74
|
+
del lines[2]
|
|
75
|
+
_write(adir, lines)
|
|
76
|
+
assert not verify_audit(adir)["ok"]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_reordered_entries_are_rejected(tmp_path):
|
|
80
|
+
adir = _make_chain(tmp_path)
|
|
81
|
+
lines = _lines(adir)
|
|
82
|
+
lines[1], lines[2] = lines[2], lines[1]
|
|
83
|
+
_write(adir, lines)
|
|
84
|
+
assert not verify_audit(adir)["ok"]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def test_relinked_prev_hash_is_rejected(tmp_path):
|
|
88
|
+
adir = _make_chain(tmp_path)
|
|
89
|
+
lines = _lines(adir)
|
|
90
|
+
e = json.loads(lines[2]); e["prev_hash"] = "0" * 64
|
|
91
|
+
lines[2] = json.dumps(e)
|
|
92
|
+
_write(adir, lines)
|
|
93
|
+
assert not verify_audit(adir)["ok"]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# --- graceful handling of degenerate inputs (never raise) ---
|
|
97
|
+
|
|
98
|
+
def test_missing_dir_is_not_ok_and_does_not_raise(tmp_path):
|
|
99
|
+
r = verify_audit(str(tmp_path / "nope"))
|
|
100
|
+
assert not r["ok"] and r["errors"]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def test_empty_dir_does_not_raise(tmp_path):
|
|
104
|
+
(tmp_path / "empty").mkdir()
|
|
105
|
+
r = verify_audit(str(tmp_path / "empty"))
|
|
106
|
+
assert r["ok"] in (True, False) # whatever the policy, no exception escaped
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# --- RFC 3161 checkpoint verification (fixture-driven, offline) ---
|
|
110
|
+
|
|
111
|
+
def _checkpoint_line(seq, token_b64, digest_hex):
|
|
112
|
+
return json.dumps({
|
|
113
|
+
"seq": seq, "event_type": "chain_checkpoint", "entry_hash": "x",
|
|
114
|
+
"checkpoint_context": {"stamped_entry_hash": digest_hex, "tst_token_b64": token_b64,
|
|
115
|
+
"tsa_url": "https://freetsa.org/tsr", "hash_algorithm": "sha256",
|
|
116
|
+
"stamped_seq": 0},
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _entry_line(seq, entry_hash):
|
|
121
|
+
# a minimal line carrying an entry_hash, so F5 chain-binding can find it
|
|
122
|
+
return json.dumps({"seq": seq, "entry_hash": entry_hash, "event_type": "sanitize"})
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _skip_if_no_ts(r):
|
|
126
|
+
if r.get("reason", "").startswith("timestamping backend"):
|
|
127
|
+
pytest.skip("timestamping backend not installed")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def test_valid_checkpoint_bound_to_chain_verifies(tmp_path):
|
|
131
|
+
# F5: the token is valid AND the stamped hash is a real entry_hash in the log.
|
|
132
|
+
adir = tmp_path / "ts"; adir.mkdir()
|
|
133
|
+
(adir / "audit_x.jsonl").write_text(
|
|
134
|
+
_entry_line(0, _FIX["stamped_entry_hash"]) + "\n"
|
|
135
|
+
+ _checkpoint_line(1, _FIX["tst_token_b64"], _FIX["stamped_entry_hash"]) + "\n",
|
|
136
|
+
encoding="utf-8")
|
|
137
|
+
r = verify_timestamps(str(adir))
|
|
138
|
+
_skip_if_no_ts(r)
|
|
139
|
+
assert r["ok"] and r["found"] == 1 and r["verified"] == 1
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def test_valid_token_unbound_to_chain_is_rejected(tmp_path):
|
|
143
|
+
# F5 core: a cryptographically VALID token whose stamped hash matches NO entry
|
|
144
|
+
# in the log proves nothing about this log -- must not verify.
|
|
145
|
+
adir = tmp_path / "unbound"; adir.mkdir()
|
|
146
|
+
(adir / "audit_x.jsonl").write_text(
|
|
147
|
+
_entry_line(0, "a" * 64) + "\n" # some other entry hash
|
|
148
|
+
+ _checkpoint_line(1, _FIX["tst_token_b64"], _FIX["stamped_entry_hash"]) + "\n",
|
|
149
|
+
encoding="utf-8")
|
|
150
|
+
r = verify_timestamps(str(adir))
|
|
151
|
+
_skip_if_no_ts(r)
|
|
152
|
+
assert not r["ok"] and r["verified"] == 0
|
|
153
|
+
assert r["checkpoints"][0]["token_valid"] and not r["checkpoints"][0]["bound_to_chain"]
|
|
154
|
+
assert r["earliest_provable_time"] is None # must NOT credit an unbound time
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def test_tampered_checkpoint_token_is_rejected(tmp_path):
|
|
158
|
+
import base64
|
|
159
|
+
raw = bytearray(base64.b64decode(_FIX["tst_token_b64"])); raw[-5] ^= 0xFF
|
|
160
|
+
bad = base64.b64encode(bytes(raw)).decode()
|
|
161
|
+
adir = tmp_path / "ts2"; adir.mkdir()
|
|
162
|
+
(adir / "audit_x.jsonl").write_text(
|
|
163
|
+
_entry_line(0, _FIX["stamped_entry_hash"]) + "\n"
|
|
164
|
+
+ _checkpoint_line(1, bad, _FIX["stamped_entry_hash"]) + "\n", encoding="utf-8")
|
|
165
|
+
r = verify_timestamps(str(adir))
|
|
166
|
+
_skip_if_no_ts(r)
|
|
167
|
+
assert not r["ok"] and r["verified"] == 0
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def test_no_checkpoints_is_not_ok(tmp_path):
|
|
171
|
+
adir = _make_chain(tmp_path) # a real chain, but no chain_checkpoint events
|
|
172
|
+
r = verify_timestamps(adir)
|
|
173
|
+
assert not r["ok"] and r["found"] == 0 # nothing to prove
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# --- F2: empty / mis-named / entry-less dirs must NOT read as "verified" ---
|
|
177
|
+
|
|
178
|
+
def test_empty_dir_is_not_verified(tmp_path):
|
|
179
|
+
(tmp_path / "empty").mkdir()
|
|
180
|
+
r = verify_audit(str(tmp_path / "empty"))
|
|
181
|
+
assert not r["ok"] and r["entries"] == 0 and r["errors"]
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def test_non_audit_prefixed_files_are_not_silently_passed(tmp_path):
|
|
185
|
+
# a dir whose only log is NOT audit_*.jsonl (and is garbage) must fail, not
|
|
186
|
+
# be green-lit because verify_chain ignored the file it was pointed at.
|
|
187
|
+
d = tmp_path / "misnamed"; d.mkdir()
|
|
188
|
+
(d / "chain.jsonl").write_text('{"prev_hash":"junk","seq":0}\n', encoding="utf-8")
|
|
189
|
+
r = verify_audit(str(d))
|
|
190
|
+
assert not r["ok"] and any("audit_" in e for e in r["errors"])
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# --- F7: hostile bytes -> clean failure, never a crash ---
|
|
194
|
+
|
|
195
|
+
def test_binary_garbage_does_not_crash(tmp_path):
|
|
196
|
+
d = tmp_path / "bin"; d.mkdir()
|
|
197
|
+
(d / "audit_x.jsonl").write_bytes(bytes(range(256)) * 8)
|
|
198
|
+
r = verify_audit(str(d)) # must not raise
|
|
199
|
+
assert not r["ok"] and r["errors"]
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def test_invalid_utf8_does_not_crash(tmp_path):
|
|
203
|
+
d = tmp_path / "u8"; d.mkdir()
|
|
204
|
+
(d / "audit_x.jsonl").write_bytes(b"\xff\xfe\x00 not utf8\n")
|
|
205
|
+
r = verify_audit(str(d))
|
|
206
|
+
assert not r["ok"] and r["errors"]
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# --- F4: verify_report rejects a malformed (non-int) total_entries ---
|
|
210
|
+
|
|
211
|
+
def test_report_non_integer_total_entries_is_rejected(tmp_path):
|
|
212
|
+
adir = _make_chain(tmp_path)
|
|
213
|
+
rp = tmp_path / "r.json"
|
|
214
|
+
rp.write_text(json.dumps({"verdict": "COMPLIANT",
|
|
215
|
+
"chain_integrity": {"verdict": "verified", "total_entries": "4"}}),
|
|
216
|
+
encoding="utf-8")
|
|
217
|
+
r = verify_report(str(rp), adir)
|
|
218
|
+
assert not r["ok"] and any("not an integer" in m for m in r["mismatches"])
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# --- KeyManifest provenance verify (verify_keys) ---
|
|
222
|
+
|
|
223
|
+
def _cert_and_manifest(tmp_path, deployer="acme"):
|
|
224
|
+
from cloakllm.attestation import DeploymentKeyPair, derive_key_manifest, SanitizationCertificate
|
|
225
|
+
kp = DeploymentKeyPair.generate()
|
|
226
|
+
manifest = derive_key_manifest(kp, deployer_id=deployer,
|
|
227
|
+
valid_from="2026-01-01T00:00:00+00:00",
|
|
228
|
+
valid_until="2027-01-01T00:00:00+00:00")
|
|
229
|
+
cert = SanitizationCertificate.create(
|
|
230
|
+
original_text="x", sanitized_text="y", entity_count=0, categories={},
|
|
231
|
+
detection_passes=["regex"], mode="tokenize", keypair=kp)
|
|
232
|
+
cp = tmp_path / "cert.json"; cp.write_text(json.dumps(cert.to_dict()), encoding="utf-8")
|
|
233
|
+
mp = tmp_path / "manifest.json"; mp.write_text(json.dumps(manifest.to_dict()), encoding="utf-8")
|
|
234
|
+
return kp, str(cp), str(mp)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def test_valid_provenance_verifies(tmp_path):
|
|
238
|
+
_, cp, mp = _cert_and_manifest(tmp_path)
|
|
239
|
+
r = verify_keys(cp, mp)
|
|
240
|
+
assert r["ok"] and r["provenance_status"] == "VERIFIED"
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def test_tampered_manifest_key_id_is_rejected(tmp_path):
|
|
244
|
+
_, cp, mp = _cert_and_manifest(tmp_path)
|
|
245
|
+
m = json.loads(Path(mp).read_text(encoding="utf-8")); m["key_id"] = "bogus"
|
|
246
|
+
Path(mp).write_text(json.dumps(m), encoding="utf-8")
|
|
247
|
+
r = verify_keys(cp, mp)
|
|
248
|
+
assert not r["ok"] # key_id mismatch AND manifest_hash inconsistency
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def test_cert_signed_by_other_key_is_rejected(tmp_path):
|
|
252
|
+
from cloakllm.attestation import DeploymentKeyPair, SanitizationCertificate
|
|
253
|
+
_, _, mp = _cert_and_manifest(tmp_path) # manifest for key A
|
|
254
|
+
other = DeploymentKeyPair.generate() # cert signed by key B
|
|
255
|
+
cert = SanitizationCertificate.create(
|
|
256
|
+
original_text="x", sanitized_text="y", entity_count=0, categories={},
|
|
257
|
+
detection_passes=["regex"], mode="tokenize", keypair=other)
|
|
258
|
+
cp = tmp_path / "cert_b.json"; cp.write_text(json.dumps(cert.to_dict()), encoding="utf-8")
|
|
259
|
+
r = verify_keys(str(cp), mp)
|
|
260
|
+
assert not r["ok"] and not r["checks"]["signature_valid"]
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def test_signature_only_mode_is_unverified_but_ok(tmp_path):
|
|
264
|
+
_, cp, _ = _cert_and_manifest(tmp_path)
|
|
265
|
+
r = verify_keys(cp) # no manifest
|
|
266
|
+
assert r["ok"] and r["provenance_status"] == "UNVERIFIED"
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# --- standalone compliance-report re-validation (the v0.10.3 CRITICAL-1 guard) ---
|
|
270
|
+
|
|
271
|
+
def _write_report(tmp_path, *, verdict, chain_verdict, total_entries):
|
|
272
|
+
rp = tmp_path / "report.json"
|
|
273
|
+
rp.write_text(json.dumps({
|
|
274
|
+
"verdict": verdict,
|
|
275
|
+
"chain_integrity": {"verdict": chain_verdict, "total_entries": total_entries},
|
|
276
|
+
}), encoding="utf-8")
|
|
277
|
+
return str(rp)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def test_report_matching_intact_chain_ok(tmp_path):
|
|
281
|
+
adir = _make_chain(tmp_path)
|
|
282
|
+
rp = _write_report(tmp_path, verdict="COMPLIANT", chain_verdict="verified", total_entries=4)
|
|
283
|
+
assert verify_report(rp, adir)["ok"]
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def test_compliant_report_over_broken_chain_is_rejected(tmp_path):
|
|
287
|
+
"""The load-bearing guard: a COMPLIANT + verified report CANNOT stand over a
|
|
288
|
+
tampered log. This is the exact v0.10.3 CRITICAL-1 failure the verifier exists
|
|
289
|
+
to catch independently."""
|
|
290
|
+
adir = _make_chain(tmp_path)
|
|
291
|
+
lines = _lines(adir)
|
|
292
|
+
e = json.loads(lines[1]); e["entity_count"] = 9999; lines[1] = json.dumps(e)
|
|
293
|
+
_write(adir, lines)
|
|
294
|
+
rp = _write_report(tmp_path, verdict="COMPLIANT", chain_verdict="verified", total_entries=4)
|
|
295
|
+
r = verify_report(rp, adir)
|
|
296
|
+
assert not r["ok"] and not r["actual_chain_valid"] and r["mismatches"]
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def test_report_inflated_entry_count_is_rejected(tmp_path):
|
|
300
|
+
# A report claiming MORE entries than the log holds describes evidence the
|
|
301
|
+
# log cannot back -- always a mismatch.
|
|
302
|
+
adir = _make_chain(tmp_path)
|
|
303
|
+
rp = _write_report(tmp_path, verdict="COMPLIANT", chain_verdict="verified", total_entries=99)
|
|
304
|
+
assert not verify_report(rp, adir)["ok"]
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def test_scoped_report_with_fewer_entries_is_ok(tmp_path):
|
|
308
|
+
# Reports are routinely period-/article-scoped, covering FEWER entries than
|
|
309
|
+
# the full chain. That must NOT be flagged (the count check is one-directional).
|
|
310
|
+
adir = _make_chain(tmp_path) # 4 entries
|
|
311
|
+
rp = _write_report(tmp_path, verdict="COMPLIANT", chain_verdict="verified", total_entries=1)
|
|
312
|
+
r = verify_report(rp, adir)
|
|
313
|
+
assert r["ok"] and not r["mismatches"]
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
# --- verify_all: the verify-don't-assert guard ---
|
|
317
|
+
|
|
318
|
+
def test_verify_all_ok_when_chain_intact_no_checkpoints(tmp_path):
|
|
319
|
+
r = verify_all(_make_chain(tmp_path))
|
|
320
|
+
assert r["ok"] and not r["timestamps_applicable"]
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def test_verify_all_false_when_chain_tampered(tmp_path):
|
|
324
|
+
adir = _make_chain(tmp_path)
|
|
325
|
+
lines = _lines(adir)
|
|
326
|
+
e = json.loads(lines[0]); e["entity_count"] = -1
|
|
327
|
+
lines[0] = json.dumps(e)
|
|
328
|
+
_write(adir, lines)
|
|
329
|
+
r = verify_all(adir)
|
|
330
|
+
assert not r["ok"] # a failed required sub-check MUST sink the whole verdict
|