apex-audit-sdk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apex_audit_sdk-0.1.0/.gitignore +17 -0
- apex_audit_sdk-0.1.0/PKG-INFO +146 -0
- apex_audit_sdk-0.1.0/README.md +133 -0
- apex_audit_sdk-0.1.0/pyproject.toml +27 -0
- apex_audit_sdk-0.1.0/scripts/bootstrap_env.py +39 -0
- apex_audit_sdk-0.1.0/src/apex_audit/__init__.py +44 -0
- apex_audit_sdk-0.1.0/src/apex_audit/_canonical.py +39 -0
- apex_audit_sdk-0.1.0/src/apex_audit/_validation.py +84 -0
- apex_audit_sdk-0.1.0/src/apex_audit/client.py +290 -0
- apex_audit_sdk-0.1.0/src/apex_audit/errors.py +73 -0
- apex_audit_sdk-0.1.0/src/apex_audit/hashing.py +96 -0
- apex_audit_sdk-0.1.0/tests/test_client.py +415 -0
- apex_audit_sdk-0.1.0/tests/test_merkle.py +105 -0
- apex_audit_sdk-0.1.0/tests/test_payload_validation.py +143 -0
- apex_audit_sdk-0.1.0/tests/test_vectors.py +111 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: apex-audit-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python capture SDK for the Apex Audit service: hash-at-source, tamper-evident audit trails for AI agent decisions.
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Keywords: ai-governance,audit,eu-ai-act,hash-chain,merkle
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Requires-Dist: httpx>=0.27
|
|
9
|
+
Requires-Dist: rfc8785>=0.1.2
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# apex-audit-sdk (Python)
|
|
15
|
+
|
|
16
|
+
Capture SDK for the **Apex Audit** service: tamper-evident, hash-at-source audit
|
|
17
|
+
trails for AI agent decisions, sealed to a public ledger.
|
|
18
|
+
|
|
19
|
+
Three lines is all an agent loop needs:
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
txn = client.open_transaction(name="benefits eligibility review")
|
|
23
|
+
txn.log("reasoning", {"rationale": "Income below threshold; criterion B met."})
|
|
24
|
+
txn.log("output", {"recommendation": "approve", "confidence": 0.94})
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Every entry is hashed **on your side** before it leaves the process
|
|
28
|
+
(`sha256` over the RFC 8785 canonical form of
|
|
29
|
+
`{transaction_id, seq, type, payload, prev_hash}`), chained to the previous
|
|
30
|
+
entry, and re-verified by the server on receipt. Nobody — including the
|
|
31
|
+
service — can silently rewrite what your agent did.
|
|
32
|
+
|
|
33
|
+
## Install
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install apex-audit-sdk # package name
|
|
37
|
+
python -c "import apex_audit" # import name
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
From this repo (development):
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
cd packages/sdk-python
|
|
44
|
+
python3 -m venv .venv
|
|
45
|
+
.venv/bin/python -m pip install -e ".[dev]"
|
|
46
|
+
.venv/bin/python -m pytest
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Requires Python >= 3.10. Runtime dependencies: `httpx`, `rfc8785`.
|
|
50
|
+
|
|
51
|
+
## Quickstart
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from apex_audit import AuditClient
|
|
55
|
+
|
|
56
|
+
client = AuditClient(
|
|
57
|
+
api_url="http://localhost:4000",
|
|
58
|
+
api_key="...", # sent as Authorization: Bearer <key>
|
|
59
|
+
timeout=10.0,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
txn = client.open_transaction(name="benefits eligibility review")
|
|
63
|
+
txn.log("initiation", {"task": "benefits-eligibility-review", "case_ref": "EU-2026-00441"})
|
|
64
|
+
txn.log("action", {"tool": "case_management.lookup", "applicant_ref": "APP-99182"})
|
|
65
|
+
txn.log("reasoning", {"rationale": "Income below threshold; criterion B met."})
|
|
66
|
+
txn.log("output", {"recommendation": "approve", "confidence": 0.94})
|
|
67
|
+
|
|
68
|
+
txn.request_review() # status -> in_review
|
|
69
|
+
txn.record_oversight(decision="approved", reviewer="j.okafor",
|
|
70
|
+
note="Consistent with policy 2026/14.")
|
|
71
|
+
txn.complete() # status -> complete
|
|
72
|
+
sealed = txn.seal() # Merkle root anchored on-ledger
|
|
73
|
+
print(sealed["merkle_root"], sealed["anchor_topic_id"])
|
|
74
|
+
|
|
75
|
+
client.close() # or use: with AuditClient(...) as client: ...
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Context-manager style (opens the transaction on enter; nothing automatic on
|
|
79
|
+
exit — review, completion and sealing stay explicit):
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
with client.transaction(name="loan pre-screen") as txn:
|
|
83
|
+
txn.log("initiation", {...})
|
|
84
|
+
...
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Entry types are restricted to `initiation`, `action`, `reasoning`,
|
|
88
|
+
`reference`, `output`, `oversight` — anything else raises `ValueError`
|
|
89
|
+
before any network call.
|
|
90
|
+
|
|
91
|
+
## Error handling
|
|
92
|
+
|
|
93
|
+
The service reports errors as `{"error": {"code", "message", "details"}}`,
|
|
94
|
+
mapped to exceptions:
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from apex_audit import ApexAPIError, ChainConflictError, EntryHashRejectedError
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
txn.log("action", {"tool": "lookup"})
|
|
101
|
+
except ChainConflictError as e: # 409 CHAIN_CONFLICT
|
|
102
|
+
# another writer extended the chain, or a previous attempt landed.
|
|
103
|
+
print(e.details["expected_seq"], e.details["expected_prev_hash"])
|
|
104
|
+
except EntryHashRejectedError as e: # 422 ENTRY_HASH_INVALID
|
|
105
|
+
# client/server canonicalisation disagreement — report it, don't retry.
|
|
106
|
+
print(e.details["computed_by_server"], e.details["submitted"])
|
|
107
|
+
except ApexAPIError as e: # everything else
|
|
108
|
+
print(e.code, e.status, e.message, e.details)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**The SDK never auto-retries POSTs.** Appending an entry is not idempotent:
|
|
112
|
+
if a request times out *after* the server stored it, a blind retry would be
|
|
113
|
+
rejected as `CHAIN_CONFLICT` — and retry loops can mask genuine chain breaks.
|
|
114
|
+
On any failure, local `seq`/`prev_hash` state is left unchanged so you can
|
|
115
|
+
inspect, resynchronise, and retry deliberately.
|
|
116
|
+
|
|
117
|
+
## Payload rules (the number caveat)
|
|
118
|
+
|
|
119
|
+
Payloads must be plain JSON: `dict` (string keys) / `list` / `str` / `int` /
|
|
120
|
+
`float` / `bool` / `None`. `NaN`/`Infinity`, sets, datetimes, bytes, Decimals
|
|
121
|
+
etc. raise `ValueError` with the offending path (e.g. `payload.scores[2]`).
|
|
122
|
+
|
|
123
|
+
**Integers beyond 2^53 − 1 lose precision** once they hit IEEE-754 doubles
|
|
124
|
+
(RFC 8785) and Postgres `jsonb`. The SDK warns (`PayloadPrecisionWarning`)
|
|
125
|
+
and recommends sending such values as strings:
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
txn.log("action", {"ledger_ref": str(9007199254740993)}) # exact
|
|
129
|
+
txn.log("action", {"ledger_ref": 9007199254740993}) # warns; value will round
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Verifying later
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
data = client.get_transaction(txn_id) # transaction + ordered entries
|
|
136
|
+
result = client.verify(txn_id) # recompute + compare with ledger anchor
|
|
137
|
+
# (404 until the service ships verify, M4)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Hash utilities are exported for independent verification:
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
from apex_audit import compute_entry_hash, compute_merkle_root, canonicalize, GENESIS_HASH
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Full reference: [`docs/sdk/python.md`](../../docs/sdk/python.md).
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# apex-audit-sdk (Python)
|
|
2
|
+
|
|
3
|
+
Capture SDK for the **Apex Audit** service: tamper-evident, hash-at-source audit
|
|
4
|
+
trails for AI agent decisions, sealed to a public ledger.
|
|
5
|
+
|
|
6
|
+
Three lines is all an agent loop needs:
|
|
7
|
+
|
|
8
|
+
```python
|
|
9
|
+
txn = client.open_transaction(name="benefits eligibility review")
|
|
10
|
+
txn.log("reasoning", {"rationale": "Income below threshold; criterion B met."})
|
|
11
|
+
txn.log("output", {"recommendation": "approve", "confidence": 0.94})
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Every entry is hashed **on your side** before it leaves the process
|
|
15
|
+
(`sha256` over the RFC 8785 canonical form of
|
|
16
|
+
`{transaction_id, seq, type, payload, prev_hash}`), chained to the previous
|
|
17
|
+
entry, and re-verified by the server on receipt. Nobody — including the
|
|
18
|
+
service — can silently rewrite what your agent did.
|
|
19
|
+
|
|
20
|
+
## Install
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install apex-audit-sdk # package name
|
|
24
|
+
python -c "import apex_audit" # import name
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
From this repo (development):
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
cd packages/sdk-python
|
|
31
|
+
python3 -m venv .venv
|
|
32
|
+
.venv/bin/python -m pip install -e ".[dev]"
|
|
33
|
+
.venv/bin/python -m pytest
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Requires Python >= 3.10. Runtime dependencies: `httpx`, `rfc8785`.
|
|
37
|
+
|
|
38
|
+
## Quickstart
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from apex_audit import AuditClient
|
|
42
|
+
|
|
43
|
+
client = AuditClient(
|
|
44
|
+
api_url="http://localhost:4000",
|
|
45
|
+
api_key="...", # sent as Authorization: Bearer <key>
|
|
46
|
+
timeout=10.0,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
txn = client.open_transaction(name="benefits eligibility review")
|
|
50
|
+
txn.log("initiation", {"task": "benefits-eligibility-review", "case_ref": "EU-2026-00441"})
|
|
51
|
+
txn.log("action", {"tool": "case_management.lookup", "applicant_ref": "APP-99182"})
|
|
52
|
+
txn.log("reasoning", {"rationale": "Income below threshold; criterion B met."})
|
|
53
|
+
txn.log("output", {"recommendation": "approve", "confidence": 0.94})
|
|
54
|
+
|
|
55
|
+
txn.request_review() # status -> in_review
|
|
56
|
+
txn.record_oversight(decision="approved", reviewer="j.okafor",
|
|
57
|
+
note="Consistent with policy 2026/14.")
|
|
58
|
+
txn.complete() # status -> complete
|
|
59
|
+
sealed = txn.seal() # Merkle root anchored on-ledger
|
|
60
|
+
print(sealed["merkle_root"], sealed["anchor_topic_id"])
|
|
61
|
+
|
|
62
|
+
client.close() # or use: with AuditClient(...) as client: ...
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Context-manager style (opens the transaction on enter; nothing automatic on
|
|
66
|
+
exit — review, completion and sealing stay explicit):
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
with client.transaction(name="loan pre-screen") as txn:
|
|
70
|
+
txn.log("initiation", {...})
|
|
71
|
+
...
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Entry types are restricted to `initiation`, `action`, `reasoning`,
|
|
75
|
+
`reference`, `output`, `oversight` — anything else raises `ValueError`
|
|
76
|
+
before any network call.
|
|
77
|
+
|
|
78
|
+
## Error handling
|
|
79
|
+
|
|
80
|
+
The service reports errors as `{"error": {"code", "message", "details"}}`,
|
|
81
|
+
mapped to exceptions:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from apex_audit import ApexAPIError, ChainConflictError, EntryHashRejectedError
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
txn.log("action", {"tool": "lookup"})
|
|
88
|
+
except ChainConflictError as e: # 409 CHAIN_CONFLICT
|
|
89
|
+
# another writer extended the chain, or a previous attempt landed.
|
|
90
|
+
print(e.details["expected_seq"], e.details["expected_prev_hash"])
|
|
91
|
+
except EntryHashRejectedError as e: # 422 ENTRY_HASH_INVALID
|
|
92
|
+
# client/server canonicalisation disagreement — report it, don't retry.
|
|
93
|
+
print(e.details["computed_by_server"], e.details["submitted"])
|
|
94
|
+
except ApexAPIError as e: # everything else
|
|
95
|
+
print(e.code, e.status, e.message, e.details)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**The SDK never auto-retries POSTs.** Appending an entry is not idempotent:
|
|
99
|
+
if a request times out *after* the server stored it, a blind retry would be
|
|
100
|
+
rejected as `CHAIN_CONFLICT` — and retry loops can mask genuine chain breaks.
|
|
101
|
+
On any failure, local `seq`/`prev_hash` state is left unchanged so you can
|
|
102
|
+
inspect, resynchronise, and retry deliberately.
|
|
103
|
+
|
|
104
|
+
## Payload rules (the number caveat)
|
|
105
|
+
|
|
106
|
+
Payloads must be plain JSON: `dict` (string keys) / `list` / `str` / `int` /
|
|
107
|
+
`float` / `bool` / `None`. `NaN`/`Infinity`, sets, datetimes, bytes, Decimals
|
|
108
|
+
etc. raise `ValueError` with the offending path (e.g. `payload.scores[2]`).
|
|
109
|
+
|
|
110
|
+
**Integers beyond 2^53 − 1 lose precision** once they hit IEEE-754 doubles
|
|
111
|
+
(RFC 8785) and Postgres `jsonb`. The SDK warns (`PayloadPrecisionWarning`)
|
|
112
|
+
and recommends sending such values as strings:
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
txn.log("action", {"ledger_ref": str(9007199254740993)}) # exact
|
|
116
|
+
txn.log("action", {"ledger_ref": 9007199254740993}) # warns; value will round
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Verifying later
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
data = client.get_transaction(txn_id) # transaction + ordered entries
|
|
123
|
+
result = client.verify(txn_id) # recompute + compare with ledger anchor
|
|
124
|
+
# (404 until the service ships verify, M4)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Hash utilities are exported for independent verification:
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
from apex_audit import compute_entry_hash, compute_merkle_root, canonicalize, GENESIS_HASH
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Full reference: [`docs/sdk/python.md`](../../docs/sdk/python.md).
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "apex-audit-sdk"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Python capture SDK for the Apex Audit service: hash-at-source, tamper-evident audit trails for AI agent decisions."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
keywords = ["audit", "ai-governance", "hash-chain", "merkle", "eu-ai-act"]
|
|
13
|
+
dependencies = [
|
|
14
|
+
"httpx>=0.27",
|
|
15
|
+
"rfc8785>=0.1.2",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.optional-dependencies]
|
|
19
|
+
dev = [
|
|
20
|
+
"pytest>=8.0",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[tool.hatch.build.targets.wheel]
|
|
24
|
+
packages = ["src/apex_audit"]
|
|
25
|
+
|
|
26
|
+
[tool.pytest.ini_options]
|
|
27
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Dev bootstrap: create .venv for the SDK and install it editable with dev extras.
|
|
2
|
+
|
|
3
|
+
Usage: python3 scripts/bootstrap_env.py
|
|
4
|
+
Equivalent to:
|
|
5
|
+
python3 -m venv .venv && .venv/bin/python -m pip install -e ".[dev]"
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import subprocess
|
|
11
|
+
import sys
|
|
12
|
+
import venv
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
PKG_DIR = Path(__file__).resolve().parents[1]
|
|
16
|
+
VENV_DIR = PKG_DIR / ".venv"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def main() -> int:
|
|
20
|
+
if not (VENV_DIR / "bin" / "python").exists():
|
|
21
|
+
print(f"creating venv at {VENV_DIR}", flush=True)
|
|
22
|
+
venv.create(VENV_DIR, with_pip=True)
|
|
23
|
+
else:
|
|
24
|
+
print(f"venv already exists at {VENV_DIR}", flush=True)
|
|
25
|
+
|
|
26
|
+
cmd = [
|
|
27
|
+
str(VENV_DIR / "bin" / "python"),
|
|
28
|
+
"-m",
|
|
29
|
+
"pip",
|
|
30
|
+
"install",
|
|
31
|
+
"-e",
|
|
32
|
+
f"{PKG_DIR}[dev]",
|
|
33
|
+
]
|
|
34
|
+
print("running:", " ".join(cmd), flush=True)
|
|
35
|
+
return subprocess.call(cmd)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Apex Audit capture SDK for Python.
|
|
2
|
+
|
|
3
|
+
Hash-at-source, tamper-evident audit trails for AI agent decisions::
|
|
4
|
+
|
|
5
|
+
from apex_audit import AuditClient
|
|
6
|
+
|
|
7
|
+
client = AuditClient(api_url="http://localhost:4000", api_key="...")
|
|
8
|
+
txn = client.open_transaction(name="benefits eligibility review")
|
|
9
|
+
txn.log("initiation", {"task": "benefits-eligibility-review"})
|
|
10
|
+
|
|
11
|
+
See ``docs/sdk/python.md`` for the full reference.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from ._canonical import canonicalize
|
|
17
|
+
from ._validation import MAX_SAFE_INTEGER, validate_payload
|
|
18
|
+
from .client import AuditClient, AuditTransaction
|
|
19
|
+
from .errors import (
|
|
20
|
+
ApexAPIError,
|
|
21
|
+
ChainConflictError,
|
|
22
|
+
EntryHashRejectedError,
|
|
23
|
+
PayloadPrecisionWarning,
|
|
24
|
+
)
|
|
25
|
+
from .hashing import ENTRY_TYPES, GENESIS_HASH, compute_entry_hash, compute_merkle_root
|
|
26
|
+
|
|
27
|
+
__version__ = "0.1.0"
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"ApexAPIError",
|
|
31
|
+
"AuditClient",
|
|
32
|
+
"AuditTransaction",
|
|
33
|
+
"ChainConflictError",
|
|
34
|
+
"ENTRY_TYPES",
|
|
35
|
+
"EntryHashRejectedError",
|
|
36
|
+
"GENESIS_HASH",
|
|
37
|
+
"MAX_SAFE_INTEGER",
|
|
38
|
+
"PayloadPrecisionWarning",
|
|
39
|
+
"__version__",
|
|
40
|
+
"canonicalize",
|
|
41
|
+
"compute_entry_hash",
|
|
42
|
+
"compute_merkle_root",
|
|
43
|
+
"validate_payload",
|
|
44
|
+
]
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""RFC 8785 (JCS) canonicalisation.
|
|
2
|
+
|
|
3
|
+
The Apex hash spec (decision 0007) defines the entry hash over the JCS
|
|
4
|
+
canonical form of the entry record. This module is the single place the SDK
|
|
5
|
+
turns a Python value into canonical bytes; everything hash-related goes
|
|
6
|
+
through :func:`canonicalize`.
|
|
7
|
+
|
|
8
|
+
The implementation delegates to the Trail of Bits ``rfc8785`` package, which
|
|
9
|
+
is proven byte-for-byte against the cross-language vectors in
|
|
10
|
+
``packages/core/test/fixtures/vectors.json`` (see ``tests/test_vectors.py``).
|
|
11
|
+
If that package ever becomes unusable, replace the import below with a local
|
|
12
|
+
``_jcs`` module — the vectors, not the library, are the source of truth.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
import rfc8785
|
|
20
|
+
|
|
21
|
+
__all__ = ["canonicalize"]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def canonicalize(value: Any) -> bytes:
|
|
25
|
+
"""Return the RFC 8785 (JCS) canonical UTF-8 encoding of ``value``.
|
|
26
|
+
|
|
27
|
+
``value`` must be a plain JSON-representable Python value (dict with
|
|
28
|
+
string keys, list, str, int, float, bool, None). Callers are expected to
|
|
29
|
+
run :func:`apex_audit._validation.validate_payload` first for friendly,
|
|
30
|
+
path-annotated errors; anything the canonicaliser still rejects (e.g.
|
|
31
|
+
integers outside the IEEE-754 safe range, which RFC 8785 cannot
|
|
32
|
+
represent exactly) is re-raised as :class:`ValueError`.
|
|
33
|
+
"""
|
|
34
|
+
try:
|
|
35
|
+
return bytes(rfc8785.dumps(value))
|
|
36
|
+
except ValueError:
|
|
37
|
+
raise
|
|
38
|
+
except Exception as exc: # rfc8785 CanonicalizationError subclasses, TypeError
|
|
39
|
+
raise ValueError(f"value is not canonicalisable per RFC 8785: {exc}") from exc
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Payload validation, run before any payload is hashed or transmitted.
|
|
2
|
+
|
|
3
|
+
Rules (decisions 0007/0008):
|
|
4
|
+
|
|
5
|
+
- Only plain JSON values are allowed anywhere in the tree: ``dict`` (string
|
|
6
|
+
keys), ``list``, ``str``, ``int``, ``float``, ``bool``, ``None``.
|
|
7
|
+
Anything else (sets, tuples, datetimes, bytes, Decimals, custom objects)
|
|
8
|
+
raises :class:`ValueError` naming the offending path.
|
|
9
|
+
- ``NaN``, ``Infinity`` and ``-Infinity`` floats raise :class:`ValueError`
|
|
10
|
+
(they are not JSON and not canonicalisable).
|
|
11
|
+
- Integers with absolute value above ``2**53 - 1`` trigger a
|
|
12
|
+
:class:`~apex_audit.errors.PayloadPrecisionWarning`: Postgres ``jsonb``
|
|
13
|
+
and IEEE-754 doubles cannot represent them exactly, so the recommendation
|
|
14
|
+
is to send them as strings.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import math
|
|
20
|
+
import warnings
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from .errors import PayloadPrecisionWarning
|
|
24
|
+
|
|
25
|
+
__all__ = ["MAX_SAFE_INTEGER", "validate_payload"]
|
|
26
|
+
|
|
27
|
+
MAX_SAFE_INTEGER = 2**53 - 1
|
|
28
|
+
"""Largest integer exactly representable as an IEEE-754 double (9007199254740991)."""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def validate_payload(payload: Any, *, root: str = "payload") -> None:
|
|
32
|
+
"""Validate that ``payload`` is plain, canonicalisable JSON.
|
|
33
|
+
|
|
34
|
+
Raises:
|
|
35
|
+
ValueError: for NaN/Infinity floats, non-string object keys, or any
|
|
36
|
+
non-JSON type, with the path of the offending value
|
|
37
|
+
(e.g. ``payload.applicant.scores[2]``).
|
|
38
|
+
|
|
39
|
+
Warns:
|
|
40
|
+
PayloadPrecisionWarning: for integers ``|n| > 2**53 - 1`` anywhere in
|
|
41
|
+
the tree — they lose precision in jsonb / IEEE-754 doubles
|
|
42
|
+
(decision 0008); send such values as strings.
|
|
43
|
+
"""
|
|
44
|
+
_validate(payload, root)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _validate(value: Any, path: str) -> None:
|
|
48
|
+
# bool must be checked before int (bool is an int subclass).
|
|
49
|
+
if value is None or isinstance(value, (bool, str)):
|
|
50
|
+
return
|
|
51
|
+
if isinstance(value, int):
|
|
52
|
+
if abs(value) > MAX_SAFE_INTEGER:
|
|
53
|
+
warnings.warn(
|
|
54
|
+
f"{path}: integer {value} exceeds 2**53 - 1 and will lose "
|
|
55
|
+
"precision in JSON number representations (IEEE-754 / "
|
|
56
|
+
"Postgres jsonb). Send it as a string to preserve it exactly.",
|
|
57
|
+
PayloadPrecisionWarning,
|
|
58
|
+
stacklevel=4,
|
|
59
|
+
)
|
|
60
|
+
return
|
|
61
|
+
if isinstance(value, float):
|
|
62
|
+
if math.isnan(value) or math.isinf(value):
|
|
63
|
+
raise ValueError(
|
|
64
|
+
f"{path}: {value!r} is not valid JSON — NaN and Infinity "
|
|
65
|
+
"cannot be canonicalised or stored"
|
|
66
|
+
)
|
|
67
|
+
return
|
|
68
|
+
if isinstance(value, dict):
|
|
69
|
+
for key, item in value.items():
|
|
70
|
+
if not isinstance(key, str):
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"{path}: object key {key!r} ({type(key).__name__}) is "
|
|
73
|
+
"not a string — JSON object keys must be strings"
|
|
74
|
+
)
|
|
75
|
+
_validate(item, f"{path}.{key}")
|
|
76
|
+
return
|
|
77
|
+
if isinstance(value, list):
|
|
78
|
+
for index, item in enumerate(value):
|
|
79
|
+
_validate(item, f"{path}[{index}]")
|
|
80
|
+
return
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"{path}: value of type {type(value).__name__} is not "
|
|
83
|
+
"JSON-serialisable — use dict/list/str/int/float/bool/None"
|
|
84
|
+
)
|