actproof 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- actproof/__init__.py +234 -0
- actproof/anchor.py +586 -0
- actproof/canonical.py +369 -0
- actproof/catalogue.py +1031 -0
- actproof/cli.py +593 -0
- actproof/manifest.py +728 -0
- actproof/receipt.py +678 -0
- actproof/signers/__init__.py +89 -0
- actproof/signers/google_kms.py +392 -0
- actproof/signers/interface.py +298 -0
- actproof/signers/mnemonic.py +153 -0
- actproof/timestamp.py +527 -0
- actproof/verify.py +683 -0
- actproof-0.2.0.dist-info/METADATA +295 -0
- actproof-0.2.0.dist-info/RECORD +18 -0
- actproof-0.2.0.dist-info/WHEEL +4 -0
- actproof-0.2.0.dist-info/entry_points.txt +2 -0
- actproof-0.2.0.dist-info/licenses/LICENSE +21 -0
actproof/canonical.py
ADDED
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Deyan Paroushev
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
"""
|
|
4
|
+
RFC 8785 JSON Canonicalization Scheme (JCS) with optional compliance discipline.
|
|
5
|
+
|
|
6
|
+
This module is the foundation of every other operation in actproof. A canonical
|
|
7
|
+
manifest is what gets hashed; the hash is what gets anchored on the public ledger
|
|
8
|
+
and timestamped by the QTSP; the receipt that travels outside this library is
|
|
9
|
+
re-verifiable by anyone who recomputes the canonical bytes from the same input
|
|
10
|
+
and gets the same hash.
|
|
11
|
+
|
|
12
|
+
The wrapper pattern
|
|
13
|
+
-------------------
|
|
14
|
+
|
|
15
|
+
We do not implement RFC 8785 from scratch. We wrap the ``rfc8785`` package
|
|
16
|
+
maintained by Trail of Bits, which is audited and broadly used. Our wrapper
|
|
17
|
+
adds three things on top:
|
|
18
|
+
|
|
19
|
+
1. **Strict mode (default).** Reject inputs that would produce ambiguous,
|
|
20
|
+
non-reproducible, or non-I-JSON canonical bytes. The restrictions are
|
|
21
|
+
carried forward from the production canonicaliser in the Quoruna reference
|
|
22
|
+
implementation (Quoruna-JCS-v1).
|
|
23
|
+
|
|
24
|
+
2. **Duplicate-key detection on JSON parse.** Python dicts silently swallow
|
|
25
|
+
duplicates. When input arrives as JSON text from an external party, the
|
|
26
|
+
``canonicalize_from_json`` entry point uses ``object_pairs_hook`` to raise
|
|
27
|
+
on duplicate keys before the dict is constructed.
|
|
28
|
+
|
|
29
|
+
3. **JSON-Path-style error locations.** Validation errors report where in the
|
|
30
|
+
input the problem occurred (``$.evidence[2].sha256``), which matters when
|
|
31
|
+
manifests have many fields.
|
|
32
|
+
|
|
33
|
+
Strict mode restrictions
|
|
34
|
+
------------------------
|
|
35
|
+
|
|
36
|
+
When ``strict=True`` (the default), the canonicaliser rejects:
|
|
37
|
+
|
|
38
|
+
* **Floating-point numbers.** Floats have representation-dependent canonical
|
|
39
|
+
forms across platforms. Use scaled integers instead (``*_basis_points``
|
|
40
|
+
for percentages, ``*_minor_units`` for currency, ``*_ppm`` for parts per
|
|
41
|
+
million).
|
|
42
|
+
* **NaN and Infinity.** Not representable in JSON; would produce a
|
|
43
|
+
canonical form that no other implementation could agree on.
|
|
44
|
+
* **Integers outside the I-JSON safe range** ([-(2^53 - 1), 2^53 - 1]).
|
|
45
|
+
RFC 7493 (I-JSON) limits integers to this range because larger values are
|
|
46
|
+
not reliably preserved across JSON implementations. If a larger value
|
|
47
|
+
must be carried, encode it as a string.
|
|
48
|
+
* **Strings that cannot encode to UTF-8** (lone surrogate code points).
|
|
49
|
+
|
|
50
|
+
When ``strict=False``, the canonicaliser delegates directly to ``rfc8785.dumps``
|
|
51
|
+
without pre-validation. This mode produces pure RFC 8785 output and accepts
|
|
52
|
+
anything the underlying library accepts. Use this for general-purpose JCS work
|
|
53
|
+
where the strict restrictions are not appropriate.
|
|
54
|
+
|
|
55
|
+
Quick reference
|
|
56
|
+
---------------
|
|
57
|
+
|
|
58
|
+
::
|
|
59
|
+
|
|
60
|
+
from actproof.canonical import canonicalize, hash_canonical_hex
|
|
61
|
+
|
|
62
|
+
manifest = {
|
|
63
|
+
"act_type_id": "op:eu.nis2.art20.management_body_approval.v1",
|
|
64
|
+
"decision_date": "2026-05-14",
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
canonical_bytes = canonicalize(manifest)
|
|
68
|
+
# b'{"act_type_id":"op:eu.nis2.art20.management_body_approval.v1",
|
|
69
|
+
# "decision_date":"2026-05-14"}'
|
|
70
|
+
|
|
71
|
+
manifest_hash = hash_canonical_hex(manifest)
|
|
72
|
+
# "a3f2c1...
|
|
73
|
+
|
|
74
|
+
API
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
``canonicalize(obj, *, strict=True) -> bytes``
|
|
78
|
+
Primary entry point. Returns UTF-8 encoded bytes of the canonical
|
|
79
|
+
representation.
|
|
80
|
+
|
|
81
|
+
``canonicalize_str(obj, *, strict=True) -> str``
|
|
82
|
+
Same as ``canonicalize`` but returns a Python ``str`` instead of bytes.
|
|
83
|
+
|
|
84
|
+
``canonicalize_from_json(json_str, *, strict=True) -> bytes``
|
|
85
|
+
Parse JSON text with duplicate-key detection, then canonicalise. Use this
|
|
86
|
+
when input arrives as JSON from an external party.
|
|
87
|
+
|
|
88
|
+
``hash_canonical(obj, *, strict=True) -> bytes``
|
|
89
|
+
Canonicalize and return the SHA-256 raw digest (32 bytes).
|
|
90
|
+
|
|
91
|
+
``hash_canonical_hex(obj, *, strict=True) -> str``
|
|
92
|
+
Canonicalize and return the SHA-256 hex digest (64 lowercase hex chars).
|
|
93
|
+
|
|
94
|
+
References
|
|
95
|
+
----------
|
|
96
|
+
|
|
97
|
+
* RFC 8785: https://datatracker.ietf.org/doc/html/rfc8785
|
|
98
|
+
* RFC 7493 (I-JSON): https://datatracker.ietf.org/doc/html/rfc7493
|
|
99
|
+
* rfc8785 library: https://github.com/trailofbits/rfc8785.py
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
from __future__ import annotations
|
|
103
|
+
|
|
104
|
+
import hashlib
|
|
105
|
+
import json
|
|
106
|
+
import math
|
|
107
|
+
from typing import Any
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
import rfc8785
|
|
111
|
+
except ImportError as exc: # pragma: no cover
|
|
112
|
+
raise ImportError(
|
|
113
|
+
"rfc8785 is required. Install with: pip install 'rfc8785>=0.1.4'"
|
|
114
|
+
) from exc
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
__all__ = [
|
|
118
|
+
"canonicalize",
|
|
119
|
+
"canonicalize_str",
|
|
120
|
+
"canonicalize_from_json",
|
|
121
|
+
"hash_canonical",
|
|
122
|
+
"hash_canonical_hex",
|
|
123
|
+
"IJSON_MAX_SAFE_INT",
|
|
124
|
+
"IJSON_MIN_SAFE_INT",
|
|
125
|
+
"CanonicalizationError",
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ─────────────────────────────────────────────────────────────────
|
|
130
|
+
# CONSTANTS
|
|
131
|
+
# ─────────────────────────────────────────────────────────────────
|
|
132
|
+
|
|
133
|
+
# I-JSON safe integer range, per RFC 7493 section 2.2. Integers outside this
|
|
134
|
+
# range are not reliably preserved across JSON implementations and must be
|
|
135
|
+
# encoded as strings if their values are to survive a round trip.
|
|
136
|
+
IJSON_MAX_SAFE_INT: int = 2**53 - 1
|
|
137
|
+
IJSON_MIN_SAFE_INT: int = -(2**53 - 1)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# ─────────────────────────────────────────────────────────────────
|
|
141
|
+
# EXCEPTIONS
|
|
142
|
+
# ─────────────────────────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
class CanonicalizationError(ValueError):
|
|
145
|
+
"""Raised when input violates a strict-mode canonicalisation restriction.
|
|
146
|
+
|
|
147
|
+
Subclass of ``ValueError`` so callers can catch ``ValueError`` if they
|
|
148
|
+
prefer to handle all input-validation errors uniformly, or
|
|
149
|
+
``CanonicalizationError`` specifically when they need to distinguish
|
|
150
|
+
canonicalisation problems from other value errors.
|
|
151
|
+
|
|
152
|
+
The error message includes a JSON-Path-style location (``$.foo.bar[2]``)
|
|
153
|
+
indicating where in the input the problem occurred.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# ─────────────────────────────────────────────────────────────────
|
|
158
|
+
# PUBLIC API
|
|
159
|
+
# ─────────────────────────────────────────────────────────────────
|
|
160
|
+
|
|
161
|
+
def canonicalize(obj: Any, *, strict: bool = True) -> bytes:
|
|
162
|
+
"""RFC 8785 canonicalise a JSON-serialisable Python object.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
obj: The Python object to canonicalise. May be a dict, list, str,
|
|
166
|
+
int, float (rejected if ``strict``), bool, or ``None``.
|
|
167
|
+
strict: If ``True`` (default), enforce actproof discipline:
|
|
168
|
+
no floats, no NaN/Infinity, integers in I-JSON safe range,
|
|
169
|
+
strings that encode to UTF-8. If ``False``, delegate directly
|
|
170
|
+
to ``rfc8785.dumps`` with no pre-validation.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
UTF-8 encoded bytes of the canonical JSON representation.
|
|
174
|
+
|
|
175
|
+
Raises:
|
|
176
|
+
CanonicalizationError: If ``strict=True`` and a restriction is
|
|
177
|
+
violated.
|
|
178
|
+
TypeError: If the input contains a type that JSON cannot represent.
|
|
179
|
+
rfc8785.IntegerDomainError: If ``strict=False`` and an integer is
|
|
180
|
+
outside the I-JSON safe range (raised by the underlying library).
|
|
181
|
+
"""
|
|
182
|
+
if strict:
|
|
183
|
+
_validate_strict(obj, "$")
|
|
184
|
+
return rfc8785.dumps(obj)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def canonicalize_str(obj: Any, *, strict: bool = True) -> str:
|
|
188
|
+
"""RFC 8785 canonicalise to a Python ``str`` (UTF-8 decoded).
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
obj: The object to canonicalise. See ``canonicalize`` for details.
|
|
192
|
+
strict: Whether to enforce strict mode. Default ``True``.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
The canonical representation as a ``str``.
|
|
196
|
+
"""
|
|
197
|
+
return canonicalize(obj, strict=strict).decode("utf-8")
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def canonicalize_from_json(json_str: str, *, strict: bool = True) -> bytes:
|
|
201
|
+
"""Parse JSON text with duplicate-key detection, then canonicalise.
|
|
202
|
+
|
|
203
|
+
Use this entry point when input arrives as JSON text from an external
|
|
204
|
+
party. Python's ``json.loads`` silently swallows duplicate keys
|
|
205
|
+
(keeping the last value); this function uses ``object_pairs_hook`` to
|
|
206
|
+
raise on duplicates before the dict is constructed.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
json_str: The JSON text to parse and canonicalise.
|
|
210
|
+
strict: Whether to enforce strict mode. Default ``True``.
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
UTF-8 encoded bytes of the canonical JSON representation.
|
|
214
|
+
|
|
215
|
+
Raises:
|
|
216
|
+
CanonicalizationError: If duplicate keys are detected, or any
|
|
217
|
+
other strict-mode restriction is violated downstream.
|
|
218
|
+
json.JSONDecodeError: If the input is not valid JSON.
|
|
219
|
+
"""
|
|
220
|
+
obj = json.loads(json_str, object_pairs_hook=_detect_duplicate_keys)
|
|
221
|
+
return canonicalize(obj, strict=strict)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def hash_canonical(obj: Any, *, strict: bool = True) -> bytes:
|
|
225
|
+
"""Canonicalise and return the SHA-256 raw digest (32 bytes).
|
|
226
|
+
|
|
227
|
+
Convenience for the most common operation: canonicalise a manifest and
|
|
228
|
+
compute the hash that will be anchored on the public ledger.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
obj: The object to canonicalise and hash.
|
|
232
|
+
strict: Whether to enforce strict mode. Default ``True``.
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
The 32-byte SHA-256 raw digest of the canonical bytes.
|
|
236
|
+
"""
|
|
237
|
+
return hashlib.sha256(canonicalize(obj, strict=strict)).digest()
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def hash_canonical_hex(obj: Any, *, strict: bool = True) -> str:
|
|
241
|
+
"""Canonicalise and return the SHA-256 hex digest.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
obj: The object to canonicalise and hash.
|
|
245
|
+
strict: Whether to enforce strict mode. Default ``True``.
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
The 64-character lowercase hexadecimal SHA-256 digest.
|
|
249
|
+
"""
|
|
250
|
+
return hash_canonical(obj, strict=strict).hex()
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
# ─────────────────────────────────────────────────────────────────
|
|
254
|
+
# INTERNAL: STRICT-MODE VALIDATION
|
|
255
|
+
# ─────────────────────────────────────────────────────────────────
|
|
256
|
+
|
|
257
|
+
def _validate_strict(node: Any, path: str) -> None:
|
|
258
|
+
"""Recursively validate ``node`` against strict-mode restrictions.
|
|
259
|
+
|
|
260
|
+
Walks the tree depth-first. Raises ``CanonicalizationError`` on the
|
|
261
|
+
first restriction violation, with a JSON-Path-style location.
|
|
262
|
+
"""
|
|
263
|
+
# bool is a subclass of int in Python; check it first.
|
|
264
|
+
if isinstance(node, bool):
|
|
265
|
+
return
|
|
266
|
+
|
|
267
|
+
if isinstance(node, float):
|
|
268
|
+
if math.isnan(node):
|
|
269
|
+
raise CanonicalizationError(
|
|
270
|
+
f"NaN at {path}: not representable in canonical JSON. "
|
|
271
|
+
f"Strict mode forbids NaN and Infinity."
|
|
272
|
+
)
|
|
273
|
+
if math.isinf(node):
|
|
274
|
+
raise CanonicalizationError(
|
|
275
|
+
f"Infinity at {path}: not representable in canonical JSON. "
|
|
276
|
+
f"Strict mode forbids NaN and Infinity."
|
|
277
|
+
)
|
|
278
|
+
raise CanonicalizationError(
|
|
279
|
+
f"Floating-point number {node} at {path}: not allowed in strict "
|
|
280
|
+
f"mode. Floats have representation-dependent canonical forms across "
|
|
281
|
+
f"platforms. Use scaled integers (e.g. *_basis_points for "
|
|
282
|
+
f"percentages, *_minor_units for currency, *_ppm for parts per "
|
|
283
|
+
f"million), or pass strict=False if you do not need cross-platform "
|
|
284
|
+
f"reproducibility."
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
if isinstance(node, int):
|
|
288
|
+
if node > IJSON_MAX_SAFE_INT or node < IJSON_MIN_SAFE_INT:
|
|
289
|
+
raise CanonicalizationError(
|
|
290
|
+
f"Integer {node} at {path} exceeds I-JSON safe range "
|
|
291
|
+
f"[{IJSON_MIN_SAFE_INT}, {IJSON_MAX_SAFE_INT}] "
|
|
292
|
+
f"(RFC 7493 section 2.2). Integers outside this range are not "
|
|
293
|
+
f"reliably preserved across JSON implementations. Encode as a "
|
|
294
|
+
f"string if a larger value must be carried, or pass strict=False."
|
|
295
|
+
)
|
|
296
|
+
return
|
|
297
|
+
|
|
298
|
+
if isinstance(node, str):
|
|
299
|
+
try:
|
|
300
|
+
node.encode("utf-8", errors="strict")
|
|
301
|
+
except UnicodeEncodeError as exc:
|
|
302
|
+
raise CanonicalizationError(
|
|
303
|
+
f"String at {path} contains code points that cannot encode to "
|
|
304
|
+
f"UTF-8 (typically lone surrogates): {exc}. Strict mode requires "
|
|
305
|
+
f"all strings to be valid UTF-8."
|
|
306
|
+
) from exc
|
|
307
|
+
return
|
|
308
|
+
|
|
309
|
+
if node is None:
|
|
310
|
+
return
|
|
311
|
+
|
|
312
|
+
if isinstance(node, dict):
|
|
313
|
+
for k, v in node.items():
|
|
314
|
+
if not isinstance(k, str):
|
|
315
|
+
raise CanonicalizationError(
|
|
316
|
+
f"Non-string key at {path}: {type(k).__name__} {k!r}. "
|
|
317
|
+
f"JSON requires string keys."
|
|
318
|
+
)
|
|
319
|
+
try:
|
|
320
|
+
k.encode("utf-8", errors="strict")
|
|
321
|
+
except UnicodeEncodeError as exc:
|
|
322
|
+
raise CanonicalizationError(
|
|
323
|
+
f"Key {k!r} at {path} contains code points that cannot "
|
|
324
|
+
f"encode to UTF-8: {exc}."
|
|
325
|
+
) from exc
|
|
326
|
+
_validate_strict(v, f"{path}.{k}")
|
|
327
|
+
return
|
|
328
|
+
|
|
329
|
+
if isinstance(node, list):
|
|
330
|
+
for i, item in enumerate(node):
|
|
331
|
+
_validate_strict(item, f"{path}[{i}]")
|
|
332
|
+
return
|
|
333
|
+
|
|
334
|
+
raise CanonicalizationError(
|
|
335
|
+
f"Unsupported type at {path}: {type(node).__name__}. "
|
|
336
|
+
f"actproof canonical accepts dict, list, str, int, bool, None."
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
# ─────────────────────────────────────────────────────────────────
|
|
341
|
+
# INTERNAL: DUPLICATE KEY DETECTION
|
|
342
|
+
# ─────────────────────────────────────────────────────────────────
|
|
343
|
+
|
|
344
|
+
def _detect_duplicate_keys(pairs: list[tuple[str, Any]]) -> dict[str, Any]:
|
|
345
|
+
"""``object_pairs_hook`` for ``json.loads`` that detects duplicate keys.
|
|
346
|
+
|
|
347
|
+
Python's default ``json.loads`` silently keeps the last value when keys
|
|
348
|
+
repeat. RFC 8785 canonicalisation requires unique keys; if input arrived
|
|
349
|
+
with duplicates and we silently swallowed them, two parties could compute
|
|
350
|
+
different canonical bytes from textually identical inputs.
|
|
351
|
+
|
|
352
|
+
Raises:
|
|
353
|
+
CanonicalizationError: If any duplicate keys are present.
|
|
354
|
+
"""
|
|
355
|
+
keys = [k for k, _ in pairs]
|
|
356
|
+
if len(keys) != len(set(keys)):
|
|
357
|
+
seen: set[str] = set()
|
|
358
|
+
duplicates: list[str] = []
|
|
359
|
+
for k in keys:
|
|
360
|
+
if k in seen:
|
|
361
|
+
if k not in duplicates:
|
|
362
|
+
duplicates.append(k)
|
|
363
|
+
else:
|
|
364
|
+
seen.add(k)
|
|
365
|
+
raise CanonicalizationError(
|
|
366
|
+
f"Duplicate keys forbidden by RFC 8785. "
|
|
367
|
+
f"Duplicates found: {sorted(duplicates)}"
|
|
368
|
+
)
|
|
369
|
+
return dict(pairs)
|