proxilion 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proxilion/__init__.py +136 -0
- proxilion/audit/__init__.py +133 -0
- proxilion/audit/base_exporters.py +527 -0
- proxilion/audit/compliance/__init__.py +130 -0
- proxilion/audit/compliance/base.py +457 -0
- proxilion/audit/compliance/eu_ai_act.py +603 -0
- proxilion/audit/compliance/iso27001.py +544 -0
- proxilion/audit/compliance/soc2.py +491 -0
- proxilion/audit/events.py +493 -0
- proxilion/audit/explainability.py +1173 -0
- proxilion/audit/exporters/__init__.py +58 -0
- proxilion/audit/exporters/aws_s3.py +636 -0
- proxilion/audit/exporters/azure_storage.py +608 -0
- proxilion/audit/exporters/cloud_base.py +468 -0
- proxilion/audit/exporters/gcp_storage.py +570 -0
- proxilion/audit/exporters/multi_exporter.py +498 -0
- proxilion/audit/hash_chain.py +652 -0
- proxilion/audit/logger.py +543 -0
- proxilion/caching/__init__.py +49 -0
- proxilion/caching/tool_cache.py +633 -0
- proxilion/context/__init__.py +73 -0
- proxilion/context/context_window.py +556 -0
- proxilion/context/message_history.py +505 -0
- proxilion/context/session.py +735 -0
- proxilion/contrib/__init__.py +51 -0
- proxilion/contrib/anthropic.py +609 -0
- proxilion/contrib/google.py +1012 -0
- proxilion/contrib/langchain.py +641 -0
- proxilion/contrib/mcp.py +893 -0
- proxilion/contrib/openai.py +646 -0
- proxilion/core.py +3058 -0
- proxilion/decorators.py +966 -0
- proxilion/engines/__init__.py +287 -0
- proxilion/engines/base.py +266 -0
- proxilion/engines/casbin_engine.py +412 -0
- proxilion/engines/opa_engine.py +493 -0
- proxilion/engines/simple.py +437 -0
- proxilion/exceptions.py +887 -0
- proxilion/guards/__init__.py +54 -0
- proxilion/guards/input_guard.py +522 -0
- proxilion/guards/output_guard.py +634 -0
- proxilion/observability/__init__.py +198 -0
- proxilion/observability/cost_tracker.py +866 -0
- proxilion/observability/hooks.py +683 -0
- proxilion/observability/metrics.py +798 -0
- proxilion/observability/session_cost_tracker.py +1063 -0
- proxilion/policies/__init__.py +67 -0
- proxilion/policies/base.py +304 -0
- proxilion/policies/builtin.py +486 -0
- proxilion/policies/registry.py +376 -0
- proxilion/providers/__init__.py +201 -0
- proxilion/providers/adapter.py +468 -0
- proxilion/providers/anthropic_adapter.py +330 -0
- proxilion/providers/gemini_adapter.py +391 -0
- proxilion/providers/openai_adapter.py +294 -0
- proxilion/py.typed +0 -0
- proxilion/resilience/__init__.py +81 -0
- proxilion/resilience/degradation.py +615 -0
- proxilion/resilience/fallback.py +555 -0
- proxilion/resilience/retry.py +554 -0
- proxilion/scheduling/__init__.py +57 -0
- proxilion/scheduling/priority_queue.py +419 -0
- proxilion/scheduling/scheduler.py +459 -0
- proxilion/security/__init__.py +244 -0
- proxilion/security/agent_trust.py +968 -0
- proxilion/security/behavioral_drift.py +794 -0
- proxilion/security/cascade_protection.py +869 -0
- proxilion/security/circuit_breaker.py +428 -0
- proxilion/security/cost_limiter.py +690 -0
- proxilion/security/idor_protection.py +460 -0
- proxilion/security/intent_capsule.py +849 -0
- proxilion/security/intent_validator.py +495 -0
- proxilion/security/memory_integrity.py +767 -0
- proxilion/security/rate_limiter.py +509 -0
- proxilion/security/scope_enforcer.py +680 -0
- proxilion/security/sequence_validator.py +636 -0
- proxilion/security/trust_boundaries.py +784 -0
- proxilion/streaming/__init__.py +70 -0
- proxilion/streaming/detector.py +761 -0
- proxilion/streaming/transformer.py +674 -0
- proxilion/timeouts/__init__.py +55 -0
- proxilion/timeouts/decorators.py +477 -0
- proxilion/timeouts/manager.py +545 -0
- proxilion/tools/__init__.py +69 -0
- proxilion/tools/decorators.py +493 -0
- proxilion/tools/registry.py +732 -0
- proxilion/types.py +339 -0
- proxilion/validation/__init__.py +93 -0
- proxilion/validation/pydantic_schema.py +351 -0
- proxilion/validation/schema.py +651 -0
- proxilion-0.0.1.dist-info/METADATA +872 -0
- proxilion-0.0.1.dist-info/RECORD +94 -0
- proxilion-0.0.1.dist-info/WHEEL +4 -0
- proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,652 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hash chain implementation for tamper-evident audit logging.
|
|
3
|
+
|
|
4
|
+
This module provides cryptographic data structures for ensuring
|
|
5
|
+
the integrity of audit logs:
|
|
6
|
+
|
|
7
|
+
- HashChain: Linear chain where each event links to the previous
|
|
8
|
+
- MerkleTree: Binary tree for batch aggregation and efficient proofs
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import hashlib
|
|
14
|
+
import json
|
|
15
|
+
import threading
|
|
16
|
+
from collections.abc import Iterator
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from proxilion.audit.events import AuditEventV2
|
|
21
|
+
|
|
22
|
+
# Genesis hash is the starting point of every new chain
|
|
23
|
+
GENESIS_HASH = "sha256:0000000000000000000000000000000000000000000000000000000000000000"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class ChainVerificationResult:
|
|
28
|
+
"""Result of verifying a hash chain."""
|
|
29
|
+
valid: bool
|
|
30
|
+
error_message: str | None = None
|
|
31
|
+
error_index: int | None = None
|
|
32
|
+
verified_count: int = 0
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class HashChain:
|
|
36
|
+
"""
|
|
37
|
+
A hash chain for tamper-evident audit logging.
|
|
38
|
+
|
|
39
|
+
Each event in the chain contains the hash of the previous event,
|
|
40
|
+
creating an unbroken chain from the genesis event. Any modification
|
|
41
|
+
to a historical event will break the chain and be detectable.
|
|
42
|
+
|
|
43
|
+
Thread Safety:
|
|
44
|
+
All operations are thread-safe using internal locking.
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
>>> chain = HashChain()
|
|
48
|
+
>>> event1 = AuditEventV2(data=..., previous_hash=GENESIS_HASH)
|
|
49
|
+
>>> event1 = chain.append(event1)
|
|
50
|
+
>>> print(event1.event_hash) # Hash is now computed
|
|
51
|
+
>>>
|
|
52
|
+
>>> event2 = AuditEventV2(data=..., previous_hash=event1.event_hash)
|
|
53
|
+
>>> event2 = chain.append(event2)
|
|
54
|
+
>>>
|
|
55
|
+
>>> result = chain.verify()
|
|
56
|
+
>>> print(result.valid) # True if chain is intact
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(self) -> None:
|
|
60
|
+
"""Initialize an empty hash chain."""
|
|
61
|
+
self._events: list[AuditEventV2] = []
|
|
62
|
+
self._hashes: dict[str, int] = {} # hash -> index mapping
|
|
63
|
+
self._lock = threading.RLock()
|
|
64
|
+
self._last_hash = GENESIS_HASH
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def genesis_hash(self) -> str:
|
|
68
|
+
"""Get the genesis hash constant."""
|
|
69
|
+
return GENESIS_HASH
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def last_hash(self) -> str:
|
|
73
|
+
"""Get the hash of the last event in the chain."""
|
|
74
|
+
with self._lock:
|
|
75
|
+
return self._last_hash
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def length(self) -> int:
|
|
79
|
+
"""Get the number of events in the chain."""
|
|
80
|
+
with self._lock:
|
|
81
|
+
return len(self._events)
|
|
82
|
+
|
|
83
|
+
def append(self, event: AuditEventV2) -> AuditEventV2:
|
|
84
|
+
"""
|
|
85
|
+
Append an event to the chain.
|
|
86
|
+
|
|
87
|
+
Computes and sets the event's hash, linking it to the
|
|
88
|
+
previous event in the chain.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
event: The event to append.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
The event with computed hash.
|
|
95
|
+
|
|
96
|
+
Raises:
|
|
97
|
+
ValueError: If the event's previous_hash doesn't match
|
|
98
|
+
the current chain head.
|
|
99
|
+
"""
|
|
100
|
+
with self._lock:
|
|
101
|
+
# Verify the event links to the current chain head
|
|
102
|
+
if event.previous_hash != self._last_hash:
|
|
103
|
+
raise ValueError(
|
|
104
|
+
f"Event previous_hash ({event.previous_hash}) doesn't match "
|
|
105
|
+
f"chain head ({self._last_hash})"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Compute the event's hash
|
|
109
|
+
event.compute_hash()
|
|
110
|
+
|
|
111
|
+
# Add to chain
|
|
112
|
+
index = len(self._events)
|
|
113
|
+
self._events.append(event)
|
|
114
|
+
self._hashes[event.event_hash] = index
|
|
115
|
+
self._last_hash = event.event_hash
|
|
116
|
+
|
|
117
|
+
return event
|
|
118
|
+
|
|
119
|
+
def create_and_append(self, event: AuditEventV2) -> AuditEventV2:
|
|
120
|
+
"""
|
|
121
|
+
Create an event with correct previous_hash and append it.
|
|
122
|
+
|
|
123
|
+
This is a convenience method that automatically sets the
|
|
124
|
+
previous_hash to the current chain head before appending.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
event: The event to modify and append.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
The event with computed hash.
|
|
131
|
+
"""
|
|
132
|
+
with self._lock:
|
|
133
|
+
# Update the previous_hash to link to chain head
|
|
134
|
+
# Since AuditEventV2 is a dataclass, we need to create a new one
|
|
135
|
+
# or modify it directly (it's not frozen)
|
|
136
|
+
event.previous_hash = self._last_hash
|
|
137
|
+
return self.append(event)
|
|
138
|
+
|
|
139
|
+
def verify(self) -> ChainVerificationResult:
|
|
140
|
+
"""
|
|
141
|
+
Verify the entire hash chain.
|
|
142
|
+
|
|
143
|
+
Checks that:
|
|
144
|
+
1. Each event's stored hash matches its computed hash
|
|
145
|
+
2. Each event's previous_hash matches the prior event's hash
|
|
146
|
+
3. The first event links to the genesis hash
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
ChainVerificationResult with validity status and details.
|
|
150
|
+
"""
|
|
151
|
+
with self._lock:
|
|
152
|
+
if not self._events:
|
|
153
|
+
return ChainVerificationResult(valid=True, verified_count=0)
|
|
154
|
+
|
|
155
|
+
# Verify first event links to genesis
|
|
156
|
+
if self._events[0].previous_hash != GENESIS_HASH:
|
|
157
|
+
return ChainVerificationResult(
|
|
158
|
+
valid=False,
|
|
159
|
+
error_message="First event doesn't link to genesis hash",
|
|
160
|
+
error_index=0,
|
|
161
|
+
verified_count=0,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
expected_previous = GENESIS_HASH
|
|
165
|
+
|
|
166
|
+
for i, event in enumerate(self._events):
|
|
167
|
+
# Check previous_hash linkage
|
|
168
|
+
if event.previous_hash != expected_previous:
|
|
169
|
+
return ChainVerificationResult(
|
|
170
|
+
valid=False,
|
|
171
|
+
error_message=(
|
|
172
|
+
f"Chain broken at index {i}: expected previous_hash "
|
|
173
|
+
f"{expected_previous}, got {event.previous_hash}"
|
|
174
|
+
),
|
|
175
|
+
error_index=i,
|
|
176
|
+
verified_count=i,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# Verify the event's own hash
|
|
180
|
+
if not event.verify_hash():
|
|
181
|
+
return ChainVerificationResult(
|
|
182
|
+
valid=False,
|
|
183
|
+
error_message=f"Invalid hash at index {i}: event tampered",
|
|
184
|
+
error_index=i,
|
|
185
|
+
verified_count=i,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
expected_previous = event.event_hash
|
|
189
|
+
|
|
190
|
+
return ChainVerificationResult(
|
|
191
|
+
valid=True,
|
|
192
|
+
verified_count=len(self._events),
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
def get_event(self, index: int) -> AuditEventV2 | None:
|
|
196
|
+
"""
|
|
197
|
+
Get an event by index.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
index: Zero-based index of the event.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
The event at that index, or None if out of bounds.
|
|
204
|
+
"""
|
|
205
|
+
with self._lock:
|
|
206
|
+
if 0 <= index < len(self._events):
|
|
207
|
+
return self._events[index]
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
def get_event_by_hash(self, event_hash: str) -> AuditEventV2 | None:
|
|
211
|
+
"""
|
|
212
|
+
Get an event by its hash.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
event_hash: The event's hash.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
The event with that hash, or None if not found.
|
|
219
|
+
"""
|
|
220
|
+
with self._lock:
|
|
221
|
+
if event_hash in self._hashes:
|
|
222
|
+
return self._events[self._hashes[event_hash]]
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
def get_proof(self, event_id: str) -> list[str]:
|
|
226
|
+
"""
|
|
227
|
+
Get the hash path from an event to the chain head.
|
|
228
|
+
|
|
229
|
+
This proof can be used to verify that an event is part
|
|
230
|
+
of the chain without having access to all events.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
event_id: The event's ID.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
List of hashes from the event to the chain head.
|
|
237
|
+
"""
|
|
238
|
+
with self._lock:
|
|
239
|
+
# Find the event
|
|
240
|
+
event_index = None
|
|
241
|
+
for i, event in enumerate(self._events):
|
|
242
|
+
if event.event_id == event_id:
|
|
243
|
+
event_index = i
|
|
244
|
+
break
|
|
245
|
+
|
|
246
|
+
if event_index is None:
|
|
247
|
+
return []
|
|
248
|
+
|
|
249
|
+
# Build the hash path
|
|
250
|
+
proof = [self._events[event_index].event_hash]
|
|
251
|
+
for i in range(event_index + 1, len(self._events)):
|
|
252
|
+
proof.append(self._events[i].event_hash)
|
|
253
|
+
|
|
254
|
+
return proof
|
|
255
|
+
|
|
256
|
+
def __iter__(self) -> Iterator[AuditEventV2]:
|
|
257
|
+
"""Iterate over events in order."""
|
|
258
|
+
with self._lock:
|
|
259
|
+
return iter(list(self._events))
|
|
260
|
+
|
|
261
|
+
def __len__(self) -> int:
|
|
262
|
+
"""Get number of events."""
|
|
263
|
+
return self.length
|
|
264
|
+
|
|
265
|
+
def to_list(self) -> list[dict[str, Any]]:
|
|
266
|
+
"""Export chain as a list of dictionaries."""
|
|
267
|
+
with self._lock:
|
|
268
|
+
return [event.to_dict() for event in self._events]
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class MerkleTree:
|
|
272
|
+
"""
|
|
273
|
+
A Merkle tree for batch aggregation of audit events.
|
|
274
|
+
|
|
275
|
+
Merkle trees allow efficient verification of event inclusion
|
|
276
|
+
without needing the entire dataset. Events are grouped into
|
|
277
|
+
batches, and each batch is summarized by a Merkle root hash.
|
|
278
|
+
|
|
279
|
+
This is useful for:
|
|
280
|
+
- Periodic external anchoring (e.g., to a blockchain)
|
|
281
|
+
- Efficient inclusion proofs for specific events
|
|
282
|
+
- Batch verification of audit logs
|
|
283
|
+
|
|
284
|
+
Example:
|
|
285
|
+
>>> tree = MerkleTree()
|
|
286
|
+
>>> tree.add_leaf("sha256:abc...")
|
|
287
|
+
>>> tree.add_leaf("sha256:def...")
|
|
288
|
+
>>> root = tree.compute_root()
|
|
289
|
+
>>> proof = tree.get_proof(0) # Proof for first leaf
|
|
290
|
+
"""
|
|
291
|
+
|
|
292
|
+
def __init__(self) -> None:
|
|
293
|
+
"""Initialize an empty Merkle tree."""
|
|
294
|
+
self._leaves: list[str] = []
|
|
295
|
+
self._root: str | None = None
|
|
296
|
+
self._lock = threading.RLock()
|
|
297
|
+
|
|
298
|
+
@property
|
|
299
|
+
def leaf_count(self) -> int:
|
|
300
|
+
"""Get the number of leaves."""
|
|
301
|
+
with self._lock:
|
|
302
|
+
return len(self._leaves)
|
|
303
|
+
|
|
304
|
+
@property
|
|
305
|
+
def root(self) -> str | None:
|
|
306
|
+
"""Get the Merkle root (None if tree is empty or not computed)."""
|
|
307
|
+
with self._lock:
|
|
308
|
+
return self._root
|
|
309
|
+
|
|
310
|
+
def add_leaf(self, hash_value: str) -> int:
|
|
311
|
+
"""
|
|
312
|
+
Add a leaf (hash) to the tree.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
hash_value: The hash to add as a leaf.
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
The index of the added leaf.
|
|
319
|
+
"""
|
|
320
|
+
with self._lock:
|
|
321
|
+
index = len(self._leaves)
|
|
322
|
+
self._leaves.append(hash_value)
|
|
323
|
+
self._root = None # Invalidate cached root
|
|
324
|
+
return index
|
|
325
|
+
|
|
326
|
+
def add_event(self, event: AuditEventV2) -> int:
|
|
327
|
+
"""
|
|
328
|
+
Add an event's hash as a leaf.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
event: The audit event.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
The index of the added leaf.
|
|
335
|
+
"""
|
|
336
|
+
if not event.event_hash:
|
|
337
|
+
event.compute_hash()
|
|
338
|
+
return self.add_leaf(event.event_hash)
|
|
339
|
+
|
|
340
|
+
def compute_root(self) -> str:
|
|
341
|
+
"""
|
|
342
|
+
Compute the Merkle root hash.
|
|
343
|
+
|
|
344
|
+
Returns:
|
|
345
|
+
The Merkle root hash.
|
|
346
|
+
|
|
347
|
+
Raises:
|
|
348
|
+
ValueError: If the tree is empty.
|
|
349
|
+
"""
|
|
350
|
+
with self._lock:
|
|
351
|
+
if not self._leaves:
|
|
352
|
+
raise ValueError("Cannot compute root of empty tree")
|
|
353
|
+
|
|
354
|
+
# Use cached root if available
|
|
355
|
+
if self._root is not None:
|
|
356
|
+
return self._root
|
|
357
|
+
|
|
358
|
+
# Build the tree
|
|
359
|
+
current_level = self._leaves.copy()
|
|
360
|
+
|
|
361
|
+
while len(current_level) > 1:
|
|
362
|
+
next_level = []
|
|
363
|
+
for i in range(0, len(current_level), 2):
|
|
364
|
+
left = current_level[i]
|
|
365
|
+
# If odd number of nodes, duplicate the last one
|
|
366
|
+
right = current_level[i + 1] if i + 1 < len(current_level) else left
|
|
367
|
+
combined = self._hash_pair(left, right)
|
|
368
|
+
next_level.append(combined)
|
|
369
|
+
current_level = next_level
|
|
370
|
+
|
|
371
|
+
self._root = current_level[0]
|
|
372
|
+
return self._root
|
|
373
|
+
|
|
374
|
+
def get_proof(self, leaf_index: int) -> list[tuple[str, str]]:
|
|
375
|
+
"""
|
|
376
|
+
Get the Merkle proof for a leaf.
|
|
377
|
+
|
|
378
|
+
The proof is a list of (hash, side) tuples that can be used
|
|
379
|
+
to verify the leaf is part of the tree with the computed root.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
leaf_index: Index of the leaf.
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
List of (hash, side) tuples where side is 'L' or 'R'.
|
|
386
|
+
|
|
387
|
+
Raises:
|
|
388
|
+
IndexError: If leaf_index is out of bounds.
|
|
389
|
+
"""
|
|
390
|
+
with self._lock:
|
|
391
|
+
if not 0 <= leaf_index < len(self._leaves):
|
|
392
|
+
raise IndexError(f"Leaf index {leaf_index} out of bounds")
|
|
393
|
+
|
|
394
|
+
if len(self._leaves) == 1:
|
|
395
|
+
return []
|
|
396
|
+
|
|
397
|
+
proof = []
|
|
398
|
+
current_level = self._leaves.copy()
|
|
399
|
+
index = leaf_index
|
|
400
|
+
|
|
401
|
+
while len(current_level) > 1:
|
|
402
|
+
next_level = []
|
|
403
|
+
for i in range(0, len(current_level), 2):
|
|
404
|
+
left = current_level[i]
|
|
405
|
+
right = current_level[i + 1] if i + 1 < len(current_level) else left
|
|
406
|
+
|
|
407
|
+
# If this pair contains our target
|
|
408
|
+
if i == (index // 2) * 2:
|
|
409
|
+
if index % 2 == 0:
|
|
410
|
+
# Target is on left, sibling is on right
|
|
411
|
+
if i + 1 < len(current_level):
|
|
412
|
+
proof.append((right, 'R'))
|
|
413
|
+
# else: no sibling needed (duplicated)
|
|
414
|
+
else:
|
|
415
|
+
# Target is on right, sibling is on left
|
|
416
|
+
proof.append((left, 'L'))
|
|
417
|
+
|
|
418
|
+
next_level.append(self._hash_pair(left, right))
|
|
419
|
+
|
|
420
|
+
current_level = next_level
|
|
421
|
+
index = index // 2
|
|
422
|
+
|
|
423
|
+
return proof
|
|
424
|
+
|
|
425
|
+
def verify_proof(
|
|
426
|
+
self,
|
|
427
|
+
leaf_hash: str,
|
|
428
|
+
proof: list[tuple[str, str]],
|
|
429
|
+
expected_root: str,
|
|
430
|
+
) -> bool:
|
|
431
|
+
"""
|
|
432
|
+
Verify a Merkle proof.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
leaf_hash: The hash of the leaf being verified.
|
|
436
|
+
proof: The Merkle proof from get_proof().
|
|
437
|
+
expected_root: The expected Merkle root.
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
True if the proof is valid.
|
|
441
|
+
"""
|
|
442
|
+
current = leaf_hash
|
|
443
|
+
|
|
444
|
+
for sibling_hash, side in proof:
|
|
445
|
+
if side == 'L':
|
|
446
|
+
current = self._hash_pair(sibling_hash, current)
|
|
447
|
+
else:
|
|
448
|
+
current = self._hash_pair(current, sibling_hash)
|
|
449
|
+
|
|
450
|
+
return current == expected_root
|
|
451
|
+
|
|
452
|
+
def _hash_pair(self, left: str, right: str) -> str:
|
|
453
|
+
"""Hash two values together."""
|
|
454
|
+
# Remove 'sha256:' prefix for consistent hashing
|
|
455
|
+
left_clean = left.replace("sha256:", "")
|
|
456
|
+
right_clean = right.replace("sha256:", "")
|
|
457
|
+
combined = f"{left_clean}{right_clean}"
|
|
458
|
+
hash_value = hashlib.sha256(combined.encode()).hexdigest()
|
|
459
|
+
return f"sha256:{hash_value}"
|
|
460
|
+
|
|
461
|
+
def clear(self) -> None:
|
|
462
|
+
"""Clear all leaves from the tree."""
|
|
463
|
+
with self._lock:
|
|
464
|
+
self._leaves.clear()
|
|
465
|
+
self._root = None
|
|
466
|
+
|
|
467
|
+
def to_dict(self) -> dict[str, Any]:
|
|
468
|
+
"""Export tree state as a dictionary."""
|
|
469
|
+
with self._lock:
|
|
470
|
+
return {
|
|
471
|
+
"leaves": self._leaves.copy(),
|
|
472
|
+
"root": self._root,
|
|
473
|
+
"leaf_count": len(self._leaves),
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
@dataclass
|
|
478
|
+
class MerkleBatch:
|
|
479
|
+
"""
|
|
480
|
+
A batch of audit events with Merkle tree aggregation.
|
|
481
|
+
|
|
482
|
+
Batches are created periodically (e.g., every N events or every hour)
|
|
483
|
+
and can be externally anchored for additional tamper evidence.
|
|
484
|
+
|
|
485
|
+
Attributes:
|
|
486
|
+
batch_id: Unique identifier for this batch.
|
|
487
|
+
start_sequence: First sequence number in batch.
|
|
488
|
+
end_sequence: Last sequence number in batch.
|
|
489
|
+
event_count: Number of events in batch.
|
|
490
|
+
merkle_root: The Merkle root hash.
|
|
491
|
+
created_at: When the batch was finalized.
|
|
492
|
+
previous_batch_root: Merkle root of previous batch (for chaining).
|
|
493
|
+
"""
|
|
494
|
+
batch_id: str
|
|
495
|
+
start_sequence: int
|
|
496
|
+
end_sequence: int
|
|
497
|
+
event_count: int
|
|
498
|
+
merkle_root: str
|
|
499
|
+
created_at: str
|
|
500
|
+
previous_batch_root: str | None = None
|
|
501
|
+
|
|
502
|
+
def to_dict(self) -> dict[str, Any]:
|
|
503
|
+
"""Convert to dictionary."""
|
|
504
|
+
return {
|
|
505
|
+
"batch_id": self.batch_id,
|
|
506
|
+
"start_sequence": self.start_sequence,
|
|
507
|
+
"end_sequence": self.end_sequence,
|
|
508
|
+
"event_count": self.event_count,
|
|
509
|
+
"merkle_root": self.merkle_root,
|
|
510
|
+
"created_at": self.created_at,
|
|
511
|
+
"previous_batch_root": self.previous_batch_root,
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
def to_json(self) -> str:
|
|
515
|
+
"""Convert to JSON string."""
|
|
516
|
+
return json.dumps(self.to_dict(), sort_keys=True)
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
class BatchedHashChain:
|
|
520
|
+
"""
|
|
521
|
+
Hash chain with periodic Merkle tree batching.
|
|
522
|
+
|
|
523
|
+
Combines linear hash chaining with batch aggregation
|
|
524
|
+
for efficient verification and external anchoring.
|
|
525
|
+
|
|
526
|
+
Example:
|
|
527
|
+
>>> chain = BatchedHashChain(batch_size=100)
|
|
528
|
+
>>> for event in events:
|
|
529
|
+
... chain.append(event)
|
|
530
|
+
>>> batch = chain.finalize_batch() # Creates Merkle root
|
|
531
|
+
"""
|
|
532
|
+
|
|
533
|
+
def __init__(self, batch_size: int = 100) -> None:
|
|
534
|
+
"""
|
|
535
|
+
Initialize the batched hash chain.
|
|
536
|
+
|
|
537
|
+
Args:
|
|
538
|
+
batch_size: Number of events per batch.
|
|
539
|
+
"""
|
|
540
|
+
self._chain = HashChain()
|
|
541
|
+
self._current_tree = MerkleTree()
|
|
542
|
+
self._batch_size = batch_size
|
|
543
|
+
self._batches: list[MerkleBatch] = []
|
|
544
|
+
self._lock = threading.RLock()
|
|
545
|
+
self._batch_counter = 0
|
|
546
|
+
|
|
547
|
+
@property
|
|
548
|
+
def chain(self) -> HashChain:
|
|
549
|
+
"""Get the underlying hash chain."""
|
|
550
|
+
return self._chain
|
|
551
|
+
|
|
552
|
+
@property
|
|
553
|
+
def batches(self) -> list[MerkleBatch]:
|
|
554
|
+
"""Get all finalized batches."""
|
|
555
|
+
with self._lock:
|
|
556
|
+
return list(self._batches)
|
|
557
|
+
|
|
558
|
+
def append(self, event: AuditEventV2) -> AuditEventV2:
|
|
559
|
+
"""
|
|
560
|
+
Append an event to the chain.
|
|
561
|
+
|
|
562
|
+
Automatically finalizes batch when batch_size is reached.
|
|
563
|
+
|
|
564
|
+
Args:
|
|
565
|
+
event: The event to append.
|
|
566
|
+
|
|
567
|
+
Returns:
|
|
568
|
+
The event with computed hash and merkle_index.
|
|
569
|
+
"""
|
|
570
|
+
with self._lock:
|
|
571
|
+
# Link to chain and compute hash
|
|
572
|
+
event.previous_hash = self._chain.last_hash
|
|
573
|
+
event = self._chain.append(event)
|
|
574
|
+
|
|
575
|
+
# Add to current Merkle tree
|
|
576
|
+
event.merkle_index = self._current_tree.add_leaf(event.event_hash)
|
|
577
|
+
|
|
578
|
+
# Auto-finalize batch if full
|
|
579
|
+
if self._current_tree.leaf_count >= self._batch_size:
|
|
580
|
+
self._finalize_current_batch()
|
|
581
|
+
|
|
582
|
+
return event
|
|
583
|
+
|
|
584
|
+
def finalize_batch(self) -> MerkleBatch | None:
|
|
585
|
+
"""
|
|
586
|
+
Finalize the current batch and start a new one.
|
|
587
|
+
|
|
588
|
+
Returns:
|
|
589
|
+
The finalized batch, or None if no events in current batch.
|
|
590
|
+
"""
|
|
591
|
+
with self._lock:
|
|
592
|
+
return self._finalize_current_batch()
|
|
593
|
+
|
|
594
|
+
def _finalize_current_batch(self) -> MerkleBatch | None:
|
|
595
|
+
"""Internal method to finalize current batch."""
|
|
596
|
+
if self._current_tree.leaf_count == 0:
|
|
597
|
+
return None
|
|
598
|
+
|
|
599
|
+
from datetime import datetime, timezone
|
|
600
|
+
|
|
601
|
+
self._batch_counter += 1
|
|
602
|
+
merkle_root = self._current_tree.compute_root()
|
|
603
|
+
|
|
604
|
+
# Calculate sequence range
|
|
605
|
+
chain_length = self._chain.length
|
|
606
|
+
start_seq = chain_length - self._current_tree.leaf_count + 1
|
|
607
|
+
end_seq = chain_length
|
|
608
|
+
|
|
609
|
+
batch = MerkleBatch(
|
|
610
|
+
batch_id=f"batch_{self._batch_counter}",
|
|
611
|
+
start_sequence=start_seq,
|
|
612
|
+
end_sequence=end_seq,
|
|
613
|
+
event_count=self._current_tree.leaf_count,
|
|
614
|
+
merkle_root=merkle_root,
|
|
615
|
+
created_at=datetime.now(timezone.utc).isoformat(),
|
|
616
|
+
previous_batch_root=self._batches[-1].merkle_root if self._batches else None,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
self._batches.append(batch)
|
|
620
|
+
self._current_tree = MerkleTree() # Start new tree
|
|
621
|
+
|
|
622
|
+
return batch
|
|
623
|
+
|
|
624
|
+
def get_inclusion_proof(self, event: AuditEventV2) -> dict[str, Any] | None:
|
|
625
|
+
"""
|
|
626
|
+
Get proof that an event is included in a batch.
|
|
627
|
+
|
|
628
|
+
Args:
|
|
629
|
+
event: The event to prove inclusion for.
|
|
630
|
+
|
|
631
|
+
Returns:
|
|
632
|
+
Proof data including Merkle path and batch info.
|
|
633
|
+
"""
|
|
634
|
+
if event.merkle_index is None:
|
|
635
|
+
return None
|
|
636
|
+
|
|
637
|
+
# Find which batch contains this event
|
|
638
|
+
for batch in self._batches:
|
|
639
|
+
# Check if event sequence is in this batch's range
|
|
640
|
+
event_seq = event.sequence_number
|
|
641
|
+
if batch.start_sequence <= event_seq <= batch.end_sequence:
|
|
642
|
+
# Reconstruct the tree for this batch to get proof
|
|
643
|
+
# (In production, you'd cache the tree structure)
|
|
644
|
+
return {
|
|
645
|
+
"event_hash": event.event_hash,
|
|
646
|
+
"batch_id": batch.batch_id,
|
|
647
|
+
"merkle_root": batch.merkle_root,
|
|
648
|
+
"merkle_index": event.merkle_index,
|
|
649
|
+
# Full proof would require storing tree structure
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
return None
|