shiro-memory 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- antaris_memory/__init__.py +136 -0
- antaris_memory/audit.py +484 -0
- antaris_memory/compression.py +65 -0
- antaris_memory/confidence.py +45 -0
- antaris_memory/consolidation.py +120 -0
- antaris_memory/context_packet.py +485 -0
- antaris_memory/core.py +391 -0
- antaris_memory/core_v4.py +1867 -0
- antaris_memory/decay.py +89 -0
- antaris_memory/entry.py +147 -0
- antaris_memory/forgetting.py +55 -0
- antaris_memory/gating.py +180 -0
- antaris_memory/indexing.py +524 -0
- antaris_memory/instrumentation.py +236 -0
- antaris_memory/locking.py +260 -0
- antaris_memory/memory_types.py +97 -0
- antaris_memory/migration.py +379 -0
- antaris_memory/namespace.py +392 -0
- antaris_memory/performance.py +303 -0
- antaris_memory/recovery.py +252 -0
- antaris_memory/search.py +1129 -0
- antaris_memory/semantic.py +719 -0
- antaris_memory/sentiment.py +59 -0
- antaris_memory/shared.py +570 -0
- antaris_memory/synthesis.py +294 -0
- antaris_memory/temporal.py +56 -0
- antaris_memory/utils.py +89 -0
- antaris_memory/versioning.py +182 -0
- shiro_memory-2.0.0.dist-info/METADATA +726 -0
- shiro_memory-2.0.0.dist-info/RECORD +33 -0
- shiro_memory-2.0.0.dist-info/WHEEL +5 -0
- shiro_memory-2.0.0.dist-info/licenses/LICENSE +202 -0
- shiro_memory-2.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shiro Memory v2.0 — Enhanced persistent memory for OpenClaw agents.
|
|
3
|
+
|
|
4
|
+
v2.0 Features:
|
|
5
|
+
- 10-layer search engine (replaces v1.0 BM25)
|
|
6
|
+
- LLM enrichment with search keywords and summaries
|
|
7
|
+
- Enhanced entry fields: agent_id, content_hash, enriched_summary, search_keywords
|
|
8
|
+
- Pipeline bridge integration for seamless enrichment workflow
|
|
9
|
+
- Preserved all of Shiro's original namespace/session logic
|
|
10
|
+
|
|
11
|
+
Forked from Shiro's personal memory system with selective upgrades from antaris-suite.
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
from antaris_memory import MemorySystem
|
|
15
|
+
|
|
16
|
+
mem = MemorySystem("./workspace")
|
|
17
|
+
mem.load()
|
|
18
|
+
mem.ingest("Key decision made", source="meeting", category="strategic")
|
|
19
|
+
results = mem.search("decision")
|
|
20
|
+
mem.save()
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
__version__ = "2.0.0"
|
|
24
|
+
|
|
25
|
+
# Core
|
|
26
|
+
from antaris_memory.core_v4 import MemorySystemV4 as MemorySystem
|
|
27
|
+
from antaris_memory.entry import MemoryEntry
|
|
28
|
+
from antaris_memory.decay import DecayEngine
|
|
29
|
+
from antaris_memory.sentiment import SentimentTagger
|
|
30
|
+
from antaris_memory.temporal import TemporalEngine
|
|
31
|
+
from antaris_memory.confidence import ConfidenceEngine
|
|
32
|
+
from antaris_memory.compression import CompressionEngine
|
|
33
|
+
from antaris_memory.forgetting import ForgettingEngine
|
|
34
|
+
from antaris_memory.consolidation import ConsolidationEngine
|
|
35
|
+
from antaris_memory.gating import InputGate
|
|
36
|
+
from antaris_memory.synthesis import KnowledgeSynthesizer
|
|
37
|
+
|
|
38
|
+
# Multi-agent
|
|
39
|
+
from antaris_memory.shared import SharedMemoryPool, AgentPermission
|
|
40
|
+
|
|
41
|
+
# Storage
|
|
42
|
+
from antaris_memory.migration import MigrationManager, Migration
|
|
43
|
+
from antaris_memory.indexing import IndexManager, SearchIndex, TagIndex, DateIndex
|
|
44
|
+
|
|
45
|
+
# Concurrency
|
|
46
|
+
from antaris_memory.locking import FileLock, LockTimeout
|
|
47
|
+
from antaris_memory.versioning import VersionTracker, ConflictError
|
|
48
|
+
|
|
49
|
+
# Search
|
|
50
|
+
from antaris_memory.search import SearchEngine, SearchResult
|
|
51
|
+
|
|
52
|
+
# Context Packets (v1.1)
|
|
53
|
+
from antaris_memory.context_packet import ContextPacket, ContextPacketBuilder
|
|
54
|
+
|
|
55
|
+
# Recovery (v3.3.1)
|
|
56
|
+
from antaris_memory.recovery import RecoveryConfig, RecoveryManager
|
|
57
|
+
|
|
58
|
+
# Memory Types + Namespace Isolation (Sprint 2 + Sprint 8)
|
|
59
|
+
from antaris_memory.memory_types import MEMORY_TYPE_CONFIGS, get_type_config
|
|
60
|
+
from antaris_memory.namespace import NamespacedMemory, NamespaceManager
|
|
61
|
+
|
|
62
|
+
# Sprint 3 — Semantic utilities
|
|
63
|
+
from antaris_memory.utils import cosine_similarity
|
|
64
|
+
|
|
65
|
+
# v3.2 — Instrumentation
|
|
66
|
+
from antaris_memory.instrumentation import (
|
|
67
|
+
SearchContext,
|
|
68
|
+
extract_memory_references,
|
|
69
|
+
anonymize_query,
|
|
70
|
+
build_usage_signal,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Backward compatibility - import legacy core if needed
|
|
74
|
+
try:
|
|
75
|
+
from antaris_memory.core import MemorySystem as LegacyMemorySystem
|
|
76
|
+
except ImportError:
|
|
77
|
+
LegacyMemorySystem = None
|
|
78
|
+
|
|
79
|
+
__all__ = [
|
|
80
|
+
"MemorySystem",
|
|
81
|
+
"MemoryEntry",
|
|
82
|
+
|
|
83
|
+
# Core engines
|
|
84
|
+
"DecayEngine",
|
|
85
|
+
"SentimentTagger",
|
|
86
|
+
"TemporalEngine",
|
|
87
|
+
"ConfidenceEngine",
|
|
88
|
+
"CompressionEngine",
|
|
89
|
+
"ForgettingEngine",
|
|
90
|
+
"ConsolidationEngine",
|
|
91
|
+
"InputGate",
|
|
92
|
+
"KnowledgeSynthesizer",
|
|
93
|
+
|
|
94
|
+
# Multi-agent (v0.3)
|
|
95
|
+
"SharedMemoryPool",
|
|
96
|
+
"AgentPermission",
|
|
97
|
+
|
|
98
|
+
# Production features (v0.4)
|
|
99
|
+
"MigrationManager",
|
|
100
|
+
"Migration",
|
|
101
|
+
"IndexManager",
|
|
102
|
+
"SearchIndex",
|
|
103
|
+
"TagIndex",
|
|
104
|
+
"DateIndex",
|
|
105
|
+
|
|
106
|
+
# Concurrency (v0.5)
|
|
107
|
+
"FileLock",
|
|
108
|
+
"LockTimeout",
|
|
109
|
+
"VersionTracker",
|
|
110
|
+
"ConflictError",
|
|
111
|
+
|
|
112
|
+
# Search (v1.0)
|
|
113
|
+
"SearchEngine",
|
|
114
|
+
"SearchResult",
|
|
115
|
+
|
|
116
|
+
# Context Packets (v1.1)
|
|
117
|
+
"ContextPacket",
|
|
118
|
+
"ContextPacketBuilder",
|
|
119
|
+
|
|
120
|
+
# Sprint 2 — Memory Types
|
|
121
|
+
"MEMORY_TYPE_CONFIGS",
|
|
122
|
+
"get_type_config",
|
|
123
|
+
|
|
124
|
+
# Sprint 8 — Namespace Isolation
|
|
125
|
+
"NamespacedMemory",
|
|
126
|
+
"NamespaceManager",
|
|
127
|
+
|
|
128
|
+
# Sprint 3 — Hybrid Semantic Search
|
|
129
|
+
"cosine_similarity",
|
|
130
|
+
|
|
131
|
+
# v3.2 — Instrumentation
|
|
132
|
+
"SearchContext",
|
|
133
|
+
"extract_memory_references",
|
|
134
|
+
"anonymize_query",
|
|
135
|
+
"build_usage_signal",
|
|
136
|
+
]
|
antaris_memory/audit.py
ADDED
|
@@ -0,0 +1,484 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Structured audit logging for antaris-memory v3.9.0.
|
|
3
|
+
|
|
4
|
+
Append-only JSON Lines (.jsonl) format audit trail for all memory operations.
|
|
5
|
+
Thread-safe via file locking. Auto-rotation when files exceed size limit.
|
|
6
|
+
PII anonymization via HMAC-SHA256 with a per-instance or persistent salt.
|
|
7
|
+
|
|
8
|
+
Example usage:
|
|
9
|
+
audit = AuditLogger("/path/to/workspace")
|
|
10
|
+
|
|
11
|
+
# Log memory operations
|
|
12
|
+
audit.log("ingest", {"id": "mem123", "source": "chat", "category": "technical"})
|
|
13
|
+
audit.log("search", {"query": "Python tips", "results_count": 5, "duration_ms": 23})
|
|
14
|
+
audit.log("recall", {"memory_id": "mem123", "context_id": "session_456"})
|
|
15
|
+
|
|
16
|
+
# Query audit trail
|
|
17
|
+
recent_recalls = audit.query(event_type="recall", since=time.time() - 3600)
|
|
18
|
+
stats = audit.stats()
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import hashlib
|
|
22
|
+
import hmac
|
|
23
|
+
import json
|
|
24
|
+
import os
|
|
25
|
+
import re
|
|
26
|
+
import stat
|
|
27
|
+
import time
|
|
28
|
+
import threading
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import Dict, List, Optional
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
import fcntl
|
|
34
|
+
FCNTL_AVAILABLE = True
|
|
35
|
+
except ImportError:
|
|
36
|
+
FCNTL_AVAILABLE = False
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
import msvcrt
|
|
40
|
+
MSVCRT_AVAILABLE = True
|
|
41
|
+
except ImportError:
|
|
42
|
+
MSVCRT_AVAILABLE = False
|
|
43
|
+
|
|
44
|
+
# Salt must be exactly 32 lowercase hex chars (128 bits from os.urandom(16).hex())
|
|
45
|
+
_SALT_RE = re.compile(r"^[0-9a-f]{32}$")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _safe_chmod_600(path: Path) -> None:
|
|
49
|
+
"""Set file permissions to 0o600 (owner read+write only).
|
|
50
|
+
|
|
51
|
+
Best-effort — silently ignores failures on Windows and restricted filesystems.
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
os.chmod(path, stat.S_IRUSR | stat.S_IWUSR)
|
|
55
|
+
except Exception:
|
|
56
|
+
pass # Windows / restricted FS: best-effort only
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _read_or_create_salt(workspace: Path, initial_salt: str) -> str:
|
|
60
|
+
"""Load a persistent salt from .audit.salt, creating it if absent or corrupt.
|
|
61
|
+
|
|
62
|
+
Validates that the stored salt is exactly 32 lowercase hex chars. If the file
|
|
63
|
+
is missing, unreadable, or contains invalid data, writes initial_salt as the
|
|
64
|
+
new salt and returns it.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
workspace: Directory containing the .audit.salt file.
|
|
68
|
+
initial_salt: Fresh salt to use if the file is absent or corrupt.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
A validated 32-char hex salt string.
|
|
72
|
+
"""
|
|
73
|
+
workspace.mkdir(parents=True, exist_ok=True)
|
|
74
|
+
salt_file = workspace / ".audit.salt"
|
|
75
|
+
|
|
76
|
+
if salt_file.exists():
|
|
77
|
+
try:
|
|
78
|
+
s = salt_file.read_text(encoding="utf-8").strip()
|
|
79
|
+
except Exception:
|
|
80
|
+
s = ""
|
|
81
|
+
if _SALT_RE.match(s):
|
|
82
|
+
return s
|
|
83
|
+
# Corrupt or invalid salt — fall through to regenerate
|
|
84
|
+
|
|
85
|
+
# Write initial_salt (either first run or regeneration after corruption)
|
|
86
|
+
try:
|
|
87
|
+
salt_file.write_text(initial_salt, encoding="utf-8")
|
|
88
|
+
except Exception:
|
|
89
|
+
# If we can't write the salt file, return the in-memory salt without persisting
|
|
90
|
+
return initial_salt
|
|
91
|
+
_safe_chmod_600(salt_file)
|
|
92
|
+
return initial_salt
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class AuditLogger:
|
|
96
|
+
"""Append-only structured audit log for memory operations.
|
|
97
|
+
|
|
98
|
+
Writes one JSON object per line to `<workspace>/.audit.jsonl`.
|
|
99
|
+
Thread-safe via file locking. Rotation when file exceeds max_size_mb.
|
|
100
|
+
|
|
101
|
+
PII anonymization uses HMAC-SHA256 with a per-instance salt (128 bits of
|
|
102
|
+
entropy from os.urandom(16)). In persistent mode the salt is saved to
|
|
103
|
+
.audit.salt (chmod 0o600). Validated on load — corrupted salt files are
|
|
104
|
+
regenerated automatically.
|
|
105
|
+
|
|
106
|
+
Events logged:
|
|
107
|
+
- ingest: memory stored (id, source, category, timestamp)
|
|
108
|
+
- search: query executed (query, results_count, duration_ms)
|
|
109
|
+
- recall: memory retrieved and used (memory_id, context_id)
|
|
110
|
+
- decay: memory score decayed (memory_id, old_score, new_score)
|
|
111
|
+
- delete: memory removed (memory_id, reason)
|
|
112
|
+
- reinforce: memory reinforced (memory_id, boost)
|
|
113
|
+
- share: memory shared to pool (memory_id, pool_id)
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
def __init__(
|
|
117
|
+
self,
|
|
118
|
+
workspace: str,
|
|
119
|
+
max_size_mb: float = 10.0,
|
|
120
|
+
max_files: int = 5,
|
|
121
|
+
salt_mode: str = "ephemeral",
|
|
122
|
+
):
|
|
123
|
+
"""Initialize audit logger.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
workspace: Directory path where audit logs will be stored
|
|
127
|
+
max_size_mb: Maximum size of log file before rotation (default 10MB)
|
|
128
|
+
max_files: Maximum number of rotated files to keep (default 5)
|
|
129
|
+
salt_mode: Salt persistence mode — "ephemeral" (default, new salt each run)
|
|
130
|
+
or "persistent" (salt stored in .audit.salt, reused across runs)
|
|
131
|
+
"""
|
|
132
|
+
self.workspace = Path(workspace)
|
|
133
|
+
self.workspace.mkdir(parents=True, exist_ok=True)
|
|
134
|
+
|
|
135
|
+
self.audit_file = self.workspace / ".audit.jsonl"
|
|
136
|
+
self.max_size_bytes = int(max_size_mb * 1024 * 1024)
|
|
137
|
+
self.max_files = max_files
|
|
138
|
+
self._salt_mode = salt_mode
|
|
139
|
+
|
|
140
|
+
# Thread safety
|
|
141
|
+
self._lock = threading.Lock()
|
|
142
|
+
|
|
143
|
+
# A2: Per-instance salt for anonymization (prevents rainbow-table attacks).
|
|
144
|
+
# os.urandom(16) = 128 bits of entropy — plenty for a MAC key.
|
|
145
|
+
self._salt = os.urandom(16).hex()
|
|
146
|
+
if salt_mode == "persistent":
|
|
147
|
+
# A2-1 + GPT: Validate salt on load; regenerate if corrupt; chmod 0o600.
|
|
148
|
+
# _read_or_create_salt ensures the salt file is always valid + restricted.
|
|
149
|
+
self._salt = _read_or_create_salt(self.workspace, self._salt)
|
|
150
|
+
|
|
151
|
+
# PII fields that should be hashed for anonymization
|
|
152
|
+
self._pii_fields = {"content", "query", "text", "source"}
|
|
153
|
+
|
|
154
|
+
def rotate_salt(self) -> None:
|
|
155
|
+
"""Generate a new salt, invalidating previous hashes.
|
|
156
|
+
|
|
157
|
+
Persists to .audit.salt if salt_mode='persistent'. The new salt is validated
|
|
158
|
+
via _read_or_create_salt so file write failures fall back to in-memory salt.
|
|
159
|
+
"""
|
|
160
|
+
self._salt = os.urandom(16).hex()
|
|
161
|
+
if self._salt_mode == "persistent":
|
|
162
|
+
# Write + validate — re-uses the same hardened helper as __init__
|
|
163
|
+
self._salt = _read_or_create_salt(self.workspace, self._salt)
|
|
164
|
+
|
|
165
|
+
def log(self, event_type: str, data: dict) -> None:
|
|
166
|
+
"""Append a single audit event. Thread-safe.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
event_type: Type of event (ingest, search, recall, etc.)
|
|
170
|
+
data: Event-specific data dictionary
|
|
171
|
+
"""
|
|
172
|
+
entry = {
|
|
173
|
+
"ts": time.time(),
|
|
174
|
+
"event": event_type,
|
|
175
|
+
"data": data
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
with self._lock:
|
|
179
|
+
# Check if rotation is needed before writing
|
|
180
|
+
if self.audit_file.exists() and self.audit_file.stat().st_size > self.max_size_bytes:
|
|
181
|
+
self._rotate_logs()
|
|
182
|
+
|
|
183
|
+
# Append the entry (atomic write)
|
|
184
|
+
self._write_entry(entry)
|
|
185
|
+
|
|
186
|
+
def _tail_lines(self, n: int) -> List[str]:
|
|
187
|
+
"""Read last n lines from audit file efficiently via backward seek.
|
|
188
|
+
|
|
189
|
+
Gemini: Decodes per-line (not per-buffer) to avoid silently corrupting
|
|
190
|
+
JSON entries when a multibyte UTF-8 sequence is split across chunk boundaries.
|
|
191
|
+
errors='replace' only applies to individual lines that are genuinely corrupt,
|
|
192
|
+
not to the whole buffer.
|
|
193
|
+
"""
|
|
194
|
+
if not self.audit_file.exists() or n <= 0:
|
|
195
|
+
return []
|
|
196
|
+
|
|
197
|
+
chunk_size = 8192
|
|
198
|
+
lines: List[bytes] = []
|
|
199
|
+
|
|
200
|
+
with open(self.audit_file, "rb") as f:
|
|
201
|
+
f.seek(0, 2)
|
|
202
|
+
remaining = f.tell()
|
|
203
|
+
buf = b""
|
|
204
|
+
while remaining > 0 and len(lines) < n + 1:
|
|
205
|
+
read_size = min(chunk_size, remaining)
|
|
206
|
+
remaining -= read_size
|
|
207
|
+
f.seek(remaining)
|
|
208
|
+
buf = f.read(read_size) + buf
|
|
209
|
+
lines = buf.split(b"\n")
|
|
210
|
+
|
|
211
|
+
# Decode each complete line individually — safe for any multibyte sequence
|
|
212
|
+
out: List[str] = []
|
|
213
|
+
for bline in lines:
|
|
214
|
+
if not bline.strip():
|
|
215
|
+
continue
|
|
216
|
+
try:
|
|
217
|
+
out.append(bline.decode("utf-8")) # strict first
|
|
218
|
+
except UnicodeDecodeError:
|
|
219
|
+
out.append(bline.decode("utf-8", errors="replace")) # fallback per-line
|
|
220
|
+
return out[-n:] if len(out) >= n else out
|
|
221
|
+
|
|
222
|
+
def query(
|
|
223
|
+
self,
|
|
224
|
+
event_type: str = None,
|
|
225
|
+
since: float = None,
|
|
226
|
+
limit: int = 100,
|
|
227
|
+
) -> List[dict]:
|
|
228
|
+
"""Query recent audit events. Reads from current file only.
|
|
229
|
+
|
|
230
|
+
Fast path (since=None): tail-reads the last limit*3 lines. If event_type is
|
|
231
|
+
specified and the fast path returns fewer than limit results, falls back to a
|
|
232
|
+
full forward scan — this handles rare event types that may be older than the
|
|
233
|
+
tail window. (GPT-D: prevents silent under-return for infrequent events.)
|
|
234
|
+
|
|
235
|
+
Slow path (since=<timestamp>): full forward scan with time filter.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
event_type: Filter by event type (optional)
|
|
239
|
+
since: Unix timestamp - only return events after this time (optional)
|
|
240
|
+
limit: Maximum number of events to return (default 100)
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
List of matching audit entries, newest first
|
|
244
|
+
"""
|
|
245
|
+
if not self.audit_file.exists():
|
|
246
|
+
return []
|
|
247
|
+
|
|
248
|
+
from collections import deque
|
|
249
|
+
with self._lock:
|
|
250
|
+
if since is None:
|
|
251
|
+
# Fast path: tail-read for recent events
|
|
252
|
+
raw_lines = self._tail_lines(limit * 3)
|
|
253
|
+
results = []
|
|
254
|
+
for line in reversed(raw_lines): # newest first
|
|
255
|
+
line = line.strip()
|
|
256
|
+
if not line:
|
|
257
|
+
continue
|
|
258
|
+
try:
|
|
259
|
+
entry = json.loads(line)
|
|
260
|
+
# B1-1: Parse-then-filter — no string pre-filter.
|
|
261
|
+
# The previous `if event_type in line` check had false negatives
|
|
262
|
+
# when the event type appeared in data fields. Parsing 300 JSON
|
|
263
|
+
# lines is sub-millisecond on any modern hardware.
|
|
264
|
+
if event_type and entry.get("event") != event_type:
|
|
265
|
+
continue
|
|
266
|
+
results.append(entry)
|
|
267
|
+
if len(results) >= limit:
|
|
268
|
+
break
|
|
269
|
+
except json.JSONDecodeError:
|
|
270
|
+
continue
|
|
271
|
+
|
|
272
|
+
# GPT-D: Fallback for rare event types older than the tail window.
|
|
273
|
+
# Only triggered when event_type is set and fast path is incomplete.
|
|
274
|
+
if event_type and len(results) < limit:
|
|
275
|
+
ring: deque = deque(maxlen=limit)
|
|
276
|
+
try:
|
|
277
|
+
with open(self.audit_file, "r", encoding="utf-8") as f:
|
|
278
|
+
for line in f:
|
|
279
|
+
line = line.strip()
|
|
280
|
+
if not line:
|
|
281
|
+
continue
|
|
282
|
+
try:
|
|
283
|
+
entry = json.loads(line)
|
|
284
|
+
if entry.get("event") != event_type:
|
|
285
|
+
continue
|
|
286
|
+
ring.append(entry)
|
|
287
|
+
except json.JSONDecodeError:
|
|
288
|
+
continue
|
|
289
|
+
except IOError:
|
|
290
|
+
return results # return fast-path partial results on I/O error
|
|
291
|
+
return list(reversed(ring))
|
|
292
|
+
|
|
293
|
+
return results
|
|
294
|
+
else:
|
|
295
|
+
# Slow path: full forward scan for time-filtered queries
|
|
296
|
+
ring = deque(maxlen=limit)
|
|
297
|
+
try:
|
|
298
|
+
with open(self.audit_file, 'r', encoding='utf-8') as f:
|
|
299
|
+
for line in f:
|
|
300
|
+
line = line.strip()
|
|
301
|
+
if not line:
|
|
302
|
+
continue
|
|
303
|
+
# String pre-filter is safe in the slow path — we're scanning
|
|
304
|
+
# every line anyway; this early-exit reduces JSON parse calls
|
|
305
|
+
if event_type and (
|
|
306
|
+
f'"event": "{event_type}"' not in line
|
|
307
|
+
and f'"event":"{event_type}"' not in line
|
|
308
|
+
):
|
|
309
|
+
continue
|
|
310
|
+
try:
|
|
311
|
+
entry = json.loads(line)
|
|
312
|
+
if entry.get("ts", 0) < since:
|
|
313
|
+
continue
|
|
314
|
+
ring.append(entry)
|
|
315
|
+
except json.JSONDecodeError:
|
|
316
|
+
continue
|
|
317
|
+
except IOError:
|
|
318
|
+
return []
|
|
319
|
+
return list(reversed(ring))
|
|
320
|
+
|
|
321
|
+
def rotate(self) -> None:
|
|
322
|
+
"""Rotate log file when it exceeds max_size_mb.
|
|
323
|
+
|
|
324
|
+
Renames current file to .audit.1.jsonl, shifts others, creates new current file.
|
|
325
|
+
"""
|
|
326
|
+
with self._lock:
|
|
327
|
+
self._rotate_logs()
|
|
328
|
+
|
|
329
|
+
def stats(self) -> dict:
|
|
330
|
+
"""Return audit statistics (counts per event type, file size, etc).
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
Dictionary with statistics about the audit log
|
|
334
|
+
"""
|
|
335
|
+
if not self.audit_file.exists():
|
|
336
|
+
return {
|
|
337
|
+
"total_entries": 0,
|
|
338
|
+
"file_size_bytes": 0,
|
|
339
|
+
"event_counts": {},
|
|
340
|
+
"oldest_entry_ts": None,
|
|
341
|
+
"newest_entry_ts": None,
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
event_counts = {}
|
|
345
|
+
total_entries = 0
|
|
346
|
+
oldest_ts = None
|
|
347
|
+
newest_ts = None
|
|
348
|
+
|
|
349
|
+
with self._lock:
|
|
350
|
+
try:
|
|
351
|
+
file_size = self.audit_file.stat().st_size
|
|
352
|
+
|
|
353
|
+
with open(self.audit_file, 'r', encoding='utf-8') as f:
|
|
354
|
+
for line in f:
|
|
355
|
+
line = line.strip()
|
|
356
|
+
if not line:
|
|
357
|
+
continue
|
|
358
|
+
|
|
359
|
+
try:
|
|
360
|
+
entry = json.loads(line)
|
|
361
|
+
total_entries += 1
|
|
362
|
+
|
|
363
|
+
# Count events by type
|
|
364
|
+
event_type = entry.get("event", "unknown")
|
|
365
|
+
event_counts[event_type] = event_counts.get(event_type, 0) + 1
|
|
366
|
+
|
|
367
|
+
# Track timestamp range
|
|
368
|
+
ts = entry.get("ts")
|
|
369
|
+
if ts:
|
|
370
|
+
if oldest_ts is None or ts < oldest_ts:
|
|
371
|
+
oldest_ts = ts
|
|
372
|
+
if newest_ts is None or ts > newest_ts:
|
|
373
|
+
newest_ts = ts
|
|
374
|
+
|
|
375
|
+
except json.JSONDecodeError:
|
|
376
|
+
continue
|
|
377
|
+
|
|
378
|
+
except IOError:
|
|
379
|
+
file_size = 0
|
|
380
|
+
|
|
381
|
+
return {
|
|
382
|
+
"total_entries": total_entries,
|
|
383
|
+
"file_size_bytes": file_size,
|
|
384
|
+
"event_counts": event_counts,
|
|
385
|
+
"oldest_entry_ts": oldest_ts,
|
|
386
|
+
"newest_entry_ts": newest_ts,
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
def anonymize_entry(self, entry: dict) -> dict:
|
|
390
|
+
"""Strip PII from an audit entry for safe export.
|
|
391
|
+
Hashes memory content with HMAC-SHA256, preserves structure.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
entry: Original audit entry dictionary
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
New entry with PII fields replaced by HMAC-SHA256 hashes (32 hex chars / 128 bits)
|
|
398
|
+
"""
|
|
399
|
+
# Deep copy to avoid modifying original
|
|
400
|
+
anonymized = json.loads(json.dumps(entry))
|
|
401
|
+
|
|
402
|
+
# Recursively anonymize PII fields
|
|
403
|
+
self._anonymize_dict(anonymized)
|
|
404
|
+
|
|
405
|
+
return anonymized
|
|
406
|
+
|
|
407
|
+
def _anonymize_dict(self, obj: dict) -> None:
|
|
408
|
+
"""Recursively hash PII fields in a dictionary.
|
|
409
|
+
|
|
410
|
+
Gemini/R3: Uses HMAC-SHA256 (correct MAC construction) and outputs 32 hex
|
|
411
|
+
chars (128 bits) instead of the previous 16 chars (64 bits). HMAC prevents
|
|
412
|
+
length-extension attacks and is the standard construction for keyed hashing.
|
|
413
|
+
"""
|
|
414
|
+
for key, value in obj.items():
|
|
415
|
+
if isinstance(value, dict):
|
|
416
|
+
self._anonymize_dict(value)
|
|
417
|
+
elif isinstance(value, str) and key.lower() in self._pii_fields:
|
|
418
|
+
# HMAC-SHA256 with salt as key — correct keyed-hash construction
|
|
419
|
+
digest = hmac.new(
|
|
420
|
+
key=bytes.fromhex(self._salt),
|
|
421
|
+
msg=value.encode("utf-8"),
|
|
422
|
+
digestmod=hashlib.sha256,
|
|
423
|
+
).hexdigest()
|
|
424
|
+
obj[key] = f"hmac:{digest[:32]}" # 32 hex = 128 bits
|
|
425
|
+
|
|
426
|
+
def _write_entry(self, entry: dict) -> None:
|
|
427
|
+
"""Write a single entry to the log file with appropriate locking."""
|
|
428
|
+
json_line = json.dumps(entry, ensure_ascii=False) + '\n'
|
|
429
|
+
|
|
430
|
+
# Open in append mode for atomic writes
|
|
431
|
+
with open(self.audit_file, 'a', encoding='utf-8') as f:
|
|
432
|
+
if FCNTL_AVAILABLE:
|
|
433
|
+
# Unix/Linux file locking
|
|
434
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
|
|
435
|
+
try:
|
|
436
|
+
f.write(json_line)
|
|
437
|
+
f.flush()
|
|
438
|
+
finally:
|
|
439
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
|
|
440
|
+
elif MSVCRT_AVAILABLE:
|
|
441
|
+
# Windows file locking
|
|
442
|
+
try:
|
|
443
|
+
msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1)
|
|
444
|
+
f.write(json_line)
|
|
445
|
+
f.flush()
|
|
446
|
+
finally:
|
|
447
|
+
msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1)
|
|
448
|
+
else:
|
|
449
|
+
# No locking available - just write (may have race conditions)
|
|
450
|
+
f.write(json_line)
|
|
451
|
+
f.flush()
|
|
452
|
+
|
|
453
|
+
def _rotate_logs(self) -> None:
|
|
454
|
+
"""Internal log rotation implementation.
|
|
455
|
+
|
|
456
|
+
Wrapped in try/except OSError to handle Windows scenarios where
|
|
457
|
+
antivirus/indexers hold files open, preventing rename operations.
|
|
458
|
+
On failure, rotation is silently skipped and retried on next append.
|
|
459
|
+
"""
|
|
460
|
+
if not self.audit_file.exists():
|
|
461
|
+
return
|
|
462
|
+
|
|
463
|
+
try:
|
|
464
|
+
# Shift existing rotated files
|
|
465
|
+
for i in range(self.max_files - 1, 0, -1):
|
|
466
|
+
old_file = self.workspace / f".audit.{i}.jsonl"
|
|
467
|
+
new_file = self.workspace / f".audit.{i + 1}.jsonl"
|
|
468
|
+
|
|
469
|
+
if old_file.exists():
|
|
470
|
+
if new_file.exists():
|
|
471
|
+
new_file.unlink() # Remove oldest file
|
|
472
|
+
old_file.rename(new_file)
|
|
473
|
+
|
|
474
|
+
# Move current file to .1
|
|
475
|
+
rotated_file = self.workspace / ".audit.1.jsonl"
|
|
476
|
+
if rotated_file.exists():
|
|
477
|
+
rotated_file.unlink()
|
|
478
|
+
self.audit_file.rename(rotated_file)
|
|
479
|
+
except OSError:
|
|
480
|
+
# Rotation failed (e.g. file locked by another process on Windows).
|
|
481
|
+
# Skip this cycle — rotation will retry on next log append.
|
|
482
|
+
pass
|
|
483
|
+
|
|
484
|
+
# Create new current file (will be created on next write)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Claim 20: Memory compression & summarization."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime, timedelta
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, List
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CompressionEngine:
|
|
10
|
+
"""Compress old memories into condensed summaries."""
|
|
11
|
+
|
|
12
|
+
@staticmethod
|
|
13
|
+
def compress_file(file_path: str) -> Dict:
|
|
14
|
+
"""Read a memory file and produce a compressed summary."""
|
|
15
|
+
try:
|
|
16
|
+
content = Path(file_path).read_text()
|
|
17
|
+
except (FileNotFoundError, UnicodeDecodeError):
|
|
18
|
+
return {"error": f"Cannot read: {file_path}"}
|
|
19
|
+
|
|
20
|
+
lines = content.strip().split("\n")
|
|
21
|
+
headers = [l for l in lines if l.startswith("#")]
|
|
22
|
+
bullets = [l.strip() for l in lines if l.strip().startswith("- ")]
|
|
23
|
+
markers = ["✅", "🎯", "💰", "🚀", "Decision:", "Key:", "Result:"]
|
|
24
|
+
key_lines = [
|
|
25
|
+
l.strip() for l in lines
|
|
26
|
+
if any(m in l for m in markers)
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
seen = set()
|
|
30
|
+
unique = []
|
|
31
|
+
for b in bullets + key_lines:
|
|
32
|
+
norm = b.lower().strip("- ").strip()
|
|
33
|
+
if norm not in seen and len(norm) > 10:
|
|
34
|
+
seen.add(norm)
|
|
35
|
+
unique.append(b)
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
"source": file_path,
|
|
39
|
+
"original_lines": len(lines),
|
|
40
|
+
"compressed_lines": len(headers) + len(unique),
|
|
41
|
+
"compression_ratio": round(
|
|
42
|
+
1 - (len(headers) + len(unique)) / max(len(lines), 1), 2
|
|
43
|
+
),
|
|
44
|
+
"headers": headers,
|
|
45
|
+
"key_points": unique[:30],
|
|
46
|
+
"compressed_at": datetime.now().isoformat(),
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def compress_old_files(memory_dir: str, days_old: int = 7) -> List[Dict]:
|
|
51
|
+
"""Compress daily memory files older than *days_old*."""
|
|
52
|
+
path = Path(memory_dir)
|
|
53
|
+
if not path.exists():
|
|
54
|
+
return []
|
|
55
|
+
|
|
56
|
+
cutoff = datetime.now() - timedelta(days=days_old)
|
|
57
|
+
results = []
|
|
58
|
+
for f in sorted(path.glob("*.md")):
|
|
59
|
+
match = re.search(r"(\d{4}-\d{2}-\d{2})", f.name)
|
|
60
|
+
if not match:
|
|
61
|
+
continue
|
|
62
|
+
file_date = datetime.strptime(match.group(1), "%Y-%m-%d")
|
|
63
|
+
if file_date < cutoff:
|
|
64
|
+
results.append(CompressionEngine.compress_file(str(f)))
|
|
65
|
+
return results
|