epi-recorder 2.1.3__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
epi_core/container.py CHANGED
@@ -11,6 +11,7 @@ Implements the EPI file format specification:
11
11
  import hashlib
12
12
  import json
13
13
  import tempfile
14
+ import threading
14
15
  import zipfile
15
16
  from pathlib import Path
16
17
  from typing import Optional
@@ -21,6 +22,9 @@ from epi_core.schemas import ManifestModel
21
22
  # EPI mimetype constant (vendor-specific MIME type per RFC 6838)
22
23
  EPI_MIMETYPE = "application/vnd.epi+zip"
23
24
 
25
+ # Thread-safe lock for ZIP packing operations (prevents concurrent corruption)
26
+ _zip_pack_lock = threading.Lock()
27
+
24
28
 
25
29
  class EPIContainer:
26
30
  """
@@ -157,6 +161,8 @@ class EPIContainer:
157
161
  """
158
162
  Create a .epi file from a source directory.
159
163
 
164
+ Thread-safe: Uses a module-level lock to prevent concurrent ZIP corruption.
165
+
160
166
  The packing process:
161
167
  1. Write mimetype first (uncompressed) per ZIP spec
162
168
  2. Hash all files in source_dir
@@ -173,64 +179,67 @@ class EPIContainer:
173
179
  FileNotFoundError: If source_dir doesn't exist
174
180
  ValueError: If source_dir is not a directory
175
181
  """
176
- if not source_dir.exists():
177
- raise FileNotFoundError(f"Source directory not found: {source_dir}")
178
-
179
- if not source_dir.is_dir():
180
- raise ValueError(f"Source must be a directory: {source_dir}")
181
-
182
- # Ensure output directory exists
183
- output_path.parent.mkdir(parents=True, exist_ok=True)
184
-
185
- # Collect all files and compute hashes
186
- file_manifest = {}
187
- files_to_pack = []
188
-
189
- for file_path in source_dir.rglob("*"):
190
- if file_path.is_file():
191
- # Get relative path for archive
192
- rel_path = file_path.relative_to(source_dir)
193
- arc_name = str(rel_path).replace("\\", "/") # Use forward slashes in ZIP
194
-
195
- # Compute hash
196
- file_hash = EPIContainer._compute_file_hash(file_path)
197
- file_manifest[arc_name] = file_hash
198
-
199
- files_to_pack.append((file_path, arc_name))
200
-
201
- # Update manifest with file hashes
202
- manifest.file_manifest = file_manifest
203
-
204
- # Create embedded viewer with data injection
205
- viewer_html = EPIContainer._create_embedded_viewer(source_dir, manifest)
206
-
207
- # Create ZIP file
208
- with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf:
209
- # 1. Write mimetype FIRST and UNCOMPRESSED (per EPI spec)
210
- zf.writestr(
211
- "mimetype",
212
- EPI_MIMETYPE,
213
- compress_type=zipfile.ZIP_STORED # No compression
214
- )
182
+ # CRITICAL: Acquire lock to prevent concurrent ZIP corruption
183
+ # Multiple threads writing to ZIP simultaneously causes file header mismatches
184
+ with _zip_pack_lock:
185
+ if not source_dir.exists():
186
+ raise FileNotFoundError(f"Source directory not found: {source_dir}")
187
+
188
+ if not source_dir.is_dir():
189
+ raise ValueError(f"Source must be a directory: {source_dir}")
190
+
191
+ # Ensure output directory exists
192
+ output_path.parent.mkdir(parents=True, exist_ok=True)
193
+
194
+ # Collect all files and compute hashes
195
+ file_manifest = {}
196
+ files_to_pack = []
197
+
198
+ for file_path in source_dir.rglob("*"):
199
+ if file_path.is_file():
200
+ # Get relative path for archive
201
+ rel_path = file_path.relative_to(source_dir)
202
+ arc_name = str(rel_path).replace("\\", "/") # Use forward slashes in ZIP
203
+
204
+ # Compute hash
205
+ file_hash = EPIContainer._compute_file_hash(file_path)
206
+ file_manifest[arc_name] = file_hash
207
+
208
+ files_to_pack.append((file_path, arc_name))
215
209
 
216
- # 2. Write all other files
217
- for file_path, arc_name in files_to_pack:
218
- zf.write(file_path, arc_name, compress_type=zipfile.ZIP_DEFLATED)
210
+ # Update manifest with file hashes
211
+ manifest.file_manifest = file_manifest
219
212
 
220
- # 3. Write embedded viewer
221
- zf.writestr(
222
- "viewer.html",
223
- viewer_html,
224
- compress_type=zipfile.ZIP_DEFLATED
225
- )
213
+ # Create embedded viewer with data injection
214
+ viewer_html = EPIContainer._create_embedded_viewer(source_dir, manifest)
226
215
 
227
- # 4. Write manifest.json LAST (after all files are hashed)
228
- manifest_json = manifest.model_dump_json(indent=2)
229
- zf.writestr(
230
- "manifest.json",
231
- manifest_json,
232
- compress_type=zipfile.ZIP_DEFLATED
233
- )
216
+ # Create ZIP file
217
+ with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf:
218
+ # 1. Write mimetype FIRST and UNCOMPRESSED (per EPI spec)
219
+ zf.writestr(
220
+ "mimetype",
221
+ EPI_MIMETYPE,
222
+ compress_type=zipfile.ZIP_STORED # No compression
223
+ )
224
+
225
+ # 2. Write all other files
226
+ for file_path, arc_name in files_to_pack:
227
+ zf.write(file_path, arc_name, compress_type=zipfile.ZIP_DEFLATED)
228
+
229
+ # 3. Write embedded viewer
230
+ zf.writestr(
231
+ "viewer.html",
232
+ viewer_html,
233
+ compress_type=zipfile.ZIP_DEFLATED
234
+ )
235
+
236
+ # 4. Write manifest.json LAST (after all files are hashed)
237
+ manifest_json = manifest.model_dump_json(indent=2)
238
+ zf.writestr(
239
+ "manifest.json",
240
+ manifest_json,
241
+ compress_type=zipfile.ZIP_DEFLATED
242
+ )
234
243
 
235
244
  @staticmethod
236
245
  def unpack(epi_path: Path, dest_dir: Optional[Path] = None) -> Path:
@@ -350,3 +359,7 @@ class EPIContainer:
350
359
  mismatches[filename] = f"Hash mismatch: expected {expected_hash}, got {actual_hash}"
351
360
 
352
361
  return (len(mismatches) == 0, mismatches)
362
+
363
+
364
+
365
+
epi_core/redactor.py CHANGED
@@ -277,3 +277,7 @@ def get_default_redactor() -> Redactor:
277
277
  pass # Fail silently, use defaults
278
278
 
279
279
  return Redactor(config_path=config_path if config_path.exists() else None)
280
+
281
+
282
+
283
+
epi_core/schemas.py CHANGED
@@ -18,7 +18,7 @@ class ManifestModel(BaseModel):
18
18
  """
19
19
 
20
20
  spec_version: str = Field(
21
- default="1.1-json",
21
+ default="2.2.0",
22
22
  description="EPI specification version"
23
23
  )
24
24
 
@@ -145,4 +145,8 @@ class StepModel(BaseModel):
145
145
  }
146
146
  }
147
147
  }
148
- )
148
+ )
149
+
150
+
151
+
152
+
epi_core/serialize.py CHANGED
@@ -158,3 +158,7 @@ def verify_hash(model: BaseModel, expected_hash: str, exclude_fields: set[str] |
158
158
  """
159
159
  actual_hash = get_canonical_hash(model, exclude_fields)
160
160
  return actual_hash == expected_hash
161
+
162
+
163
+
164
+
epi_core/storage.py ADDED
@@ -0,0 +1,186 @@
1
+ """
2
+ SQLite-based storage for EPI recordings.
3
+
4
+ Provides atomic, crash-safe storage replacing JSONL files.
5
+ SQLite transactions ensure no data corruption on crashes.
6
+ """
7
+
8
+ import sqlite3
9
+ import json
10
+ import time
11
+ from pathlib import Path
12
+ from typing import List, Dict, Any, Optional
13
+ from datetime import datetime
14
+
15
+ from .schemas import StepModel
16
+
17
+
18
+ class EpiStorage:
19
+ """
20
+ SQLite-based atomic storage for agent execution.
21
+ Replaces JSONL (which corrupts on crashes).
22
+ """
23
+
24
+ def __init__(self, session_id: str, output_dir: Path):
25
+ """
26
+ Initialize SQLite storage.
27
+
28
+ Args:
29
+ session_id: Unique session identifier
30
+ output_dir: Directory for database file
31
+ """
32
+ self.session_id = session_id
33
+ self.output_dir = Path(output_dir)
34
+ self.output_dir.mkdir(parents=True, exist_ok=True)
35
+
36
+ self.db_path = self.output_dir / f"{session_id}_temp.db"
37
+ self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
38
+ self._init_tables()
39
+
40
+ def _init_tables(self):
41
+ """Initialize database schema"""
42
+ self.conn.execute('''
43
+ CREATE TABLE IF NOT EXISTS steps (
44
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
45
+ step_index INTEGER NOT NULL,
46
+ timestamp TEXT NOT NULL,
47
+ kind TEXT NOT NULL,
48
+ content TEXT NOT NULL,
49
+ created_at REAL NOT NULL
50
+ )
51
+ ''')
52
+
53
+ self.conn.execute('''
54
+ CREATE TABLE IF NOT EXISTS metadata (
55
+ key TEXT PRIMARY KEY,
56
+ value TEXT NOT NULL
57
+ )
58
+ ''')
59
+
60
+ self.conn.execute('''
61
+ CREATE INDEX IF NOT EXISTS idx_steps_index
62
+ ON steps(step_index)
63
+ ''')
64
+
65
+ self.conn.commit()
66
+
67
+ def add_step(self, step: StepModel) -> None:
68
+ """
69
+ Atomic insert of execution step.
70
+ Survives process crashes.
71
+
72
+ Args:
73
+ step: StepModel to persist
74
+ """
75
+ self.conn.execute(
76
+ '''INSERT INTO steps
77
+ (step_index, timestamp, kind, content, created_at)
78
+ VALUES (?, ?, ?, ?, ?)''',
79
+ (
80
+ step.index,
81
+ step.timestamp.isoformat(),
82
+ step.kind,
83
+ step.model_dump_json(),
84
+ time.time()
85
+ )
86
+ )
87
+ self.conn.commit()
88
+
89
+ def get_steps(self) -> List[StepModel]:
90
+ """
91
+ Retrieve all steps in order.
92
+
93
+ Returns:
94
+ List of StepModel instances
95
+ """
96
+ cursor = self.conn.execute(
97
+ 'SELECT content FROM steps ORDER BY step_index'
98
+ )
99
+ rows = cursor.fetchall()
100
+
101
+ steps = []
102
+ for row in rows:
103
+ step_data = json.loads(row[0])
104
+ steps.append(StepModel(**step_data))
105
+
106
+ return steps
107
+
108
+ def set_metadata(self, key: str, value: str) -> None:
109
+ """
110
+ Set metadata key-value pair.
111
+
112
+ Args:
113
+ key: Metadata key
114
+ value: Metadata value
115
+ """
116
+ self.conn.execute(
117
+ 'INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)',
118
+ (key, value)
119
+ )
120
+ self.conn.commit()
121
+
122
+ def get_metadata(self, key: str) -> Optional[str]:
123
+ """
124
+ Get metadata value.
125
+
126
+ Args:
127
+ key: Metadata key
128
+
129
+ Returns:
130
+ Metadata value or None
131
+ """
132
+ cursor = self.conn.execute(
133
+ 'SELECT value FROM metadata WHERE key = ?',
134
+ (key,)
135
+ )
136
+ row = cursor.fetchone()
137
+ return row[0] if row else None
138
+
139
+ def close(self) -> None:
140
+ """Close database connection."""
141
+ if self.conn:
142
+ self.conn.close()
143
+
144
+ def export_to_jsonl(self, output_path: Path) -> None:
145
+ """
146
+ Export steps to JSONL file for backwards compatibility.
147
+
148
+ Args:
149
+ output_path: Path to JSONL file
150
+ """
151
+ steps = self.get_steps()
152
+ with open(output_path, 'w', encoding='utf-8') as f:
153
+ for step in steps:
154
+ f.write(step.model_dump_json() + '\n')
155
+
156
+ def finalize(self) -> Path:
157
+ """
158
+ Finalize recording and rename to final path.
159
+ This ensures we never have half-written files.
160
+
161
+ Returns:
162
+ Path to finalized database file
163
+ """
164
+ # Add finalization metadata
165
+ self.set_metadata('finalized_at', datetime.utcnow().isoformat())
166
+ self.set_metadata('session_id', self.session_id)
167
+
168
+ # Close connection
169
+ self.close()
170
+
171
+ # Atomic rename (SQLite transaction guarantees consistency)
172
+ final_path = self.output_dir / "steps.jsonl"
173
+
174
+ # Export to JSONL for backwards compatibility
175
+ self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
176
+ self.export_to_jsonl(final_path)
177
+ self.close()
178
+
179
+ # Clean up temp DB
180
+ self.db_path.unlink(missing_ok=True)
181
+
182
+ return final_path
183
+
184
+
185
+
186
+
epi_core/trust.py CHANGED
@@ -244,3 +244,7 @@ def create_verification_report(
244
244
  report["trust_message"] = "Integrity compromised - do not trust"
245
245
 
246
246
  return report
247
+
248
+
249
+
250
+
epi_recorder/__init__.py CHANGED
@@ -4,7 +4,7 @@ EPI Recorder - Runtime interception and workflow capture.
4
4
  Python API for recording AI workflows with cryptographic verification.
5
5
  """
6
6
 
7
- __version__ = "2.1.3"
7
+ __version__ = "2.2.0"
8
8
 
9
9
  # Export Python API
10
10
  from epi_recorder.api import (
@@ -19,3 +19,7 @@ __all__ = [
19
19
  "get_current_session",
20
20
  "__version__"
21
21
  ]
22
+
23
+
24
+
25
+
epi_recorder/api.py CHANGED
@@ -176,6 +176,11 @@ class EpiRecorderSession:
176
176
  output_path=self.output_path
177
177
  )
178
178
 
179
+ # CRITICAL: Windows file system flush
180
+ # Allow OS to finalize file before signing
181
+ import time
182
+ time.sleep(0.1)
183
+
179
184
  # Sign if requested
180
185
  if self.auto_sign:
181
186
  self._sign_epi_file()
@@ -355,7 +360,24 @@ class EpiRecorderSession:
355
360
  encoding="utf-8"
356
361
  )
357
362
 
358
- # Repack the ZIP with signed manifest
363
+ # Regenerate viewer.html with signed manifest
364
+ steps = []
365
+ steps_file = tmp_path / "steps.jsonl"
366
+ if steps_file.exists():
367
+ for line in steps_file.read_text(encoding="utf-8").strip().split("\n"):
368
+ if line:
369
+ try:
370
+ steps.append(json.loads(line))
371
+ except json.JSONDecodeError:
372
+ pass
373
+
374
+ # Regenerate viewer with signed manifest
375
+ from epi_core.container import EPIContainer
376
+ viewer_html = EPIContainer._create_embedded_viewer(tmp_path, signed_manifest)
377
+ viewer_path = tmp_path / "viewer.html"
378
+ viewer_path.write_text(viewer_html, encoding="utf-8")
379
+
380
+ # Repack the ZIP with signed manifest and updated viewer
359
381
  # CRITICAL: Write to temp file first to prevent data loss
360
382
  temp_output = self.output_path.with_suffix('.epi.tmp')
361
383
 
@@ -590,4 +612,8 @@ def get_current_session() -> Optional[EpiRecorderSession]:
590
612
  Returns:
591
613
  EpiRecorderSession or None
592
614
  """
593
- return getattr(_thread_local, 'active_session', None)
615
+ return getattr(_thread_local, 'active_session', None)
616
+
617
+
618
+
619
+
@@ -0,0 +1,151 @@
1
+ import asyncio
2
+ import threading
3
+ import time
4
+ from concurrent.futures import ThreadPoolExecutor
5
+ from contextlib import asynccontextmanager
6
+ from typing import Optional, Dict, Any
7
+ from datetime import datetime
8
+
9
+ from epi_core.storage import EpiStorage
10
+ from epi_core.schemas import StepModel
11
+
12
+ class AsyncRecorder:
13
+ """
14
+ Async-native recorder that doesn't block the event loop.
15
+ Uses background thread for SQLite writes.
16
+ """
17
+
18
+ def __init__(self, session_name: str, output_dir: str = "."):
19
+ self.session_name = session_name
20
+ self.output_dir = output_dir
21
+
22
+ # Thread-safe queue for steps
23
+ self._queue = asyncio.Queue()
24
+
25
+ # Background thread executor (1 thread is enough for SQLite)
26
+ self._executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="epi_writer")
27
+
28
+ # Storage instance (created in background thread)
29
+ self._storage: Optional[EpiStorage] = None
30
+ self._writer_task: Optional[asyncio.Task] = None
31
+
32
+ # State tracking
33
+ self._step_count = 0
34
+ self._done = asyncio.Event()
35
+ self._error: Optional[Exception] = None
36
+
37
+ async def start(self):
38
+ """Initialize storage in background thread and start writer"""
39
+ # Create storage in thread (SQLite init is also blocking)
40
+ loop = asyncio.get_event_loop()
41
+ self._storage = await loop.run_in_executor(
42
+ self._executor,
43
+ lambda: EpiStorage(self.session_name, self.output_dir)
44
+ )
45
+
46
+ # Start background writer task
47
+ self._writer_task = asyncio.create_task(self._writer_loop())
48
+
49
+ async def record_step(self, step_type: str, content: dict):
50
+ """Non-blocking step recording"""
51
+ if self._error:
52
+ raise self._error
53
+
54
+ self._step_count += 1
55
+
56
+ # Put in queue (never blocks, just buffers in memory)
57
+ await self._queue.put({
58
+ 'index': self._step_count,
59
+ 'type': step_type,
60
+ 'content': content,
61
+ 'timestamp': datetime.utcnow() # StepModel expects datetime
62
+ })
63
+
64
+ async def _writer_loop(self):
65
+ """Background task: Drains queue to SQLite"""
66
+ try:
67
+ while True:
68
+ # Wait for item with timeout to check for shutdown
69
+ try:
70
+ step_data = await asyncio.wait_for(self._queue.get(), timeout=0.5)
71
+ except asyncio.TimeoutError:
72
+ continue
73
+
74
+ if step_data is None: # Shutdown sentinel
75
+ self._queue.task_done()
76
+ break
77
+
78
+ # Write to SQLite in background thread (non-blocking for async)
79
+ loop = asyncio.get_event_loop()
80
+ await loop.run_in_executor(
81
+ self._executor,
82
+ self._write_to_storage,
83
+ step_data
84
+ )
85
+
86
+ self._queue.task_done()
87
+
88
+ except asyncio.CancelledError:
89
+ # Graceful shutdown
90
+ pass
91
+ except Exception as e:
92
+ self._error = e
93
+
94
+ def _write_to_storage(self, step_data: dict):
95
+ """Synchronous SQLite write (runs in background thread)"""
96
+ if self._storage:
97
+ # Construct StepModel
98
+ step = StepModel(
99
+ index=step_data['index'],
100
+ timestamp=step_data['timestamp'],
101
+ kind=step_data['type'],
102
+ content=step_data['content']
103
+ )
104
+ self._storage.add_step(step)
105
+
106
+ async def stop(self):
107
+ """Finalize: Drain queue, close storage"""
108
+ if not self._writer_task:
109
+ return
110
+
111
+ # Signal writer to finish
112
+ await self._queue.put(None)
113
+ await self._queue.join()
114
+
115
+ # Wait for task
116
+ self._writer_task.cancel()
117
+ try:
118
+ await self._writer_task
119
+ except asyncio.CancelledError:
120
+ pass
121
+
122
+ # Finalize storage in background thread
123
+ if self._storage:
124
+ loop = asyncio.get_event_loop()
125
+ await loop.run_in_executor(
126
+ self._executor,
127
+ self._storage.finalize
128
+ )
129
+
130
+ # Shutdown executor
131
+ self._executor.shutdown(wait=True)
132
+
133
+ @asynccontextmanager
134
+ async def record_async(session_name: str, output_dir: str = "."):
135
+ """
136
+ Async context manager for recording.
137
+
138
+ Usage:
139
+ async with record_async("my_agent") as rec:
140
+ await agent.arun("task") # Non-blocking
141
+ """
142
+ recorder = AsyncRecorder(session_name, output_dir)
143
+ await recorder.start()
144
+ try:
145
+ yield recorder
146
+ finally:
147
+ await recorder.stop()
148
+
149
+
150
+
151
+
epi_recorder/bootstrap.py CHANGED
@@ -56,3 +56,7 @@ def initialize_recording():
56
56
  # Auto-initialize if EPI_RECORD is set
57
57
  if os.environ.get("EPI_RECORD") == "1":
58
58
  initialize_recording()
59
+
60
+
61
+
62
+
@@ -235,3 +235,7 @@ def capture_environment(
235
235
  include_all_env_vars=include_all_env_vars,
236
236
  redact_env_vars=redact_env_vars
237
237
  )
238
+
239
+
240
+
241
+