PyPI - epi-recorder - Versions diffs - 2.1.3__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

epi-recorder 2.1.3py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

epi_analyzer/__init__.py +9 -0
epi_analyzer/detector.py +337 -0
epi_cli/__init__.py +4 -0
epi_cli/__main__.py +4 -0
epi_cli/chat.py +21 -3
epi_cli/debug.py +107 -0
epi_cli/keys.py +4 -0
epi_cli/ls.py +5 -1
epi_cli/main.py +8 -0
epi_cli/record.py +4 -0
epi_cli/run.py +12 -4
epi_cli/verify.py +4 -0
epi_cli/view.py +4 -0
epi_core/__init__.py +5 -1
epi_core/container.py +68 -55
epi_core/redactor.py +4 -0
epi_core/schemas.py +6 -2
epi_core/serialize.py +4 -0
epi_core/storage.py +186 -0
epi_core/trust.py +4 -0
epi_recorder/__init__.py +13 -1
epi_recorder/api.py +211 -5
epi_recorder/async_api.py +151 -0
epi_recorder/bootstrap.py +4 -0
epi_recorder/environment.py +4 -0
epi_recorder/patcher.py +79 -19
epi_recorder/test_import.py +2 -0
epi_recorder/test_script.py +2 -0
epi_recorder/wrappers/__init__.py +16 -0
epi_recorder/wrappers/base.py +79 -0
epi_recorder/wrappers/openai.py +178 -0
epi_recorder-2.3.0.dist-info/METADATA +269 -0
epi_recorder-2.3.0.dist-info/RECORD +41 -0
{epi_recorder-2.1.3.dist-info → epi_recorder-2.3.0.dist-info}/WHEEL +1 -1
epi_recorder-2.3.0.dist-info/licenses/LICENSE +21 -0
{epi_recorder-2.1.3.dist-info → epi_recorder-2.3.0.dist-info}/top_level.txt +1 -0
epi_viewer_static/app.js +113 -7
epi_viewer_static/crypto.js +3 -0
epi_viewer_static/index.html +4 -2
epi_viewer_static/viewer_lite.css +3 -1
epi_postinstall.py +0 -197
epi_recorder-2.1.3.dist-info/METADATA +0 -577
epi_recorder-2.1.3.dist-info/RECORD +0 -34
epi_recorder-2.1.3.dist-info/licenses/LICENSE +0 -201
{epi_recorder-2.1.3.dist-info → epi_recorder-2.3.0.dist-info}/entry_points.txt +0 -0

epi_core/container.py CHANGED Viewed

@@ -11,6 +11,7 @@ Implements the EPI file format specification:
 import hashlib
 import json
 import tempfile
+import threading
 import zipfile
 from pathlib import Path
 from typing import Optional
@@ -21,6 +22,9 @@ from epi_core.schemas import ManifestModel
 # EPI mimetype constant (vendor-specific MIME type per RFC 6838)
 EPI_MIMETYPE = "application/vnd.epi+zip"
+# Thread-safe lock for ZIP packing operations (prevents concurrent corruption)
+_zip_pack_lock = threading.Lock()
 class EPIContainer:
     """
@@ -157,6 +161,8 @@ class EPIContainer:
         """
         Create a .epi file from a source directory.
+        Thread-safe: Uses a module-level lock to prevent concurrent ZIP corruption.
         The packing process:
         1. Write mimetype first (uncompressed) per ZIP spec
         2. Hash all files in source_dir
@@ -173,64 +179,67 @@ class EPIContainer:
             FileNotFoundError: If source_dir doesn't exist
             ValueError: If source_dir is not a directory
         """
-        if not source_dir.exists():
-            raise FileNotFoundError(f"Source directory not found: {source_dir}")
-        if not source_dir.is_dir():
-            raise ValueError(f"Source must be a directory: {source_dir}")
-        # Ensure output directory exists
-        output_path.parent.mkdir(parents=True, exist_ok=True)
-        # Collect all files and compute hashes
-        file_manifest = {}
-        files_to_pack = []
-        for file_path in source_dir.rglob("*"):
-            if file_path.is_file():
-                # Get relative path for archive
-                rel_path = file_path.relative_to(source_dir)
-                arc_name = str(rel_path).replace("\\", "/")  # Use forward slashes in ZIP
-                # Compute hash
-                file_hash = EPIContainer._compute_file_hash(file_path)
-                file_manifest[arc_name] = file_hash
-                files_to_pack.append((file_path, arc_name))
-        # Update manifest with file hashes
-        manifest.file_manifest = file_manifest
-        # Create embedded viewer with data injection
-        viewer_html = EPIContainer._create_embedded_viewer(source_dir, manifest)
-        # Create ZIP file
-        with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf:
-            # 1. Write mimetype FIRST and UNCOMPRESSED (per EPI spec)
-            zf.writestr(
-                "mimetype",
-                EPI_MIMETYPE,
-                compress_type=zipfile.ZIP_STORED  # No compression
-            )
+        # CRITICAL: Acquire lock to prevent concurrent ZIP corruption
+        # Multiple threads writing to ZIP simultaneously causes file header mismatches
+        with _zip_pack_lock:
+            if not source_dir.exists():
+                raise FileNotFoundError(f"Source directory not found: {source_dir}")
+            if not source_dir.is_dir():
+                raise ValueError(f"Source must be a directory: {source_dir}")
+            # Ensure output directory exists
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            # Collect all files and compute hashes
+            file_manifest = {}
+            files_to_pack = []
+            for file_path in source_dir.rglob("*"):
+                if file_path.is_file():
+                    # Get relative path for archive
+                    rel_path = file_path.relative_to(source_dir)
+                    arc_name = str(rel_path).replace("\\", "/")  # Use forward slashes in ZIP
+                    # Compute hash
+                    file_hash = EPIContainer._compute_file_hash(file_path)
+                    file_manifest[arc_name] = file_hash
+                    files_to_pack.append((file_path, arc_name))
-            # 2. Write all other files
-            for file_path, arc_name in files_to_pack:
-                zf.write(file_path, arc_name, compress_type=zipfile.ZIP_DEFLATED)
+            # Update manifest with file hashes
+            manifest.file_manifest = file_manifest
-            # 3. Write embedded viewer
-            zf.writestr(
-                "viewer.html",
-                viewer_html,
-                compress_type=zipfile.ZIP_DEFLATED
-            )
+            # Create embedded viewer with data injection
+            viewer_html = EPIContainer._create_embedded_viewer(source_dir, manifest)
-            # 4. Write manifest.json LAST (after all files are hashed)
-            manifest_json = manifest.model_dump_json(indent=2)
-            zf.writestr(
-                "manifest.json",
-                manifest_json,
-                compress_type=zipfile.ZIP_DEFLATED
-            )
+            # Create ZIP file
+            with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf:
+                # 1. Write mimetype FIRST and UNCOMPRESSED (per EPI spec)
+                zf.writestr(
+                    "mimetype",
+                    EPI_MIMETYPE,
+                    compress_type=zipfile.ZIP_STORED  # No compression
+                )
+                # 2. Write all other files
+                for file_path, arc_name in files_to_pack:
+                    zf.write(file_path, arc_name, compress_type=zipfile.ZIP_DEFLATED)
+                # 3. Write embedded viewer
+                zf.writestr(
+                    "viewer.html",
+                    viewer_html,
+                    compress_type=zipfile.ZIP_DEFLATED
+                )
+                # 4. Write manifest.json LAST (after all files are hashed)
+                manifest_json = manifest.model_dump_json(indent=2)
+                zf.writestr(
+                    "manifest.json",
+                    manifest_json,
+                    compress_type=zipfile.ZIP_DEFLATED
+                )
     @staticmethod
     def unpack(epi_path: Path, dest_dir: Optional[Path] = None) -> Path:
@@ -350,3 +359,7 @@ class EPIContainer:
                     mismatches[filename] = f"Hash mismatch: expected {expected_hash}, got {actual_hash}"
         return (len(mismatches) == 0, mismatches)

epi_core/redactor.py CHANGED Viewed

@@ -277,3 +277,7 @@ def get_default_redactor() -> Redactor:
             pass  # Fail silently, use defaults
     return Redactor(config_path=config_path if config_path.exists() else None)

epi_core/schemas.py CHANGED Viewed

@@ -18,7 +18,7 @@ class ManifestModel(BaseModel):
     """
     spec_version: str = Field(
-        default="1.1-json",
+        default="2.3.0",
         description="EPI specification version"
     )
@@ -145,4 +145,8 @@ class StepModel(BaseModel):
                 }
             }
         }
-    )
+    )

epi_core/serialize.py CHANGED Viewed

@@ -158,3 +158,7 @@ def verify_hash(model: BaseModel, expected_hash: str, exclude_fields: set[str] |
     """
     actual_hash = get_canonical_hash(model, exclude_fields)
     return actual_hash == expected_hash

epi_core/storage.py ADDED Viewed

@@ -0,0 +1,186 @@
+"""
+SQLite-based storage for EPI recordings.
+Provides atomic, crash-safe storage replacing JSONL files.
+SQLite transactions ensure no data corruption on crashes.
+"""
+import sqlite3
+import json
+import time
+from pathlib import Path
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+from .schemas import StepModel
+class EpiStorage:
+    """
+    SQLite-based atomic storage for agent execution.
+    Replaces JSONL (which corrupts on crashes).
+    """
+    def __init__(self, session_id: str, output_dir: Path):
+        """
+        Initialize SQLite storage.
+        Args:
+            session_id: Unique session identifier
+            output_dir: Directory for database file
+        """
+        self.session_id = session_id
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        self.db_path = self.output_dir / f"{session_id}_temp.db"
+        self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
+        self._init_tables()
+    def _init_tables(self):
+        """Initialize database schema"""
+        self.conn.execute('''
+            CREATE TABLE IF NOT EXISTS steps (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                step_index INTEGER NOT NULL,
+                timestamp TEXT NOT NULL,
+                kind TEXT NOT NULL,
+                content TEXT NOT NULL,
+                created_at REAL NOT NULL
+            )
+        ''')
+        self.conn.execute('''
+            CREATE TABLE IF NOT EXISTS metadata (
+                key TEXT PRIMARY KEY,
+                value TEXT NOT NULL
+            )
+        ''')
+        self.conn.execute('''
+            CREATE INDEX IF NOT EXISTS idx_steps_index
+            ON steps(step_index)
+        ''')
+        self.conn.commit()
+    def add_step(self, step: StepModel) -> None:
+        """
+        Atomic insert of execution step.
+        Survives process crashes.
+        Args:
+            step: StepModel to persist
+        """
+        self.conn.execute(
+            '''INSERT INTO steps
+               (step_index, timestamp, kind, content, created_at)
+               VALUES (?, ?, ?, ?, ?)''',
+            (
+                step.index,
+                step.timestamp.isoformat(),
+                step.kind,
+                step.model_dump_json(),
+                time.time()
+            )
+        )
+        self.conn.commit()
+    def get_steps(self) -> List[StepModel]:
+        """
+        Retrieve all steps in order.
+        Returns:
+            List of StepModel instances
+        """
+        cursor = self.conn.execute(
+            'SELECT content FROM steps ORDER BY step_index'
+        )
+        rows = cursor.fetchall()
+        steps = []
+        for row in rows:
+            step_data = json.loads(row[0])
+            steps.append(StepModel(**step_data))
+        return steps
+    def set_metadata(self, key: str, value: str) -> None:
+        """
+        Set metadata key-value pair.
+        Args:
+            key: Metadata key
+            value: Metadata value
+        """
+        self.conn.execute(
+            'INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)',
+            (key, value)
+        )
+        self.conn.commit()
+    def get_metadata(self, key: str) -> Optional[str]:
+        """
+        Get metadata value.
+        Args:
+            key: Metadata key
+        Returns:
+            Metadata value or None
+        """
+        cursor = self.conn.execute(
+            'SELECT value FROM metadata WHERE key = ?',
+            (key,)
+        )
+        row = cursor.fetchone()
+        return row[0] if row else None
+    def close(self) -> None:
+        """Close database connection."""
+        if self.conn:
+            self.conn.close()
+    def export_to_jsonl(self, output_path: Path) -> None:
+        """
+        Export steps to JSONL file for backwards compatibility.
+        Args:
+            output_path: Path to JSONL file
+        """
+        steps = self.get_steps()
+        with open(output_path, 'w', encoding='utf-8') as f:
+            for step in steps:
+                f.write(step.model_dump_json() + '\n')
+    def finalize(self) -> Path:
+        """
+        Finalize recording and rename to final path.
+        This ensures we never have half-written files.
+        Returns:
+            Path to finalized database file
+        """
+        # Add finalization metadata
+        self.set_metadata('finalized_at', datetime.utcnow().isoformat())
+        self.set_metadata('session_id', self.session_id)
+        # Close connection
+        self.close()
+        # Atomic rename (SQLite transaction guarantees consistency)
+        final_path = self.output_dir / "steps.jsonl"
+        # Export to JSONL for backwards compatibility
+        self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
+        self.export_to_jsonl(final_path)
+        self.close()
+        # Clean up temp DB
+        self.db_path.unlink(missing_ok=True)
+        return final_path

epi_core/trust.py CHANGED Viewed

@@ -244,3 +244,7 @@ def create_verification_report(
         report["trust_message"] = "Integrity compromised - do not trust"
     return report

epi_recorder/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ EPI Recorder - Runtime interception and workflow capture.
 Python API for recording AI workflows with cryptographic verification.
 """
-__version__ = "2.1.3"
+__version__ = "2.3.0"
 # Export Python API
 from epi_recorder.api import (
@@ -13,9 +13,21 @@ from epi_recorder.api import (
     get_current_session
 )
+# Export wrapper clients (new in v2.3.0)
+from epi_recorder.wrappers import (
+    wrap_openai,
+    TracedOpenAI,
+)
 __all__ = [
     "EpiRecorderSession",
     "record",
     "get_current_session",
+    "wrap_openai",
+    "TracedOpenAI",
     "__version__"
 ]

epi_recorder/api.py CHANGED Viewed

@@ -54,6 +54,8 @@ class EpiRecorderSession:
         metrics: Optional[Dict[str, Union[float, str]]] = None,
         approved_by: Optional[str] = None,
         metadata_tags: Optional[List[str]] = None,  # Renamed to avoid conflict with tags parameter
+        # Legacy mode (deprecated)
+        legacy_patching: bool = False,
     ):
         """
         Initialize EPI recording session.
@@ -70,6 +72,7 @@ class EpiRecorderSession:
             metrics: Key-value metrics for this workflow (accuracy, latency, etc.)
             approved_by: Person or entity who approved this workflow execution
             metadata_tags: Tags for categorizing this workflow (renamed from tags to avoid conflict)
+            legacy_patching: Enable deprecated monkey patching mode (default: False)
         """
         self.output_path = Path(output_path)
         self.workflow_name = workflow_name or "untitled"
@@ -85,6 +88,9 @@ class EpiRecorderSession:
         self.approved_by = approved_by
         self.metadata_tags = metadata_tags
+        # Legacy mode flag (deprecated)
+        self.legacy_patching = legacy_patching
         # Runtime state
         self.temp_dir: Optional[Path] = None
         self.recording_context: Optional[RecordingContext] = None
@@ -117,9 +123,17 @@ class EpiRecorderSession:
         set_recording_context(self.recording_context)
         _thread_local.active_session = self
-        # Patch LLM libraries and HTTP
-        from epi_recorder.patcher import patch_all
-        patch_all()
+        # Only patch LLM libraries if legacy mode is enabled (deprecated)
+        if self.legacy_patching:
+            import warnings
+            warnings.warn(
+                "legacy_patching is deprecated and will be removed in v3.0.0. "
+                "Use epi.log_llm_call() or wrapper clients (wrap_openai) instead.",
+                DeprecationWarning,
+                stacklevel=2
+            )
+            from epi_recorder.patcher import patch_all
+            patch_all()
         # Log session start
         self.log_step("session.start", {
@@ -176,6 +190,11 @@ class EpiRecorderSession:
                 output_path=self.output_path
             )
+            # CRITICAL: Windows file system flush
+            # Allow OS to finalize file before signing
+            import time
+            time.sleep(0.1)
             # Sign if requested
             if self.auto_sign:
                 self._sign_epi_file()
@@ -245,6 +264,172 @@ class EpiRecorderSession:
             **response_payload
         })
+    def log_llm_call(
+        self,
+        response: Any,
+        messages: Optional[List[Dict[str, str]]] = None,
+        provider: str = "auto"
+    ) -> None:
+        """
+        Log a complete LLM call (request + response) from any provider.
+        Auto-detects OpenAI, Anthropic, and Gemini response objects.
+        This is the RECOMMENDED way to log LLM calls without monkey patching.
+        Args:
+            response: The LLM response object (OpenAI, Anthropic, Gemini, etc.)
+            messages: Optional original messages (for request logging)
+            provider: Provider name ("auto" to detect, or "openai", "anthropic", etc.)
+        Example:
+            with record("my_agent.epi") as epi:
+                response = client.chat.completions.create(
+                    model="gpt-4",
+                    messages=[{"role": "user", "content": "Hello"}]
+                )
+                epi.log_llm_call(response, messages=[{"role": "user", "content": "Hello"}])
+        """
+        if not self._entered:
+            raise RuntimeError("Cannot log LLM call outside of context manager")
+        # Auto-detect provider and extract data
+        model = "unknown"
+        content = ""
+        usage = None
+        choices = []
+        # Try OpenAI format
+        if hasattr(response, "choices") and hasattr(response, "model"):
+            provider = "openai" if provider == "auto" else provider
+            model = getattr(response, "model", "unknown")
+            for choice in response.choices:
+                msg = choice.message
+                choices.append({
+                    "message": {
+                        "role": getattr(msg, "role", "assistant"),
+                        "content": getattr(msg, "content", ""),
+                    },
+                    "finish_reason": getattr(choice, "finish_reason", None),
+                })
+                if not content:
+                    content = getattr(msg, "content", "")
+            if hasattr(response, "usage") and response.usage:
+                usage = {
+                    "prompt_tokens": getattr(response.usage, "prompt_tokens", 0),
+                    "completion_tokens": getattr(response.usage, "completion_tokens", 0),
+                    "total_tokens": getattr(response.usage, "total_tokens", 0),
+                }
+        # Try Anthropic format
+        elif hasattr(response, "content") and hasattr(response, "model"):
+            provider = "anthropic" if provider == "auto" else provider
+            model = getattr(response, "model", "unknown")
+            # Anthropic returns content as a list of content blocks
+            content_blocks = getattr(response, "content", [])
+            if content_blocks and hasattr(content_blocks[0], "text"):
+                content = content_blocks[0].text
+            choices = [{"message": {"role": "assistant", "content": content}}]
+            if hasattr(response, "usage"):
+                usage = {
+                    "input_tokens": getattr(response.usage, "input_tokens", 0),
+                    "output_tokens": getattr(response.usage, "output_tokens", 0),
+                }
+        # Try Gemini format
+        elif hasattr(response, "text") and hasattr(response, "candidates"):
+            provider = "gemini" if provider == "auto" else provider
+            model = "gemini"
+            content = getattr(response, "text", "")
+            choices = [{"message": {"role": "assistant", "content": content}}]
+        # Fallback: try to extract as dict or string
+        else:
+            provider = provider if provider != "auto" else "unknown"
+            if isinstance(response, dict):
+                content = str(response.get("content", response))
+            else:
+                content = str(response)
+            choices = [{"message": {"role": "assistant", "content": content}}]
+        # Log request if messages provided
+        if messages:
+            self.log_step("llm.request", {
+                "provider": provider,
+                "model": model,
+                "messages": messages,
+                "timestamp": datetime.utcnow().isoformat(),
+            })
+        # Log response
+        response_data = {
+            "provider": provider,
+            "model": model,
+            "choices": choices,
+            "timestamp": datetime.utcnow().isoformat(),
+        }
+        if usage:
+            response_data["usage"] = usage
+        self.log_step("llm.response", response_data)
+    def log_chat(
+        self,
+        model: str,
+        messages: List[Dict[str, str]],
+        response_content: str,
+        provider: str = "custom",
+        usage: Optional[Dict[str, int]] = None,
+        **metadata
+    ) -> None:
+        """
+        Simplified logging for chat completions.
+        Use this when you have the raw data instead of response objects.
+        Args:
+            model: Model name (e.g., "gpt-4", "claude-3")
+            messages: The messages sent to the model
+            response_content: The assistant's response text
+            provider: Provider name (default: "custom")
+            usage: Optional token usage dict
+            **metadata: Additional metadata to include
+        Example:
+            epi.log_chat(
+                model="gpt-4",
+                messages=[{"role": "user", "content": "Hello"}],
+                response_content="Hi there!",
+                tokens=150
+            )
+        """
+        if not self._entered:
+            raise RuntimeError("Cannot log chat outside of context manager")
+        # Log request
+        self.log_step("llm.request", {
+            "provider": provider,
+            "model": model,
+            "messages": messages,
+            "timestamp": datetime.utcnow().isoformat(),
+            **metadata
+        })
+        # Log response
+        response_data = {
+            "provider": provider,
+            "model": model,
+            "choices": [{"message": {"role": "assistant", "content": response_content}}],
+            "timestamp": datetime.utcnow().isoformat(),
+        }
+        if usage:
+            response_data["usage"] = usage
+        self.log_step("llm.response", response_data)
     def log_artifact(
         self,
         file_path: Path,
@@ -355,7 +540,24 @@ class EpiRecorderSession:
                     encoding="utf-8"
                 )
-                # Repack the ZIP with signed manifest
+                # Regenerate viewer.html with signed manifest
+                steps = []
+                steps_file = tmp_path / "steps.jsonl"
+                if steps_file.exists():
+                    for line in steps_file.read_text(encoding="utf-8").strip().split("\n"):
+                        if line:
+                            try:
+                                steps.append(json.loads(line))
+                            except json.JSONDecodeError:
+                                pass
+                # Regenerate viewer with signed manifest
+                from epi_core.container import EPIContainer
+                viewer_html = EPIContainer._create_embedded_viewer(tmp_path, signed_manifest)
+                viewer_path = tmp_path / "viewer.html"
+                viewer_path.write_text(viewer_html, encoding="utf-8")
+                # Repack the ZIP with signed manifest and updated viewer
                 # CRITICAL: Write to temp file first to prevent data loss
                 temp_output = self.output_path.with_suffix('.epi.tmp')
@@ -590,4 +792,8 @@ def get_current_session() -> Optional[EpiRecorderSession]:
     Returns:
         EpiRecorderSession or None
     """
-    return getattr(_thread_local, 'active_session', None)
+    return getattr(_thread_local, 'active_session', None)

epi-recorder 2.1.3__py3-none-any.whl → 2.3.0__py3-none-any.whl

epi-recorder 2.1.3py3-none-any.whl → 2.3.0py3-none-any.whl