epi-recorder 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,216 @@
1
+ """
2
+ EPI Recorder Environment - Capture execution environment details.
3
+
4
+ Records OS, Python version, dependencies, and environment variables
5
+ for reproducibility verification.
6
+ """
7
+
8
+ import os
9
+ import platform
10
+ import sys
11
+ import json
12
+ from pathlib import Path
13
+ from typing import Dict, Any, List, Optional
14
+ import importlib.metadata
15
+
16
+
17
+ def capture_os_info() -> Dict[str, str]:
18
+ """
19
+ Capture operating system information.
20
+
21
+ Returns:
22
+ dict: OS details
23
+ """
24
+ return {
25
+ "system": platform.system(),
26
+ "release": platform.release(),
27
+ "version": platform.version(),
28
+ "machine": platform.machine(),
29
+ "processor": platform.processor() or "Unknown",
30
+ "platform": platform.platform(),
31
+ }
32
+
33
+
34
+ def capture_python_info() -> Dict[str, str]:
35
+ """
36
+ Capture Python interpreter information.
37
+
38
+ Returns:
39
+ dict: Python details
40
+ """
41
+ return {
42
+ "version": platform.python_version(),
43
+ "implementation": platform.python_implementation(),
44
+ "compiler": platform.python_compiler(),
45
+ "executable": sys.executable,
46
+ }
47
+
48
+
49
+ def capture_installed_packages() -> Dict[str, str]:
50
+ """
51
+ Capture installed Python packages and their versions.
52
+
53
+ Returns:
54
+ dict: Package name -> version
55
+ """
56
+ packages = {}
57
+
58
+ try:
59
+ # Get all installed packages
60
+ for dist in importlib.metadata.distributions():
61
+ packages[dist.name] = dist.version
62
+ except Exception as e:
63
+ # Fallback: try pip list
64
+ try:
65
+ import subprocess
66
+ result = subprocess.run(
67
+ [sys.executable, "-m", "pip", "list", "--format=json"],
68
+ capture_output=True,
69
+ text=True,
70
+ timeout=10
71
+ )
72
+ if result.returncode == 0:
73
+ pip_packages = json.loads(result.stdout)
74
+ for pkg in pip_packages:
75
+ packages[pkg["name"]] = pkg["version"]
76
+ except Exception:
77
+ pass # Fail silently
78
+
79
+ return packages
80
+
81
+
82
+ def capture_environment_variables(
83
+ include_all: bool = False,
84
+ redact: bool = True
85
+ ) -> Dict[str, str]:
86
+ """
87
+ Capture environment variables.
88
+
89
+ Args:
90
+ include_all: Whether to include all env vars (default: False, only safe ones)
91
+ redact: Whether to redact sensitive variables (default: True)
92
+
93
+ Returns:
94
+ dict: Environment variable name -> value
95
+ """
96
+ # Safe environment variables to capture by default
97
+ SAFE_ENV_VARS = {
98
+ "PATH",
99
+ "PYTHONPATH",
100
+ "HOME",
101
+ "USER",
102
+ "USERNAME",
103
+ "SHELL",
104
+ "LANG",
105
+ "LC_ALL",
106
+ "TERM",
107
+ "PWD",
108
+ "VIRTUAL_ENV",
109
+ "CONDA_DEFAULT_ENV",
110
+ }
111
+
112
+ # Sensitive patterns to redact
113
+ SENSITIVE_PATTERNS = {
114
+ "KEY", "SECRET", "TOKEN", "PASSWORD", "PASS",
115
+ "API", "AUTH", "CREDENTIAL", "ACCESS"
116
+ }
117
+
118
+ env_vars = {}
119
+
120
+ for key, value in os.environ.items():
121
+ # Include based on policy
122
+ if not include_all and key not in SAFE_ENV_VARS:
123
+ continue
124
+
125
+ # Redact sensitive values
126
+ if redact and any(pattern in key.upper() for pattern in SENSITIVE_PATTERNS):
127
+ env_vars[key] = "***REDACTED***"
128
+ else:
129
+ env_vars[key] = value
130
+
131
+ return env_vars
132
+
133
+
134
+ def capture_working_directory() -> Dict[str, str]:
135
+ """
136
+ Capture current working directory information.
137
+
138
+ Returns:
139
+ dict: Working directory details
140
+ """
141
+ cwd = Path.cwd()
142
+ return {
143
+ "path": str(cwd),
144
+ "absolute": str(cwd.absolute()),
145
+ "exists": cwd.exists(),
146
+ }
147
+
148
+
149
+ def capture_full_environment(
150
+ include_all_env_vars: bool = False,
151
+ redact_env_vars: bool = True
152
+ ) -> Dict[str, Any]:
153
+ """
154
+ Capture complete environment snapshot.
155
+
156
+ Args:
157
+ include_all_env_vars: Whether to include all environment variables
158
+ redact_env_vars: Whether to redact sensitive env vars
159
+
160
+ Returns:
161
+ dict: Complete environment snapshot
162
+ """
163
+ return {
164
+ "os": capture_os_info(),
165
+ "python": capture_python_info(),
166
+ "packages": capture_installed_packages(),
167
+ "environment_variables": capture_environment_variables(
168
+ include_all=include_all_env_vars,
169
+ redact=redact_env_vars
170
+ ),
171
+ "working_directory": capture_working_directory(),
172
+ }
173
+
174
+
175
+ def save_environment_snapshot(
176
+ output_path: Path,
177
+ include_all_env_vars: bool = False,
178
+ redact_env_vars: bool = True
179
+ ) -> None:
180
+ """
181
+ Save environment snapshot to JSON file.
182
+
183
+ Args:
184
+ output_path: Path where env.json should be saved
185
+ include_all_env_vars: Whether to include all environment variables
186
+ redact_env_vars: Whether to redact sensitive env vars
187
+ """
188
+ environment = capture_full_environment(
189
+ include_all_env_vars=include_all_env_vars,
190
+ redact_env_vars=redact_env_vars
191
+ )
192
+
193
+ # Ensure output directory exists
194
+ output_path.parent.mkdir(parents=True, exist_ok=True)
195
+
196
+ # Write JSON
197
+ with open(output_path, 'w', encoding='utf-8') as f:
198
+ json.dump(environment, f, indent=2, sort_keys=True)
199
+
200
+
201
+ def get_environment_summary() -> str:
202
+ """
203
+ Get a human-readable environment summary.
204
+
205
+ Returns:
206
+ str: Summary string
207
+ """
208
+ env = capture_full_environment()
209
+
210
+ lines = []
211
+ lines.append(f"OS: {env['os']['system']} {env['os']['release']}")
212
+ lines.append(f"Python: {env['python']['version']} ({env['python']['implementation']})")
213
+ lines.append(f"Packages: {len(env['packages'])} installed")
214
+ lines.append(f"Working Directory: {env['working_directory']['path']}")
215
+
216
+ return "\n".join(lines)
@@ -0,0 +1,356 @@
1
+ """
2
+ EPI Recorder Patcher - Runtime interception of LLM API calls.
3
+
4
+ Provides transparent monkey-patching for OpenAI and other LLM providers
5
+ to capture requests and responses for workflow recording.
6
+ """
7
+
8
+ import json
9
+ import time
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ from typing import Any, Callable, Dict, List, Optional
13
+ from functools import wraps
14
+
15
+ from epi_core.schemas import StepModel
16
+ from epi_core.redactor import get_default_redactor
17
+
18
+
19
+ class RecordingContext:
20
+ """
21
+ Global recording context for capturing LLM calls.
22
+
23
+ Stores steps during recording and provides thread-safe access.
24
+ """
25
+
26
+ def __init__(self, output_dir: Path, enable_redaction: bool = True):
27
+ """
28
+ Initialize recording context.
29
+
30
+ Args:
31
+ output_dir: Directory where steps.jsonl will be written
32
+ enable_redaction: Whether to redact secrets (default: True)
33
+ """
34
+ self.output_dir = output_dir
35
+ self.steps: List[StepModel] = []
36
+ self.step_index = 0
37
+ self.enable_redaction = enable_redaction
38
+ self.redactor = get_default_redactor() if enable_redaction else None
39
+
40
+ # Ensure output directory exists
41
+ self.output_dir.mkdir(parents=True, exist_ok=True)
42
+
43
+ # Create steps file
44
+ self.steps_file = self.output_dir / "steps.jsonl"
45
+ self.steps_file.touch()
46
+
47
+ def add_step(self, kind: str, content: Dict[str, Any]) -> None:
48
+ """
49
+ Add a step to the recording.
50
+
51
+ Args:
52
+ kind: Step type (e.g., "llm.request", "llm.response")
53
+ content: Step content data
54
+ """
55
+ # Redact if enabled
56
+ if self.redactor:
57
+ redacted_content, redaction_count = self.redactor.redact(content)
58
+
59
+ # Add redaction step if secrets were found
60
+ if redaction_count > 0:
61
+ redaction_step = StepModel(
62
+ index=self.step_index,
63
+ timestamp=datetime.utcnow(),
64
+ kind="security.redaction",
65
+ content={
66
+ "count": redaction_count,
67
+ "target_step": kind
68
+ }
69
+ )
70
+ self._write_step(redaction_step)
71
+ self.step_index += 1
72
+
73
+ content = redacted_content
74
+
75
+ # Create step
76
+ step = StepModel(
77
+ index=self.step_index,
78
+ timestamp=datetime.utcnow(),
79
+ kind=kind,
80
+ content=content
81
+ )
82
+
83
+ # Write to file
84
+ self._write_step(step)
85
+
86
+ # Store in memory
87
+ self.steps.append(step)
88
+ self.step_index += 1
89
+
90
+ def _write_step(self, step: StepModel) -> None:
91
+ """Write step to steps.jsonl file."""
92
+ with open(self.steps_file, 'a', encoding='utf-8') as f:
93
+ f.write(step.model_dump_json() + '\n')
94
+
95
+
96
+ # Global recording context (set by epi record command)
97
+ _recording_context: Optional[RecordingContext] = None
98
+
99
+
100
+ def set_recording_context(context: RecordingContext) -> None:
101
+ """Set global recording context."""
102
+ global _recording_context
103
+ _recording_context = context
104
+
105
+
106
+ def get_recording_context() -> Optional[RecordingContext]:
107
+ """Get global recording context."""
108
+ return _recording_context
109
+
110
+
111
+ def is_recording() -> bool:
112
+ """Check if recording is active."""
113
+ return _recording_context is not None
114
+
115
+
116
+ # ==================== OpenAI Patcher ====================
117
+
118
+ def patch_openai() -> bool:
119
+ """
120
+ Patch OpenAI library to intercept API calls.
121
+
122
+ Returns:
123
+ bool: True if patching succeeded, False otherwise
124
+ """
125
+ try:
126
+ import openai
127
+ from openai import OpenAI
128
+
129
+ # Get version for compatibility
130
+ openai_version = openai.__version__
131
+ major_version = int(openai_version.split('.')[0])
132
+
133
+ if major_version >= 1:
134
+ # OpenAI >= 1.0 (new client-based API)
135
+ return _patch_openai_v1()
136
+ else:
137
+ # OpenAI < 1.0 (legacy API)
138
+ return _patch_openai_legacy()
139
+
140
+ except ImportError:
141
+ # OpenAI not installed
142
+ return False
143
+ except Exception as e:
144
+ print(f"Warning: Failed to patch OpenAI: {e}")
145
+ return False
146
+
147
+
148
+ def _patch_openai_v1() -> bool:
149
+ """
150
+ Patch OpenAI v1+ (client-based API).
151
+
152
+ Patches the chat.completions.create method.
153
+ """
154
+ try:
155
+ from openai import OpenAI
156
+ from openai.resources.chat import completions
157
+
158
+ # Store original method
159
+ original_create = completions.Completions.create
160
+
161
+ @wraps(original_create)
162
+ def wrapped_create(self, *args, **kwargs):
163
+ """Wrapped OpenAI chat completion with recording."""
164
+
165
+ # Only record if context is active
166
+ if not is_recording():
167
+ return original_create(self, *args, **kwargs)
168
+
169
+ context = get_recording_context()
170
+ start_time = time.time()
171
+
172
+ # Capture request
173
+ request_data = {
174
+ "provider": "openai",
175
+ "method": "chat.completions.create",
176
+ "model": kwargs.get("model", args[0] if args else None),
177
+ "messages": kwargs.get("messages", args[1] if len(args) > 1 else None),
178
+ "temperature": kwargs.get("temperature"),
179
+ "max_tokens": kwargs.get("max_tokens"),
180
+ "top_p": kwargs.get("top_p"),
181
+ "frequency_penalty": kwargs.get("frequency_penalty"),
182
+ "presence_penalty": kwargs.get("presence_penalty"),
183
+ }
184
+
185
+ # Remove None values
186
+ request_data = {k: v for k, v in request_data.items() if v is not None}
187
+
188
+ # Log request step
189
+ context.add_step("llm.request", request_data)
190
+
191
+ # Execute original call
192
+ try:
193
+ response = original_create(self, *args, **kwargs)
194
+ elapsed = time.time() - start_time
195
+
196
+ # Capture response
197
+ response_data = {
198
+ "provider": "openai",
199
+ "model": response.model,
200
+ "choices": [
201
+ {
202
+ "message": {
203
+ "role": choice.message.role,
204
+ "content": choice.message.content
205
+ },
206
+ "finish_reason": choice.finish_reason
207
+ }
208
+ for choice in response.choices
209
+ ],
210
+ "usage": {
211
+ "prompt_tokens": response.usage.prompt_tokens,
212
+ "completion_tokens": response.usage.completion_tokens,
213
+ "total_tokens": response.usage.total_tokens
214
+ } if response.usage else None,
215
+ "latency_seconds": round(elapsed, 3)
216
+ }
217
+
218
+ # Log response step
219
+ context.add_step("llm.response", response_data)
220
+
221
+ return response
222
+
223
+ except Exception as e:
224
+ # Log error step
225
+ context.add_step("llm.error", {
226
+ "provider": "openai",
227
+ "error": str(e),
228
+ "error_type": type(e).__name__
229
+ })
230
+ raise
231
+
232
+ # Apply patch
233
+ completions.Completions.create = wrapped_create
234
+
235
+ return True
236
+
237
+ except Exception as e:
238
+ print(f"Warning: Failed to patch OpenAI v1: {e}")
239
+ return False
240
+
241
+
242
+ def _patch_openai_legacy() -> bool:
243
+ """
244
+ Patch OpenAI < 1.0 (legacy API).
245
+
246
+ Patches openai.ChatCompletion.create method.
247
+ """
248
+ try:
249
+ import openai
250
+
251
+ # Store original method
252
+ original_create = openai.ChatCompletion.create
253
+
254
+ @wraps(original_create)
255
+ def wrapped_create(*args, **kwargs):
256
+ """Wrapped OpenAI chat completion (legacy) with recording."""
257
+
258
+ # Only record if context is active
259
+ if not is_recording():
260
+ return original_create(*args, **kwargs)
261
+
262
+ context = get_recording_context()
263
+ start_time = time.time()
264
+
265
+ # Capture request
266
+ request_data = {
267
+ "provider": "openai",
268
+ "method": "ChatCompletion.create",
269
+ "model": kwargs.get("model"),
270
+ "messages": kwargs.get("messages"),
271
+ "temperature": kwargs.get("temperature"),
272
+ "max_tokens": kwargs.get("max_tokens"),
273
+ }
274
+
275
+ # Remove None values
276
+ request_data = {k: v for k, v in request_data.items() if v is not None}
277
+
278
+ # Log request step
279
+ context.add_step("llm.request", request_data)
280
+
281
+ # Execute original call
282
+ try:
283
+ response = original_create(*args, **kwargs)
284
+ elapsed = time.time() - start_time
285
+
286
+ # Capture response
287
+ response_data = {
288
+ "provider": "openai",
289
+ "model": response.model,
290
+ "choices": [
291
+ {
292
+ "message": {
293
+ "role": choice.message.role,
294
+ "content": choice.message.content
295
+ },
296
+ "finish_reason": choice.finish_reason
297
+ }
298
+ for choice in response.choices
299
+ ],
300
+ "usage": dict(response.usage) if hasattr(response, 'usage') else None,
301
+ "latency_seconds": round(elapsed, 3)
302
+ }
303
+
304
+ # Log response step
305
+ context.add_step("llm.response", response_data)
306
+
307
+ return response
308
+
309
+ except Exception as e:
310
+ # Log error step
311
+ context.add_step("llm.error", {
312
+ "provider": "openai",
313
+ "error": str(e),
314
+ "error_type": type(e).__name__
315
+ })
316
+ raise
317
+
318
+ # Apply patch
319
+ openai.ChatCompletion.create = wrapped_create
320
+
321
+ return True
322
+
323
+ except Exception as e:
324
+ print(f"Warning: Failed to patch OpenAI legacy: {e}")
325
+ return False
326
+
327
+
328
+ def patch_all() -> Dict[str, bool]:
329
+ """
330
+ Patch all supported LLM providers.
331
+
332
+ Returns:
333
+ dict: Provider name -> success status
334
+ """
335
+ results = {}
336
+
337
+ # Patch OpenAI
338
+ results["openai"] = patch_openai()
339
+
340
+ # Future: Add Anthropic, Gemini, etc.
341
+ # results["anthropic"] = patch_anthropic()
342
+ # results["gemini"] = patch_gemini()
343
+
344
+ return results
345
+
346
+
347
+ def unpatch_all() -> None:
348
+ """
349
+ Unpatch all providers (restore original methods).
350
+
351
+ Note: This is a placeholder for future implementation.
352
+ Full unpatching requires storing original methods.
353
+ """
354
+ # For MVP, we don't implement unpatching
355
+ # In production, store original methods and restore them
356
+ pass