agentstracer 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """AgentsTrace — Export and manage coding agent conversation data."""
2
+
3
+ __version__ = "1.0.0"
@@ -0,0 +1,112 @@
1
+ """Anonymize PII in Claude Code log data."""
2
+
3
+ import hashlib
4
+ import os
5
+ import re
6
+
7
+
8
+ def _hash_username(username: str) -> str:
9
+ return "user_" + hashlib.sha256(username.encode()).hexdigest()[:8]
10
+
11
+
12
+ def _detect_home_dir() -> tuple[str, str]:
13
+ home = os.path.expanduser("~")
14
+ username = os.path.basename(home)
15
+ return home, username
16
+
17
+
18
+ def anonymize_path(path: str, username: str, username_hash: str, home: str | None = None) -> str:
19
+ """Strip a path to project-relative and hash the username."""
20
+ if not path:
21
+ return path
22
+
23
+ if home is None:
24
+ home = os.path.expanduser("~")
25
+ prefixes = set()
26
+ for base in (f"/Users/{username}", f"/home/{username}", home):
27
+ for subdir in ("Documents", "Downloads", "Desktop"):
28
+ prefixes.add(f"{base}/{subdir}/")
29
+ prefixes.add(f"{base}/")
30
+
31
+ # Try longest prefixes first (subdirectory matches before bare home)
32
+ home_patterns = sorted(prefixes, key=len, reverse=True)
33
+
34
+ for prefix in home_patterns:
35
+ if path.startswith(prefix):
36
+ rest = path[len(prefix):]
37
+ if "/Documents/" in prefix or "/Downloads/" in prefix or "/Desktop/" in prefix:
38
+ return rest
39
+ return f"{username_hash}/{rest}"
40
+
41
+ path = path.replace(f"/Users/{username}/", f"/{username_hash}/")
42
+ path = path.replace(f"/home/{username}/", f"/{username_hash}/")
43
+
44
+ return path
45
+
46
+
47
+ def anonymize_text(text: str, username: str, username_hash: str) -> str:
48
+ if not text or not username:
49
+ return text
50
+
51
+ escaped = re.escape(username)
52
+
53
+ # Replace /Users/<username> and /home/<username>
54
+ text = re.sub(rf"/Users/{escaped}(?=/|[^a-zA-Z0-9_-]|$)", f"/{username_hash}", text)
55
+ text = re.sub(rf"/home/{escaped}(?=/|[^a-zA-Z0-9_-]|$)", f"/{username_hash}", text)
56
+
57
+ # Catch hyphen-encoded paths: -Users-peteromalley- or -Users-peteromalley/
58
+ text = re.sub(rf"-Users-{escaped}(?=-|/|$)", f"-Users-{username_hash}", text)
59
+ text = re.sub(rf"-home-{escaped}(?=-|/|$)", f"-home-{username_hash}", text)
60
+
61
+ # Also handle underscore-to-hyphen encoding: kaid_aiagent → kaid-aiagent
62
+ if "_" in username:
63
+ hyphen_variant = username.replace("_", "-")
64
+ hyphen_escaped = re.escape(hyphen_variant)
65
+ text = re.sub(rf"-Users-{hyphen_escaped}(?=-|/|$)", f"-Users-{username_hash}", text)
66
+ text = re.sub(rf"-home-{hyphen_escaped}(?=-|/|$)", f"-home-{username_hash}", text)
67
+
68
+ # Catch temp paths like /private/tmp/claude-501/-Users-peteromalley/
69
+ text = re.sub(rf"claude-\d+/-Users-{escaped}", f"claude-XXX/-Users-{username_hash}", text)
70
+
71
+ # Final pass: replace bare username in remaining contexts (ls output, prose, etc.)
72
+ # Only if username is >= 4 chars to avoid false positives
73
+ if len(username) >= 4:
74
+ text = re.sub(rf"\b{escaped}\b", username_hash, text)
75
+
76
+ return text
77
+
78
+
79
+ class Anonymizer:
80
+ """Stateful anonymizer that consistently hashes usernames."""
81
+
82
+ def __init__(self, extra_usernames: list[str] | None = None):
83
+ self.home, self.username = _detect_home_dir()
84
+ self.username_hash = _hash_username(self.username)
85
+
86
+ # Additional usernames to anonymize (GitHub handles, Discord names, etc.)
87
+ self._extra: list[tuple[str, str]] = []
88
+ for name in (extra_usernames or []):
89
+ name = name.strip()
90
+ if name and name != self.username:
91
+ self._extra.append((name, _hash_username(name)))
92
+
93
+ def path(self, file_path: str) -> str:
94
+ result = anonymize_path(file_path, self.username, self.username_hash, self.home)
95
+ result = anonymize_text(result, self.username, self.username_hash)
96
+ for name, hashed in self._extra:
97
+ result = _replace_username(result, name, hashed)
98
+ return result
99
+
100
+ def text(self, content: str) -> str:
101
+ result = anonymize_text(content, self.username, self.username_hash)
102
+ for name, hashed in self._extra:
103
+ result = _replace_username(result, name, hashed)
104
+ return result
105
+
106
+
107
+ def _replace_username(text: str, username: str, username_hash: str) -> str:
108
+ if not text or not username or len(username) < 3:
109
+ return text
110
+ escaped = re.escape(username)
111
+ text = re.sub(escaped, username_hash, text, flags=re.IGNORECASE)
112
+ return text
@@ -0,0 +1,211 @@
1
+ """Shared backend detection and resolution for coding-agent CLIs.
2
+
3
+ Used by both the scoring pipeline and PII review to auto-detect whether
4
+ agentstrace is running under Claude Code, Codex, or OpenClaw and dispatch
5
+ to the corresponding automation CLI.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ import shutil
12
+ import subprocess
13
+ from pathlib import Path
14
+
15
+
16
+ SUPPORTED_BACKENDS = ("claude", "codex", "openclaw")
17
+ BACKEND_CHOICES = ("auto", *SUPPORTED_BACKENDS)
18
+ BACKEND_COMMANDS: dict[str, str] = {
19
+ "claude": "claude",
20
+ "codex": "codex",
21
+ "openclaw": "openclaw",
22
+ }
23
+ BACKEND_ENV_MARKERS: dict[str, tuple[str, ...]] = {
24
+ "claude": ("CLAUDECODE", "CLAUDE_CODE", "CLAUDECODE_SESSION_ID", "CLAUDE_PROJECT_DIR"),
25
+ "codex": ("CODEX_THREAD_ID", "CODEX_SANDBOX", "CODEX_CI"),
26
+ "openclaw": ("OPENCLAW_HOME", "OPENCLAW_STATE_DIR", "OPENCLAW_CONFIG_PATH"),
27
+ }
28
+ BACKEND_COMMAND_ALIASES: dict[str, tuple[str, ...]] = {
29
+ "claude": ("claude",),
30
+ "codex": ("codex",),
31
+ "openclaw": ("openclaw",),
32
+ }
33
+
34
+
35
+ def _detect_current_agent_from_env(env: dict[str, str] | None = None) -> str | None:
36
+ """Infer the current agent from the process environment."""
37
+ env = os.environ if env is None else env
38
+ for backend, keys in BACKEND_ENV_MARKERS.items():
39
+ for key in keys:
40
+ if env.get(key):
41
+ return backend
42
+ return None
43
+
44
+
45
+ def _get_process_field(pid: int, field: str) -> str:
46
+ """Read a single process field from ps, returning an empty string on failure."""
47
+ try:
48
+ proc = subprocess.run(
49
+ ["ps", f"-o{field}=", "-p", str(pid)],
50
+ capture_output=True,
51
+ text=True,
52
+ timeout=2,
53
+ )
54
+ except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
55
+ return ""
56
+ if proc.returncode != 0:
57
+ return ""
58
+ return proc.stdout.strip()
59
+
60
+
61
+ def _classify_process_command(comm: str, command: str) -> str | None:
62
+ """Map a process command to a supported backend."""
63
+ fields = " ".join(part for part in (comm, command) if part).lower()
64
+ if not fields:
65
+ return None
66
+ base = Path(comm).name.lower() if comm else ""
67
+ for backend, aliases in BACKEND_COMMAND_ALIASES.items():
68
+ for alias in aliases:
69
+ if base == alias or f" {alias}" in f" {fields}" or f"/{alias}" in fields:
70
+ return backend
71
+ return None
72
+
73
+
74
+ def _detect_current_agent_from_process_tree(pid: int | None = None, *, max_depth: int = 6) -> str | None:
75
+ """Walk parent processes to find a known coding-agent CLI."""
76
+ current_pid = pid if pid is not None else os.getppid()
77
+ depth = 0
78
+ seen: set[int] = set()
79
+
80
+ while current_pid > 1 and depth < max_depth and current_pid not in seen:
81
+ seen.add(current_pid)
82
+ comm = _get_process_field(current_pid, "comm")
83
+ command = _get_process_field(current_pid, "command")
84
+ detected = _classify_process_command(comm, command)
85
+ if detected:
86
+ return detected
87
+ parent_text = _get_process_field(current_pid, "ppid")
88
+ try:
89
+ current_pid = int(parent_text)
90
+ except ValueError:
91
+ break
92
+ depth += 1
93
+ return None
94
+
95
+
96
+ def detect_current_agent(env: dict[str, str] | None = None) -> str | None:
97
+ """Detect the current coding agent from env vars or process tree."""
98
+ return _detect_current_agent_from_env(env) or _detect_current_agent_from_process_tree()
99
+
100
+
101
+ def resolve_backend(backend: str = "auto", env: dict[str, str] | None = None) -> str:
102
+ """Resolve 'auto' backend selection to a concrete backend name.
103
+
104
+ Priority: explicit value > AGENTSTRACE_SCORER_BACKEND env > auto-detect.
105
+ """
106
+ env = os.environ if env is None else env
107
+ requested = (backend or "auto").strip().lower()
108
+ if requested != "auto":
109
+ if requested not in SUPPORTED_BACKENDS:
110
+ raise RuntimeError(f"Unsupported backend: {backend}")
111
+ return requested
112
+
113
+ override = (env.get("AGENTSTRACE_SCORER_BACKEND") or "").strip().lower()
114
+ if override:
115
+ if override not in SUPPORTED_BACKENDS:
116
+ raise RuntimeError(
117
+ f"Unsupported AGENTSTRACE_SCORER_BACKEND value: {override}. "
118
+ f"Use one of: {', '.join(SUPPORTED_BACKENDS)}."
119
+ )
120
+ return override
121
+
122
+ detected = detect_current_agent(env)
123
+ if detected:
124
+ return detected
125
+
126
+ raise RuntimeError(
127
+ "Could not detect the current agent. "
128
+ "Run agentstrace from a supported agent CLI, set AGENTSTRACE_SCORER_BACKEND, "
129
+ "or pass --backend explicitly."
130
+ )
131
+
132
+
133
+ def require_backend_command(backend: str) -> str:
134
+ """Return the CLI command for a backend, ensuring it is installed."""
135
+ command = BACKEND_COMMANDS[backend]
136
+ if shutil.which(command) is None:
137
+ raise RuntimeError(f"{backend} CLI not found. Install it or choose a different --backend.")
138
+ return command
139
+
140
+
141
+ def check_backend_runtime(backend: str, env: dict[str, str] | None = None) -> None:
142
+ """Backend-specific runtime preflight hook (extensible, currently a no-op)."""
143
+ _ = backend, env
144
+
145
+
146
+ def summarize_process_error(stderr: str, stdout: str = "") -> str:
147
+ """Return the most actionable error line from subprocess output."""
148
+ lines: list[str] = []
149
+ for raw in f"{stderr}\n{stdout}".splitlines():
150
+ line = raw.strip()
151
+ if not line:
152
+ continue
153
+ if line.startswith("WARNING: proceeding, even though we could not update PATH"):
154
+ continue
155
+ if line.startswith("note: run with `RUST_BACKTRACE=1`"):
156
+ continue
157
+ if line.startswith("thread '"):
158
+ continue
159
+ lines.append(line)
160
+
161
+ if not lines:
162
+ return ""
163
+
164
+ for line in reversed(lines):
165
+ lower = line.lower()
166
+ if (
167
+ lower.startswith("error:")
168
+ or " error " in lower
169
+ or "failed" in lower
170
+ or "unauthorized" in lower
171
+ ):
172
+ return line
173
+
174
+ return lines[-1]
175
+
176
+
177
+ def format_codex_runtime_error(returncode: int, stderr: str, stdout: str = "") -> str:
178
+ """Normalize common Codex exec failures into actionable guidance."""
179
+ combined = "\n".join(part.strip() for part in (stderr, stdout) if part and part.strip())
180
+ lower = combined.lower()
181
+
182
+ if (
183
+ "failed to lookup address information" in lower
184
+ or "temporary failure in name resolution" in lower
185
+ or "name or service not known" in lower
186
+ or "network is unreachable" in lower
187
+ or "could not resolve host" in lower
188
+ ):
189
+ return (
190
+ "Codex runs through `codex exec` in non-interactive mode. "
191
+ "This process could not reach the Codex backend from the current environment. "
192
+ "If you launched agentstrace inside a network-disabled Codex sandbox, "
193
+ "rerun it from your host shell or with network access."
194
+ )
195
+
196
+ if (
197
+ "401" in lower
198
+ or "unauthorized" in lower
199
+ or "not signed in" in lower
200
+ or "authentication required" in lower
201
+ ):
202
+ return (
203
+ "Codex runs through `codex exec` in non-interactive mode. "
204
+ "`codex exec` reuses saved CLI authentication by default; for automation, "
205
+ "run `codex login` or set `CODEX_API_KEY` before running agentstrace."
206
+ )
207
+
208
+ summary = summarize_process_error(stderr, stdout)
209
+ if summary:
210
+ return f"codex exited {returncode}: {summary}"
211
+ return f"codex exited {returncode}"