agentstracer 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentstrace/__init__.py +3 -0
- agentstrace/anonymizer.py +112 -0
- agentstrace/backends.py +211 -0
- agentstrace/badges.py +678 -0
- agentstrace/card.py +217 -0
- agentstrace/cli.py +3849 -0
- agentstrace/config.py +69 -0
- agentstrace/convert_to_training_data.py +330 -0
- agentstrace/daemon.py +969 -0
- agentstrace/depth.py +437 -0
- agentstrace/index.py +1159 -0
- agentstrace/parser.py +2045 -0
- agentstrace/pii.py +729 -0
- agentstrace/scoring.py +885 -0
- agentstrace/secrets.py +551 -0
- agentstrace/segmenter.py +580 -0
- agentstrace/web/frontend/node_modules/flatted/python/flatted.py +144 -0
- agentstracer-1.0.0.dist-info/METADATA +170 -0
- agentstracer-1.0.0.dist-info/RECORD +23 -0
- agentstracer-1.0.0.dist-info/WHEEL +5 -0
- agentstracer-1.0.0.dist-info/entry_points.txt +2 -0
- agentstracer-1.0.0.dist-info/licenses/LICENSE +22 -0
- agentstracer-1.0.0.dist-info/top_level.txt +1 -0
agentstrace/__init__.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Anonymize PII in Claude Code log data."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _hash_username(username: str) -> str:
|
|
9
|
+
return "user_" + hashlib.sha256(username.encode()).hexdigest()[:8]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _detect_home_dir() -> tuple[str, str]:
|
|
13
|
+
home = os.path.expanduser("~")
|
|
14
|
+
username = os.path.basename(home)
|
|
15
|
+
return home, username
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def anonymize_path(path: str, username: str, username_hash: str, home: str | None = None) -> str:
|
|
19
|
+
"""Strip a path to project-relative and hash the username."""
|
|
20
|
+
if not path:
|
|
21
|
+
return path
|
|
22
|
+
|
|
23
|
+
if home is None:
|
|
24
|
+
home = os.path.expanduser("~")
|
|
25
|
+
prefixes = set()
|
|
26
|
+
for base in (f"/Users/{username}", f"/home/{username}", home):
|
|
27
|
+
for subdir in ("Documents", "Downloads", "Desktop"):
|
|
28
|
+
prefixes.add(f"{base}/{subdir}/")
|
|
29
|
+
prefixes.add(f"{base}/")
|
|
30
|
+
|
|
31
|
+
# Try longest prefixes first (subdirectory matches before bare home)
|
|
32
|
+
home_patterns = sorted(prefixes, key=len, reverse=True)
|
|
33
|
+
|
|
34
|
+
for prefix in home_patterns:
|
|
35
|
+
if path.startswith(prefix):
|
|
36
|
+
rest = path[len(prefix):]
|
|
37
|
+
if "/Documents/" in prefix or "/Downloads/" in prefix or "/Desktop/" in prefix:
|
|
38
|
+
return rest
|
|
39
|
+
return f"{username_hash}/{rest}"
|
|
40
|
+
|
|
41
|
+
path = path.replace(f"/Users/{username}/", f"/{username_hash}/")
|
|
42
|
+
path = path.replace(f"/home/{username}/", f"/{username_hash}/")
|
|
43
|
+
|
|
44
|
+
return path
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def anonymize_text(text: str, username: str, username_hash: str) -> str:
|
|
48
|
+
if not text or not username:
|
|
49
|
+
return text
|
|
50
|
+
|
|
51
|
+
escaped = re.escape(username)
|
|
52
|
+
|
|
53
|
+
# Replace /Users/<username> and /home/<username>
|
|
54
|
+
text = re.sub(rf"/Users/{escaped}(?=/|[^a-zA-Z0-9_-]|$)", f"/{username_hash}", text)
|
|
55
|
+
text = re.sub(rf"/home/{escaped}(?=/|[^a-zA-Z0-9_-]|$)", f"/{username_hash}", text)
|
|
56
|
+
|
|
57
|
+
# Catch hyphen-encoded paths: -Users-peteromalley- or -Users-peteromalley/
|
|
58
|
+
text = re.sub(rf"-Users-{escaped}(?=-|/|$)", f"-Users-{username_hash}", text)
|
|
59
|
+
text = re.sub(rf"-home-{escaped}(?=-|/|$)", f"-home-{username_hash}", text)
|
|
60
|
+
|
|
61
|
+
# Also handle underscore-to-hyphen encoding: kaid_aiagent → kaid-aiagent
|
|
62
|
+
if "_" in username:
|
|
63
|
+
hyphen_variant = username.replace("_", "-")
|
|
64
|
+
hyphen_escaped = re.escape(hyphen_variant)
|
|
65
|
+
text = re.sub(rf"-Users-{hyphen_escaped}(?=-|/|$)", f"-Users-{username_hash}", text)
|
|
66
|
+
text = re.sub(rf"-home-{hyphen_escaped}(?=-|/|$)", f"-home-{username_hash}", text)
|
|
67
|
+
|
|
68
|
+
# Catch temp paths like /private/tmp/claude-501/-Users-peteromalley/
|
|
69
|
+
text = re.sub(rf"claude-\d+/-Users-{escaped}", f"claude-XXX/-Users-{username_hash}", text)
|
|
70
|
+
|
|
71
|
+
# Final pass: replace bare username in remaining contexts (ls output, prose, etc.)
|
|
72
|
+
# Only if username is >= 4 chars to avoid false positives
|
|
73
|
+
if len(username) >= 4:
|
|
74
|
+
text = re.sub(rf"\b{escaped}\b", username_hash, text)
|
|
75
|
+
|
|
76
|
+
return text
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class Anonymizer:
|
|
80
|
+
"""Stateful anonymizer that consistently hashes usernames."""
|
|
81
|
+
|
|
82
|
+
def __init__(self, extra_usernames: list[str] | None = None):
|
|
83
|
+
self.home, self.username = _detect_home_dir()
|
|
84
|
+
self.username_hash = _hash_username(self.username)
|
|
85
|
+
|
|
86
|
+
# Additional usernames to anonymize (GitHub handles, Discord names, etc.)
|
|
87
|
+
self._extra: list[tuple[str, str]] = []
|
|
88
|
+
for name in (extra_usernames or []):
|
|
89
|
+
name = name.strip()
|
|
90
|
+
if name and name != self.username:
|
|
91
|
+
self._extra.append((name, _hash_username(name)))
|
|
92
|
+
|
|
93
|
+
def path(self, file_path: str) -> str:
|
|
94
|
+
result = anonymize_path(file_path, self.username, self.username_hash, self.home)
|
|
95
|
+
result = anonymize_text(result, self.username, self.username_hash)
|
|
96
|
+
for name, hashed in self._extra:
|
|
97
|
+
result = _replace_username(result, name, hashed)
|
|
98
|
+
return result
|
|
99
|
+
|
|
100
|
+
def text(self, content: str) -> str:
|
|
101
|
+
result = anonymize_text(content, self.username, self.username_hash)
|
|
102
|
+
for name, hashed in self._extra:
|
|
103
|
+
result = _replace_username(result, name, hashed)
|
|
104
|
+
return result
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _replace_username(text: str, username: str, username_hash: str) -> str:
|
|
108
|
+
if not text or not username or len(username) < 3:
|
|
109
|
+
return text
|
|
110
|
+
escaped = re.escape(username)
|
|
111
|
+
text = re.sub(escaped, username_hash, text, flags=re.IGNORECASE)
|
|
112
|
+
return text
|
agentstrace/backends.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""Shared backend detection and resolution for coding-agent CLIs.
|
|
2
|
+
|
|
3
|
+
Used by both the scoring pipeline and PII review to auto-detect whether
|
|
4
|
+
agentstrace is running under Claude Code, Codex, or OpenClaw and dispatch
|
|
5
|
+
to the corresponding automation CLI.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import shutil
|
|
12
|
+
import subprocess
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
SUPPORTED_BACKENDS = ("claude", "codex", "openclaw")
|
|
17
|
+
BACKEND_CHOICES = ("auto", *SUPPORTED_BACKENDS)
|
|
18
|
+
BACKEND_COMMANDS: dict[str, str] = {
|
|
19
|
+
"claude": "claude",
|
|
20
|
+
"codex": "codex",
|
|
21
|
+
"openclaw": "openclaw",
|
|
22
|
+
}
|
|
23
|
+
BACKEND_ENV_MARKERS: dict[str, tuple[str, ...]] = {
|
|
24
|
+
"claude": ("CLAUDECODE", "CLAUDE_CODE", "CLAUDECODE_SESSION_ID", "CLAUDE_PROJECT_DIR"),
|
|
25
|
+
"codex": ("CODEX_THREAD_ID", "CODEX_SANDBOX", "CODEX_CI"),
|
|
26
|
+
"openclaw": ("OPENCLAW_HOME", "OPENCLAW_STATE_DIR", "OPENCLAW_CONFIG_PATH"),
|
|
27
|
+
}
|
|
28
|
+
BACKEND_COMMAND_ALIASES: dict[str, tuple[str, ...]] = {
|
|
29
|
+
"claude": ("claude",),
|
|
30
|
+
"codex": ("codex",),
|
|
31
|
+
"openclaw": ("openclaw",),
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _detect_current_agent_from_env(env: dict[str, str] | None = None) -> str | None:
|
|
36
|
+
"""Infer the current agent from the process environment."""
|
|
37
|
+
env = os.environ if env is None else env
|
|
38
|
+
for backend, keys in BACKEND_ENV_MARKERS.items():
|
|
39
|
+
for key in keys:
|
|
40
|
+
if env.get(key):
|
|
41
|
+
return backend
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _get_process_field(pid: int, field: str) -> str:
|
|
46
|
+
"""Read a single process field from ps, returning an empty string on failure."""
|
|
47
|
+
try:
|
|
48
|
+
proc = subprocess.run(
|
|
49
|
+
["ps", f"-o{field}=", "-p", str(pid)],
|
|
50
|
+
capture_output=True,
|
|
51
|
+
text=True,
|
|
52
|
+
timeout=2,
|
|
53
|
+
)
|
|
54
|
+
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
|
55
|
+
return ""
|
|
56
|
+
if proc.returncode != 0:
|
|
57
|
+
return ""
|
|
58
|
+
return proc.stdout.strip()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _classify_process_command(comm: str, command: str) -> str | None:
|
|
62
|
+
"""Map a process command to a supported backend."""
|
|
63
|
+
fields = " ".join(part for part in (comm, command) if part).lower()
|
|
64
|
+
if not fields:
|
|
65
|
+
return None
|
|
66
|
+
base = Path(comm).name.lower() if comm else ""
|
|
67
|
+
for backend, aliases in BACKEND_COMMAND_ALIASES.items():
|
|
68
|
+
for alias in aliases:
|
|
69
|
+
if base == alias or f" {alias}" in f" {fields}" or f"/{alias}" in fields:
|
|
70
|
+
return backend
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _detect_current_agent_from_process_tree(pid: int | None = None, *, max_depth: int = 6) -> str | None:
|
|
75
|
+
"""Walk parent processes to find a known coding-agent CLI."""
|
|
76
|
+
current_pid = pid if pid is not None else os.getppid()
|
|
77
|
+
depth = 0
|
|
78
|
+
seen: set[int] = set()
|
|
79
|
+
|
|
80
|
+
while current_pid > 1 and depth < max_depth and current_pid not in seen:
|
|
81
|
+
seen.add(current_pid)
|
|
82
|
+
comm = _get_process_field(current_pid, "comm")
|
|
83
|
+
command = _get_process_field(current_pid, "command")
|
|
84
|
+
detected = _classify_process_command(comm, command)
|
|
85
|
+
if detected:
|
|
86
|
+
return detected
|
|
87
|
+
parent_text = _get_process_field(current_pid, "ppid")
|
|
88
|
+
try:
|
|
89
|
+
current_pid = int(parent_text)
|
|
90
|
+
except ValueError:
|
|
91
|
+
break
|
|
92
|
+
depth += 1
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def detect_current_agent(env: dict[str, str] | None = None) -> str | None:
|
|
97
|
+
"""Detect the current coding agent from env vars or process tree."""
|
|
98
|
+
return _detect_current_agent_from_env(env) or _detect_current_agent_from_process_tree()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def resolve_backend(backend: str = "auto", env: dict[str, str] | None = None) -> str:
|
|
102
|
+
"""Resolve 'auto' backend selection to a concrete backend name.
|
|
103
|
+
|
|
104
|
+
Priority: explicit value > AGENTSTRACE_SCORER_BACKEND env > auto-detect.
|
|
105
|
+
"""
|
|
106
|
+
env = os.environ if env is None else env
|
|
107
|
+
requested = (backend or "auto").strip().lower()
|
|
108
|
+
if requested != "auto":
|
|
109
|
+
if requested not in SUPPORTED_BACKENDS:
|
|
110
|
+
raise RuntimeError(f"Unsupported backend: {backend}")
|
|
111
|
+
return requested
|
|
112
|
+
|
|
113
|
+
override = (env.get("AGENTSTRACE_SCORER_BACKEND") or "").strip().lower()
|
|
114
|
+
if override:
|
|
115
|
+
if override not in SUPPORTED_BACKENDS:
|
|
116
|
+
raise RuntimeError(
|
|
117
|
+
f"Unsupported AGENTSTRACE_SCORER_BACKEND value: {override}. "
|
|
118
|
+
f"Use one of: {', '.join(SUPPORTED_BACKENDS)}."
|
|
119
|
+
)
|
|
120
|
+
return override
|
|
121
|
+
|
|
122
|
+
detected = detect_current_agent(env)
|
|
123
|
+
if detected:
|
|
124
|
+
return detected
|
|
125
|
+
|
|
126
|
+
raise RuntimeError(
|
|
127
|
+
"Could not detect the current agent. "
|
|
128
|
+
"Run agentstrace from a supported agent CLI, set AGENTSTRACE_SCORER_BACKEND, "
|
|
129
|
+
"or pass --backend explicitly."
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def require_backend_command(backend: str) -> str:
|
|
134
|
+
"""Return the CLI command for a backend, ensuring it is installed."""
|
|
135
|
+
command = BACKEND_COMMANDS[backend]
|
|
136
|
+
if shutil.which(command) is None:
|
|
137
|
+
raise RuntimeError(f"{backend} CLI not found. Install it or choose a different --backend.")
|
|
138
|
+
return command
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def check_backend_runtime(backend: str, env: dict[str, str] | None = None) -> None:
|
|
142
|
+
"""Backend-specific runtime preflight hook (extensible, currently a no-op)."""
|
|
143
|
+
_ = backend, env
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def summarize_process_error(stderr: str, stdout: str = "") -> str:
|
|
147
|
+
"""Return the most actionable error line from subprocess output."""
|
|
148
|
+
lines: list[str] = []
|
|
149
|
+
for raw in f"{stderr}\n{stdout}".splitlines():
|
|
150
|
+
line = raw.strip()
|
|
151
|
+
if not line:
|
|
152
|
+
continue
|
|
153
|
+
if line.startswith("WARNING: proceeding, even though we could not update PATH"):
|
|
154
|
+
continue
|
|
155
|
+
if line.startswith("note: run with `RUST_BACKTRACE=1`"):
|
|
156
|
+
continue
|
|
157
|
+
if line.startswith("thread '"):
|
|
158
|
+
continue
|
|
159
|
+
lines.append(line)
|
|
160
|
+
|
|
161
|
+
if not lines:
|
|
162
|
+
return ""
|
|
163
|
+
|
|
164
|
+
for line in reversed(lines):
|
|
165
|
+
lower = line.lower()
|
|
166
|
+
if (
|
|
167
|
+
lower.startswith("error:")
|
|
168
|
+
or " error " in lower
|
|
169
|
+
or "failed" in lower
|
|
170
|
+
or "unauthorized" in lower
|
|
171
|
+
):
|
|
172
|
+
return line
|
|
173
|
+
|
|
174
|
+
return lines[-1]
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def format_codex_runtime_error(returncode: int, stderr: str, stdout: str = "") -> str:
|
|
178
|
+
"""Normalize common Codex exec failures into actionable guidance."""
|
|
179
|
+
combined = "\n".join(part.strip() for part in (stderr, stdout) if part and part.strip())
|
|
180
|
+
lower = combined.lower()
|
|
181
|
+
|
|
182
|
+
if (
|
|
183
|
+
"failed to lookup address information" in lower
|
|
184
|
+
or "temporary failure in name resolution" in lower
|
|
185
|
+
or "name or service not known" in lower
|
|
186
|
+
or "network is unreachable" in lower
|
|
187
|
+
or "could not resolve host" in lower
|
|
188
|
+
):
|
|
189
|
+
return (
|
|
190
|
+
"Codex runs through `codex exec` in non-interactive mode. "
|
|
191
|
+
"This process could not reach the Codex backend from the current environment. "
|
|
192
|
+
"If you launched agentstrace inside a network-disabled Codex sandbox, "
|
|
193
|
+
"rerun it from your host shell or with network access."
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
if (
|
|
197
|
+
"401" in lower
|
|
198
|
+
or "unauthorized" in lower
|
|
199
|
+
or "not signed in" in lower
|
|
200
|
+
or "authentication required" in lower
|
|
201
|
+
):
|
|
202
|
+
return (
|
|
203
|
+
"Codex runs through `codex exec` in non-interactive mode. "
|
|
204
|
+
"`codex exec` reuses saved CLI authentication by default; for automation, "
|
|
205
|
+
"run `codex login` or set `CODEX_API_KEY` before running agentstrace."
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
summary = summarize_process_error(stderr, stdout)
|
|
209
|
+
if summary:
|
|
210
|
+
return f"codex exited {returncode}: {summary}"
|
|
211
|
+
return f"codex exited {returncode}"
|