agentstracer 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentstracer-1.0.0/LICENSE +22 -0
- agentstracer-1.0.0/MANIFEST.in +1 -0
- agentstracer-1.0.0/PKG-INFO +170 -0
- agentstracer-1.0.0/README.md +146 -0
- agentstracer-1.0.0/agentstrace/__init__.py +3 -0
- agentstracer-1.0.0/agentstrace/anonymizer.py +112 -0
- agentstracer-1.0.0/agentstrace/backends.py +211 -0
- agentstracer-1.0.0/agentstrace/badges.py +678 -0
- agentstracer-1.0.0/agentstrace/card.py +217 -0
- agentstracer-1.0.0/agentstrace/cli.py +3849 -0
- agentstracer-1.0.0/agentstrace/config.py +69 -0
- agentstracer-1.0.0/agentstrace/convert_to_training_data.py +330 -0
- agentstracer-1.0.0/agentstrace/daemon.py +969 -0
- agentstracer-1.0.0/agentstrace/depth.py +437 -0
- agentstracer-1.0.0/agentstrace/index.py +1159 -0
- agentstracer-1.0.0/agentstrace/parser.py +2045 -0
- agentstracer-1.0.0/agentstrace/pii.py +729 -0
- agentstracer-1.0.0/agentstrace/scoring.py +885 -0
- agentstracer-1.0.0/agentstrace/secrets.py +551 -0
- agentstracer-1.0.0/agentstrace/segmenter.py +580 -0
- agentstracer-1.0.0/agentstrace/web/frontend/node_modules/flatted/python/flatted.py +144 -0
- agentstracer-1.0.0/agentstracer.egg-info/PKG-INFO +170 -0
- agentstracer-1.0.0/agentstracer.egg-info/SOURCES.txt +42 -0
- agentstracer-1.0.0/agentstracer.egg-info/dependency_links.txt +1 -0
- agentstracer-1.0.0/agentstracer.egg-info/entry_points.txt +2 -0
- agentstracer-1.0.0/agentstracer.egg-info/requires.txt +3 -0
- agentstracer-1.0.0/agentstracer.egg-info/top_level.txt +1 -0
- agentstracer-1.0.0/pyproject.toml +42 -0
- agentstracer-1.0.0/setup.cfg +4 -0
- agentstracer-1.0.0/tests/test_anonymizer.py +223 -0
- agentstracer-1.0.0/tests/test_backends.py +175 -0
- agentstracer-1.0.0/tests/test_badges.py +304 -0
- agentstracer-1.0.0/tests/test_card.py +228 -0
- agentstracer-1.0.0/tests/test_cli.py +1249 -0
- agentstracer-1.0.0/tests/test_config.py +71 -0
- agentstracer-1.0.0/tests/test_daemon.py +449 -0
- agentstracer-1.0.0/tests/test_depth.py +338 -0
- agentstracer-1.0.0/tests/test_index.py +244 -0
- agentstracer-1.0.0/tests/test_parser.py +1845 -0
- agentstracer-1.0.0/tests/test_pii.py +462 -0
- agentstracer-1.0.0/tests/test_scoring.py +446 -0
- agentstracer-1.0.0/tests/test_secrets.py +628 -0
- agentstracer-1.0.0/tests/test_segmenter.py +589 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 kaiaiagent (Original Author)
|
|
4
|
+
Copyright (c) 2024 cyijun (Modified Version)
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
recursive-include agentstrace/web/frontend/dist *
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agentstracer
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Review, score, and curate your coding agent conversation traces locally
|
|
5
|
+
Author: kaiaiagent
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/cyijun/agentstrace
|
|
8
|
+
Project-URL: Repository, https://github.com/cyijun/agentstrace
|
|
9
|
+
Keywords: claude-code,codex,gemini-cli,opencode,openclaw,dataset,conversations
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest; extra == "dev"
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# AgentsTrace (Local-Only Fork)
|
|
26
|
+
|
|
27
|
+
原版:[kaiaiagent/clawtrace](https://github.com/kaiaiagent/clawtrace)
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## 快速开始
|
|
32
|
+
|
|
33
|
+
### 安装
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
git clone https://github.com/cyijun/agentstrace.git
|
|
37
|
+
cd agentstrace-local
|
|
38
|
+
pip install -e .
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### 基本使用
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# 1. 配置导出源(claude/kimi/codex/gemini/all)
|
|
45
|
+
agentstrace config --source all
|
|
46
|
+
|
|
47
|
+
# 2. 导出对话记录
|
|
48
|
+
agentstrace export --no-push -o my_conversations.jsonl
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### 私人使用(保留 API Keys)
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
# 禁用 secrets 脱敏(仅建议本地私人使用)
|
|
55
|
+
agentstrace config --no-secrets-redaction
|
|
56
|
+
|
|
57
|
+
# 导出(包含原始 API keys)
|
|
58
|
+
agentstrace export --no-push -o my_data.jsonl
|
|
59
|
+
|
|
60
|
+
# ⚠️ 警告:此文件包含明文 API keys,请勿分享!
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### 其他命令
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
# 列出发现的项目
|
|
67
|
+
agentstrace list
|
|
68
|
+
|
|
69
|
+
# 启动本地 Web UI
|
|
70
|
+
agentstrace serve
|
|
71
|
+
|
|
72
|
+
# 查看配置
|
|
73
|
+
agentstrace config
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## 支持的 AI 工具
|
|
79
|
+
|
|
80
|
+
| 工具 | 数据位置 | 状态 |
|
|
81
|
+
|------|---------|------|
|
|
82
|
+
| Claude Code | `~/.claude/projects/` | ✅ |
|
|
83
|
+
| Kimi CLI | `~/.kimi/sessions/` | ✅ |
|
|
84
|
+
| Codex CLI | `~/.codex/sessions/` | ✅ |
|
|
85
|
+
| OpenCode | `~/.local/share/opencode/` | ✅ |
|
|
86
|
+
| OpenClaw | `~/.openclaw/` | ✅ |
|
|
87
|
+
| Gemini CLI | `~/.gemini/tmp/` | ✅ |
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## 导出格式
|
|
92
|
+
|
|
93
|
+
**JSONL**(每行一个 JSON 对象):
|
|
94
|
+
|
|
95
|
+
```jsonl
|
|
96
|
+
{"session_id": "abc-123", "model": "kimi-k2", "messages": [...], ...}
|
|
97
|
+
{"session_id": "def-456", "model": "claude-3-7", "messages": [...], ...}
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 主要字段
|
|
101
|
+
|
|
102
|
+
| 字段 | 说明 |
|
|
103
|
+
|------|------|
|
|
104
|
+
| `session_id` | 会话唯一标识 |
|
|
105
|
+
| `model` | AI 模型名称 |
|
|
106
|
+
| `project` | 项目名称(已脱敏) |
|
|
107
|
+
| `source` | 来源(claude/kimi/codex等) |
|
|
108
|
+
| `start_time` / `end_time` | ISO 8601 时间 |
|
|
109
|
+
| `messages` | 对话消息列表 |
|
|
110
|
+
| `stats` | 统计信息 |
|
|
111
|
+
|
|
112
|
+
### messages 结构
|
|
113
|
+
|
|
114
|
+
```json
|
|
115
|
+
{
|
|
116
|
+
"role": "user|assistant",
|
|
117
|
+
"content": "消息内容",
|
|
118
|
+
"thinking": "思考过程(assistant)",
|
|
119
|
+
"timestamp": "2024-01-01T12:00:00Z",
|
|
120
|
+
"tool_uses": [{
|
|
121
|
+
"tool": "bash",
|
|
122
|
+
"input": {"command": "ls -la"},
|
|
123
|
+
"output": {"text": "..."},
|
|
124
|
+
"status": "success"
|
|
125
|
+
}]
|
|
126
|
+
}
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## 关于本版本
|
|
132
|
+
|
|
133
|
+
这是 [AgentsTrace](https://github.com/kaiaiagent/clawtrace) 的修改版本,专注于**本地隐私保护**和**纯离线使用**。
|
|
134
|
+
|
|
135
|
+
### 主要修改点
|
|
136
|
+
|
|
137
|
+
#### 1. 移除所有网络功能
|
|
138
|
+
- ❌ 云上传功能
|
|
139
|
+
- ❌ Skill 下载功能
|
|
140
|
+
- ❌ 浏览器自动打开
|
|
141
|
+
- ❌ 所有 urllib 网络请求
|
|
142
|
+
|
|
143
|
+
#### 2. 可选禁用 Secrets 脱敏
|
|
144
|
+
- 新增 `--no-secrets-redaction` 配置
|
|
145
|
+
- 私人使用时保留 API keys
|
|
146
|
+
- 路径/用户名脱敏始终启用
|
|
147
|
+
|
|
148
|
+
#### 3. 安全审查
|
|
149
|
+
- ✅ 无 `eval()` / `exec()` / `compile()`
|
|
150
|
+
- ✅ 无动态代码执行
|
|
151
|
+
- ✅ 无反序列化风险
|
|
152
|
+
- ✅ 路径遍历已防护
|
|
153
|
+
|
|
154
|
+
### 与原版对比
|
|
155
|
+
|
|
156
|
+
| 功能 | 原版 | 本版本 |
|
|
157
|
+
|------|------|--------|
|
|
158
|
+
| 云上传 | ✅ | ❌ 已移除 |
|
|
159
|
+
| Skill 下载 | ✅ | ❌ 已移除 |
|
|
160
|
+
| 可选禁用脱敏 | ❌ | ✅ 支持 |
|
|
161
|
+
| 适用场景 | 分享数据集 | 私人本地分析 |
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## 许可证
|
|
166
|
+
|
|
167
|
+
MIT License - 详见 [LICENSE](LICENSE)
|
|
168
|
+
|
|
169
|
+
Copyright (c) 2024 kaiaiagent (Original Author)
|
|
170
|
+
Copyright (c) 2024 cyijun (Modified Version)
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# AgentsTrace (Local-Only Fork)
|
|
2
|
+
|
|
3
|
+
原版:[kaiaiagent/clawtrace](https://github.com/kaiaiagent/clawtrace)
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## 快速开始
|
|
8
|
+
|
|
9
|
+
### 安装
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
git clone https://github.com/cyijun/agentstrace.git
|
|
13
|
+
cd agentstrace-local
|
|
14
|
+
pip install -e .
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
### 基本使用
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# 1. 配置导出源(claude/kimi/codex/gemini/all)
|
|
21
|
+
agentstrace config --source all
|
|
22
|
+
|
|
23
|
+
# 2. 导出对话记录
|
|
24
|
+
agentstrace export --no-push -o my_conversations.jsonl
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### 私人使用(保留 API Keys)
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
# 禁用 secrets 脱敏(仅建议本地私人使用)
|
|
31
|
+
agentstrace config --no-secrets-redaction
|
|
32
|
+
|
|
33
|
+
# 导出(包含原始 API keys)
|
|
34
|
+
agentstrace export --no-push -o my_data.jsonl
|
|
35
|
+
|
|
36
|
+
# ⚠️ 警告:此文件包含明文 API keys,请勿分享!
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### 其他命令
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# 列出发现的项目
|
|
43
|
+
agentstrace list
|
|
44
|
+
|
|
45
|
+
# 启动本地 Web UI
|
|
46
|
+
agentstrace serve
|
|
47
|
+
|
|
48
|
+
# 查看配置
|
|
49
|
+
agentstrace config
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## 支持的 AI 工具
|
|
55
|
+
|
|
56
|
+
| 工具 | 数据位置 | 状态 |
|
|
57
|
+
|------|---------|------|
|
|
58
|
+
| Claude Code | `~/.claude/projects/` | ✅ |
|
|
59
|
+
| Kimi CLI | `~/.kimi/sessions/` | ✅ |
|
|
60
|
+
| Codex CLI | `~/.codex/sessions/` | ✅ |
|
|
61
|
+
| OpenCode | `~/.local/share/opencode/` | ✅ |
|
|
62
|
+
| OpenClaw | `~/.openclaw/` | ✅ |
|
|
63
|
+
| Gemini CLI | `~/.gemini/tmp/` | ✅ |
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## 导出格式
|
|
68
|
+
|
|
69
|
+
**JSONL**(每行一个 JSON 对象):
|
|
70
|
+
|
|
71
|
+
```jsonl
|
|
72
|
+
{"session_id": "abc-123", "model": "kimi-k2", "messages": [...], ...}
|
|
73
|
+
{"session_id": "def-456", "model": "claude-3-7", "messages": [...], ...}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 主要字段
|
|
77
|
+
|
|
78
|
+
| 字段 | 说明 |
|
|
79
|
+
|------|------|
|
|
80
|
+
| `session_id` | 会话唯一标识 |
|
|
81
|
+
| `model` | AI 模型名称 |
|
|
82
|
+
| `project` | 项目名称(已脱敏) |
|
|
83
|
+
| `source` | 来源(claude/kimi/codex等) |
|
|
84
|
+
| `start_time` / `end_time` | ISO 8601 时间 |
|
|
85
|
+
| `messages` | 对话消息列表 |
|
|
86
|
+
| `stats` | 统计信息 |
|
|
87
|
+
|
|
88
|
+
### messages 结构
|
|
89
|
+
|
|
90
|
+
```json
|
|
91
|
+
{
|
|
92
|
+
"role": "user|assistant",
|
|
93
|
+
"content": "消息内容",
|
|
94
|
+
"thinking": "思考过程(assistant)",
|
|
95
|
+
"timestamp": "2024-01-01T12:00:00Z",
|
|
96
|
+
"tool_uses": [{
|
|
97
|
+
"tool": "bash",
|
|
98
|
+
"input": {"command": "ls -la"},
|
|
99
|
+
"output": {"text": "..."},
|
|
100
|
+
"status": "success"
|
|
101
|
+
}]
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## 关于本版本
|
|
108
|
+
|
|
109
|
+
这是 [AgentsTrace](https://github.com/kaiaiagent/clawtrace) 的修改版本,专注于**本地隐私保护**和**纯离线使用**。
|
|
110
|
+
|
|
111
|
+
### 主要修改点
|
|
112
|
+
|
|
113
|
+
#### 1. 移除所有网络功能
|
|
114
|
+
- ❌ 云上传功能
|
|
115
|
+
- ❌ Skill 下载功能
|
|
116
|
+
- ❌ 浏览器自动打开
|
|
117
|
+
- ❌ 所有 urllib 网络请求
|
|
118
|
+
|
|
119
|
+
#### 2. 可选禁用 Secrets 脱敏
|
|
120
|
+
- 新增 `--no-secrets-redaction` 配置
|
|
121
|
+
- 私人使用时保留 API keys
|
|
122
|
+
- 路径/用户名脱敏始终启用
|
|
123
|
+
|
|
124
|
+
#### 3. 安全审查
|
|
125
|
+
- ✅ 无 `eval()` / `exec()` / `compile()`
|
|
126
|
+
- ✅ 无动态代码执行
|
|
127
|
+
- ✅ 无反序列化风险
|
|
128
|
+
- ✅ 路径遍历已防护
|
|
129
|
+
|
|
130
|
+
### 与原版对比
|
|
131
|
+
|
|
132
|
+
| 功能 | 原版 | 本版本 |
|
|
133
|
+
|------|------|--------|
|
|
134
|
+
| 云上传 | ✅ | ❌ 已移除 |
|
|
135
|
+
| Skill 下载 | ✅ | ❌ 已移除 |
|
|
136
|
+
| 可选禁用脱敏 | ❌ | ✅ 支持 |
|
|
137
|
+
| 适用场景 | 分享数据集 | 私人本地分析 |
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## 许可证
|
|
142
|
+
|
|
143
|
+
MIT License - 详见 [LICENSE](LICENSE)
|
|
144
|
+
|
|
145
|
+
Copyright (c) 2024 kaiaiagent (Original Author)
|
|
146
|
+
Copyright (c) 2024 cyijun (Modified Version)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Anonymize PII in Claude Code log data."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _hash_username(username: str) -> str:
|
|
9
|
+
return "user_" + hashlib.sha256(username.encode()).hexdigest()[:8]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _detect_home_dir() -> tuple[str, str]:
|
|
13
|
+
home = os.path.expanduser("~")
|
|
14
|
+
username = os.path.basename(home)
|
|
15
|
+
return home, username
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def anonymize_path(path: str, username: str, username_hash: str, home: str | None = None) -> str:
|
|
19
|
+
"""Strip a path to project-relative and hash the username."""
|
|
20
|
+
if not path:
|
|
21
|
+
return path
|
|
22
|
+
|
|
23
|
+
if home is None:
|
|
24
|
+
home = os.path.expanduser("~")
|
|
25
|
+
prefixes = set()
|
|
26
|
+
for base in (f"/Users/{username}", f"/home/{username}", home):
|
|
27
|
+
for subdir in ("Documents", "Downloads", "Desktop"):
|
|
28
|
+
prefixes.add(f"{base}/{subdir}/")
|
|
29
|
+
prefixes.add(f"{base}/")
|
|
30
|
+
|
|
31
|
+
# Try longest prefixes first (subdirectory matches before bare home)
|
|
32
|
+
home_patterns = sorted(prefixes, key=len, reverse=True)
|
|
33
|
+
|
|
34
|
+
for prefix in home_patterns:
|
|
35
|
+
if path.startswith(prefix):
|
|
36
|
+
rest = path[len(prefix):]
|
|
37
|
+
if "/Documents/" in prefix or "/Downloads/" in prefix or "/Desktop/" in prefix:
|
|
38
|
+
return rest
|
|
39
|
+
return f"{username_hash}/{rest}"
|
|
40
|
+
|
|
41
|
+
path = path.replace(f"/Users/{username}/", f"/{username_hash}/")
|
|
42
|
+
path = path.replace(f"/home/{username}/", f"/{username_hash}/")
|
|
43
|
+
|
|
44
|
+
return path
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def anonymize_text(text: str, username: str, username_hash: str) -> str:
|
|
48
|
+
if not text or not username:
|
|
49
|
+
return text
|
|
50
|
+
|
|
51
|
+
escaped = re.escape(username)
|
|
52
|
+
|
|
53
|
+
# Replace /Users/<username> and /home/<username>
|
|
54
|
+
text = re.sub(rf"/Users/{escaped}(?=/|[^a-zA-Z0-9_-]|$)", f"/{username_hash}", text)
|
|
55
|
+
text = re.sub(rf"/home/{escaped}(?=/|[^a-zA-Z0-9_-]|$)", f"/{username_hash}", text)
|
|
56
|
+
|
|
57
|
+
# Catch hyphen-encoded paths: -Users-peteromalley- or -Users-peteromalley/
|
|
58
|
+
text = re.sub(rf"-Users-{escaped}(?=-|/|$)", f"-Users-{username_hash}", text)
|
|
59
|
+
text = re.sub(rf"-home-{escaped}(?=-|/|$)", f"-home-{username_hash}", text)
|
|
60
|
+
|
|
61
|
+
# Also handle underscore-to-hyphen encoding: kaid_aiagent → kaid-aiagent
|
|
62
|
+
if "_" in username:
|
|
63
|
+
hyphen_variant = username.replace("_", "-")
|
|
64
|
+
hyphen_escaped = re.escape(hyphen_variant)
|
|
65
|
+
text = re.sub(rf"-Users-{hyphen_escaped}(?=-|/|$)", f"-Users-{username_hash}", text)
|
|
66
|
+
text = re.sub(rf"-home-{hyphen_escaped}(?=-|/|$)", f"-home-{username_hash}", text)
|
|
67
|
+
|
|
68
|
+
# Catch temp paths like /private/tmp/claude-501/-Users-peteromalley/
|
|
69
|
+
text = re.sub(rf"claude-\d+/-Users-{escaped}", f"claude-XXX/-Users-{username_hash}", text)
|
|
70
|
+
|
|
71
|
+
# Final pass: replace bare username in remaining contexts (ls output, prose, etc.)
|
|
72
|
+
# Only if username is >= 4 chars to avoid false positives
|
|
73
|
+
if len(username) >= 4:
|
|
74
|
+
text = re.sub(rf"\b{escaped}\b", username_hash, text)
|
|
75
|
+
|
|
76
|
+
return text
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class Anonymizer:
|
|
80
|
+
"""Stateful anonymizer that consistently hashes usernames."""
|
|
81
|
+
|
|
82
|
+
def __init__(self, extra_usernames: list[str] | None = None):
|
|
83
|
+
self.home, self.username = _detect_home_dir()
|
|
84
|
+
self.username_hash = _hash_username(self.username)
|
|
85
|
+
|
|
86
|
+
# Additional usernames to anonymize (GitHub handles, Discord names, etc.)
|
|
87
|
+
self._extra: list[tuple[str, str]] = []
|
|
88
|
+
for name in (extra_usernames or []):
|
|
89
|
+
name = name.strip()
|
|
90
|
+
if name and name != self.username:
|
|
91
|
+
self._extra.append((name, _hash_username(name)))
|
|
92
|
+
|
|
93
|
+
def path(self, file_path: str) -> str:
|
|
94
|
+
result = anonymize_path(file_path, self.username, self.username_hash, self.home)
|
|
95
|
+
result = anonymize_text(result, self.username, self.username_hash)
|
|
96
|
+
for name, hashed in self._extra:
|
|
97
|
+
result = _replace_username(result, name, hashed)
|
|
98
|
+
return result
|
|
99
|
+
|
|
100
|
+
def text(self, content: str) -> str:
|
|
101
|
+
result = anonymize_text(content, self.username, self.username_hash)
|
|
102
|
+
for name, hashed in self._extra:
|
|
103
|
+
result = _replace_username(result, name, hashed)
|
|
104
|
+
return result
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _replace_username(text: str, username: str, username_hash: str) -> str:
|
|
108
|
+
if not text or not username or len(username) < 3:
|
|
109
|
+
return text
|
|
110
|
+
escaped = re.escape(username)
|
|
111
|
+
text = re.sub(escaped, username_hash, text, flags=re.IGNORECASE)
|
|
112
|
+
return text
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""Shared backend detection and resolution for coding-agent CLIs.
|
|
2
|
+
|
|
3
|
+
Used by both the scoring pipeline and PII review to auto-detect whether
|
|
4
|
+
agentstrace is running under Claude Code, Codex, or OpenClaw and dispatch
|
|
5
|
+
to the corresponding automation CLI.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import shutil
|
|
12
|
+
import subprocess
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
SUPPORTED_BACKENDS = ("claude", "codex", "openclaw")
|
|
17
|
+
BACKEND_CHOICES = ("auto", *SUPPORTED_BACKENDS)
|
|
18
|
+
BACKEND_COMMANDS: dict[str, str] = {
|
|
19
|
+
"claude": "claude",
|
|
20
|
+
"codex": "codex",
|
|
21
|
+
"openclaw": "openclaw",
|
|
22
|
+
}
|
|
23
|
+
BACKEND_ENV_MARKERS: dict[str, tuple[str, ...]] = {
|
|
24
|
+
"claude": ("CLAUDECODE", "CLAUDE_CODE", "CLAUDECODE_SESSION_ID", "CLAUDE_PROJECT_DIR"),
|
|
25
|
+
"codex": ("CODEX_THREAD_ID", "CODEX_SANDBOX", "CODEX_CI"),
|
|
26
|
+
"openclaw": ("OPENCLAW_HOME", "OPENCLAW_STATE_DIR", "OPENCLAW_CONFIG_PATH"),
|
|
27
|
+
}
|
|
28
|
+
BACKEND_COMMAND_ALIASES: dict[str, tuple[str, ...]] = {
|
|
29
|
+
"claude": ("claude",),
|
|
30
|
+
"codex": ("codex",),
|
|
31
|
+
"openclaw": ("openclaw",),
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _detect_current_agent_from_env(env: dict[str, str] | None = None) -> str | None:
|
|
36
|
+
"""Infer the current agent from the process environment."""
|
|
37
|
+
env = os.environ if env is None else env
|
|
38
|
+
for backend, keys in BACKEND_ENV_MARKERS.items():
|
|
39
|
+
for key in keys:
|
|
40
|
+
if env.get(key):
|
|
41
|
+
return backend
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _get_process_field(pid: int, field: str) -> str:
|
|
46
|
+
"""Read a single process field from ps, returning an empty string on failure."""
|
|
47
|
+
try:
|
|
48
|
+
proc = subprocess.run(
|
|
49
|
+
["ps", f"-o{field}=", "-p", str(pid)],
|
|
50
|
+
capture_output=True,
|
|
51
|
+
text=True,
|
|
52
|
+
timeout=2,
|
|
53
|
+
)
|
|
54
|
+
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
|
55
|
+
return ""
|
|
56
|
+
if proc.returncode != 0:
|
|
57
|
+
return ""
|
|
58
|
+
return proc.stdout.strip()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _classify_process_command(comm: str, command: str) -> str | None:
|
|
62
|
+
"""Map a process command to a supported backend."""
|
|
63
|
+
fields = " ".join(part for part in (comm, command) if part).lower()
|
|
64
|
+
if not fields:
|
|
65
|
+
return None
|
|
66
|
+
base = Path(comm).name.lower() if comm else ""
|
|
67
|
+
for backend, aliases in BACKEND_COMMAND_ALIASES.items():
|
|
68
|
+
for alias in aliases:
|
|
69
|
+
if base == alias or f" {alias}" in f" {fields}" or f"/{alias}" in fields:
|
|
70
|
+
return backend
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _detect_current_agent_from_process_tree(pid: int | None = None, *, max_depth: int = 6) -> str | None:
|
|
75
|
+
"""Walk parent processes to find a known coding-agent CLI."""
|
|
76
|
+
current_pid = pid if pid is not None else os.getppid()
|
|
77
|
+
depth = 0
|
|
78
|
+
seen: set[int] = set()
|
|
79
|
+
|
|
80
|
+
while current_pid > 1 and depth < max_depth and current_pid not in seen:
|
|
81
|
+
seen.add(current_pid)
|
|
82
|
+
comm = _get_process_field(current_pid, "comm")
|
|
83
|
+
command = _get_process_field(current_pid, "command")
|
|
84
|
+
detected = _classify_process_command(comm, command)
|
|
85
|
+
if detected:
|
|
86
|
+
return detected
|
|
87
|
+
parent_text = _get_process_field(current_pid, "ppid")
|
|
88
|
+
try:
|
|
89
|
+
current_pid = int(parent_text)
|
|
90
|
+
except ValueError:
|
|
91
|
+
break
|
|
92
|
+
depth += 1
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def detect_current_agent(env: dict[str, str] | None = None) -> str | None:
|
|
97
|
+
"""Detect the current coding agent from env vars or process tree."""
|
|
98
|
+
return _detect_current_agent_from_env(env) or _detect_current_agent_from_process_tree()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def resolve_backend(backend: str = "auto", env: dict[str, str] | None = None) -> str:
|
|
102
|
+
"""Resolve 'auto' backend selection to a concrete backend name.
|
|
103
|
+
|
|
104
|
+
Priority: explicit value > AGENTSTRACE_SCORER_BACKEND env > auto-detect.
|
|
105
|
+
"""
|
|
106
|
+
env = os.environ if env is None else env
|
|
107
|
+
requested = (backend or "auto").strip().lower()
|
|
108
|
+
if requested != "auto":
|
|
109
|
+
if requested not in SUPPORTED_BACKENDS:
|
|
110
|
+
raise RuntimeError(f"Unsupported backend: {backend}")
|
|
111
|
+
return requested
|
|
112
|
+
|
|
113
|
+
override = (env.get("AGENTSTRACE_SCORER_BACKEND") or "").strip().lower()
|
|
114
|
+
if override:
|
|
115
|
+
if override not in SUPPORTED_BACKENDS:
|
|
116
|
+
raise RuntimeError(
|
|
117
|
+
f"Unsupported AGENTSTRACE_SCORER_BACKEND value: {override}. "
|
|
118
|
+
f"Use one of: {', '.join(SUPPORTED_BACKENDS)}."
|
|
119
|
+
)
|
|
120
|
+
return override
|
|
121
|
+
|
|
122
|
+
detected = detect_current_agent(env)
|
|
123
|
+
if detected:
|
|
124
|
+
return detected
|
|
125
|
+
|
|
126
|
+
raise RuntimeError(
|
|
127
|
+
"Could not detect the current agent. "
|
|
128
|
+
"Run agentstrace from a supported agent CLI, set AGENTSTRACE_SCORER_BACKEND, "
|
|
129
|
+
"or pass --backend explicitly."
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def require_backend_command(backend: str) -> str:
|
|
134
|
+
"""Return the CLI command for a backend, ensuring it is installed."""
|
|
135
|
+
command = BACKEND_COMMANDS[backend]
|
|
136
|
+
if shutil.which(command) is None:
|
|
137
|
+
raise RuntimeError(f"{backend} CLI not found. Install it or choose a different --backend.")
|
|
138
|
+
return command
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def check_backend_runtime(backend: str, env: dict[str, str] | None = None) -> None:
|
|
142
|
+
"""Backend-specific runtime preflight hook (extensible, currently a no-op)."""
|
|
143
|
+
_ = backend, env
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def summarize_process_error(stderr: str, stdout: str = "") -> str:
|
|
147
|
+
"""Return the most actionable error line from subprocess output."""
|
|
148
|
+
lines: list[str] = []
|
|
149
|
+
for raw in f"{stderr}\n{stdout}".splitlines():
|
|
150
|
+
line = raw.strip()
|
|
151
|
+
if not line:
|
|
152
|
+
continue
|
|
153
|
+
if line.startswith("WARNING: proceeding, even though we could not update PATH"):
|
|
154
|
+
continue
|
|
155
|
+
if line.startswith("note: run with `RUST_BACKTRACE=1`"):
|
|
156
|
+
continue
|
|
157
|
+
if line.startswith("thread '"):
|
|
158
|
+
continue
|
|
159
|
+
lines.append(line)
|
|
160
|
+
|
|
161
|
+
if not lines:
|
|
162
|
+
return ""
|
|
163
|
+
|
|
164
|
+
for line in reversed(lines):
|
|
165
|
+
lower = line.lower()
|
|
166
|
+
if (
|
|
167
|
+
lower.startswith("error:")
|
|
168
|
+
or " error " in lower
|
|
169
|
+
or "failed" in lower
|
|
170
|
+
or "unauthorized" in lower
|
|
171
|
+
):
|
|
172
|
+
return line
|
|
173
|
+
|
|
174
|
+
return lines[-1]
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def format_codex_runtime_error(returncode: int, stderr: str, stdout: str = "") -> str:
|
|
178
|
+
"""Normalize common Codex exec failures into actionable guidance."""
|
|
179
|
+
combined = "\n".join(part.strip() for part in (stderr, stdout) if part and part.strip())
|
|
180
|
+
lower = combined.lower()
|
|
181
|
+
|
|
182
|
+
if (
|
|
183
|
+
"failed to lookup address information" in lower
|
|
184
|
+
or "temporary failure in name resolution" in lower
|
|
185
|
+
or "name or service not known" in lower
|
|
186
|
+
or "network is unreachable" in lower
|
|
187
|
+
or "could not resolve host" in lower
|
|
188
|
+
):
|
|
189
|
+
return (
|
|
190
|
+
"Codex runs through `codex exec` in non-interactive mode. "
|
|
191
|
+
"This process could not reach the Codex backend from the current environment. "
|
|
192
|
+
"If you launched agentstrace inside a network-disabled Codex sandbox, "
|
|
193
|
+
"rerun it from your host shell or with network access."
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
if (
|
|
197
|
+
"401" in lower
|
|
198
|
+
or "unauthorized" in lower
|
|
199
|
+
or "not signed in" in lower
|
|
200
|
+
or "authentication required" in lower
|
|
201
|
+
):
|
|
202
|
+
return (
|
|
203
|
+
"Codex runs through `codex exec` in non-interactive mode. "
|
|
204
|
+
"`codex exec` reuses saved CLI authentication by default; for automation, "
|
|
205
|
+
"run `codex login` or set `CODEX_API_KEY` before running agentstrace."
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
summary = summarize_process_error(stderr, stdout)
|
|
209
|
+
if summary:
|
|
210
|
+
return f"codex exited {returncode}: {summary}"
|
|
211
|
+
return f"codex exited {returncode}"
|