clawtrace 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clawtrace-0.1.0/LICENSE +21 -0
- clawtrace-0.1.0/MANIFEST.in +2 -0
- clawtrace-0.1.0/PKG-INFO +251 -0
- clawtrace-0.1.0/README.md +229 -0
- clawtrace-0.1.0/clawtrace/__init__.py +3 -0
- clawtrace-0.1.0/clawtrace/anonymizer.py +112 -0
- clawtrace-0.1.0/clawtrace/badges.py +523 -0
- clawtrace-0.1.0/clawtrace/cli.py +3299 -0
- clawtrace-0.1.0/clawtrace/config.py +65 -0
- clawtrace-0.1.0/clawtrace/daemon.py +981 -0
- clawtrace-0.1.0/clawtrace/data/SKILL.md +273 -0
- clawtrace-0.1.0/clawtrace/index.py +1008 -0
- clawtrace-0.1.0/clawtrace/parser.py +2038 -0
- clawtrace-0.1.0/clawtrace/pii.py +523 -0
- clawtrace-0.1.0/clawtrace/scoring.py +527 -0
- clawtrace-0.1.0/clawtrace/secrets.py +282 -0
- clawtrace-0.1.0/clawtrace/web/frontend/dist/assets/index-CC1ZQXpr.js +11 -0
- clawtrace-0.1.0/clawtrace/web/frontend/dist/assets/index-CbWkARoN.css +1 -0
- clawtrace-0.1.0/clawtrace/web/frontend/dist/favicon.svg +1 -0
- clawtrace-0.1.0/clawtrace/web/frontend/dist/icons.svg +24 -0
- clawtrace-0.1.0/clawtrace/web/frontend/dist/index.html +14 -0
- clawtrace-0.1.0/clawtrace/web/frontend/node_modules/flatted/python/flatted.py +144 -0
- clawtrace-0.1.0/clawtrace.egg-info/PKG-INFO +251 -0
- clawtrace-0.1.0/clawtrace.egg-info/SOURCES.txt +38 -0
- clawtrace-0.1.0/clawtrace.egg-info/dependency_links.txt +1 -0
- clawtrace-0.1.0/clawtrace.egg-info/entry_points.txt +2 -0
- clawtrace-0.1.0/clawtrace.egg-info/requires.txt +3 -0
- clawtrace-0.1.0/clawtrace.egg-info/top_level.txt +1 -0
- clawtrace-0.1.0/pyproject.toml +38 -0
- clawtrace-0.1.0/setup.cfg +4 -0
- clawtrace-0.1.0/tests/test_anonymizer.py +223 -0
- clawtrace-0.1.0/tests/test_badges.py +242 -0
- clawtrace-0.1.0/tests/test_cli.py +1160 -0
- clawtrace-0.1.0/tests/test_config.py +71 -0
- clawtrace-0.1.0/tests/test_daemon.py +447 -0
- clawtrace-0.1.0/tests/test_index.py +227 -0
- clawtrace-0.1.0/tests/test_parser.py +1780 -0
- clawtrace-0.1.0/tests/test_pii.py +304 -0
- clawtrace-0.1.0/tests/test_scoring.py +291 -0
- clawtrace-0.1.0/tests/test_secrets.py +431 -0
clawtrace-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Banodoco
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
clawtrace-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: clawtrace
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Review, score, and curate your coding agent conversation traces locally
|
|
5
|
+
Author: kaiaiagent
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: claude-code,codex,gemini-cli,opencode,openclaw,dataset,conversations
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest; extra == "dev"
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# ClawTrace
|
|
24
|
+
|
|
25
|
+
Review, score, and curate your coding agent conversation traces locally. ClawTrace indexes session logs from Claude Code, Codex, Gemini CLI, OpenCode, OpenClaw, Kimi CLI, and Cline, redacts secrets and PII, and gives you a browser workbench to triage and export the results.
|
|
26
|
+
|
|
27
|
+
Requires Python 3.10+.
|
|
28
|
+
|
|
29
|
+
## Review & score your traces
|
|
30
|
+
|
|
31
|
+
Index your local sessions, auto-score them for quality, and open a browser workbench — all in three commands:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install clawtrace
|
|
35
|
+
clawtrace scan # Index all local sessions
|
|
36
|
+
clawtrace score --batch --auto-triage # AI-score sessions, auto-approve 4-5, auto-block 1-2
|
|
37
|
+
clawtrace serve # Open workbench at localhost:8384
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
`score --batch` sends each session to Claude for evaluation (1-5 quality rating) and `--auto-triage` automatically approves high-quality sessions and blocks low-quality ones, leaving score-3 sessions for your manual review. The workbench gives you a full browser UI to read transcripts, adjust scores, search across sessions, and bundle approved traces for export.
|
|
41
|
+
|
|
42
|
+
## Give this to your agent
|
|
43
|
+
|
|
44
|
+
Paste this into Claude Code, Codex, Gemini CLI, OpenCode, OpenClaw, or any coding agent:
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
Help me review and curate my coding agent traces using ClawTrace.
|
|
48
|
+
Install it, set up the skill, then walk me through the process.
|
|
49
|
+
|
|
50
|
+
STEP 1 — INSTALL
|
|
51
|
+
pip install clawtrace
|
|
52
|
+
If that fails, ask the user where the source is.
|
|
53
|
+
|
|
54
|
+
STEP 2 — INSTALL SKILL
|
|
55
|
+
clawtrace update-skill claude
|
|
56
|
+
|
|
57
|
+
STEP 3 — SCAN & SCORE
|
|
58
|
+
clawtrace scan # Index sessions into local DB
|
|
59
|
+
clawtrace score --batch --auto-triage # AI-score + auto-approve/block
|
|
60
|
+
|
|
61
|
+
STEP 4 — REVIEW
|
|
62
|
+
clawtrace serve # Open workbench at localhost:8384
|
|
63
|
+
|
|
64
|
+
Tell the user: "Your workbench is open at localhost:8384. Everything is 100% local.
|
|
65
|
+
Use the Inbox to triage traces, Search to find sessions, and Bundles to assemble exports."
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
<details>
|
|
69
|
+
<summary><b>Terminal workflow (works on remote VMs — no browser needed)</b></summary>
|
|
70
|
+
|
|
71
|
+
The entire review-and-share workflow runs in your terminal. Your coding agent drives these commands for you.
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# 1. Scan — discover and index sessions
|
|
75
|
+
clawtrace scan
|
|
76
|
+
|
|
77
|
+
# 2. Review — browse and triage
|
|
78
|
+
clawtrace inbox --json --limit 20
|
|
79
|
+
clawtrace search "refactor auth" --json
|
|
80
|
+
|
|
81
|
+
# 3. Triage — approve or block
|
|
82
|
+
clawtrace approve <session_id> --reason "clean trace"
|
|
83
|
+
clawtrace block <session_id> --reason "proprietary code"
|
|
84
|
+
|
|
85
|
+
# 4. Score (optional) — AI-assisted quality scoring
|
|
86
|
+
clawtrace score --batch --auto-triage
|
|
87
|
+
|
|
88
|
+
# 5. Preview — review what will be shared (shows summaries + risk flags)
|
|
89
|
+
clawtrace share --status approved --preview
|
|
90
|
+
|
|
91
|
+
# 6. Share — after user confirms
|
|
92
|
+
clawtrace share --status approved --note "week 12 traces"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
For a visual review experience: `clawtrace serve` (local) or `clawtrace serve --remote` (prints SSH tunnel command for remote VMs).
|
|
96
|
+
|
|
97
|
+
</details>
|
|
98
|
+
|
|
99
|
+
<details>
|
|
100
|
+
<summary><b>Manual usage (without an agent)</b></summary>
|
|
101
|
+
|
|
102
|
+
### Quick start
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
pip install clawtrace
|
|
106
|
+
|
|
107
|
+
# Scan and score
|
|
108
|
+
clawtrace scan
|
|
109
|
+
clawtrace score --batch --auto-triage
|
|
110
|
+
|
|
111
|
+
# Open the workbench
|
|
112
|
+
clawtrace serve
|
|
113
|
+
|
|
114
|
+
# Or triage from the terminal
|
|
115
|
+
clawtrace inbox --json --limit 20
|
|
116
|
+
clawtrace approve <session-id> --reason "good trace"
|
|
117
|
+
clawtrace block <session-id> --reason "low quality"
|
|
118
|
+
|
|
119
|
+
# Configure redactions and exclusions
|
|
120
|
+
clawtrace config --exclude "personal-stuff,scratch"
|
|
121
|
+
clawtrace config --redact-usernames "my_github_handle,my_discord_name"
|
|
122
|
+
clawtrace config --redact "my-domain.com,my-secret-project"
|
|
123
|
+
|
|
124
|
+
# Export locally
|
|
125
|
+
clawtrace export --output /tmp/clawtrace_export.jsonl
|
|
126
|
+
|
|
127
|
+
# Optional: generate structured PII findings (hybrid = rules + Claude)
|
|
128
|
+
clawtrace export --output /tmp/clawtrace_export.jsonl --pii-review --pii-provider hybrid
|
|
129
|
+
|
|
130
|
+
# Optional: also produce a sanitized JSONL automatically
|
|
131
|
+
clawtrace export --output /tmp/clawtrace_export.jsonl --pii-review --pii-apply --pii-provider hybrid
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Commands
|
|
135
|
+
|
|
136
|
+
| Command | Description |
|
|
137
|
+
|---------|-------------|
|
|
138
|
+
| `clawtrace scan` | Index local sessions into workbench DB |
|
|
139
|
+
| `clawtrace score --batch --auto-triage` | AI-score all unscored sessions, auto-approve 4-5 and block 1-2 |
|
|
140
|
+
| `clawtrace score --batch --limit 20` | AI-score up to 20 sessions without triage |
|
|
141
|
+
| `clawtrace serve` | Open workbench UI at localhost:8384 |
|
|
142
|
+
| `clawtrace serve --remote` | Print SSH tunnel command for remote VM access |
|
|
143
|
+
| `clawtrace inbox --json --limit 20` | List sessions as JSON (for agent parsing) |
|
|
144
|
+
| `clawtrace search <query> --json` | Full-text search across sessions |
|
|
145
|
+
| `clawtrace approve <id> [id ...]` | Approve sessions by ID |
|
|
146
|
+
| `clawtrace block <id> [id ...]` | Block sessions by ID |
|
|
147
|
+
| `clawtrace shortlist <id> [id ...]` | Shortlist sessions for review |
|
|
148
|
+
| `clawtrace bundle-create --status approved` | Create bundle from all approved sessions |
|
|
149
|
+
| `clawtrace bundle-list` | List all bundles |
|
|
150
|
+
| `clawtrace bundle-view <bundle_id>` | View bundle details and sessions |
|
|
151
|
+
| `clawtrace bundle-export <bundle_id>` | Export bundle to disk (JSONL + manifest) |
|
|
152
|
+
| `clawtrace bundle-share <bundle_id>` | Upload bundle to ClawTrace ingest service |
|
|
153
|
+
| `clawtrace share --status approved` | One-step: bundle + export + share |
|
|
154
|
+
| `clawtrace export --pii-review --pii-apply` | Export, generate findings, and produce sanitized JSONL |
|
|
155
|
+
| `clawtrace config --source all` | Select source scope (`claude`, `codex`, `gemini`, `opencode`, `openclaw`, `kimi`, or `all`) |
|
|
156
|
+
| `clawtrace config --exclude "a,b"` | Add excluded projects (appends) |
|
|
157
|
+
| `clawtrace config --redact "str1,str2"` | Add strings to always redact (appends) |
|
|
158
|
+
| `clawtrace config --redact-usernames "u1,u2"` | Add usernames to anonymize (appends) |
|
|
159
|
+
| `clawtrace export` | Export to local JSONL |
|
|
160
|
+
| `clawtrace export --no-thinking` | Exclude extended thinking blocks |
|
|
161
|
+
| `clawtrace list` | List all projects with exclusion status |
|
|
162
|
+
| `clawtrace status` | Show current stage and next steps (JSON) |
|
|
163
|
+
| `clawtrace update-skill claude` | Install/update the clawtrace skill for Claude Code |
|
|
164
|
+
|
|
165
|
+
</details>
|
|
166
|
+
|
|
167
|
+
<details>
|
|
168
|
+
<summary><b>What gets exported</b></summary>
|
|
169
|
+
|
|
170
|
+
| Data | Included | Notes |
|
|
171
|
+
|------|----------|-------|
|
|
172
|
+
| User messages | Yes | Full text (including voice transcripts) |
|
|
173
|
+
| Assistant responses | Yes | Full text output |
|
|
174
|
+
| Extended thinking | Yes | Claude's reasoning (opt out with `--no-thinking`) |
|
|
175
|
+
| Tool calls | Yes | Tool name + inputs + outputs |
|
|
176
|
+
| Token usage | Yes | Input/output tokens per session |
|
|
177
|
+
| Model & metadata | Yes | Model name, git branch, timestamps |
|
|
178
|
+
|
|
179
|
+
### Privacy & Redaction
|
|
180
|
+
|
|
181
|
+
ClawTrace applies multiple layers of protection:
|
|
182
|
+
|
|
183
|
+
1. **Path anonymization** — File paths stripped to project-relative
|
|
184
|
+
2. **Username hashing** — Your macOS username + any configured usernames replaced with stable hashes
|
|
185
|
+
3. **Secret detection** — Regex patterns catch JWT tokens, API keys (Anthropic, OpenAI, GitHub, AWS, etc.), database passwords, private keys, Discord webhooks, and more
|
|
186
|
+
4. **Entropy analysis** — Long high-entropy strings in quotes are flagged as potential secrets
|
|
187
|
+
5. **Email redaction** — Personal email addresses removed
|
|
188
|
+
6. **Custom redaction** — You can configure additional strings and usernames to redact
|
|
189
|
+
7. **Tool call redaction** — Secrets in tool inputs and outputs are redacted
|
|
190
|
+
|
|
191
|
+
**This is NOT foolproof.** Always review your exported data before sharing.
|
|
192
|
+
Automated redaction cannot catch everything — especially service-specific
|
|
193
|
+
identifiers, third-party PII, or secrets in unusual formats.
|
|
194
|
+
|
|
195
|
+
To help improve redaction, report issues: https://github.com/kaiaiagent/clawtrace/issues
|
|
196
|
+
|
|
197
|
+
</details>
|
|
198
|
+
|
|
199
|
+
<details>
|
|
200
|
+
<summary><b>Data schema</b></summary>
|
|
201
|
+
|
|
202
|
+
Each line in `conversations.jsonl` is one session:
|
|
203
|
+
|
|
204
|
+
```json
|
|
205
|
+
{
|
|
206
|
+
"session_id": "abc-123",
|
|
207
|
+
"project": "my-project",
|
|
208
|
+
"model": "claude-opus-4-6",
|
|
209
|
+
"git_branch": "main",
|
|
210
|
+
"start_time": "2025-06-15T10:00:00+00:00",
|
|
211
|
+
"end_time": "2025-06-15T10:30:00+00:00",
|
|
212
|
+
"messages": [
|
|
213
|
+
{"role": "user", "content": "Fix the login bug", "timestamp": "..."},
|
|
214
|
+
{
|
|
215
|
+
"role": "assistant",
|
|
216
|
+
"content": "I'll investigate the login flow.",
|
|
217
|
+
"thinking": "The user wants me to look at...",
|
|
218
|
+
"tool_uses": [
|
|
219
|
+
{
|
|
220
|
+
"tool": "bash",
|
|
221
|
+
"input": {"command": "grep -r 'login' src/"},
|
|
222
|
+
"output": {"text": "src/auth.py:42: def login(user, password):"},
|
|
223
|
+
"status": "success"
|
|
224
|
+
}
|
|
225
|
+
],
|
|
226
|
+
"timestamp": "..."
|
|
227
|
+
}
|
|
228
|
+
],
|
|
229
|
+
"stats": {
|
|
230
|
+
"user_messages": 5, "assistant_messages": 8,
|
|
231
|
+
"tool_uses": 20, "input_tokens": 50000, "output_tokens": 3000
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
</details>
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
<details>
|
|
240
|
+
<summary><b>Gotchas</b></summary>
|
|
241
|
+
|
|
242
|
+
- **`--exclude`, `--redact`, `--redact-usernames` APPEND** — they never overwrite. Safe to call repeatedly.
|
|
243
|
+
- **Source selection is REQUIRED before export** — set `clawtrace config --source claude|codex|gemini|opencode|openclaw|all`.
|
|
244
|
+
- **PII audit is critical** — automated redaction is not foolproof.
|
|
245
|
+
- **Large exports take time** — 500+ sessions may take 1-3 minutes.
|
|
246
|
+
|
|
247
|
+
</details>
|
|
248
|
+
|
|
249
|
+
## License
|
|
250
|
+
|
|
251
|
+
MIT
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# ClawTrace
|
|
2
|
+
|
|
3
|
+
Review, score, and curate your coding agent conversation traces locally. ClawTrace indexes session logs from Claude Code, Codex, Gemini CLI, OpenCode, OpenClaw, Kimi CLI, and Cline, redacts secrets and PII, and gives you a browser workbench to triage and export the results.
|
|
4
|
+
|
|
5
|
+
Requires Python 3.10+.
|
|
6
|
+
|
|
7
|
+
## Review & score your traces
|
|
8
|
+
|
|
9
|
+
Index your local sessions, auto-score them for quality, and open a browser workbench — all in three commands:
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install clawtrace
|
|
13
|
+
clawtrace scan # Index all local sessions
|
|
14
|
+
clawtrace score --batch --auto-triage # AI-score sessions, auto-approve 4-5, auto-block 1-2
|
|
15
|
+
clawtrace serve # Open workbench at localhost:8384
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
`score --batch` sends each session to Claude for evaluation (1-5 quality rating) and `--auto-triage` automatically approves high-quality sessions and blocks low-quality ones, leaving score-3 sessions for your manual review. The workbench gives you a full browser UI to read transcripts, adjust scores, search across sessions, and bundle approved traces for export.
|
|
19
|
+
|
|
20
|
+
## Give this to your agent
|
|
21
|
+
|
|
22
|
+
Paste this into Claude Code, Codex, Gemini CLI, OpenCode, OpenClaw, or any coding agent:
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
Help me review and curate my coding agent traces using ClawTrace.
|
|
26
|
+
Install it, set up the skill, then walk me through the process.
|
|
27
|
+
|
|
28
|
+
STEP 1 — INSTALL
|
|
29
|
+
pip install clawtrace
|
|
30
|
+
If that fails, ask the user where the source is.
|
|
31
|
+
|
|
32
|
+
STEP 2 — INSTALL SKILL
|
|
33
|
+
clawtrace update-skill claude
|
|
34
|
+
|
|
35
|
+
STEP 3 — SCAN & SCORE
|
|
36
|
+
clawtrace scan # Index sessions into local DB
|
|
37
|
+
clawtrace score --batch --auto-triage # AI-score + auto-approve/block
|
|
38
|
+
|
|
39
|
+
STEP 4 — REVIEW
|
|
40
|
+
clawtrace serve # Open workbench at localhost:8384
|
|
41
|
+
|
|
42
|
+
Tell the user: "Your workbench is open at localhost:8384. Everything is 100% local.
|
|
43
|
+
Use the Inbox to triage traces, Search to find sessions, and Bundles to assemble exports."
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
<details>
|
|
47
|
+
<summary><b>Terminal workflow (works on remote VMs — no browser needed)</b></summary>
|
|
48
|
+
|
|
49
|
+
The entire review-and-share workflow runs in your terminal. Your coding agent drives these commands for you.
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
# 1. Scan — discover and index sessions
|
|
53
|
+
clawtrace scan
|
|
54
|
+
|
|
55
|
+
# 2. Review — browse and triage
|
|
56
|
+
clawtrace inbox --json --limit 20
|
|
57
|
+
clawtrace search "refactor auth" --json
|
|
58
|
+
|
|
59
|
+
# 3. Triage — approve or block
|
|
60
|
+
clawtrace approve <session_id> --reason "clean trace"
|
|
61
|
+
clawtrace block <session_id> --reason "proprietary code"
|
|
62
|
+
|
|
63
|
+
# 4. Score (optional) — AI-assisted quality scoring
|
|
64
|
+
clawtrace score --batch --auto-triage
|
|
65
|
+
|
|
66
|
+
# 5. Preview — review what will be shared (shows summaries + risk flags)
|
|
67
|
+
clawtrace share --status approved --preview
|
|
68
|
+
|
|
69
|
+
# 6. Share — after user confirms
|
|
70
|
+
clawtrace share --status approved --note "week 12 traces"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
For a visual review experience: `clawtrace serve` (local) or `clawtrace serve --remote` (prints SSH tunnel command for remote VMs).
|
|
74
|
+
|
|
75
|
+
</details>
|
|
76
|
+
|
|
77
|
+
<details>
|
|
78
|
+
<summary><b>Manual usage (without an agent)</b></summary>
|
|
79
|
+
|
|
80
|
+
### Quick start
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
pip install clawtrace
|
|
84
|
+
|
|
85
|
+
# Scan and score
|
|
86
|
+
clawtrace scan
|
|
87
|
+
clawtrace score --batch --auto-triage
|
|
88
|
+
|
|
89
|
+
# Open the workbench
|
|
90
|
+
clawtrace serve
|
|
91
|
+
|
|
92
|
+
# Or triage from the terminal
|
|
93
|
+
clawtrace inbox --json --limit 20
|
|
94
|
+
clawtrace approve <session-id> --reason "good trace"
|
|
95
|
+
clawtrace block <session-id> --reason "low quality"
|
|
96
|
+
|
|
97
|
+
# Configure redactions and exclusions
|
|
98
|
+
clawtrace config --exclude "personal-stuff,scratch"
|
|
99
|
+
clawtrace config --redact-usernames "my_github_handle,my_discord_name"
|
|
100
|
+
clawtrace config --redact "my-domain.com,my-secret-project"
|
|
101
|
+
|
|
102
|
+
# Export locally
|
|
103
|
+
clawtrace export --output /tmp/clawtrace_export.jsonl
|
|
104
|
+
|
|
105
|
+
# Optional: generate structured PII findings (hybrid = rules + Claude)
|
|
106
|
+
clawtrace export --output /tmp/clawtrace_export.jsonl --pii-review --pii-provider hybrid
|
|
107
|
+
|
|
108
|
+
# Optional: also produce a sanitized JSONL automatically
|
|
109
|
+
clawtrace export --output /tmp/clawtrace_export.jsonl --pii-review --pii-apply --pii-provider hybrid
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Commands
|
|
113
|
+
|
|
114
|
+
| Command | Description |
|
|
115
|
+
|---------|-------------|
|
|
116
|
+
| `clawtrace scan` | Index local sessions into workbench DB |
|
|
117
|
+
| `clawtrace score --batch --auto-triage` | AI-score all unscored sessions, auto-approve 4-5 and block 1-2 |
|
|
118
|
+
| `clawtrace score --batch --limit 20` | AI-score up to 20 sessions without triage |
|
|
119
|
+
| `clawtrace serve` | Open workbench UI at localhost:8384 |
|
|
120
|
+
| `clawtrace serve --remote` | Print SSH tunnel command for remote VM access |
|
|
121
|
+
| `clawtrace inbox --json --limit 20` | List sessions as JSON (for agent parsing) |
|
|
122
|
+
| `clawtrace search <query> --json` | Full-text search across sessions |
|
|
123
|
+
| `clawtrace approve <id> [id ...]` | Approve sessions by ID |
|
|
124
|
+
| `clawtrace block <id> [id ...]` | Block sessions by ID |
|
|
125
|
+
| `clawtrace shortlist <id> [id ...]` | Shortlist sessions for review |
|
|
126
|
+
| `clawtrace bundle-create --status approved` | Create bundle from all approved sessions |
|
|
127
|
+
| `clawtrace bundle-list` | List all bundles |
|
|
128
|
+
| `clawtrace bundle-view <bundle_id>` | View bundle details and sessions |
|
|
129
|
+
| `clawtrace bundle-export <bundle_id>` | Export bundle to disk (JSONL + manifest) |
|
|
130
|
+
| `clawtrace bundle-share <bundle_id>` | Upload bundle to ClawTrace ingest service |
|
|
131
|
+
| `clawtrace share --status approved` | One-step: bundle + export + share |
|
|
132
|
+
| `clawtrace export --pii-review --pii-apply` | Export, generate findings, and produce sanitized JSONL |
|
|
133
|
+
| `clawtrace config --source all` | Select source scope (`claude`, `codex`, `gemini`, `opencode`, `openclaw`, `kimi`, or `all`) |
|
|
134
|
+
| `clawtrace config --exclude "a,b"` | Add excluded projects (appends) |
|
|
135
|
+
| `clawtrace config --redact "str1,str2"` | Add strings to always redact (appends) |
|
|
136
|
+
| `clawtrace config --redact-usernames "u1,u2"` | Add usernames to anonymize (appends) |
|
|
137
|
+
| `clawtrace export` | Export to local JSONL |
|
|
138
|
+
| `clawtrace export --no-thinking` | Exclude extended thinking blocks |
|
|
139
|
+
| `clawtrace list` | List all projects with exclusion status |
|
|
140
|
+
| `clawtrace status` | Show current stage and next steps (JSON) |
|
|
141
|
+
| `clawtrace update-skill claude` | Install/update the clawtrace skill for Claude Code |
|
|
142
|
+
|
|
143
|
+
</details>
|
|
144
|
+
|
|
145
|
+
<details>
|
|
146
|
+
<summary><b>What gets exported</b></summary>
|
|
147
|
+
|
|
148
|
+
| Data | Included | Notes |
|
|
149
|
+
|------|----------|-------|
|
|
150
|
+
| User messages | Yes | Full text (including voice transcripts) |
|
|
151
|
+
| Assistant responses | Yes | Full text output |
|
|
152
|
+
| Extended thinking | Yes | Claude's reasoning (opt out with `--no-thinking`) |
|
|
153
|
+
| Tool calls | Yes | Tool name + inputs + outputs |
|
|
154
|
+
| Token usage | Yes | Input/output tokens per session |
|
|
155
|
+
| Model & metadata | Yes | Model name, git branch, timestamps |
|
|
156
|
+
|
|
157
|
+
### Privacy & Redaction
|
|
158
|
+
|
|
159
|
+
ClawTrace applies multiple layers of protection:
|
|
160
|
+
|
|
161
|
+
1. **Path anonymization** — File paths stripped to project-relative
|
|
162
|
+
2. **Username hashing** — Your macOS username + any configured usernames replaced with stable hashes
|
|
163
|
+
3. **Secret detection** — Regex patterns catch JWT tokens, API keys (Anthropic, OpenAI, GitHub, AWS, etc.), database passwords, private keys, Discord webhooks, and more
|
|
164
|
+
4. **Entropy analysis** — Long high-entropy strings in quotes are flagged as potential secrets
|
|
165
|
+
5. **Email redaction** — Personal email addresses removed
|
|
166
|
+
6. **Custom redaction** — You can configure additional strings and usernames to redact
|
|
167
|
+
7. **Tool call redaction** — Secrets in tool inputs and outputs are redacted
|
|
168
|
+
|
|
169
|
+
**This is NOT foolproof.** Always review your exported data before sharing.
|
|
170
|
+
Automated redaction cannot catch everything — especially service-specific
|
|
171
|
+
identifiers, third-party PII, or secrets in unusual formats.
|
|
172
|
+
|
|
173
|
+
To help improve redaction, report issues: https://github.com/kaiaiagent/clawtrace/issues
|
|
174
|
+
|
|
175
|
+
</details>
|
|
176
|
+
|
|
177
|
+
<details>
|
|
178
|
+
<summary><b>Data schema</b></summary>
|
|
179
|
+
|
|
180
|
+
Each line in `conversations.jsonl` is one session:
|
|
181
|
+
|
|
182
|
+
```json
|
|
183
|
+
{
|
|
184
|
+
"session_id": "abc-123",
|
|
185
|
+
"project": "my-project",
|
|
186
|
+
"model": "claude-opus-4-6",
|
|
187
|
+
"git_branch": "main",
|
|
188
|
+
"start_time": "2025-06-15T10:00:00+00:00",
|
|
189
|
+
"end_time": "2025-06-15T10:30:00+00:00",
|
|
190
|
+
"messages": [
|
|
191
|
+
{"role": "user", "content": "Fix the login bug", "timestamp": "..."},
|
|
192
|
+
{
|
|
193
|
+
"role": "assistant",
|
|
194
|
+
"content": "I'll investigate the login flow.",
|
|
195
|
+
"thinking": "The user wants me to look at...",
|
|
196
|
+
"tool_uses": [
|
|
197
|
+
{
|
|
198
|
+
"tool": "bash",
|
|
199
|
+
"input": {"command": "grep -r 'login' src/"},
|
|
200
|
+
"output": {"text": "src/auth.py:42: def login(user, password):"},
|
|
201
|
+
"status": "success"
|
|
202
|
+
}
|
|
203
|
+
],
|
|
204
|
+
"timestamp": "..."
|
|
205
|
+
}
|
|
206
|
+
],
|
|
207
|
+
"stats": {
|
|
208
|
+
"user_messages": 5, "assistant_messages": 8,
|
|
209
|
+
"tool_uses": 20, "input_tokens": 50000, "output_tokens": 3000
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
</details>
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
<details>
|
|
218
|
+
<summary><b>Gotchas</b></summary>
|
|
219
|
+
|
|
220
|
+
- **`--exclude`, `--redact`, `--redact-usernames` APPEND** — they never overwrite. Safe to call repeatedly.
|
|
221
|
+
- **Source selection is REQUIRED before export** — set `clawtrace config --source claude|codex|gemini|opencode|openclaw|all`.
|
|
222
|
+
- **PII audit is critical** — automated redaction is not foolproof.
|
|
223
|
+
- **Large exports take time** — 500+ sessions may take 1-3 minutes.
|
|
224
|
+
|
|
225
|
+
</details>
|
|
226
|
+
|
|
227
|
+
## License
|
|
228
|
+
|
|
229
|
+
MIT
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Anonymize PII in Claude Code log data."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _hash_username(username: str) -> str:
|
|
9
|
+
return "user_" + hashlib.sha256(username.encode()).hexdigest()[:8]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _detect_home_dir() -> tuple[str, str]:
|
|
13
|
+
home = os.path.expanduser("~")
|
|
14
|
+
username = os.path.basename(home)
|
|
15
|
+
return home, username
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def anonymize_path(path: str, username: str, username_hash: str, home: str | None = None) -> str:
|
|
19
|
+
"""Strip a path to project-relative and hash the username."""
|
|
20
|
+
if not path:
|
|
21
|
+
return path
|
|
22
|
+
|
|
23
|
+
if home is None:
|
|
24
|
+
home = os.path.expanduser("~")
|
|
25
|
+
prefixes = set()
|
|
26
|
+
for base in (f"/Users/{username}", f"/home/{username}", home):
|
|
27
|
+
for subdir in ("Documents", "Downloads", "Desktop"):
|
|
28
|
+
prefixes.add(f"{base}/{subdir}/")
|
|
29
|
+
prefixes.add(f"{base}/")
|
|
30
|
+
|
|
31
|
+
# Try longest prefixes first (subdirectory matches before bare home)
|
|
32
|
+
home_patterns = sorted(prefixes, key=len, reverse=True)
|
|
33
|
+
|
|
34
|
+
for prefix in home_patterns:
|
|
35
|
+
if path.startswith(prefix):
|
|
36
|
+
rest = path[len(prefix):]
|
|
37
|
+
if "/Documents/" in prefix or "/Downloads/" in prefix or "/Desktop/" in prefix:
|
|
38
|
+
return rest
|
|
39
|
+
return f"{username_hash}/{rest}"
|
|
40
|
+
|
|
41
|
+
path = path.replace(f"/Users/{username}/", f"/{username_hash}/")
|
|
42
|
+
path = path.replace(f"/home/{username}/", f"/{username_hash}/")
|
|
43
|
+
|
|
44
|
+
return path
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def anonymize_text(text: str, username: str, username_hash: str) -> str:
|
|
48
|
+
if not text or not username:
|
|
49
|
+
return text
|
|
50
|
+
|
|
51
|
+
escaped = re.escape(username)
|
|
52
|
+
|
|
53
|
+
# Replace /Users/<username> and /home/<username>
|
|
54
|
+
text = re.sub(rf"/Users/{escaped}(?=/|[^a-zA-Z0-9_-]|$)", f"/{username_hash}", text)
|
|
55
|
+
text = re.sub(rf"/home/{escaped}(?=/|[^a-zA-Z0-9_-]|$)", f"/{username_hash}", text)
|
|
56
|
+
|
|
57
|
+
# Catch hyphen-encoded paths: -Users-peteromalley- or -Users-peteromalley/
|
|
58
|
+
text = re.sub(rf"-Users-{escaped}(?=-|/|$)", f"-Users-{username_hash}", text)
|
|
59
|
+
text = re.sub(rf"-home-{escaped}(?=-|/|$)", f"-home-{username_hash}", text)
|
|
60
|
+
|
|
61
|
+
# Also handle underscore-to-hyphen encoding: kaid_aiagent → kaid-aiagent
|
|
62
|
+
if "_" in username:
|
|
63
|
+
hyphen_variant = username.replace("_", "-")
|
|
64
|
+
hyphen_escaped = re.escape(hyphen_variant)
|
|
65
|
+
text = re.sub(rf"-Users-{hyphen_escaped}(?=-|/|$)", f"-Users-{username_hash}", text)
|
|
66
|
+
text = re.sub(rf"-home-{hyphen_escaped}(?=-|/|$)", f"-home-{username_hash}", text)
|
|
67
|
+
|
|
68
|
+
# Catch temp paths like /private/tmp/claude-501/-Users-peteromalley/
|
|
69
|
+
text = re.sub(rf"claude-\d+/-Users-{escaped}", f"claude-XXX/-Users-{username_hash}", text)
|
|
70
|
+
|
|
71
|
+
# Final pass: replace bare username in remaining contexts (ls output, prose, etc.)
|
|
72
|
+
# Only if username is >= 4 chars to avoid false positives
|
|
73
|
+
if len(username) >= 4:
|
|
74
|
+
text = re.sub(rf"\b{escaped}\b", username_hash, text)
|
|
75
|
+
|
|
76
|
+
return text
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class Anonymizer:
|
|
80
|
+
"""Stateful anonymizer that consistently hashes usernames."""
|
|
81
|
+
|
|
82
|
+
def __init__(self, extra_usernames: list[str] | None = None):
|
|
83
|
+
self.home, self.username = _detect_home_dir()
|
|
84
|
+
self.username_hash = _hash_username(self.username)
|
|
85
|
+
|
|
86
|
+
# Additional usernames to anonymize (GitHub handles, Discord names, etc.)
|
|
87
|
+
self._extra: list[tuple[str, str]] = []
|
|
88
|
+
for name in (extra_usernames or []):
|
|
89
|
+
name = name.strip()
|
|
90
|
+
if name and name != self.username:
|
|
91
|
+
self._extra.append((name, _hash_username(name)))
|
|
92
|
+
|
|
93
|
+
def path(self, file_path: str) -> str:
|
|
94
|
+
result = anonymize_path(file_path, self.username, self.username_hash, self.home)
|
|
95
|
+
result = anonymize_text(result, self.username, self.username_hash)
|
|
96
|
+
for name, hashed in self._extra:
|
|
97
|
+
result = _replace_username(result, name, hashed)
|
|
98
|
+
return result
|
|
99
|
+
|
|
100
|
+
def text(self, content: str) -> str:
|
|
101
|
+
result = anonymize_text(content, self.username, self.username_hash)
|
|
102
|
+
for name, hashed in self._extra:
|
|
103
|
+
result = _replace_username(result, name, hashed)
|
|
104
|
+
return result
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _replace_username(text: str, username: str, username_hash: str) -> str:
|
|
108
|
+
if not text or not username or len(username) < 3:
|
|
109
|
+
return text
|
|
110
|
+
escaped = re.escape(username)
|
|
111
|
+
text = re.sub(escaped, username_hash, text, flags=re.IGNORECASE)
|
|
112
|
+
return text
|