clawtrace 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. clawtrace-0.1.0/LICENSE +21 -0
  2. clawtrace-0.1.0/MANIFEST.in +2 -0
  3. clawtrace-0.1.0/PKG-INFO +251 -0
  4. clawtrace-0.1.0/README.md +229 -0
  5. clawtrace-0.1.0/clawtrace/__init__.py +3 -0
  6. clawtrace-0.1.0/clawtrace/anonymizer.py +112 -0
  7. clawtrace-0.1.0/clawtrace/badges.py +523 -0
  8. clawtrace-0.1.0/clawtrace/cli.py +3299 -0
  9. clawtrace-0.1.0/clawtrace/config.py +65 -0
  10. clawtrace-0.1.0/clawtrace/daemon.py +981 -0
  11. clawtrace-0.1.0/clawtrace/data/SKILL.md +273 -0
  12. clawtrace-0.1.0/clawtrace/index.py +1008 -0
  13. clawtrace-0.1.0/clawtrace/parser.py +2038 -0
  14. clawtrace-0.1.0/clawtrace/pii.py +523 -0
  15. clawtrace-0.1.0/clawtrace/scoring.py +527 -0
  16. clawtrace-0.1.0/clawtrace/secrets.py +282 -0
  17. clawtrace-0.1.0/clawtrace/web/frontend/dist/assets/index-CC1ZQXpr.js +11 -0
  18. clawtrace-0.1.0/clawtrace/web/frontend/dist/assets/index-CbWkARoN.css +1 -0
  19. clawtrace-0.1.0/clawtrace/web/frontend/dist/favicon.svg +1 -0
  20. clawtrace-0.1.0/clawtrace/web/frontend/dist/icons.svg +24 -0
  21. clawtrace-0.1.0/clawtrace/web/frontend/dist/index.html +14 -0
  22. clawtrace-0.1.0/clawtrace/web/frontend/node_modules/flatted/python/flatted.py +144 -0
  23. clawtrace-0.1.0/clawtrace.egg-info/PKG-INFO +251 -0
  24. clawtrace-0.1.0/clawtrace.egg-info/SOURCES.txt +38 -0
  25. clawtrace-0.1.0/clawtrace.egg-info/dependency_links.txt +1 -0
  26. clawtrace-0.1.0/clawtrace.egg-info/entry_points.txt +2 -0
  27. clawtrace-0.1.0/clawtrace.egg-info/requires.txt +3 -0
  28. clawtrace-0.1.0/clawtrace.egg-info/top_level.txt +1 -0
  29. clawtrace-0.1.0/pyproject.toml +38 -0
  30. clawtrace-0.1.0/setup.cfg +4 -0
  31. clawtrace-0.1.0/tests/test_anonymizer.py +223 -0
  32. clawtrace-0.1.0/tests/test_badges.py +242 -0
  33. clawtrace-0.1.0/tests/test_cli.py +1160 -0
  34. clawtrace-0.1.0/tests/test_config.py +71 -0
  35. clawtrace-0.1.0/tests/test_daemon.py +447 -0
  36. clawtrace-0.1.0/tests/test_index.py +227 -0
  37. clawtrace-0.1.0/tests/test_parser.py +1780 -0
  38. clawtrace-0.1.0/tests/test_pii.py +304 -0
  39. clawtrace-0.1.0/tests/test_scoring.py +291 -0
  40. clawtrace-0.1.0/tests/test_secrets.py +431 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Banodoco
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,2 @@
1
+ recursive-include clawtrace/web/frontend/dist *
2
+ recursive-include clawtrace/data *
@@ -0,0 +1,251 @@
1
+ Metadata-Version: 2.4
2
+ Name: clawtrace
3
+ Version: 0.1.0
4
+ Summary: Review, score, and curate your coding agent conversation traces locally
5
+ Author: kaiaiagent
6
+ License-Expression: MIT
7
+ Keywords: claude-code,codex,gemini-cli,opencode,openclaw,dataset,conversations
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Requires-Python: >=3.10
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Provides-Extra: dev
20
+ Requires-Dist: pytest; extra == "dev"
21
+ Dynamic: license-file
22
+
23
+ # ClawTrace
24
+
25
+ Review, score, and curate your coding agent conversation traces locally. ClawTrace indexes session logs from Claude Code, Codex, Gemini CLI, OpenCode, OpenClaw, Kimi CLI, and Cline, redacts secrets and PII, and gives you a browser workbench to triage and export the results.
26
+
27
+ Requires Python 3.10+.
28
+
29
+ ## Review & score your traces
30
+
31
+ Index your local sessions, auto-score them for quality, and open a browser workbench — all in three commands:
32
+
33
+ ```bash
34
+ pip install clawtrace
35
+ clawtrace scan # Index all local sessions
36
+ clawtrace score --batch --auto-triage # AI-score sessions, auto-approve 4-5, auto-block 1-2
37
+ clawtrace serve # Open workbench at localhost:8384
38
+ ```
39
+
40
+ `score --batch` sends each session to Claude for evaluation (1-5 quality rating) and `--auto-triage` automatically approves high-quality sessions and blocks low-quality ones, leaving score-3 sessions for your manual review. The workbench gives you a full browser UI to read transcripts, adjust scores, search across sessions, and bundle approved traces for export.
41
+
42
+ ## Give this to your agent
43
+
44
+ Paste this into Claude Code, Codex, Gemini CLI, OpenCode, OpenClaw, or any coding agent:
45
+
46
+ ```
47
+ Help me review and curate my coding agent traces using ClawTrace.
48
+ Install it, set up the skill, then walk me through the process.
49
+
50
+ STEP 1 — INSTALL
51
+ pip install clawtrace
52
+ If that fails, ask the user where the source is.
53
+
54
+ STEP 2 — INSTALL SKILL
55
+ clawtrace update-skill claude
56
+
57
+ STEP 3 — SCAN & SCORE
58
+ clawtrace scan # Index sessions into local DB
59
+ clawtrace score --batch --auto-triage # AI-score + auto-approve/block
60
+
61
+ STEP 4 — REVIEW
62
+ clawtrace serve # Open workbench at localhost:8384
63
+
64
+ Tell the user: "Your workbench is open at localhost:8384. Everything is 100% local.
65
+ Use the Inbox to triage traces, Search to find sessions, and Bundles to assemble exports."
66
+ ```
67
+
68
+ <details>
69
+ <summary><b>Terminal workflow (works on remote VMs — no browser needed)</b></summary>
70
+
71
+ The entire review-and-share workflow runs in your terminal. Your coding agent drives these commands for you.
72
+
73
+ ```bash
74
+ # 1. Scan — discover and index sessions
75
+ clawtrace scan
76
+
77
+ # 2. Review — browse and triage
78
+ clawtrace inbox --json --limit 20
79
+ clawtrace search "refactor auth" --json
80
+
81
+ # 3. Triage — approve or block
82
+ clawtrace approve <session_id> --reason "clean trace"
83
+ clawtrace block <session_id> --reason "proprietary code"
84
+
85
+ # 4. Score (optional) — AI-assisted quality scoring
86
+ clawtrace score --batch --auto-triage
87
+
88
+ # 5. Preview — review what will be shared (shows summaries + risk flags)
89
+ clawtrace share --status approved --preview
90
+
91
+ # 6. Share — after user confirms
92
+ clawtrace share --status approved --note "week 12 traces"
93
+ ```
94
+
95
+ For a visual review experience: `clawtrace serve` (local) or `clawtrace serve --remote` (prints SSH tunnel command for remote VMs).
96
+
97
+ </details>
98
+
99
+ <details>
100
+ <summary><b>Manual usage (without an agent)</b></summary>
101
+
102
+ ### Quick start
103
+
104
+ ```bash
105
+ pip install clawtrace
106
+
107
+ # Scan and score
108
+ clawtrace scan
109
+ clawtrace score --batch --auto-triage
110
+
111
+ # Open the workbench
112
+ clawtrace serve
113
+
114
+ # Or triage from the terminal
115
+ clawtrace inbox --json --limit 20
116
+ clawtrace approve <session-id> --reason "good trace"
117
+ clawtrace block <session-id> --reason "low quality"
118
+
119
+ # Configure redactions and exclusions
120
+ clawtrace config --exclude "personal-stuff,scratch"
121
+ clawtrace config --redact-usernames "my_github_handle,my_discord_name"
122
+ clawtrace config --redact "my-domain.com,my-secret-project"
123
+
124
+ # Export locally
125
+ clawtrace export --output /tmp/clawtrace_export.jsonl
126
+
127
+ # Optional: generate structured PII findings (hybrid = rules + Claude)
128
+ clawtrace export --output /tmp/clawtrace_export.jsonl --pii-review --pii-provider hybrid
129
+
130
+ # Optional: also produce a sanitized JSONL automatically
131
+ clawtrace export --output /tmp/clawtrace_export.jsonl --pii-review --pii-apply --pii-provider hybrid
132
+ ```
133
+
134
+ ### Commands
135
+
136
+ | Command | Description |
137
+ |---------|-------------|
138
+ | `clawtrace scan` | Index local sessions into workbench DB |
139
+ | `clawtrace score --batch --auto-triage` | AI-score all unscored sessions, auto-approve 4-5 and block 1-2 |
140
+ | `clawtrace score --batch --limit 20` | AI-score up to 20 sessions without triage |
141
+ | `clawtrace serve` | Open workbench UI at localhost:8384 |
142
+ | `clawtrace serve --remote` | Print SSH tunnel command for remote VM access |
143
+ | `clawtrace inbox --json --limit 20` | List sessions as JSON (for agent parsing) |
144
+ | `clawtrace search <query> --json` | Full-text search across sessions |
145
+ | `clawtrace approve <id> [id ...]` | Approve sessions by ID |
146
+ | `clawtrace block <id> [id ...]` | Block sessions by ID |
147
+ | `clawtrace shortlist <id> [id ...]` | Shortlist sessions for review |
148
+ | `clawtrace bundle-create --status approved` | Create bundle from all approved sessions |
149
+ | `clawtrace bundle-list` | List all bundles |
150
+ | `clawtrace bundle-view <bundle_id>` | View bundle details and sessions |
151
+ | `clawtrace bundle-export <bundle_id>` | Export bundle to disk (JSONL + manifest) |
152
+ | `clawtrace bundle-share <bundle_id>` | Upload bundle to ClawTrace ingest service |
153
+ | `clawtrace share --status approved` | One-step: bundle + export + share |
154
+ | `clawtrace export --pii-review --pii-apply` | Export, generate findings, and produce sanitized JSONL |
155
+ | `clawtrace config --source all` | Select source scope (`claude`, `codex`, `gemini`, `opencode`, `openclaw`, `kimi`, or `all`) |
156
+ | `clawtrace config --exclude "a,b"` | Add excluded projects (appends) |
157
+ | `clawtrace config --redact "str1,str2"` | Add strings to always redact (appends) |
158
+ | `clawtrace config --redact-usernames "u1,u2"` | Add usernames to anonymize (appends) |
159
+ | `clawtrace export` | Export to local JSONL |
160
+ | `clawtrace export --no-thinking` | Exclude extended thinking blocks |
161
+ | `clawtrace list` | List all projects with exclusion status |
162
+ | `clawtrace status` | Show current stage and next steps (JSON) |
163
+ | `clawtrace update-skill claude` | Install/update the clawtrace skill for Claude Code |
164
+
165
+ </details>
166
+
167
+ <details>
168
+ <summary><b>What gets exported</b></summary>
169
+
170
+ | Data | Included | Notes |
171
+ |------|----------|-------|
172
+ | User messages | Yes | Full text (including voice transcripts) |
173
+ | Assistant responses | Yes | Full text output |
174
+ | Extended thinking | Yes | Claude's reasoning (opt out with `--no-thinking`) |
175
+ | Tool calls | Yes | Tool name + inputs + outputs |
176
+ | Token usage | Yes | Input/output tokens per session |
177
+ | Model & metadata | Yes | Model name, git branch, timestamps |
178
+
179
+ ### Privacy & Redaction
180
+
181
+ ClawTrace applies multiple layers of protection:
182
+
183
+ 1. **Path anonymization** — File paths stripped to project-relative
184
+ 2. **Username hashing** — Your macOS username + any configured usernames replaced with stable hashes
185
+ 3. **Secret detection** — Regex patterns catch JWT tokens, API keys (Anthropic, OpenAI, GitHub, AWS, etc.), database passwords, private keys, Discord webhooks, and more
186
+ 4. **Entropy analysis** — Long high-entropy strings in quotes are flagged as potential secrets
187
+ 5. **Email redaction** — Personal email addresses removed
188
+ 6. **Custom redaction** — You can configure additional strings and usernames to redact
189
+ 7. **Tool call redaction** — Secrets in tool inputs and outputs are redacted
190
+
191
+ **This is NOT foolproof.** Always review your exported data before sharing.
192
+ Automated redaction cannot catch everything — especially service-specific
193
+ identifiers, third-party PII, or secrets in unusual formats.
194
+
195
+ To help improve redaction, report issues: https://github.com/kaiaiagent/clawtrace/issues
196
+
197
+ </details>
198
+
199
+ <details>
200
+ <summary><b>Data schema</b></summary>
201
+
202
+ Each line in `conversations.jsonl` is one session:
203
+
204
+ ```json
205
+ {
206
+ "session_id": "abc-123",
207
+ "project": "my-project",
208
+ "model": "claude-opus-4-6",
209
+ "git_branch": "main",
210
+ "start_time": "2025-06-15T10:00:00+00:00",
211
+ "end_time": "2025-06-15T10:30:00+00:00",
212
+ "messages": [
213
+ {"role": "user", "content": "Fix the login bug", "timestamp": "..."},
214
+ {
215
+ "role": "assistant",
216
+ "content": "I'll investigate the login flow.",
217
+ "thinking": "The user wants me to look at...",
218
+ "tool_uses": [
219
+ {
220
+ "tool": "bash",
221
+ "input": {"command": "grep -r 'login' src/"},
222
+ "output": {"text": "src/auth.py:42: def login(user, password):"},
223
+ "status": "success"
224
+ }
225
+ ],
226
+ "timestamp": "..."
227
+ }
228
+ ],
229
+ "stats": {
230
+ "user_messages": 5, "assistant_messages": 8,
231
+ "tool_uses": 20, "input_tokens": 50000, "output_tokens": 3000
232
+ }
233
+ }
234
+ ```
235
+
236
+ </details>
237
+
238
+
239
+ <details>
240
+ <summary><b>Gotchas</b></summary>
241
+
242
+ - **`--exclude`, `--redact`, `--redact-usernames` APPEND** — they never overwrite. Safe to call repeatedly.
243
+ - **Source selection is REQUIRED before export** — set `clawtrace config --source claude|codex|gemini|opencode|openclaw|all`.
244
+ - **PII audit is critical** — automated redaction is not foolproof.
245
+ - **Large exports take time** — 500+ sessions may take 1-3 minutes.
246
+
247
+ </details>
248
+
249
+ ## License
250
+
251
+ MIT
@@ -0,0 +1,229 @@
1
+ # ClawTrace
2
+
3
+ Review, score, and curate your coding agent conversation traces locally. ClawTrace indexes session logs from Claude Code, Codex, Gemini CLI, OpenCode, OpenClaw, Kimi CLI, and Cline, redacts secrets and PII, and gives you a browser workbench to triage and export the results.
4
+
5
+ Requires Python 3.10+.
6
+
7
+ ## Review & score your traces
8
+
9
+ Index your local sessions, auto-score them for quality, and open a browser workbench — all in three commands:
10
+
11
+ ```bash
12
+ pip install clawtrace
13
+ clawtrace scan # Index all local sessions
14
+ clawtrace score --batch --auto-triage # AI-score sessions, auto-approve 4-5, auto-block 1-2
15
+ clawtrace serve # Open workbench at localhost:8384
16
+ ```
17
+
18
+ `score --batch` sends each session to Claude for evaluation (1-5 quality rating) and `--auto-triage` automatically approves high-quality sessions and blocks low-quality ones, leaving score-3 sessions for your manual review. The workbench gives you a full browser UI to read transcripts, adjust scores, search across sessions, and bundle approved traces for export.
19
+
20
+ ## Give this to your agent
21
+
22
+ Paste this into Claude Code, Codex, Gemini CLI, OpenCode, OpenClaw, or any coding agent:
23
+
24
+ ```
25
+ Help me review and curate my coding agent traces using ClawTrace.
26
+ Install it, set up the skill, then walk me through the process.
27
+
28
+ STEP 1 — INSTALL
29
+ pip install clawtrace
30
+ If that fails, ask the user where the source is.
31
+
32
+ STEP 2 — INSTALL SKILL
33
+ clawtrace update-skill claude
34
+
35
+ STEP 3 — SCAN & SCORE
36
+ clawtrace scan # Index sessions into local DB
37
+ clawtrace score --batch --auto-triage # AI-score + auto-approve/block
38
+
39
+ STEP 4 — REVIEW
40
+ clawtrace serve # Open workbench at localhost:8384
41
+
42
+ Tell the user: "Your workbench is open at localhost:8384. Everything is 100% local.
43
+ Use the Inbox to triage traces, Search to find sessions, and Bundles to assemble exports."
44
+ ```
45
+
46
+ <details>
47
+ <summary><b>Terminal workflow (works on remote VMs — no browser needed)</b></summary>
48
+
49
+ The entire review-and-share workflow runs in your terminal. Your coding agent drives these commands for you.
50
+
51
+ ```bash
52
+ # 1. Scan — discover and index sessions
53
+ clawtrace scan
54
+
55
+ # 2. Review — browse and triage
56
+ clawtrace inbox --json --limit 20
57
+ clawtrace search "refactor auth" --json
58
+
59
+ # 3. Triage — approve or block
60
+ clawtrace approve <session_id> --reason "clean trace"
61
+ clawtrace block <session_id> --reason "proprietary code"
62
+
63
+ # 4. Score (optional) — AI-assisted quality scoring
64
+ clawtrace score --batch --auto-triage
65
+
66
+ # 5. Preview — review what will be shared (shows summaries + risk flags)
67
+ clawtrace share --status approved --preview
68
+
69
+ # 6. Share — after user confirms
70
+ clawtrace share --status approved --note "week 12 traces"
71
+ ```
72
+
73
+ For a visual review experience: `clawtrace serve` (local) or `clawtrace serve --remote` (prints SSH tunnel command for remote VMs).
74
+
75
+ </details>
76
+
77
+ <details>
78
+ <summary><b>Manual usage (without an agent)</b></summary>
79
+
80
+ ### Quick start
81
+
82
+ ```bash
83
+ pip install clawtrace
84
+
85
+ # Scan and score
86
+ clawtrace scan
87
+ clawtrace score --batch --auto-triage
88
+
89
+ # Open the workbench
90
+ clawtrace serve
91
+
92
+ # Or triage from the terminal
93
+ clawtrace inbox --json --limit 20
94
+ clawtrace approve <session-id> --reason "good trace"
95
+ clawtrace block <session-id> --reason "low quality"
96
+
97
+ # Configure redactions and exclusions
98
+ clawtrace config --exclude "personal-stuff,scratch"
99
+ clawtrace config --redact-usernames "my_github_handle,my_discord_name"
100
+ clawtrace config --redact "my-domain.com,my-secret-project"
101
+
102
+ # Export locally
103
+ clawtrace export --output /tmp/clawtrace_export.jsonl
104
+
105
+ # Optional: generate structured PII findings (hybrid = rules + Claude)
106
+ clawtrace export --output /tmp/clawtrace_export.jsonl --pii-review --pii-provider hybrid
107
+
108
+ # Optional: also produce a sanitized JSONL automatically
109
+ clawtrace export --output /tmp/clawtrace_export.jsonl --pii-review --pii-apply --pii-provider hybrid
110
+ ```
111
+
112
+ ### Commands
113
+
114
+ | Command | Description |
115
+ |---------|-------------|
116
+ | `clawtrace scan` | Index local sessions into workbench DB |
117
+ | `clawtrace score --batch --auto-triage` | AI-score all unscored sessions, auto-approve 4-5 and block 1-2 |
118
+ | `clawtrace score --batch --limit 20` | AI-score up to 20 sessions without triage |
119
+ | `clawtrace serve` | Open workbench UI at localhost:8384 |
120
+ | `clawtrace serve --remote` | Print SSH tunnel command for remote VM access |
121
+ | `clawtrace inbox --json --limit 20` | List sessions as JSON (for agent parsing) |
122
+ | `clawtrace search <query> --json` | Full-text search across sessions |
123
+ | `clawtrace approve <id> [id ...]` | Approve sessions by ID |
124
+ | `clawtrace block <id> [id ...]` | Block sessions by ID |
125
+ | `clawtrace shortlist <id> [id ...]` | Shortlist sessions for review |
126
+ | `clawtrace bundle-create --status approved` | Create bundle from all approved sessions |
127
+ | `clawtrace bundle-list` | List all bundles |
128
+ | `clawtrace bundle-view <bundle_id>` | View bundle details and sessions |
129
+ | `clawtrace bundle-export <bundle_id>` | Export bundle to disk (JSONL + manifest) |
130
+ | `clawtrace bundle-share <bundle_id>` | Upload bundle to ClawTrace ingest service |
131
+ | `clawtrace share --status approved` | One-step: bundle + export + share |
132
+ | `clawtrace export --pii-review --pii-apply` | Export, generate findings, and produce sanitized JSONL |
133
+ | `clawtrace config --source all` | Select source scope (`claude`, `codex`, `gemini`, `opencode`, `openclaw`, `kimi`, or `all`) |
134
+ | `clawtrace config --exclude "a,b"` | Add excluded projects (appends) |
135
+ | `clawtrace config --redact "str1,str2"` | Add strings to always redact (appends) |
136
+ | `clawtrace config --redact-usernames "u1,u2"` | Add usernames to anonymize (appends) |
137
+ | `clawtrace export` | Export to local JSONL |
138
+ | `clawtrace export --no-thinking` | Exclude extended thinking blocks |
139
+ | `clawtrace list` | List all projects with exclusion status |
140
+ | `clawtrace status` | Show current stage and next steps (JSON) |
141
+ | `clawtrace update-skill claude` | Install/update the clawtrace skill for Claude Code |
142
+
143
+ </details>
144
+
145
+ <details>
146
+ <summary><b>What gets exported</b></summary>
147
+
148
+ | Data | Included | Notes |
149
+ |------|----------|-------|
150
+ | User messages | Yes | Full text (including voice transcripts) |
151
+ | Assistant responses | Yes | Full text output |
152
+ | Extended thinking | Yes | Claude's reasoning (opt out with `--no-thinking`) |
153
+ | Tool calls | Yes | Tool name + inputs + outputs |
154
+ | Token usage | Yes | Input/output tokens per session |
155
+ | Model & metadata | Yes | Model name, git branch, timestamps |
156
+
157
+ ### Privacy & Redaction
158
+
159
+ ClawTrace applies multiple layers of protection:
160
+
161
+ 1. **Path anonymization** — File paths stripped to project-relative
162
+ 2. **Username hashing** — Your macOS username + any configured usernames replaced with stable hashes
163
+ 3. **Secret detection** — Regex patterns catch JWT tokens, API keys (Anthropic, OpenAI, GitHub, AWS, etc.), database passwords, private keys, Discord webhooks, and more
164
+ 4. **Entropy analysis** — Long high-entropy strings in quotes are flagged as potential secrets
165
+ 5. **Email redaction** — Personal email addresses removed
166
+ 6. **Custom redaction** — You can configure additional strings and usernames to redact
167
+ 7. **Tool call redaction** — Secrets in tool inputs and outputs are redacted
168
+
169
+ **This is NOT foolproof.** Always review your exported data before sharing.
170
+ Automated redaction cannot catch everything — especially service-specific
171
+ identifiers, third-party PII, or secrets in unusual formats.
172
+
173
+ To help improve redaction, report issues: https://github.com/kaiaiagent/clawtrace/issues
174
+
175
+ </details>
176
+
177
+ <details>
178
+ <summary><b>Data schema</b></summary>
179
+
180
+ Each line in `conversations.jsonl` is one session:
181
+
182
+ ```json
183
+ {
184
+ "session_id": "abc-123",
185
+ "project": "my-project",
186
+ "model": "claude-opus-4-6",
187
+ "git_branch": "main",
188
+ "start_time": "2025-06-15T10:00:00+00:00",
189
+ "end_time": "2025-06-15T10:30:00+00:00",
190
+ "messages": [
191
+ {"role": "user", "content": "Fix the login bug", "timestamp": "..."},
192
+ {
193
+ "role": "assistant",
194
+ "content": "I'll investigate the login flow.",
195
+ "thinking": "The user wants me to look at...",
196
+ "tool_uses": [
197
+ {
198
+ "tool": "bash",
199
+ "input": {"command": "grep -r 'login' src/"},
200
+ "output": {"text": "src/auth.py:42: def login(user, password):"},
201
+ "status": "success"
202
+ }
203
+ ],
204
+ "timestamp": "..."
205
+ }
206
+ ],
207
+ "stats": {
208
+ "user_messages": 5, "assistant_messages": 8,
209
+ "tool_uses": 20, "input_tokens": 50000, "output_tokens": 3000
210
+ }
211
+ }
212
+ ```
213
+
214
+ </details>
215
+
216
+
217
+ <details>
218
+ <summary><b>Gotchas</b></summary>
219
+
220
+ - **`--exclude`, `--redact`, `--redact-usernames` APPEND** — they never overwrite. Safe to call repeatedly.
221
+ - **Source selection is REQUIRED before export** — set `clawtrace config --source claude|codex|gemini|opencode|openclaw|all`.
222
+ - **PII audit is critical** — automated redaction is not foolproof.
223
+ - **Large exports take time** — 500+ sessions may take 1-3 minutes.
224
+
225
+ </details>
226
+
227
+ ## License
228
+
229
+ MIT
@@ -0,0 +1,3 @@
1
+ """ClawTrace — Export and manage coding agent conversation data."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,112 @@
1
+ """Anonymize PII in Claude Code log data."""
2
+
3
+ import hashlib
4
+ import os
5
+ import re
6
+
7
+
8
+ def _hash_username(username: str) -> str:
9
+ return "user_" + hashlib.sha256(username.encode()).hexdigest()[:8]
10
+
11
+
12
+ def _detect_home_dir() -> tuple[str, str]:
13
+ home = os.path.expanduser("~")
14
+ username = os.path.basename(home)
15
+ return home, username
16
+
17
+
18
+ def anonymize_path(path: str, username: str, username_hash: str, home: str | None = None) -> str:
19
+ """Strip a path to project-relative and hash the username."""
20
+ if not path:
21
+ return path
22
+
23
+ if home is None:
24
+ home = os.path.expanduser("~")
25
+ prefixes = set()
26
+ for base in (f"/Users/{username}", f"/home/{username}", home):
27
+ for subdir in ("Documents", "Downloads", "Desktop"):
28
+ prefixes.add(f"{base}/{subdir}/")
29
+ prefixes.add(f"{base}/")
30
+
31
+ # Try longest prefixes first (subdirectory matches before bare home)
32
+ home_patterns = sorted(prefixes, key=len, reverse=True)
33
+
34
+ for prefix in home_patterns:
35
+ if path.startswith(prefix):
36
+ rest = path[len(prefix):]
37
+ if "/Documents/" in prefix or "/Downloads/" in prefix or "/Desktop/" in prefix:
38
+ return rest
39
+ return f"{username_hash}/{rest}"
40
+
41
+ path = path.replace(f"/Users/{username}/", f"/{username_hash}/")
42
+ path = path.replace(f"/home/{username}/", f"/{username_hash}/")
43
+
44
+ return path
45
+
46
+
47
+ def anonymize_text(text: str, username: str, username_hash: str) -> str:
48
+ if not text or not username:
49
+ return text
50
+
51
+ escaped = re.escape(username)
52
+
53
+ # Replace /Users/<username> and /home/<username>
54
+ text = re.sub(rf"/Users/{escaped}(?=/|[^a-zA-Z0-9_-]|$)", f"/{username_hash}", text)
55
+ text = re.sub(rf"/home/{escaped}(?=/|[^a-zA-Z0-9_-]|$)", f"/{username_hash}", text)
56
+
57
+ # Catch hyphen-encoded paths: -Users-peteromalley- or -Users-peteromalley/
58
+ text = re.sub(rf"-Users-{escaped}(?=-|/|$)", f"-Users-{username_hash}", text)
59
+ text = re.sub(rf"-home-{escaped}(?=-|/|$)", f"-home-{username_hash}", text)
60
+
61
+ # Also handle underscore-to-hyphen encoding: kaid_aiagent → kaid-aiagent
62
+ if "_" in username:
63
+ hyphen_variant = username.replace("_", "-")
64
+ hyphen_escaped = re.escape(hyphen_variant)
65
+ text = re.sub(rf"-Users-{hyphen_escaped}(?=-|/|$)", f"-Users-{username_hash}", text)
66
+ text = re.sub(rf"-home-{hyphen_escaped}(?=-|/|$)", f"-home-{username_hash}", text)
67
+
68
+ # Catch temp paths like /private/tmp/claude-501/-Users-peteromalley/
69
+ text = re.sub(rf"claude-\d+/-Users-{escaped}", f"claude-XXX/-Users-{username_hash}", text)
70
+
71
+ # Final pass: replace bare username in remaining contexts (ls output, prose, etc.)
72
+ # Only if username is >= 4 chars to avoid false positives
73
+ if len(username) >= 4:
74
+ text = re.sub(rf"\b{escaped}\b", username_hash, text)
75
+
76
+ return text
77
+
78
+
79
+ class Anonymizer:
80
+ """Stateful anonymizer that consistently hashes usernames."""
81
+
82
+ def __init__(self, extra_usernames: list[str] | None = None):
83
+ self.home, self.username = _detect_home_dir()
84
+ self.username_hash = _hash_username(self.username)
85
+
86
+ # Additional usernames to anonymize (GitHub handles, Discord names, etc.)
87
+ self._extra: list[tuple[str, str]] = []
88
+ for name in (extra_usernames or []):
89
+ name = name.strip()
90
+ if name and name != self.username:
91
+ self._extra.append((name, _hash_username(name)))
92
+
93
+ def path(self, file_path: str) -> str:
94
+ result = anonymize_path(file_path, self.username, self.username_hash, self.home)
95
+ result = anonymize_text(result, self.username, self.username_hash)
96
+ for name, hashed in self._extra:
97
+ result = _replace_username(result, name, hashed)
98
+ return result
99
+
100
+ def text(self, content: str) -> str:
101
+ result = anonymize_text(content, self.username, self.username_hash)
102
+ for name, hashed in self._extra:
103
+ result = _replace_username(result, name, hashed)
104
+ return result
105
+
106
+
107
+ def _replace_username(text: str, username: str, username_hash: str) -> str:
108
+ if not text or not username or len(username) < 3:
109
+ return text
110
+ escaped = re.escape(username)
111
+ text = re.sub(escaped, username_hash, text, flags=re.IGNORECASE)
112
+ return text