claude-self-reflect 7.1.9 → 7.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,186 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Extract structured data from Claude Code conversation JSONL.
4
+ Handles large files by trimming to stay within token budgets.
5
+ """
6
+
7
+ import json
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import Dict, List, Any
11
+ from datetime import datetime
12
+
13
+
14
+ def estimate_tokens(text: str) -> int:
15
+ """Rough token estimation (1 token ≈ 4 characters)."""
16
+ return len(text) // 4
17
+
18
+
19
+ def trim_conversation(messages: List[Dict], max_tokens: int = 150000) -> List[Dict]:
20
+ """
21
+ Trim conversation to fit within token budget.
22
+ Strategy: Keep first 20% + last 50% of messages (where solutions usually are).
23
+ """
24
+ if not messages:
25
+ return []
26
+
27
+ # Estimate total tokens
28
+ total_text = json.dumps(messages)
29
+ total_tokens = estimate_tokens(total_text)
30
+
31
+ if total_tokens <= max_tokens:
32
+ return messages
33
+
34
+ # Keep first 20% and last 50% of messages
35
+ n = len(messages)
36
+ first_count = max(10, int(n * 0.2))
37
+ last_count = max(20, int(n * 0.5))
38
+
39
+ trimmed = messages[:first_count] + [
40
+ {
41
+ "role": "assistant",
42
+ "content": f"[... {n - first_count - last_count} messages omitted for brevity ...]",
43
+ "type": "text"
44
+ }
45
+ ] + messages[-last_count:]
46
+
47
+ print(f"Trimmed conversation: {n} → {len(trimmed)} messages (~{estimate_tokens(json.dumps(trimmed))} tokens)", file=sys.stderr)
48
+
49
+ return trimmed
50
+
51
+
52
+ def extract_files(messages: List[Dict]) -> Dict[str, List[str]]:
53
+ """Extract files that were read, edited, or created."""
54
+ files = {"read": set(), "edited": set(), "created": set()}
55
+
56
+ for msg in messages:
57
+ if msg.get("type") != "tool_use":
58
+ continue
59
+
60
+ content = msg.get("content", [])
61
+ if isinstance(content, str):
62
+ continue
63
+
64
+ for item in content if isinstance(content, list) else [content]:
65
+ if not isinstance(item, dict):
66
+ continue
67
+
68
+ tool_name = item.get("name") or item.get("type", "")
69
+
70
+ if "read" in tool_name.lower():
71
+ if "file_path" in item.get("input", {}):
72
+ files["read"].add(item["input"]["file_path"])
73
+ elif "edit" in tool_name.lower():
74
+ if "file_path" in item.get("input", {}):
75
+ files["edited"].add(item["input"]["file_path"])
76
+ elif "write" in tool_name.lower():
77
+ if "file_path" in item.get("input", {}):
78
+ files["created"].add(item["input"]["file_path"])
79
+
80
+ return {k: sorted(list(v)) for k, v in files.items()}
81
+
82
+
83
+ def extract_tools(messages: List[Dict]) -> Dict[str, int]:
84
+ """Count tool usage."""
85
+ tools = {}
86
+
87
+ for msg in messages:
88
+ if msg.get("type") != "tool_use":
89
+ continue
90
+
91
+ content = msg.get("content", [])
92
+ if isinstance(content, str):
93
+ continue
94
+
95
+ for item in content if isinstance(content, list) else [content]:
96
+ if not isinstance(item, dict):
97
+ continue
98
+
99
+ tool_name = item.get("name") or item.get("type", "unknown")
100
+ tools[tool_name] = tools.get(tool_name, 0) + 1
101
+
102
+ return dict(sorted(tools.items(), key=lambda x: x[1], reverse=True))
103
+
104
+
105
+ def extract_errors(messages: List[Dict]) -> List[Dict[str, Any]]:
106
+ """Extract error messages and track if they were resolved."""
107
+ errors = []
108
+
109
+ for i, msg in enumerate(messages):
110
+ content_str = json.dumps(msg.get("content", "")).lower()
111
+
112
+ # Look for error indicators
113
+ if any(keyword in content_str for keyword in ["error", "failed", "exception", "traceback"]):
114
+ # Check if resolved in next few messages
115
+ resolved = False
116
+ for j in range(i + 1, min(i + 5, len(messages))):
117
+ next_content = json.dumps(messages[j].get("content", "")).lower()
118
+ if any(word in next_content for word in ["success", "fixed", "working", "resolved"]):
119
+ resolved = True
120
+ break
121
+
122
+ errors.append({
123
+ "message_index": i,
124
+ "preview": content_str[:200],
125
+ "resolved": resolved
126
+ })
127
+
128
+ return errors
129
+
130
+
131
+ def extract_structured_data(jsonl_path: Path, max_tokens: int = 150000) -> Dict[str, Any]:
132
+ """
133
+ Extract structured data from conversation JSONL.
134
+ Returns JSON suitable for LLM analysis.
135
+ """
136
+ messages = []
137
+
138
+ # Read JSONL
139
+ with open(jsonl_path, 'r') as f:
140
+ for line in f:
141
+ line = line.strip()
142
+ if not line:
143
+ continue
144
+ try:
145
+ msg = json.loads(line)
146
+ messages.append(msg)
147
+ except json.JSONDecodeError:
148
+ continue
149
+
150
+ # Trim if needed
151
+ messages = trim_conversation(messages, max_tokens)
152
+
153
+ # Extract components
154
+ files = extract_files(messages)
155
+ tools = extract_tools(messages)
156
+ errors = extract_errors(messages)
157
+
158
+ # Build structured data
159
+ return {
160
+ "conversation_id": jsonl_path.stem,
161
+ "total_messages": len(messages),
162
+ "messages": messages, # Trimmed messages
163
+ "files": files,
164
+ "tools_used": tools,
165
+ "errors": errors,
166
+ "has_code": any("```" in json.dumps(msg.get("content", "")) for msg in messages),
167
+ "metadata": {
168
+ "source_file": str(jsonl_path),
169
+ "extracted_at": datetime.now().isoformat(),
170
+ "trimmed": len(messages) < sum(1 for _ in open(jsonl_path))
171
+ }
172
+ }
173
+
174
+
175
+ if __name__ == "__main__":
176
+ if len(sys.argv) < 2:
177
+ print("Usage: python extract_structured.py <conversation.jsonl>", file=sys.stderr)
178
+ sys.exit(1)
179
+
180
+ jsonl_path = Path(sys.argv[1])
181
+ if not jsonl_path.exists():
182
+ print(f"Error: File not found: {jsonl_path}", file=sys.stderr)
183
+ sys.exit(1)
184
+
185
+ structured_data = extract_structured_data(jsonl_path)
186
+ print(json.dumps(structured_data, indent=2))