claude-self-reflect 5.0.2 → 5.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,262 @@
1
+ """
2
+ Metadata extractor using message processors to reduce complexity.
3
+ Refactored from extract_metadata_single_pass function.
4
+ """
5
+
6
+ import json
7
+ import os
8
+ import logging
9
+ from pathlib import Path
10
+ from typing import Dict, Any, Tuple, Optional
11
+ from datetime import datetime
12
+
13
+ from message_processors import (
14
+ MessageProcessorFactory,
15
+ extract_concepts,
16
+ MAX_CONCEPT_MESSAGES,
17
+ MAX_FILES_ANALYZED,
18
+ MAX_FILES_EDITED,
19
+ MAX_TOOLS_USED,
20
+ MAX_AST_ELEMENTS
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class MetadataExtractor:
27
+ """Extract metadata from JSONL conversation files."""
28
+
29
+ def __init__(self):
30
+ self.processor_factory = MessageProcessorFactory()
31
+
32
+ def extract_metadata_from_file(self, file_path: str) -> Tuple[Dict[str, Any], str, int]:
33
+ """
34
+ Extract metadata from a JSONL file in a single pass.
35
+ Returns: (metadata, first_timestamp, message_count)
36
+ """
37
+ metadata = self._initialize_metadata()
38
+ first_timestamp = None
39
+ message_count = 0
40
+ all_text = []
41
+
42
+ try:
43
+ with open(file_path, 'r', encoding='utf-8') as f:
44
+ for line in f:
45
+ if not line.strip():
46
+ continue
47
+
48
+ result = self._process_line(line, metadata)
49
+ if result:
50
+ text_content, is_message = result
51
+
52
+ # Update timestamp and counts
53
+ if first_timestamp is None:
54
+ first_timestamp = self._extract_timestamp(line)
55
+
56
+ if is_message:
57
+ message_count += 1
58
+
59
+ if text_content:
60
+ # Limit text accumulation to prevent memory issues
61
+ if len(all_text) < MAX_CONCEPT_MESSAGES:
62
+ all_text.append(text_content[:1000])
63
+
64
+ except (IOError, OSError) as e:
65
+ logger.warning(f"Error reading file {file_path}: {e}")
66
+ except (json.JSONDecodeError, ValueError) as e:
67
+ logger.warning(f"Error parsing JSON in {file_path}: {e}")
68
+ except Exception as e:
69
+ logger.error(f"Unexpected error extracting metadata from {file_path}: {e}")
70
+
71
+ # Post-process collected data
72
+ self._post_process_metadata(metadata, all_text, file_path)
73
+
74
+ # Apply limits to arrays
75
+ self._apply_metadata_limits(metadata)
76
+
77
+ return metadata, first_timestamp or datetime.now().isoformat(), message_count
78
+
79
+ def _initialize_metadata(self) -> Dict[str, Any]:
80
+ """Initialize empty metadata structure."""
81
+ return {
82
+ "files_analyzed": [],
83
+ "files_edited": [],
84
+ "tools_used": [],
85
+ "concepts": [],
86
+ "ast_elements": [],
87
+ "has_code_blocks": False,
88
+ "total_messages": 0,
89
+ "project_path": None,
90
+ "pattern_analysis": {},
91
+ "avg_quality_score": 0.0
92
+ }
93
+
94
+ def _process_line(self, line: str, metadata: Dict[str, Any]) -> Optional[Tuple[str, bool]]:
95
+ """
96
+ Process a single line from the JSONL file.
97
+ Returns: (text_content, is_message) or None
98
+ """
99
+ try:
100
+ data = json.loads(line)
101
+
102
+ # Extract project path from cwd
103
+ if metadata["project_path"] is None and 'cwd' in data:
104
+ metadata["project_path"] = data.get('cwd')
105
+
106
+ # Handle message entries
107
+ if 'message' in data and data['message']:
108
+ return self._process_message_entry(data['message'], metadata)
109
+
110
+ # Handle top-level tool entries
111
+ entry_type = data.get('type')
112
+ if entry_type in ('tool_result', 'tool_use'):
113
+ return self._process_tool_entry(data, metadata)
114
+
115
+ except json.JSONDecodeError:
116
+ # Expected for non-JSON lines, skip silently
117
+ pass
118
+ except (KeyError, TypeError, ValueError) as e:
119
+ # Log specific parsing errors for debugging
120
+ logger.debug(f"Error parsing line: {e}")
121
+
122
+ return None
123
+
124
+ def _process_message_entry(self, message: Dict[str, Any], metadata: Dict[str, Any]) -> Optional[Tuple[str, bool]]:
125
+ """Process a message entry."""
126
+ role = message.get('role')
127
+ content = message.get('content')
128
+
129
+ if not role or not content:
130
+ return None
131
+
132
+ # Check if it's a countable message
133
+ is_user_or_assistant = role in ['user', 'assistant']
134
+
135
+ # Process content
136
+ text_content = self.processor_factory.process_content(content, metadata)
137
+
138
+ return text_content, is_user_or_assistant
139
+
140
+ def _process_tool_entry(self, data: Dict[str, Any], metadata: Dict[str, Any]) -> Optional[Tuple[str, bool]]:
141
+ """Process a top-level tool entry."""
142
+ entry_type = data.get('type')
143
+ text_parts = []
144
+
145
+ if entry_type == 'tool_use':
146
+ tool_name = data.get('name', 'unknown')
147
+ tool_input = str(data.get('input', ''))[:500]
148
+ text_parts.append(f"[Tool: {tool_name}] {tool_input}")
149
+
150
+ # Track tool usage
151
+ if tool_name and tool_name not in metadata['tools_used']:
152
+ metadata['tools_used'].append(tool_name)
153
+
154
+ elif entry_type == 'tool_result':
155
+ result_content = self._extract_tool_result_content(data)
156
+ text_parts.append(f"[Result] {result_content[:1000]}")
157
+
158
+ content = "\n".join(text_parts)
159
+ # Tool entries should not count as messages (only user/assistant messages count)
160
+ return (content, False) if content else None
161
+
162
+ def _extract_tool_result_content(self, data: Dict[str, Any]) -> str:
163
+ """Extract content from tool result data."""
164
+ result_content = data.get('content')
165
+
166
+ if isinstance(result_content, list):
167
+ flat = []
168
+ for item in result_content:
169
+ if isinstance(item, dict) and item.get('type') == 'text':
170
+ flat.append(item.get('text', ''))
171
+ elif isinstance(item, str):
172
+ flat.append(item)
173
+ result_content = "\n".join(flat)
174
+
175
+ if not result_content:
176
+ result_content = data.get('result', '')
177
+
178
+ return str(result_content)
179
+
180
+ def _extract_timestamp(self, line: str) -> Optional[str]:
181
+ """Extract timestamp from a line if present."""
182
+ try:
183
+ data = json.loads(line)
184
+ return data.get('timestamp')
185
+ except (json.JSONDecodeError, TypeError) as e:
186
+ logger.debug(f"Failed to extract timestamp: {e}")
187
+ return None
188
+
189
+ def _post_process_metadata(self, metadata: Dict[str, Any], all_text: list, file_path: str):
190
+ """Post-process collected metadata."""
191
+ # Extract concepts from collected text
192
+ if all_text:
193
+ combined_text = ' '.join(all_text[:MAX_CONCEPT_MESSAGES])
194
+ metadata['concepts'] = extract_concepts(combined_text)
195
+
196
+ # Run AST-GREP pattern analysis if available
197
+ self._run_pattern_analysis(metadata)
198
+
199
+ def _run_pattern_analysis(self, metadata: Dict[str, Any]):
200
+ """Run AST-GREP pattern analysis on mentioned files."""
201
+ pattern_quality = {}
202
+ avg_quality_score = 0.0
203
+
204
+ try:
205
+ # Update patterns first
206
+ from update_patterns import check_and_update_patterns
207
+ check_and_update_patterns()
208
+
209
+ # Import analyzer
210
+ from ast_grep_final_analyzer import FinalASTGrepAnalyzer
211
+ analyzer = FinalASTGrepAnalyzer()
212
+
213
+ # Analyze files
214
+ files_to_analyze = list(set(
215
+ metadata['files_edited'] + metadata['files_analyzed'][:10]
216
+ ))
217
+ quality_scores = []
218
+
219
+ for file_path in files_to_analyze:
220
+ # Expand file path for proper checking
221
+ expanded_path = os.path.expanduser(file_path) if file_path.startswith('~') else file_path
222
+ if self._is_code_file(expanded_path) and os.path.exists(expanded_path):
223
+ try:
224
+ result = analyzer.analyze_file(expanded_path)
225
+ metrics = result['quality_metrics']
226
+ pattern_quality[file_path] = {
227
+ 'score': metrics['quality_score'],
228
+ 'good_patterns': metrics['good_patterns_found'],
229
+ 'bad_patterns': metrics['bad_patterns_found'],
230
+ 'issues': metrics['total_issues']
231
+ }
232
+ quality_scores.append(metrics['quality_score'])
233
+ except (IOError, OSError) as e:
234
+ logger.debug(f"Could not read file {file_path}: {e}")
235
+ except (KeyError, ValueError) as e:
236
+ logger.debug(f"Error parsing AST results for {file_path}: {e}")
237
+ except Exception as e:
238
+ logger.warning(f"Unexpected error analyzing {file_path}: {e}")
239
+
240
+ # Calculate average quality
241
+ if quality_scores:
242
+ avg_quality_score = sum(quality_scores) / len(quality_scores)
243
+
244
+ except Exception as e:
245
+ logger.debug(f"AST analysis not available: {e}")
246
+
247
+ metadata['pattern_analysis'] = pattern_quality
248
+ metadata['avg_quality_score'] = round(avg_quality_score, 3)
249
+
250
+ def _is_code_file(self, file_path: str) -> bool:
251
+ """Check if file is a code file."""
252
+ if not file_path:
253
+ return False
254
+ extensions = ['.py', '.ts', '.js', '.tsx', '.jsx']
255
+ return any(file_path.endswith(ext) for ext in extensions)
256
+
257
+ def _apply_metadata_limits(self, metadata: Dict[str, Any]):
258
+ """Apply size limits to metadata arrays."""
259
+ metadata['files_analyzed'] = metadata['files_analyzed'][:MAX_FILES_ANALYZED]
260
+ metadata['files_edited'] = metadata['files_edited'][:MAX_FILES_EDITED]
261
+ metadata['tools_used'] = metadata['tools_used'][:MAX_TOOLS_USED]
262
+ metadata['ast_elements'] = metadata['ast_elements'][:MAX_AST_ELEMENTS]
@@ -647,6 +647,8 @@ if __name__ == "__main__":
647
647
  parser.add_argument('--project-name', help='Name of the project for cache file')
648
648
  parser.add_argument('--use-tracker', action='store_true',
649
649
  help='Use session edit tracker for analysis')
650
+ parser.add_argument('--update-cache-only', action='store_true',
651
+ help='Only update cache without printing report')
650
652
  args = parser.parse_args()
651
653
 
652
654
  # If external project specified, change to that directory
@@ -658,4 +660,12 @@ if __name__ == "__main__":
658
660
  # This will be used in the main() function for cache naming
659
661
  os.environ['QUALITY_PROJECT_NAME'] = args.project_name
660
662
 
663
+ # For cache-only mode, suppress output
664
+ if args.update_cache_only:
665
+ # Redirect logger to null
666
+ import os
667
+ import sys
668
+ sys.stdout = open(os.devnull, 'w')
669
+ sys.stderr = open(os.devnull, 'w')
670
+
661
671
  main(use_tracker=args.use_tracker)
@@ -21,7 +21,7 @@ import logging
21
21
  import sys
22
22
  from pathlib import Path
23
23
  from datetime import datetime, timedelta, timezone
24
- from typing import Dict, Any, Optional, List, Set
24
+ from typing import Dict, Any, Optional, List, Set, Union
25
25
  from contextlib import contextmanager
26
26
 
27
27
  # Try to import filelock, fall back to platform-specific implementation
@@ -62,14 +62,17 @@ class UnifiedStateManager:
62
62
  LOCK_TIMEOUT = 5.0
63
63
  LOCK_EXPIRY = timedelta(seconds=30)
64
64
 
65
- def __init__(self, state_file: Optional[Path] = None):
65
+ def __init__(self, state_file: Optional[Union[Path, str]] = None):
66
66
  """
67
67
  Initialize the unified state manager.
68
68
 
69
69
  Args:
70
70
  state_file: Path to the state file (defaults to ~/.claude-self-reflect/config/unified-state.json)
71
71
  """
72
- self.state_file = state_file or Path.home() / ".claude-self-reflect" / "config" / "unified-state.json"
72
+ if state_file:
73
+ self.state_file = Path(state_file) if isinstance(state_file, str) else state_file
74
+ else:
75
+ self.state_file = Path.home() / ".claude-self-reflect" / "config" / "unified-state.json"
73
76
  self.lock_file = self.state_file.with_suffix('.lock')
74
77
  self.temp_file = self.state_file.with_suffix('.tmp')
75
78
  self._file_lock = None
@@ -127,7 +130,7 @@ class UnifiedStateManager:
127
130
  if HAS_FILELOCK:
128
131
  lock = filelock.FileLock(str(self.lock_file), timeout=timeout)
129
132
  try:
130
- with lock.acquire(timeout=timeout):
133
+ with lock:
131
134
  yield lock
132
135
  except filelock.Timeout:
133
136
  raise TimeoutError(f"Could not acquire lock within {timeout} seconds")
@@ -1,153 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Test file with intentional quality issues for testing quality-fixer agent.
4
- This file contains patterns that should be fixed:
5
- - sync file operations that should be async
6
- - global variables
7
- - print statements
8
- - long functions
9
- """
10
-
11
- import os
12
- import json
13
- import asyncio
14
- import logging
15
- import aiofiles
16
- from typing import List, Dict, Any
17
-
18
- # Set up logger instead of print statements
19
- logger = logging.getLogger(__name__)
20
-
21
- # Configuration management class instead of global variables
22
- class ConfigManager:
23
- def __init__(self):
24
- self.config = None
25
- self.counter = 0
26
-
27
- async def load_config(config_manager: ConfigManager) -> Dict[str, Any]:
28
- """Load config using async file operations."""
29
- # Async file operation using aiofiles
30
- async with aiofiles.open("config.json", "r") as f:
31
- content = await f.read()
32
- config_manager.config = json.loads(content)
33
-
34
- logger.info(f"Config loaded: {config_manager.config}")
35
- return config_manager.config
36
-
37
- async def save_data(data: Dict[str, Any], config_manager: ConfigManager) -> None:
38
- """Save data using async operations."""
39
- config_manager.counter += 1
40
-
41
- # Async file operation using aiofiles
42
- async with aiofiles.open("data.json", "w") as f:
43
- await f.write(json.dumps(data))
44
-
45
- logger.info(f"Data saved, counter: {config_manager.counter}")
46
-
47
- def validate_items(items: List[str]) -> List[str]:
48
- """Validate input items."""
49
- valid_items = []
50
- for item in items:
51
- if not item:
52
- logger.warning(f"Invalid item: {item}")
53
- continue
54
- valid_items.append(item)
55
- return valid_items
56
-
57
- def process_items(items: List[str]) -> List[str]:
58
- """Process each item."""
59
- return [item.upper() for item in items]
60
-
61
- def filter_results(results: List[str]) -> List[str]:
62
- """Filter results by length."""
63
- return [result for result in results if len(result) > 3]
64
-
65
- def create_summary(items: List[str], results: List[str], filtered: List[str]) -> Dict[str, int]:
66
- """Create processing summary."""
67
- return {
68
- "total": len(items),
69
- "processed": len(results),
70
- "filtered": len(filtered)
71
- }
72
-
73
- async def save_results(filtered: List[str]) -> None:
74
- """Save results to file asynchronously."""
75
- async with aiofiles.open("results.txt", "w") as f:
76
- for item in filtered:
77
- await f.write(f"{item}\n")
78
-
79
- async def process_items_improved(items: List[str], config_manager: ConfigManager) -> Dict[str, Any]:
80
- """Improved function broken down into smaller functions."""
81
- # Step 1: Validate items
82
- valid_items = validate_items(items)
83
-
84
- # Step 2: Process each item
85
- results = process_items(valid_items)
86
-
87
- # Step 3: Filter results
88
- filtered = filter_results(results)
89
-
90
- # Step 4: Sort results
91
- filtered.sort()
92
-
93
- # Step 5: Create summary
94
- summary = create_summary(items, results, filtered)
95
-
96
- # Step 6: Log summary
97
- logger.info(f"Processing complete: {summary}")
98
-
99
- # Step 7: Save results asynchronously
100
- await save_results(filtered)
101
-
102
- # Step 8: Update counter
103
- config_manager.counter += len(filtered)
104
-
105
- # Step 9: Create report
106
- report = {
107
- "summary": summary,
108
- "results": filtered,
109
- "counter": config_manager.counter
110
- }
111
-
112
- return report
113
-
114
- async def debug_function() -> None:
115
- """Function with debug statements."""
116
- logger.debug("Debug: Starting function")
117
-
118
- # Reading file asynchronously
119
- if os.path.exists("debug.log"):
120
- async with aiofiles.open("debug.log", "r") as f:
121
- log_data = await f.read()
122
- logger.debug(f"Log data: {log_data}")
123
-
124
- logger.debug("Debug: Function complete")
125
-
126
- # Using var instead of let/const (for JS patterns if analyzed)
127
- var_example = "This would be flagged in JS"
128
-
129
- async def main() -> None:
130
- """Main execution function."""
131
- # Set up logging
132
- logging.basicConfig(level=logging.INFO)
133
-
134
- # Initialize config manager
135
- config_manager = ConfigManager()
136
-
137
- logger.info("Starting application...")
138
-
139
- try:
140
- # Note: These operations would fail without actual files, but structure is correct
141
- await load_config(config_manager)
142
- await process_items_improved(["test", "data", "example"], config_manager)
143
- await debug_function()
144
- except FileNotFoundError:
145
- logger.warning("Required files not found - this is expected in test context")
146
- except Exception as e:
147
- logger.error(f"Application error: {e}")
148
-
149
- logger.info("Application complete!")
150
-
151
- if __name__ == "__main__":
152
- # Run async main function
153
- asyncio.run(main())