claude-self-reflect 5.0.4 → 5.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/csr-validator.md +43 -0
- package/.claude/agents/open-source-maintainer.md +77 -0
- package/Dockerfile.importer +4 -0
- package/Dockerfile.safe-watcher +4 -0
- package/package.json +7 -1
- package/scripts/doctor.py +342 -0
- package/scripts/embedding_service.py +241 -0
- package/scripts/import_strategies.py +344 -0
- package/scripts/message_processors.py +248 -0
- package/scripts/metadata_extractor.py +262 -0
- package/scripts/watcher-loop.sh +56 -0
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Message processor classes for handling different message types in JSONL import.
|
|
3
|
+
Refactored from extract_metadata_single_pass to reduce complexity.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
import ast
|
|
8
|
+
import logging
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from typing import Dict, Any, List, Set, Optional
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
# Constants for metadata limits (can be overridden via environment variables)
|
|
16
|
+
import os
|
|
17
|
+
|
|
18
|
+
MAX_CONCEPTS = int(os.getenv("MAX_CONCEPTS", "10"))
|
|
19
|
+
MAX_AST_ELEMENTS = int(os.getenv("MAX_AST_ELEMENTS", "30"))
|
|
20
|
+
MAX_CODE_BLOCKS = int(os.getenv("MAX_CODE_BLOCKS", "5"))
|
|
21
|
+
MAX_ELEMENTS_PER_BLOCK = int(os.getenv("MAX_ELEMENTS_PER_BLOCK", "10"))
|
|
22
|
+
MAX_FILES_ANALYZED = int(os.getenv("MAX_FILES_ANALYZED", "20"))
|
|
23
|
+
MAX_FILES_EDITED = int(os.getenv("MAX_FILES_EDITED", "20"))
|
|
24
|
+
MAX_TOOLS_USED = int(os.getenv("MAX_TOOLS_USED", "15"))
|
|
25
|
+
MAX_CONCEPT_MESSAGES = int(os.getenv("MAX_CONCEPT_MESSAGES", "50"))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class MessageProcessor(ABC):
|
|
29
|
+
"""Abstract base class for message processing."""
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def process(self, item: Any, metadata: Dict[str, Any]) -> Optional[str]:
|
|
33
|
+
"""Process a message item and update metadata."""
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TextMessageProcessor(MessageProcessor):
|
|
38
|
+
"""Process text messages and extract code blocks."""
|
|
39
|
+
|
|
40
|
+
def process(self, item: Dict[str, Any], metadata: Dict[str, Any]) -> Optional[str]:
|
|
41
|
+
"""Process text content and extract code blocks with AST elements."""
|
|
42
|
+
if item.get('type') != 'text':
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
text_content = item.get('text', '')
|
|
46
|
+
|
|
47
|
+
# Check for code blocks
|
|
48
|
+
if '```' in text_content:
|
|
49
|
+
metadata['has_code_blocks'] = True
|
|
50
|
+
self._extract_code_ast_elements(text_content, metadata)
|
|
51
|
+
|
|
52
|
+
return text_content
|
|
53
|
+
|
|
54
|
+
def _extract_code_ast_elements(self, text: str, metadata: Dict[str, Any]):
|
|
55
|
+
"""Extract AST elements from code blocks in text."""
|
|
56
|
+
if 'ast_elements' not in metadata:
|
|
57
|
+
metadata['ast_elements'] = []
|
|
58
|
+
|
|
59
|
+
if len(metadata['ast_elements']) >= MAX_AST_ELEMENTS:
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
# More permissive regex to handle various fence formats
|
|
63
|
+
code_blocks = re.findall(r'```[^`\n]*\n?(.*?)```', text, re.DOTALL)
|
|
64
|
+
|
|
65
|
+
for code_block in code_blocks[:MAX_CODE_BLOCKS]:
|
|
66
|
+
if len(metadata['ast_elements']) >= MAX_AST_ELEMENTS:
|
|
67
|
+
break
|
|
68
|
+
|
|
69
|
+
ast_elems = extract_ast_elements(code_block)
|
|
70
|
+
for elem in list(ast_elems)[:MAX_ELEMENTS_PER_BLOCK]:
|
|
71
|
+
if elem not in metadata['ast_elements'] and len(metadata['ast_elements']) < MAX_AST_ELEMENTS:
|
|
72
|
+
metadata['ast_elements'].append(elem)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ThinkingMessageProcessor(MessageProcessor):
|
|
76
|
+
"""Process thinking messages."""
|
|
77
|
+
|
|
78
|
+
def process(self, item: Dict[str, Any], metadata: Dict[str, Any]) -> Optional[str]:
|
|
79
|
+
"""Process thinking content."""
|
|
80
|
+
if item.get('type') != 'thinking':
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
return item.get('thinking', '')
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class ToolMessageProcessor(MessageProcessor):
|
|
87
|
+
"""Process tool use messages and extract file references."""
|
|
88
|
+
|
|
89
|
+
def process(self, item: Dict[str, Any], metadata: Dict[str, Any]) -> Optional[str]:
|
|
90
|
+
"""Process tool use and extract file references."""
|
|
91
|
+
if item.get('type') != 'tool_use':
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
tool_name = item.get('name', '')
|
|
95
|
+
|
|
96
|
+
# Track tool usage
|
|
97
|
+
if 'tools_used' not in metadata:
|
|
98
|
+
metadata['tools_used'] = []
|
|
99
|
+
|
|
100
|
+
if tool_name and tool_name not in metadata['tools_used']:
|
|
101
|
+
if len(metadata['tools_used']) < MAX_TOOLS_USED:
|
|
102
|
+
metadata['tools_used'].append(tool_name)
|
|
103
|
+
|
|
104
|
+
# Extract file references
|
|
105
|
+
if 'input' in item:
|
|
106
|
+
self._extract_file_references(item['input'], tool_name, metadata)
|
|
107
|
+
|
|
108
|
+
# Return tool use as text
|
|
109
|
+
tool_input = str(item.get('input', ''))[:500]
|
|
110
|
+
return f"[Tool: {tool_name}] {tool_input}"
|
|
111
|
+
|
|
112
|
+
def _extract_file_references(self, input_data: Any, tool_name: str, metadata: Dict[str, Any]):
|
|
113
|
+
"""Extract file references from tool input."""
|
|
114
|
+
if not isinstance(input_data, dict):
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
# Initialize metadata lists if not present
|
|
118
|
+
if 'files_edited' not in metadata:
|
|
119
|
+
metadata['files_edited'] = []
|
|
120
|
+
if 'files_analyzed' not in metadata:
|
|
121
|
+
metadata['files_analyzed'] = []
|
|
122
|
+
|
|
123
|
+
is_edit = tool_name in ['Edit', 'Write', 'MultiEdit', 'NotebookEdit']
|
|
124
|
+
|
|
125
|
+
# Check file_path field
|
|
126
|
+
if 'file_path' in input_data:
|
|
127
|
+
file_ref = input_data['file_path']
|
|
128
|
+
if is_edit:
|
|
129
|
+
if file_ref not in metadata['files_edited'] and len(metadata['files_edited']) < MAX_FILES_EDITED:
|
|
130
|
+
metadata['files_edited'].append(file_ref)
|
|
131
|
+
else:
|
|
132
|
+
if file_ref not in metadata['files_analyzed'] and len(metadata['files_analyzed']) < MAX_FILES_ANALYZED:
|
|
133
|
+
metadata['files_analyzed'].append(file_ref)
|
|
134
|
+
|
|
135
|
+
# Check path field (for non-edit tools)
|
|
136
|
+
if 'path' in input_data and not is_edit:
|
|
137
|
+
file_ref = input_data['path']
|
|
138
|
+
if file_ref not in metadata['files_analyzed'] and len(metadata['files_analyzed']) < MAX_FILES_ANALYZED:
|
|
139
|
+
metadata['files_analyzed'].append(file_ref)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class ToolResultProcessor(MessageProcessor):
|
|
143
|
+
"""Process tool result messages."""
|
|
144
|
+
|
|
145
|
+
def process(self, item: Any, metadata: Dict[str, Any]) -> Optional[str]:
|
|
146
|
+
"""Process tool results."""
|
|
147
|
+
# Handle both dict items and top-level tool results
|
|
148
|
+
if isinstance(item, dict):
|
|
149
|
+
if item.get('type') == 'tool_result':
|
|
150
|
+
result_content = str(item.get('content', ''))[:1000]
|
|
151
|
+
return f"[Result] {result_content}"
|
|
152
|
+
elif item.get('type') == 'tool_use':
|
|
153
|
+
# Already handled by ToolMessageProcessor
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class MessageProcessorFactory:
|
|
160
|
+
"""Factory for creating appropriate message processors."""
|
|
161
|
+
|
|
162
|
+
def __init__(self):
|
|
163
|
+
self.processors = {
|
|
164
|
+
'text': TextMessageProcessor(),
|
|
165
|
+
'thinking': ThinkingMessageProcessor(),
|
|
166
|
+
'tool_use': ToolMessageProcessor(),
|
|
167
|
+
'tool_result': ToolResultProcessor()
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
def get_processor(self, message_type: str) -> Optional[MessageProcessor]:
|
|
171
|
+
"""Get the appropriate processor for a message type."""
|
|
172
|
+
return self.processors.get(message_type)
|
|
173
|
+
|
|
174
|
+
def process_content(self, content: Any, metadata: Dict[str, Any]) -> str:
|
|
175
|
+
"""Process content of various types and return text representation."""
|
|
176
|
+
text_parts = []
|
|
177
|
+
|
|
178
|
+
if isinstance(content, list):
|
|
179
|
+
for item in content:
|
|
180
|
+
if isinstance(item, dict):
|
|
181
|
+
item_type = item.get('type', '')
|
|
182
|
+
processor = self.get_processor(item_type)
|
|
183
|
+
if processor:
|
|
184
|
+
text = processor.process(item, metadata)
|
|
185
|
+
if text:
|
|
186
|
+
text_parts.append(text)
|
|
187
|
+
elif isinstance(item, str):
|
|
188
|
+
text_parts.append(item)
|
|
189
|
+
elif isinstance(content, str):
|
|
190
|
+
text_parts.append(content)
|
|
191
|
+
|
|
192
|
+
return '\n'.join(text_parts)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def extract_ast_elements(code_text: str) -> Set[str]:
|
|
196
|
+
"""Extract AST elements from Python code."""
|
|
197
|
+
elements = set()
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
tree = ast.parse(code_text)
|
|
201
|
+
for node in ast.walk(tree):
|
|
202
|
+
if isinstance(node, ast.FunctionDef):
|
|
203
|
+
elements.add(f"func:{node.name}")
|
|
204
|
+
elif isinstance(node, ast.ClassDef):
|
|
205
|
+
elements.add(f"class:{node.name}")
|
|
206
|
+
elif isinstance(node, ast.Import):
|
|
207
|
+
for alias in node.names:
|
|
208
|
+
elements.add(f"import:{alias.name}")
|
|
209
|
+
elif isinstance(node, ast.ImportFrom):
|
|
210
|
+
module = node.module or ''
|
|
211
|
+
for alias in node.names:
|
|
212
|
+
elements.add(f"from:{module}.{alias.name}")
|
|
213
|
+
except (SyntaxError, ValueError):
|
|
214
|
+
# Not Python code or invalid syntax
|
|
215
|
+
pass
|
|
216
|
+
|
|
217
|
+
return elements
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def extract_concepts(text: str) -> List[str]:
|
|
221
|
+
"""Extract key concepts from text using simple heuristics."""
|
|
222
|
+
concepts = []
|
|
223
|
+
|
|
224
|
+
# Common programming concepts
|
|
225
|
+
concept_patterns = [
|
|
226
|
+
(r'\b(async|await|promise|future)\b', 'async-programming'),
|
|
227
|
+
(r'\b(test|spec|jest|pytest|unittest)\b', 'testing'),
|
|
228
|
+
(r'\b(docker|container|kubernetes|k8s)\b', 'containerization'),
|
|
229
|
+
(r'\b(api|rest|graphql|endpoint)\b', 'api-development'),
|
|
230
|
+
(r'\b(react|vue|angular|svelte)\b', 'frontend-framework'),
|
|
231
|
+
(r'\b(database|sql|postgres|mysql|mongodb)\b', 'database'),
|
|
232
|
+
(r'\b(auth|authentication|oauth|jwt)\b', 'authentication'),
|
|
233
|
+
(r'\b(error|exception|bug|fix)\b', 'debugging'),
|
|
234
|
+
(r'\b(refactor|optimize|performance)\b', 'optimization'),
|
|
235
|
+
(r'\b(deploy|ci|cd|pipeline)\b', 'deployment')
|
|
236
|
+
]
|
|
237
|
+
|
|
238
|
+
text_lower = text.lower()
|
|
239
|
+
seen_concepts = set()
|
|
240
|
+
|
|
241
|
+
for pattern, concept in concept_patterns:
|
|
242
|
+
if re.search(pattern, text_lower) and concept not in seen_concepts:
|
|
243
|
+
concepts.append(concept)
|
|
244
|
+
seen_concepts.add(concept)
|
|
245
|
+
if len(concepts) >= MAX_CONCEPTS:
|
|
246
|
+
break
|
|
247
|
+
|
|
248
|
+
return concepts
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Metadata extractor using message processors to reduce complexity.
|
|
3
|
+
Refactored from extract_metadata_single_pass function.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import logging
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Dict, Any, Tuple, Optional
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
|
|
13
|
+
from message_processors import (
|
|
14
|
+
MessageProcessorFactory,
|
|
15
|
+
extract_concepts,
|
|
16
|
+
MAX_CONCEPT_MESSAGES,
|
|
17
|
+
MAX_FILES_ANALYZED,
|
|
18
|
+
MAX_FILES_EDITED,
|
|
19
|
+
MAX_TOOLS_USED,
|
|
20
|
+
MAX_AST_ELEMENTS
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MetadataExtractor:
|
|
27
|
+
"""Extract metadata from JSONL conversation files."""
|
|
28
|
+
|
|
29
|
+
def __init__(self):
|
|
30
|
+
self.processor_factory = MessageProcessorFactory()
|
|
31
|
+
|
|
32
|
+
def extract_metadata_from_file(self, file_path: str) -> Tuple[Dict[str, Any], str, int]:
|
|
33
|
+
"""
|
|
34
|
+
Extract metadata from a JSONL file in a single pass.
|
|
35
|
+
Returns: (metadata, first_timestamp, message_count)
|
|
36
|
+
"""
|
|
37
|
+
metadata = self._initialize_metadata()
|
|
38
|
+
first_timestamp = None
|
|
39
|
+
message_count = 0
|
|
40
|
+
all_text = []
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
44
|
+
for line in f:
|
|
45
|
+
if not line.strip():
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
result = self._process_line(line, metadata)
|
|
49
|
+
if result:
|
|
50
|
+
text_content, is_message = result
|
|
51
|
+
|
|
52
|
+
# Update timestamp and counts
|
|
53
|
+
if first_timestamp is None:
|
|
54
|
+
first_timestamp = self._extract_timestamp(line)
|
|
55
|
+
|
|
56
|
+
if is_message:
|
|
57
|
+
message_count += 1
|
|
58
|
+
|
|
59
|
+
if text_content:
|
|
60
|
+
# Limit text accumulation to prevent memory issues
|
|
61
|
+
if len(all_text) < MAX_CONCEPT_MESSAGES:
|
|
62
|
+
all_text.append(text_content[:1000])
|
|
63
|
+
|
|
64
|
+
except (IOError, OSError) as e:
|
|
65
|
+
logger.warning(f"Error reading file {file_path}: {e}")
|
|
66
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
67
|
+
logger.warning(f"Error parsing JSON in {file_path}: {e}")
|
|
68
|
+
except Exception as e:
|
|
69
|
+
logger.error(f"Unexpected error extracting metadata from {file_path}: {e}")
|
|
70
|
+
|
|
71
|
+
# Post-process collected data
|
|
72
|
+
self._post_process_metadata(metadata, all_text, file_path)
|
|
73
|
+
|
|
74
|
+
# Apply limits to arrays
|
|
75
|
+
self._apply_metadata_limits(metadata)
|
|
76
|
+
|
|
77
|
+
return metadata, first_timestamp or datetime.now().isoformat(), message_count
|
|
78
|
+
|
|
79
|
+
def _initialize_metadata(self) -> Dict[str, Any]:
|
|
80
|
+
"""Initialize empty metadata structure."""
|
|
81
|
+
return {
|
|
82
|
+
"files_analyzed": [],
|
|
83
|
+
"files_edited": [],
|
|
84
|
+
"tools_used": [],
|
|
85
|
+
"concepts": [],
|
|
86
|
+
"ast_elements": [],
|
|
87
|
+
"has_code_blocks": False,
|
|
88
|
+
"total_messages": 0,
|
|
89
|
+
"project_path": None,
|
|
90
|
+
"pattern_analysis": {},
|
|
91
|
+
"avg_quality_score": 0.0
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
def _process_line(self, line: str, metadata: Dict[str, Any]) -> Optional[Tuple[str, bool]]:
|
|
95
|
+
"""
|
|
96
|
+
Process a single line from the JSONL file.
|
|
97
|
+
Returns: (text_content, is_message) or None
|
|
98
|
+
"""
|
|
99
|
+
try:
|
|
100
|
+
data = json.loads(line)
|
|
101
|
+
|
|
102
|
+
# Extract project path from cwd
|
|
103
|
+
if metadata["project_path"] is None and 'cwd' in data:
|
|
104
|
+
metadata["project_path"] = data.get('cwd')
|
|
105
|
+
|
|
106
|
+
# Handle message entries
|
|
107
|
+
if 'message' in data and data['message']:
|
|
108
|
+
return self._process_message_entry(data['message'], metadata)
|
|
109
|
+
|
|
110
|
+
# Handle top-level tool entries
|
|
111
|
+
entry_type = data.get('type')
|
|
112
|
+
if entry_type in ('tool_result', 'tool_use'):
|
|
113
|
+
return self._process_tool_entry(data, metadata)
|
|
114
|
+
|
|
115
|
+
except json.JSONDecodeError:
|
|
116
|
+
# Expected for non-JSON lines, skip silently
|
|
117
|
+
pass
|
|
118
|
+
except (KeyError, TypeError, ValueError) as e:
|
|
119
|
+
# Log specific parsing errors for debugging
|
|
120
|
+
logger.debug(f"Error parsing line: {e}")
|
|
121
|
+
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
def _process_message_entry(self, message: Dict[str, Any], metadata: Dict[str, Any]) -> Optional[Tuple[str, bool]]:
|
|
125
|
+
"""Process a message entry."""
|
|
126
|
+
role = message.get('role')
|
|
127
|
+
content = message.get('content')
|
|
128
|
+
|
|
129
|
+
if not role or not content:
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
# Check if it's a countable message
|
|
133
|
+
is_user_or_assistant = role in ['user', 'assistant']
|
|
134
|
+
|
|
135
|
+
# Process content
|
|
136
|
+
text_content = self.processor_factory.process_content(content, metadata)
|
|
137
|
+
|
|
138
|
+
return text_content, is_user_or_assistant
|
|
139
|
+
|
|
140
|
+
def _process_tool_entry(self, data: Dict[str, Any], metadata: Dict[str, Any]) -> Optional[Tuple[str, bool]]:
|
|
141
|
+
"""Process a top-level tool entry."""
|
|
142
|
+
entry_type = data.get('type')
|
|
143
|
+
text_parts = []
|
|
144
|
+
|
|
145
|
+
if entry_type == 'tool_use':
|
|
146
|
+
tool_name = data.get('name', 'unknown')
|
|
147
|
+
tool_input = str(data.get('input', ''))[:500]
|
|
148
|
+
text_parts.append(f"[Tool: {tool_name}] {tool_input}")
|
|
149
|
+
|
|
150
|
+
# Track tool usage
|
|
151
|
+
if tool_name and tool_name not in metadata['tools_used']:
|
|
152
|
+
metadata['tools_used'].append(tool_name)
|
|
153
|
+
|
|
154
|
+
elif entry_type == 'tool_result':
|
|
155
|
+
result_content = self._extract_tool_result_content(data)
|
|
156
|
+
text_parts.append(f"[Result] {result_content[:1000]}")
|
|
157
|
+
|
|
158
|
+
content = "\n".join(text_parts)
|
|
159
|
+
# Tool entries should not count as messages (only user/assistant messages count)
|
|
160
|
+
return (content, False) if content else None
|
|
161
|
+
|
|
162
|
+
def _extract_tool_result_content(self, data: Dict[str, Any]) -> str:
|
|
163
|
+
"""Extract content from tool result data."""
|
|
164
|
+
result_content = data.get('content')
|
|
165
|
+
|
|
166
|
+
if isinstance(result_content, list):
|
|
167
|
+
flat = []
|
|
168
|
+
for item in result_content:
|
|
169
|
+
if isinstance(item, dict) and item.get('type') == 'text':
|
|
170
|
+
flat.append(item.get('text', ''))
|
|
171
|
+
elif isinstance(item, str):
|
|
172
|
+
flat.append(item)
|
|
173
|
+
result_content = "\n".join(flat)
|
|
174
|
+
|
|
175
|
+
if not result_content:
|
|
176
|
+
result_content = data.get('result', '')
|
|
177
|
+
|
|
178
|
+
return str(result_content)
|
|
179
|
+
|
|
180
|
+
def _extract_timestamp(self, line: str) -> Optional[str]:
|
|
181
|
+
"""Extract timestamp from a line if present."""
|
|
182
|
+
try:
|
|
183
|
+
data = json.loads(line)
|
|
184
|
+
return data.get('timestamp')
|
|
185
|
+
except (json.JSONDecodeError, TypeError) as e:
|
|
186
|
+
logger.debug(f"Failed to extract timestamp: {e}")
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
def _post_process_metadata(self, metadata: Dict[str, Any], all_text: list, file_path: str):
|
|
190
|
+
"""Post-process collected metadata."""
|
|
191
|
+
# Extract concepts from collected text
|
|
192
|
+
if all_text:
|
|
193
|
+
combined_text = ' '.join(all_text[:MAX_CONCEPT_MESSAGES])
|
|
194
|
+
metadata['concepts'] = extract_concepts(combined_text)
|
|
195
|
+
|
|
196
|
+
# Run AST-GREP pattern analysis if available
|
|
197
|
+
self._run_pattern_analysis(metadata)
|
|
198
|
+
|
|
199
|
+
def _run_pattern_analysis(self, metadata: Dict[str, Any]):
|
|
200
|
+
"""Run AST-GREP pattern analysis on mentioned files."""
|
|
201
|
+
pattern_quality = {}
|
|
202
|
+
avg_quality_score = 0.0
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
# Update patterns first
|
|
206
|
+
from update_patterns import check_and_update_patterns
|
|
207
|
+
check_and_update_patterns()
|
|
208
|
+
|
|
209
|
+
# Import analyzer
|
|
210
|
+
from ast_grep_final_analyzer import FinalASTGrepAnalyzer
|
|
211
|
+
analyzer = FinalASTGrepAnalyzer()
|
|
212
|
+
|
|
213
|
+
# Analyze files
|
|
214
|
+
files_to_analyze = list(set(
|
|
215
|
+
metadata['files_edited'] + metadata['files_analyzed'][:10]
|
|
216
|
+
))
|
|
217
|
+
quality_scores = []
|
|
218
|
+
|
|
219
|
+
for file_path in files_to_analyze:
|
|
220
|
+
# Expand file path for proper checking
|
|
221
|
+
expanded_path = os.path.expanduser(file_path) if file_path.startswith('~') else file_path
|
|
222
|
+
if self._is_code_file(expanded_path) and os.path.exists(expanded_path):
|
|
223
|
+
try:
|
|
224
|
+
result = analyzer.analyze_file(expanded_path)
|
|
225
|
+
metrics = result['quality_metrics']
|
|
226
|
+
pattern_quality[file_path] = {
|
|
227
|
+
'score': metrics['quality_score'],
|
|
228
|
+
'good_patterns': metrics['good_patterns_found'],
|
|
229
|
+
'bad_patterns': metrics['bad_patterns_found'],
|
|
230
|
+
'issues': metrics['total_issues']
|
|
231
|
+
}
|
|
232
|
+
quality_scores.append(metrics['quality_score'])
|
|
233
|
+
except (IOError, OSError) as e:
|
|
234
|
+
logger.debug(f"Could not read file {file_path}: {e}")
|
|
235
|
+
except (KeyError, ValueError) as e:
|
|
236
|
+
logger.debug(f"Error parsing AST results for {file_path}: {e}")
|
|
237
|
+
except Exception as e:
|
|
238
|
+
logger.warning(f"Unexpected error analyzing {file_path}: {e}")
|
|
239
|
+
|
|
240
|
+
# Calculate average quality
|
|
241
|
+
if quality_scores:
|
|
242
|
+
avg_quality_score = sum(quality_scores) / len(quality_scores)
|
|
243
|
+
|
|
244
|
+
except Exception as e:
|
|
245
|
+
logger.debug(f"AST analysis not available: {e}")
|
|
246
|
+
|
|
247
|
+
metadata['pattern_analysis'] = pattern_quality
|
|
248
|
+
metadata['avg_quality_score'] = round(avg_quality_score, 3)
|
|
249
|
+
|
|
250
|
+
def _is_code_file(self, file_path: str) -> bool:
|
|
251
|
+
"""Check if file is a code file."""
|
|
252
|
+
if not file_path:
|
|
253
|
+
return False
|
|
254
|
+
extensions = ['.py', '.ts', '.js', '.tsx', '.jsx']
|
|
255
|
+
return any(file_path.endswith(ext) for ext in extensions)
|
|
256
|
+
|
|
257
|
+
def _apply_metadata_limits(self, metadata: Dict[str, Any]):
|
|
258
|
+
"""Apply size limits to metadata arrays."""
|
|
259
|
+
metadata['files_analyzed'] = metadata['files_analyzed'][:MAX_FILES_ANALYZED]
|
|
260
|
+
metadata['files_edited'] = metadata['files_edited'][:MAX_FILES_EDITED]
|
|
261
|
+
metadata['tools_used'] = metadata['tools_used'][:MAX_TOOLS_USED]
|
|
262
|
+
metadata['ast_elements'] = metadata['ast_elements'][:MAX_AST_ELEMENTS]
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Watcher loop for Docker container
|
|
3
|
+
# Runs the streaming-watcher.py with HOT/WARM/COLD prioritization
|
|
4
|
+
|
|
5
|
+
# Don't use set -e in retry loops - it can cause premature exits
|
|
6
|
+
|
|
7
|
+
echo "Starting Claude Self-Reflect Streaming Watcher v3.0.0"
|
|
8
|
+
echo "HOT/WARM/COLD prioritization enabled"
|
|
9
|
+
echo "=========================================="
|
|
10
|
+
|
|
11
|
+
# Ensure config directory exists
|
|
12
|
+
mkdir -p /config
|
|
13
|
+
|
|
14
|
+
# Set Python path to include scripts directory
|
|
15
|
+
export PYTHONPATH=/app/scripts:$PYTHONPATH
|
|
16
|
+
|
|
17
|
+
# Main loop - restart on failure with backoff
|
|
18
|
+
RETRY_COUNT=0
|
|
19
|
+
MAX_RETRIES=10
|
|
20
|
+
BACKOFF_SECONDS=5
|
|
21
|
+
|
|
22
|
+
while true; do
|
|
23
|
+
echo "[$(date)] Starting watcher (attempt $((RETRY_COUNT + 1))/$MAX_RETRIES)..."
|
|
24
|
+
|
|
25
|
+
# Run the streaming watcher
|
|
26
|
+
python /app/scripts/streaming-watcher.py
|
|
27
|
+
EXIT_CODE=$?
|
|
28
|
+
|
|
29
|
+
if [ $EXIT_CODE -eq 0 ]; then
|
|
30
|
+
echo "[$(date)] Watcher exited cleanly"
|
|
31
|
+
RETRY_COUNT=0
|
|
32
|
+
BACKOFF_SECONDS=5
|
|
33
|
+
else
|
|
34
|
+
echo "[$(date)] Watcher exited with code $EXIT_CODE"
|
|
35
|
+
|
|
36
|
+
RETRY_COUNT=$((RETRY_COUNT + 1))
|
|
37
|
+
if [ $RETRY_COUNT -ge $MAX_RETRIES ]; then
|
|
38
|
+
echo "[$(date)] Maximum retries reached. Exiting."
|
|
39
|
+
exit 1
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
# Add jitter to prevent thundering herd (±20% of backoff)
|
|
43
|
+
JITTER=$(( (RANDOM % (BACKOFF_SECONDS / 5 + 1)) - (BACKOFF_SECONDS / 10) ))
|
|
44
|
+
SLEEP_TIME=$((BACKOFF_SECONDS + JITTER))
|
|
45
|
+
[ $SLEEP_TIME -lt 1 ] && SLEEP_TIME=1
|
|
46
|
+
|
|
47
|
+
echo "[$(date)] Restarting in $SLEEP_TIME seconds (base: $BACKOFF_SECONDS, jitter: $JITTER)..."
|
|
48
|
+
sleep $SLEEP_TIME
|
|
49
|
+
|
|
50
|
+
# Exponential backoff (max 300 seconds)
|
|
51
|
+
BACKOFF_SECONDS=$((BACKOFF_SECONDS * 2))
|
|
52
|
+
if [ $BACKOFF_SECONDS -gt 300 ]; then
|
|
53
|
+
BACKOFF_SECONDS=300
|
|
54
|
+
fi
|
|
55
|
+
fi
|
|
56
|
+
done
|