claude-self-reflect 3.3.0 → 3.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/claude-self-reflect-test.md +426 -11
- package/installer/cli.js +16 -0
- package/installer/postinstall.js +14 -0
- package/installer/statusline-setup.js +289 -0
- package/mcp-server/run-mcp.sh +28 -4
- package/mcp-server/src/parallel_search.py +16 -82
- package/mcp-server/src/reflection_tools.py +13 -8
- package/mcp-server/src/search_tools.py +90 -42
- package/mcp-server/src/temporal_tools.py +10 -3
- package/package.json +6 -1
- package/scripts/ast_grep_final_analyzer.py +325 -0
- package/scripts/ast_grep_unified_registry.py +556 -0
- package/scripts/csr-status +366 -0
- package/scripts/import-conversations-unified.py +104 -23
- package/scripts/session_quality_tracker.py +481 -0
- package/scripts/streaming-watcher.py +140 -5
- package/scripts/update_patterns.py +334 -0
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Claude Self-Reflect Status for CC Statusline
|
|
4
|
+
Standalone script that doesn't require venv activation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import time
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from datetime import datetime, timedelta
|
|
11
|
+
import sys
|
|
12
|
+
|
|
13
|
+
# Configuration
|
|
14
|
+
CYCLE_FILE = Path.home() / ".claude-self-reflect" / "statusline_cycle.json"
|
|
15
|
+
CYCLE_INTERVAL = 5 # seconds between cycles
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_import_status():
|
|
19
|
+
"""Get current import/indexing status."""
|
|
20
|
+
state_file = Path.home() / ".claude-self-reflect" / "config" / "imported-files.json"
|
|
21
|
+
|
|
22
|
+
if not state_file.exists():
|
|
23
|
+
return "📚 CSR: Not configured"
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
with open(state_file, 'r') as f:
|
|
27
|
+
state = json.load(f)
|
|
28
|
+
|
|
29
|
+
imported = len(state.get("imported_files", {}))
|
|
30
|
+
|
|
31
|
+
# Count total JSONL files
|
|
32
|
+
claude_dir = Path.home() / ".claude" / "projects"
|
|
33
|
+
total = 0
|
|
34
|
+
if claude_dir.exists():
|
|
35
|
+
for project_dir in claude_dir.iterdir():
|
|
36
|
+
if project_dir.is_dir():
|
|
37
|
+
total += len(list(project_dir.glob("*.jsonl")))
|
|
38
|
+
|
|
39
|
+
if total == 0:
|
|
40
|
+
return "📚 CSR: No files"
|
|
41
|
+
|
|
42
|
+
percent = min(100, (imported / total * 100))
|
|
43
|
+
|
|
44
|
+
# Color coding
|
|
45
|
+
if percent >= 95:
|
|
46
|
+
emoji = "✅"
|
|
47
|
+
elif percent >= 50:
|
|
48
|
+
emoji = "🔄"
|
|
49
|
+
else:
|
|
50
|
+
emoji = "⏳"
|
|
51
|
+
|
|
52
|
+
return f"{emoji} CSR: {percent:.0f}% indexed"
|
|
53
|
+
|
|
54
|
+
except Exception:
|
|
55
|
+
return "📚 CSR: Error"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_session_health():
|
|
59
|
+
"""Get cached session health."""
|
|
60
|
+
cache_file = Path.home() / ".claude-self-reflect" / "session_quality.json"
|
|
61
|
+
|
|
62
|
+
if not cache_file.exists():
|
|
63
|
+
# Fall back to import status if no health data
|
|
64
|
+
return get_import_status()
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
# Check cache age
|
|
68
|
+
mtime = datetime.fromtimestamp(cache_file.stat().st_mtime)
|
|
69
|
+
age = datetime.now() - mtime
|
|
70
|
+
|
|
71
|
+
if age > timedelta(minutes=5):
|
|
72
|
+
# Fall back to import status if stale
|
|
73
|
+
return get_import_status()
|
|
74
|
+
|
|
75
|
+
with open(cache_file, 'r') as f:
|
|
76
|
+
data = json.load(f)
|
|
77
|
+
|
|
78
|
+
if data.get('status') != 'success':
|
|
79
|
+
# Fall back to import status if no session
|
|
80
|
+
return get_import_status()
|
|
81
|
+
|
|
82
|
+
summary = data['summary']
|
|
83
|
+
grade = summary['quality_grade']
|
|
84
|
+
issues = summary['total_issues']
|
|
85
|
+
|
|
86
|
+
# Color coding
|
|
87
|
+
if grade in ['A+', 'A']:
|
|
88
|
+
emoji = '🟢'
|
|
89
|
+
elif grade in ['B', 'C']:
|
|
90
|
+
emoji = '🟡'
|
|
91
|
+
else:
|
|
92
|
+
emoji = '🔴'
|
|
93
|
+
|
|
94
|
+
if issues > 0:
|
|
95
|
+
return f"{emoji} Code: {grade} ({issues})"
|
|
96
|
+
else:
|
|
97
|
+
return f"{emoji} Code: {grade}"
|
|
98
|
+
|
|
99
|
+
except Exception:
|
|
100
|
+
return get_import_status()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def get_current_cycle():
|
|
104
|
+
"""Determine which metric to show based on cycle."""
|
|
105
|
+
# Read or create cycle state
|
|
106
|
+
cycle_state = {"last_update": 0, "current": "import"}
|
|
107
|
+
|
|
108
|
+
if CYCLE_FILE.exists():
|
|
109
|
+
try:
|
|
110
|
+
with open(CYCLE_FILE, 'r') as f:
|
|
111
|
+
cycle_state = json.load(f)
|
|
112
|
+
except:
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
# Check if it's time to cycle
|
|
116
|
+
now = time.time()
|
|
117
|
+
if now - cycle_state["last_update"] >= CYCLE_INTERVAL:
|
|
118
|
+
# Toggle between import and health
|
|
119
|
+
cycle_state["current"] = "health" if cycle_state["current"] == "import" else "import"
|
|
120
|
+
cycle_state["last_update"] = now
|
|
121
|
+
|
|
122
|
+
# Save state
|
|
123
|
+
CYCLE_FILE.parent.mkdir(exist_ok=True)
|
|
124
|
+
with open(CYCLE_FILE, 'w') as f:
|
|
125
|
+
json.dump(cycle_state, f)
|
|
126
|
+
|
|
127
|
+
return cycle_state["current"]
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def get_compact_status():
|
|
131
|
+
"""Get both import and quality in compact format: [100%][🟢:A+]"""
|
|
132
|
+
import subprocess
|
|
133
|
+
import os
|
|
134
|
+
import re
|
|
135
|
+
import shutil
|
|
136
|
+
|
|
137
|
+
# Get project-specific status using claude-self-reflect status command
|
|
138
|
+
import_pct = "?"
|
|
139
|
+
time_behind = ""
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
# Get current working directory to determine project
|
|
143
|
+
cwd = os.getcwd()
|
|
144
|
+
project_name = os.path.basename(cwd)
|
|
145
|
+
|
|
146
|
+
# Get status from claude-self-reflect with secure path
|
|
147
|
+
import shutil
|
|
148
|
+
csr_binary = shutil.which("claude-self-reflect")
|
|
149
|
+
if not csr_binary or not os.path.isfile(csr_binary):
|
|
150
|
+
# Fallback if binary not found
|
|
151
|
+
import_pct = "?"
|
|
152
|
+
return f"[{import_pct}]"
|
|
153
|
+
|
|
154
|
+
result = subprocess.run(
|
|
155
|
+
[csr_binary, "status"],
|
|
156
|
+
capture_output=True,
|
|
157
|
+
text=True,
|
|
158
|
+
timeout=2
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if result.returncode == 0:
|
|
162
|
+
status_data = json.loads(result.stdout)
|
|
163
|
+
|
|
164
|
+
# Try to find project-specific percentage
|
|
165
|
+
project_pct = None
|
|
166
|
+
encoded_path = None
|
|
167
|
+
|
|
168
|
+
# Try exact project name FIRST
|
|
169
|
+
if project_name in status_data.get("projects", {}):
|
|
170
|
+
project_pct = status_data["projects"][project_name].get("percentage")
|
|
171
|
+
encoded_path = project_name # Use project name for file lookup
|
|
172
|
+
|
|
173
|
+
# Only try encoded path if exact match not found
|
|
174
|
+
elif project_pct is None:
|
|
175
|
+
# Convert path to encoded format
|
|
176
|
+
encoded_path = cwd.replace("/", "-")
|
|
177
|
+
if encoded_path.startswith("-"):
|
|
178
|
+
encoded_path = encoded_path[1:] # Remove leading dash
|
|
179
|
+
|
|
180
|
+
if encoded_path in status_data.get("projects", {}):
|
|
181
|
+
project_pct = status_data["projects"][encoded_path].get("percentage")
|
|
182
|
+
|
|
183
|
+
# Use project percentage if found, otherwise use overall
|
|
184
|
+
if project_pct is not None:
|
|
185
|
+
pct = int(project_pct)
|
|
186
|
+
else:
|
|
187
|
+
pct = int(status_data.get("overall", {}).get("percentage", 0))
|
|
188
|
+
|
|
189
|
+
import_pct = f"{pct}%"
|
|
190
|
+
|
|
191
|
+
# Only show time behind if NOT at 100%
|
|
192
|
+
# This indicates how old the unindexed files are
|
|
193
|
+
if pct < 100:
|
|
194
|
+
# Check for newest unindexed file
|
|
195
|
+
state_file = Path.home() / ".claude-self-reflect" / "config" / "imported-files.json"
|
|
196
|
+
if state_file.exists():
|
|
197
|
+
with open(state_file, 'r') as f:
|
|
198
|
+
state = json.load(f)
|
|
199
|
+
|
|
200
|
+
# Find project directory
|
|
201
|
+
claude_dir = Path.home() / ".claude" / "projects"
|
|
202
|
+
if encoded_path:
|
|
203
|
+
project_dir = claude_dir / encoded_path
|
|
204
|
+
if not project_dir.exists() and not encoded_path.startswith("-"):
|
|
205
|
+
project_dir = claude_dir / f"-{encoded_path}"
|
|
206
|
+
|
|
207
|
+
if project_dir.exists():
|
|
208
|
+
# Find the newest UNINDEXED file
|
|
209
|
+
newest_unindexed_time = None
|
|
210
|
+
for jsonl_file in project_dir.glob("*.jsonl"):
|
|
211
|
+
file_key = str(jsonl_file)
|
|
212
|
+
# Only check unindexed files
|
|
213
|
+
if file_key not in state.get("imported_files", {}):
|
|
214
|
+
file_time = datetime.fromtimestamp(jsonl_file.stat().st_mtime)
|
|
215
|
+
if newest_unindexed_time is None or file_time > newest_unindexed_time:
|
|
216
|
+
newest_unindexed_time = file_time
|
|
217
|
+
|
|
218
|
+
# Calculate how behind we are
|
|
219
|
+
if newest_unindexed_time:
|
|
220
|
+
age = datetime.now() - newest_unindexed_time
|
|
221
|
+
if age < timedelta(minutes=5):
|
|
222
|
+
time_behind = " <5m"
|
|
223
|
+
elif age < timedelta(hours=1):
|
|
224
|
+
time_behind = f" {int(age.total_seconds() / 60)}m"
|
|
225
|
+
elif age < timedelta(days=1):
|
|
226
|
+
time_behind = f" {int(age.total_seconds() / 3600)}h"
|
|
227
|
+
else:
|
|
228
|
+
time_behind = f" {int(age.days)}d"
|
|
229
|
+
except:
|
|
230
|
+
# Fallback to simple file counting
|
|
231
|
+
state_file = Path.home() / ".claude-self-reflect" / "config" / "imported-files.json"
|
|
232
|
+
if state_file.exists():
|
|
233
|
+
try:
|
|
234
|
+
with open(state_file, 'r') as f:
|
|
235
|
+
state = json.load(f)
|
|
236
|
+
imported = len(state.get("imported_files", {}))
|
|
237
|
+
|
|
238
|
+
claude_dir = Path.home() / ".claude" / "projects"
|
|
239
|
+
total = 0
|
|
240
|
+
if claude_dir.exists():
|
|
241
|
+
for project_dir in claude_dir.iterdir():
|
|
242
|
+
if project_dir.is_dir():
|
|
243
|
+
total += len(list(project_dir.glob("*.jsonl")))
|
|
244
|
+
|
|
245
|
+
if total > 0:
|
|
246
|
+
pct = min(100, int(imported / total * 100))
|
|
247
|
+
import_pct = f"{pct}%"
|
|
248
|
+
except:
|
|
249
|
+
pass
|
|
250
|
+
|
|
251
|
+
# Get quality grade - PER PROJECT cache
|
|
252
|
+
# BUG FIX: Cache must be per-project, not global!
|
|
253
|
+
project_name = os.path.basename(os.getcwd())
|
|
254
|
+
# Secure sanitization with whitelist approach
|
|
255
|
+
import re
|
|
256
|
+
safe_project_name = re.sub(r'[^a-zA-Z0-9_-]', '_', project_name)[:100]
|
|
257
|
+
cache_dir = Path.home() / ".claude-self-reflect" / "quality_cache"
|
|
258
|
+
cache_file = cache_dir / f"{safe_project_name}.json"
|
|
259
|
+
|
|
260
|
+
# Validate cache file path stays within cache directory
|
|
261
|
+
if cache_file.exists() and not str(cache_file.resolve()).startswith(str(cache_dir.resolve())):
|
|
262
|
+
# Security issue - return placeholder
|
|
263
|
+
grade_str = "[...]"
|
|
264
|
+
else:
|
|
265
|
+
cache_file.parent.mkdir(exist_ok=True, parents=True)
|
|
266
|
+
grade_str = ""
|
|
267
|
+
|
|
268
|
+
# Try to get quality data (regenerate if too old or missing)
|
|
269
|
+
quality_valid = False
|
|
270
|
+
|
|
271
|
+
if cache_file.exists():
|
|
272
|
+
try:
|
|
273
|
+
mtime = datetime.fromtimestamp(cache_file.stat().st_mtime)
|
|
274
|
+
age = datetime.now() - mtime
|
|
275
|
+
|
|
276
|
+
# Use quality data up to 24 hours old (more reasonable)
|
|
277
|
+
if age < timedelta(hours=24):
|
|
278
|
+
with open(cache_file, 'r') as f:
|
|
279
|
+
data = json.load(f)
|
|
280
|
+
|
|
281
|
+
if data.get('status') == 'success':
|
|
282
|
+
summary = data['summary']
|
|
283
|
+
grade = summary['quality_grade']
|
|
284
|
+
issues = summary.get('total_issues', 0)
|
|
285
|
+
scope = data.get('scope_label', 'Core') # Get scope label
|
|
286
|
+
|
|
287
|
+
# GPT-5 fix: Remove forced downgrades, trust the analyzer's grade
|
|
288
|
+
# Grade should reflect actual quality metrics, not arbitrary thresholds
|
|
289
|
+
|
|
290
|
+
# Pick emoji based on grade
|
|
291
|
+
if grade in ['A+', 'A']:
|
|
292
|
+
emoji = '🟢'
|
|
293
|
+
elif grade in ['B', 'C']:
|
|
294
|
+
emoji = '🟡'
|
|
295
|
+
else:
|
|
296
|
+
emoji = '🔴'
|
|
297
|
+
|
|
298
|
+
# Simple, clear display without confusing scope labels
|
|
299
|
+
grade_str = f"[{emoji}:{grade}/{issues}]"
|
|
300
|
+
quality_valid = True
|
|
301
|
+
except:
|
|
302
|
+
pass
|
|
303
|
+
|
|
304
|
+
# If no valid quality data, show last known value or placeholder
|
|
305
|
+
if not quality_valid and not grade_str:
|
|
306
|
+
# Try to use last known value from cache even if expired
|
|
307
|
+
try:
|
|
308
|
+
if cache_file.exists():
|
|
309
|
+
with open(cache_file, 'r') as f:
|
|
310
|
+
old_data = json.load(f)
|
|
311
|
+
if old_data.get('status') == 'success':
|
|
312
|
+
old_grade = old_data['summary']['quality_grade']
|
|
313
|
+
old_issues = old_data['summary'].get('total_issues', 0)
|
|
314
|
+
# Show with dimmed indicator that it's old
|
|
315
|
+
if old_grade in ['A+', 'A']:
|
|
316
|
+
emoji = '🟢'
|
|
317
|
+
elif old_grade in ['B', 'C']:
|
|
318
|
+
emoji = '🟡'
|
|
319
|
+
else:
|
|
320
|
+
emoji = '🔴'
|
|
321
|
+
grade_str = f"[{emoji}:{old_grade}/{old_issues}]"
|
|
322
|
+
else:
|
|
323
|
+
grade_str = "[...]"
|
|
324
|
+
else:
|
|
325
|
+
grade_str = "[...]"
|
|
326
|
+
except:
|
|
327
|
+
grade_str = "[...]"
|
|
328
|
+
|
|
329
|
+
# Add mini progress bar if not 100%
|
|
330
|
+
bar_str = ""
|
|
331
|
+
if import_pct != "?" and import_pct != "100%":
|
|
332
|
+
pct_num = int(import_pct.rstrip('%'))
|
|
333
|
+
filled = int(pct_num * 4 / 100) # 4-char mini bar
|
|
334
|
+
empty = 4 - filled
|
|
335
|
+
bar_str = "█" * filled + "░" * empty + " "
|
|
336
|
+
|
|
337
|
+
# Return compact format with bar, percentage, time behind, and grade
|
|
338
|
+
return f"[{bar_str}{import_pct}{time_behind}]{grade_str}"
|
|
339
|
+
|
|
340
|
+
def main():
|
|
341
|
+
"""Main entry point for CC statusline."""
|
|
342
|
+
# Check for forced mode
|
|
343
|
+
if len(sys.argv) > 1:
|
|
344
|
+
if sys.argv[1] == "--import":
|
|
345
|
+
print(get_import_status())
|
|
346
|
+
elif sys.argv[1] == "--health":
|
|
347
|
+
print(get_session_health())
|
|
348
|
+
elif sys.argv[1] == "--quality-only":
|
|
349
|
+
# Only show quality, not import (to avoid duplication with MCP status)
|
|
350
|
+
health = get_session_health()
|
|
351
|
+
# Only show if it's actual quality data, not fallback to import
|
|
352
|
+
if "Code:" in health:
|
|
353
|
+
print(health)
|
|
354
|
+
elif sys.argv[1] == "--compact":
|
|
355
|
+
print(get_compact_status())
|
|
356
|
+
else:
|
|
357
|
+
# Default to compact mode
|
|
358
|
+
print(get_compact_status())
|
|
359
|
+
return
|
|
360
|
+
|
|
361
|
+
# Default to compact format (no cycling)
|
|
362
|
+
print(get_compact_status())
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
if __name__ == "__main__":
|
|
366
|
+
main()
|
|
@@ -147,7 +147,15 @@ def generate_embeddings(texts: List[str]) -> List[List[float]]:
|
|
|
147
147
|
"""Generate embeddings for texts."""
|
|
148
148
|
# Use the global embedding_provider which gets updated by command-line args
|
|
149
149
|
if PREFER_LOCAL_EMBEDDINGS:
|
|
150
|
-
|
|
150
|
+
# FastEmbed uses 'embed' method, not 'passage_embed'
|
|
151
|
+
# Try 'embed' first, fall back to 'passage_embed' for compatibility
|
|
152
|
+
if hasattr(embedding_provider, 'embed'):
|
|
153
|
+
embeddings = list(embedding_provider.embed(texts))
|
|
154
|
+
elif hasattr(embedding_provider, 'passage_embed'):
|
|
155
|
+
# Fallback for older versions (shouldn't exist but kept for safety)
|
|
156
|
+
embeddings = list(embedding_provider.passage_embed(texts))
|
|
157
|
+
else:
|
|
158
|
+
raise AttributeError("FastEmbed provider has neither 'embed' nor 'passage_embed' method")
|
|
151
159
|
return [emb.tolist() if hasattr(emb, 'tolist') else emb for emb in embeddings]
|
|
152
160
|
else:
|
|
153
161
|
response = embedding_provider.embed(texts, model="voyage-3")
|
|
@@ -368,7 +376,8 @@ def extract_metadata_single_pass(file_path: str) -> tuple[Dict[str, Any], str, i
|
|
|
368
376
|
# Extract code for AST analysis with bounds checking
|
|
369
377
|
if len(metadata['ast_elements']) < MAX_AST_ELEMENTS:
|
|
370
378
|
# Fix: More permissive regex to handle various fence formats
|
|
371
|
-
|
|
379
|
+
# Handles both ```\n and ```python\n cases, with optional newline
|
|
380
|
+
code_blocks = re.findall(r'```[^`\n]*\n?(.*?)```', item.get('text', ''), re.DOTALL)
|
|
372
381
|
for code_block in code_blocks[:MAX_CODE_BLOCKS]: # Use defined constant
|
|
373
382
|
if len(metadata['ast_elements']) >= MAX_AST_ELEMENTS:
|
|
374
383
|
break
|
|
@@ -376,7 +385,11 @@ def extract_metadata_single_pass(file_path: str) -> tuple[Dict[str, Any], str, i
|
|
|
376
385
|
for elem in list(ast_elems)[:MAX_ELEMENTS_PER_BLOCK]: # Use defined constant
|
|
377
386
|
if elem not in metadata['ast_elements'] and len(metadata['ast_elements']) < MAX_AST_ELEMENTS:
|
|
378
387
|
metadata['ast_elements'].append(elem)
|
|
379
|
-
|
|
388
|
+
|
|
389
|
+
elif item.get('type') == 'thinking':
|
|
390
|
+
# Also include thinking content in metadata extraction
|
|
391
|
+
text_content += item.get('thinking', '')
|
|
392
|
+
|
|
380
393
|
elif item.get('type') == 'tool_use':
|
|
381
394
|
tool_name = item.get('name', '')
|
|
382
395
|
if tool_name and tool_name not in metadata['tools_used']:
|
|
@@ -423,39 +436,77 @@ def extract_metadata_single_pass(file_path: str) -> tuple[Dict[str, Any], str, i
|
|
|
423
436
|
if all_text:
|
|
424
437
|
combined_text = ' '.join(all_text[:MAX_CONCEPT_MESSAGES]) # Limit messages for concept extraction
|
|
425
438
|
metadata['concepts'] = extract_concepts(combined_text)
|
|
426
|
-
|
|
439
|
+
|
|
440
|
+
# MANDATORY: AST-GREP Pattern Analysis
|
|
441
|
+
# Analyze code quality for files mentioned in conversation
|
|
442
|
+
pattern_quality = {}
|
|
443
|
+
avg_quality_score = 0.0
|
|
444
|
+
|
|
445
|
+
try:
|
|
446
|
+
# Update patterns first (uses 24h cache, <100ms)
|
|
447
|
+
from update_patterns import check_and_update_patterns
|
|
448
|
+
check_and_update_patterns()
|
|
449
|
+
|
|
450
|
+
# Import analyzer
|
|
451
|
+
from ast_grep_final_analyzer import FinalASTGrepAnalyzer
|
|
452
|
+
analyzer = FinalASTGrepAnalyzer()
|
|
453
|
+
|
|
454
|
+
# Analyze edited and analyzed files
|
|
455
|
+
files_to_analyze = list(set(metadata['files_edited'] + metadata['files_analyzed'][:10]))
|
|
456
|
+
quality_scores = []
|
|
457
|
+
|
|
458
|
+
for file_path in files_to_analyze:
|
|
459
|
+
# Only analyze code files
|
|
460
|
+
if file_path and any(file_path.endswith(ext) for ext in ['.py', '.ts', '.js', '.tsx', '.jsx']):
|
|
461
|
+
try:
|
|
462
|
+
# Check if file exists and is accessible
|
|
463
|
+
if os.path.exists(file_path):
|
|
464
|
+
result = analyzer.analyze_file(file_path)
|
|
465
|
+
metrics = result['quality_metrics']
|
|
466
|
+
pattern_quality[file_path] = {
|
|
467
|
+
'score': metrics['quality_score'],
|
|
468
|
+
'good_patterns': metrics['good_patterns_found'],
|
|
469
|
+
'bad_patterns': metrics['bad_patterns_found'],
|
|
470
|
+
'issues': metrics['total_issues']
|
|
471
|
+
}
|
|
472
|
+
quality_scores.append(metrics['quality_score'])
|
|
473
|
+
except Exception as e:
|
|
474
|
+
logger.debug(f"Could not analyze {file_path}: {e}")
|
|
475
|
+
|
|
476
|
+
# Calculate average quality
|
|
477
|
+
if quality_scores:
|
|
478
|
+
avg_quality_score = sum(quality_scores) / len(quality_scores)
|
|
479
|
+
|
|
480
|
+
except Exception as e:
|
|
481
|
+
logger.debug(f"AST analysis not available: {e}")
|
|
482
|
+
|
|
483
|
+
# Add pattern analysis to metadata
|
|
484
|
+
metadata['pattern_analysis'] = pattern_quality
|
|
485
|
+
metadata['avg_quality_score'] = round(avg_quality_score, 3)
|
|
486
|
+
|
|
427
487
|
# Set total messages
|
|
428
488
|
metadata['total_messages'] = message_count
|
|
429
|
-
|
|
489
|
+
|
|
430
490
|
# Limit arrays
|
|
431
491
|
metadata['files_analyzed'] = metadata['files_analyzed'][:MAX_FILES_ANALYZED]
|
|
432
492
|
metadata['files_edited'] = metadata['files_edited'][:MAX_FILES_EDITED]
|
|
433
493
|
metadata['tools_used'] = metadata['tools_used'][:MAX_TOOLS_USED]
|
|
434
494
|
metadata['ast_elements'] = metadata['ast_elements'][:MAX_AST_ELEMENTS]
|
|
435
|
-
|
|
495
|
+
|
|
436
496
|
return metadata, first_timestamp or datetime.now().isoformat(), message_count
|
|
437
497
|
|
|
438
498
|
def stream_import_file(jsonl_file: Path, collection_name: str, project_path: Path) -> int:
|
|
439
499
|
"""Stream import a single JSONL file without loading it into memory."""
|
|
440
500
|
logger.info(f"Streaming import of {jsonl_file.name}")
|
|
441
|
-
|
|
442
|
-
#
|
|
501
|
+
|
|
502
|
+
# Extract conversation ID
|
|
443
503
|
conversation_id = jsonl_file.stem
|
|
444
|
-
|
|
445
|
-
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
|
446
|
-
client.delete(
|
|
447
|
-
collection_name=collection_name,
|
|
448
|
-
points_selector=Filter(
|
|
449
|
-
must=[FieldCondition(key="conversation_id", match=MatchValue(value=conversation_id))]
|
|
450
|
-
),
|
|
451
|
-
wait=True
|
|
452
|
-
)
|
|
453
|
-
logger.info(f"Deleted existing points for conversation {conversation_id}")
|
|
454
|
-
except Exception as e:
|
|
455
|
-
logger.warning(f"Could not delete existing points for {conversation_id}: {e}")
|
|
456
|
-
|
|
504
|
+
|
|
457
505
|
# Extract metadata in first pass (lightweight)
|
|
458
506
|
metadata, created_at, total_messages = extract_metadata_single_pass(str(jsonl_file))
|
|
507
|
+
|
|
508
|
+
# Track whether we should delete old points (only after successful import)
|
|
509
|
+
should_delete_old = False
|
|
459
510
|
|
|
460
511
|
# Reset counters for each conversation (critical for correct indexing)
|
|
461
512
|
current_message_index = 0 # Must be reset before processing each conversation
|
|
@@ -493,6 +544,11 @@ def stream_import_file(jsonl_file: Path, collection_name: str, project_path: Pat
|
|
|
493
544
|
item_type = item.get('type', '')
|
|
494
545
|
if item_type == 'text':
|
|
495
546
|
text_parts.append(item.get('text', ''))
|
|
547
|
+
elif item_type == 'thinking':
|
|
548
|
+
# Include thinking content (from Claude's thinking blocks)
|
|
549
|
+
thinking_content = item.get('thinking', '')
|
|
550
|
+
if thinking_content:
|
|
551
|
+
text_parts.append(f"[Thinking] {thinking_content[:1000]}") # Limit size
|
|
496
552
|
elif item_type == 'tool_use':
|
|
497
553
|
# Include tool use information
|
|
498
554
|
tool_name = item.get('name', 'unknown')
|
|
@@ -594,10 +650,35 @@ def stream_import_file(jsonl_file: Path, collection_name: str, project_path: Pat
|
|
|
594
650
|
created_at, metadata, collection_name, project_path, total_messages
|
|
595
651
|
)
|
|
596
652
|
total_chunks += chunks
|
|
597
|
-
|
|
653
|
+
|
|
654
|
+
# Only delete old points after successful import verification
|
|
655
|
+
if total_chunks > 0:
|
|
656
|
+
try:
|
|
657
|
+
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
|
658
|
+
# Count old points before deletion for verification
|
|
659
|
+
old_count_filter = Filter(
|
|
660
|
+
must=[FieldCondition(key="conversation_id", match=MatchValue(value=conversation_id))]
|
|
661
|
+
)
|
|
662
|
+
old_points = client.scroll(
|
|
663
|
+
collection_name=collection_name,
|
|
664
|
+
scroll_filter=old_count_filter,
|
|
665
|
+
limit=1
|
|
666
|
+
)[0]
|
|
667
|
+
|
|
668
|
+
if len(old_points) > total_chunks + 5: # Allow some tolerance
|
|
669
|
+
# Only delete if we have significantly more old points than new
|
|
670
|
+
client.delete(
|
|
671
|
+
collection_name=collection_name,
|
|
672
|
+
points_selector=old_count_filter,
|
|
673
|
+
wait=True
|
|
674
|
+
)
|
|
675
|
+
logger.info(f"Deleted old points for conversation {conversation_id} after verifying new import")
|
|
676
|
+
except Exception as e:
|
|
677
|
+
logger.warning(f"Could not clean up old points for {conversation_id}: {e}")
|
|
678
|
+
|
|
598
679
|
logger.info(f"Imported {total_chunks} chunks from {jsonl_file.name}")
|
|
599
680
|
return total_chunks
|
|
600
|
-
|
|
681
|
+
|
|
601
682
|
except Exception as e:
|
|
602
683
|
logger.error(f"Failed to import {jsonl_file}: {e}")
|
|
603
684
|
return 0
|