claude-self-reflect 3.2.3 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.claude/agents/claude-self-reflect-test.md +595 -528
  2. package/.claude/agents/documentation-writer.md +1 -1
  3. package/.claude/agents/qdrant-specialist.md +2 -2
  4. package/.claude/agents/reflection-specialist.md +61 -5
  5. package/.claude/agents/search-optimizer.md +9 -7
  6. package/README.md +16 -9
  7. package/mcp-server/pyproject.toml +1 -1
  8. package/mcp-server/run-mcp.sh +49 -5
  9. package/mcp-server/src/app_context.py +64 -0
  10. package/mcp-server/src/config.py +57 -0
  11. package/mcp-server/src/connection_pool.py +286 -0
  12. package/mcp-server/src/decay_manager.py +106 -0
  13. package/mcp-server/src/embedding_manager.py +64 -40
  14. package/mcp-server/src/embeddings_old.py +141 -0
  15. package/mcp-server/src/models.py +64 -0
  16. package/mcp-server/src/parallel_search.py +371 -0
  17. package/mcp-server/src/project_resolver.py +33 -46
  18. package/mcp-server/src/reflection_tools.py +206 -0
  19. package/mcp-server/src/rich_formatting.py +196 -0
  20. package/mcp-server/src/search_tools.py +826 -0
  21. package/mcp-server/src/server.py +140 -1715
  22. package/mcp-server/src/temporal_design.py +132 -0
  23. package/mcp-server/src/temporal_tools.py +597 -0
  24. package/mcp-server/src/temporal_utils.py +384 -0
  25. package/mcp-server/src/utils.py +150 -67
  26. package/package.json +11 -1
  27. package/scripts/add-timestamp-indexes.py +134 -0
  28. package/scripts/check-collections.py +29 -0
  29. package/scripts/debug-august-parsing.py +76 -0
  30. package/scripts/debug-import-single.py +91 -0
  31. package/scripts/debug-project-resolver.py +82 -0
  32. package/scripts/debug-temporal-tools.py +135 -0
  33. package/scripts/delta-metadata-update.py +547 -0
  34. package/scripts/import-conversations-unified.py +65 -6
  35. package/scripts/importer/utils/project_normalizer.py +22 -9
  36. package/scripts/precompact-hook.sh +33 -0
  37. package/scripts/streaming-watcher.py +1443 -0
  38. package/scripts/utils.py +39 -0
  39. package/shared/__init__.py +5 -0
  40. package/shared/normalization.py +54 -0
@@ -6,11 +6,21 @@ Handles mapping between user-friendly names and internal collection names.
6
6
  import hashlib
7
7
  import logging
8
8
  import re
9
+ import sys
9
10
  from pathlib import Path
10
11
  from typing import List, Dict, Optional, Set
11
12
  from time import time
12
13
  from qdrant_client import QdrantClient
13
14
 
15
+ # Import from shared module for consistent normalization
16
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
17
+ try:
18
+ from shared.normalization import normalize_project_name
19
+ except ImportError:
20
+ # Fall back to creating local version if shared module not found
21
+ logging.warning("Could not import shared normalization module")
22
+ normalize_project_name = None
23
+
14
24
  logger = logging.getLogger(__name__)
15
25
 
16
26
  # Project discovery markers - common parent directories that indicate project roots
@@ -59,6 +69,11 @@ class ProjectResolver:
59
69
  Returns:
60
70
  List of collection names that match the project
61
71
  """
72
+ # Special case: 'all' returns all conversation collections
73
+ if user_project_name == 'all':
74
+ collection_names = self._get_collection_names()
75
+ return collection_names # Return all conv_ collections
76
+
62
77
  if user_project_name in self._cache:
63
78
  # Check if cache entry is still valid
64
79
  if time() - self._cache_ttl.get(user_project_name, 0) < self._cache_duration:
@@ -244,59 +259,31 @@ class ProjectResolver:
244
259
  def _normalize_project_name(self, project_path: str) -> str:
245
260
  """
246
261
  Normalize project name for consistent hashing.
247
- Extracts the actual project name from various path formats.
262
+ Uses the shared normalization module to ensure consistency
263
+ with import scripts.
248
264
  """
265
+ # Use the shared normalization function if available
266
+ if normalize_project_name:
267
+ return normalize_project_name(project_path)
268
+
269
+ # Fallback implementation - EXACT copy of shared module
249
270
  if not project_path:
250
271
  return ""
251
272
 
252
- # Remove trailing slashes
253
- project_path = project_path.rstrip('/')
273
+ path = Path(project_path.rstrip('/'))
254
274
 
255
- # Handle Claude logs format (starts with dash)
256
- if project_path.startswith('-'):
257
- # Split on dashes but don't convert to path separators
258
- # This preserves project names that contain dashes
259
- path_str = project_path[1:] # Remove leading dash
260
- path_parts = path_str.split('-') # Split on dashes, not path separators
261
-
262
- # Look for common project parent directories
263
- project_parents = {'projects', 'code', 'Code', 'repos', 'repositories',
264
- 'dev', 'Development', 'work', 'src', 'github'}
265
-
266
- # Find the project name after a known parent directory
267
- for i, part in enumerate(path_parts):
268
- if part.lower() in project_parents and i + 1 < len(path_parts):
269
- # Return everything after the parent directory
270
- remaining = path_parts[i + 1:]
271
-
272
- # Use segment-based approach for complex paths
273
- # Return the most likely project name from remaining segments
274
- if remaining:
275
- # If it's a single segment, return it
276
- if len(remaining) == 1:
277
- return remaining[0]
278
- # For multiple segments, look for project-like patterns
279
- for r in remaining:
280
- r_lower = r.lower()
281
- # Prioritize segments with project indicators
282
- if any(ind in r_lower for ind in ['app', 'service', 'project', 'api', 'client']):
283
- return r
284
-
285
- # Otherwise join remaining parts
286
- return '-'.join(remaining)
287
-
288
- # Fallback: use the last component
289
- return path_parts[-1] if path_parts else project_path
275
+ # Extract the final directory name
276
+ final_component = path.name
290
277
 
291
- # For regular paths or simple names
292
- path_obj = Path(project_path)
278
+ # If it's Claude's dash-separated format, extract project name
279
+ if final_component.startswith('-') and 'projects' in final_component:
280
+ # Find the last occurrence of 'projects-' to handle edge cases
281
+ idx = final_component.rfind('projects-')
282
+ if idx != -1:
283
+ return final_component[idx + len('projects-'):]
293
284
 
294
- # If it's already a simple name, return it
295
- if '/' not in project_path and '\\' not in project_path:
296
- return project_path
297
-
298
- # Otherwise extract from path
299
- return path_obj.name
285
+ # For regular paths, just return the directory name
286
+ return final_component if final_component else path.parent.name
300
287
 
301
288
  def _project_matches(self, stored_project: str, target_project: str) -> bool:
302
289
  """
@@ -0,0 +1,206 @@
1
+ """Reflection tools for Claude Self Reflect MCP server."""
2
+
3
+ import os
4
+ import json
5
+ import hashlib
6
+ import logging
7
+ from typing import Optional, List, Dict, Any
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
10
+ import uuid
11
+
12
+ from fastmcp import Context
13
+ from pydantic import Field
14
+ from qdrant_client import AsyncQdrantClient
15
+ from qdrant_client.models import PointStruct, VectorParams, Distance
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class ReflectionTools:
21
+ """Handles reflection storage and conversation retrieval operations."""
22
+
23
+ def __init__(
24
+ self,
25
+ qdrant_client: AsyncQdrantClient,
26
+ qdrant_url: str,
27
+ get_embedding_manager,
28
+ normalize_project_name
29
+ ):
30
+ """Initialize reflection tools with dependencies."""
31
+ self.qdrant_client = qdrant_client
32
+ self.qdrant_url = qdrant_url
33
+ self.get_embedding_manager = get_embedding_manager
34
+ self.normalize_project_name = normalize_project_name
35
+
36
+ async def store_reflection(
37
+ self,
38
+ ctx: Context,
39
+ content: str,
40
+ tags: List[str] = []
41
+ ) -> str:
42
+ """Store an important insight or reflection for future reference."""
43
+
44
+ await ctx.debug(f"Storing reflection with {len(tags)} tags")
45
+
46
+ try:
47
+ # Determine collection name based on embedding type
48
+ embedding_manager = self.get_embedding_manager()
49
+ embedding_type = "local" if embedding_manager.prefer_local else "voyage"
50
+ collection_name = f"reflections_{embedding_type}"
51
+
52
+ # Ensure reflections collection exists
53
+ try:
54
+ await self.qdrant_client.get_collection(collection_name)
55
+ await ctx.debug(f"Using existing {collection_name} collection")
56
+ except Exception:
57
+ # Collection doesn't exist, create it
58
+ await ctx.debug(f"Creating {collection_name} collection")
59
+
60
+ # Determine embedding dimensions
61
+ embedding_dim = embedding_manager.get_vector_dimension()
62
+
63
+ await self.qdrant_client.create_collection(
64
+ collection_name=collection_name,
65
+ vectors_config=VectorParams(
66
+ size=embedding_dim,
67
+ distance=Distance.COSINE
68
+ )
69
+ )
70
+
71
+ # Generate embedding for the reflection
72
+ embedding_manager = self.get_embedding_manager()
73
+ embedding = await embedding_manager.generate_embedding(content)
74
+
75
+ # Create unique ID
76
+ reflection_id = hashlib.md5(f"{content}{datetime.now().isoformat()}".encode()).hexdigest()
77
+
78
+ # Prepare metadata
79
+ metadata = {
80
+ "content": content,
81
+ "tags": tags,
82
+ "timestamp": datetime.now(timezone.utc).isoformat(),
83
+ "type": "reflection"
84
+ }
85
+
86
+ # Store in Qdrant
87
+ await self.qdrant_client.upsert(
88
+ collection_name=collection_name,
89
+ points=[
90
+ PointStruct(
91
+ id=reflection_id,
92
+ vector=embedding,
93
+ payload=metadata
94
+ )
95
+ ]
96
+ )
97
+
98
+ await ctx.debug(f"Stored reflection with ID {reflection_id}")
99
+
100
+ return f"""Reflection stored successfully.
101
+ ID: {reflection_id}
102
+ Tags: {', '.join(tags) if tags else 'none'}
103
+ Timestamp: {metadata['timestamp']}"""
104
+
105
+ except Exception as e:
106
+ logger.error(f"Failed to store reflection: {e}", exc_info=True)
107
+ return f"Failed to store reflection: {str(e)}"
108
+
109
+ async def get_full_conversation(
110
+ self,
111
+ ctx: Context,
112
+ conversation_id: str,
113
+ project: Optional[str] = None
114
+ ) -> str:
115
+ """Get the full JSONL conversation file path for a conversation ID.
116
+ This allows agents to read complete conversations instead of truncated excerpts."""
117
+
118
+ await ctx.debug(f"Getting full conversation for ID: {conversation_id}, project: {project}")
119
+
120
+ try:
121
+ # Base path for conversations
122
+ base_path = Path.home() / '.claude' / 'projects'
123
+
124
+ # If project is specified, try to find it in that project
125
+ if project:
126
+ # Normalize project name for path matching
127
+ project_normalized = self.normalize_project_name(project)
128
+
129
+ # Look for project directories that match
130
+ for project_dir in base_path.glob('*'):
131
+ if project_normalized in project_dir.name.lower():
132
+ # Look for JSONL files in this project
133
+ for jsonl_file in project_dir.glob('*.jsonl'):
134
+ # Check if filename matches conversation_id (with or without .jsonl)
135
+ if conversation_id in jsonl_file.stem or conversation_id == jsonl_file.stem:
136
+ await ctx.debug(f"Found conversation by filename in {jsonl_file}")
137
+ return f"""<conversation_file>
138
+ <conversation_id>{conversation_id}</conversation_id>
139
+ <file_path>{str(jsonl_file)}</file_path>
140
+ <project>{project_dir.name}</project>
141
+ <message>Use the Read tool with this file path to read the complete conversation.</message>
142
+ </conversation_file>"""
143
+
144
+ # If not found in specific project or no project specified, search all
145
+ await ctx.debug("Searching all projects for conversation")
146
+ for project_dir in base_path.glob('*'):
147
+ for jsonl_file in project_dir.glob('*.jsonl'):
148
+ # Check if filename matches conversation_id (with or without .jsonl)
149
+ if conversation_id in jsonl_file.stem or conversation_id == jsonl_file.stem:
150
+ await ctx.debug(f"Found conversation by filename in {jsonl_file}")
151
+ return f"""<conversation_file>
152
+ <conversation_id>{conversation_id}</conversation_id>
153
+ <file_path>{str(jsonl_file)}</file_path>
154
+ <project>{project_dir.name}</project>
155
+ <message>Use the Read tool with this file path to read the complete conversation.</message>
156
+ </conversation_file>"""
157
+
158
+ # Not found
159
+ return f"""<conversation_file>
160
+ <error>Conversation ID '{conversation_id}' not found in any project.</error>
161
+ <suggestion>The conversation may not have been imported yet, or the ID may be incorrect.</suggestion>
162
+ </conversation_file>"""
163
+
164
+ except Exception as e:
165
+ logger.error(f"Failed to get conversation file: {e}", exc_info=True)
166
+ return f"""<conversation_file>
167
+ <error>Failed to locate conversation: {str(e)}</error>
168
+ </conversation_file>"""
169
+
170
+
171
+ def register_reflection_tools(
172
+ mcp,
173
+ qdrant_client: AsyncQdrantClient,
174
+ qdrant_url: str,
175
+ get_embedding_manager,
176
+ normalize_project_name
177
+ ):
178
+ """Register reflection tools with the MCP server."""
179
+
180
+ tools = ReflectionTools(
181
+ qdrant_client,
182
+ qdrant_url,
183
+ get_embedding_manager,
184
+ normalize_project_name
185
+ )
186
+
187
+ @mcp.tool()
188
+ async def store_reflection(
189
+ ctx: Context,
190
+ content: str = Field(description="The insight or reflection to store"),
191
+ tags: List[str] = Field(default=[], description="Tags to categorize this reflection")
192
+ ) -> str:
193
+ """Store an important insight or reflection for future reference."""
194
+ return await tools.store_reflection(ctx, content, tags)
195
+
196
+ @mcp.tool()
197
+ async def get_full_conversation(
198
+ ctx: Context,
199
+ conversation_id: str = Field(description="The conversation ID from search results (cid)"),
200
+ project: Optional[str] = Field(default=None, description="Optional project name to help locate the file")
201
+ ) -> str:
202
+ """Get the full JSONL conversation file path for a conversation ID.
203
+ This allows agents to read complete conversations instead of truncated excerpts."""
204
+ return await tools.get_full_conversation(ctx, conversation_id, project)
205
+
206
+ logger.info("Reflection tools registered successfully")
@@ -0,0 +1,196 @@
1
+ """Rich formatting for search results with emojis and enhanced display."""
2
+
3
+ import json
4
+ import time
5
+ from datetime import datetime, timezone
6
+ from typing import List, Dict, Any, Optional
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def format_search_results_rich(
13
+ results: List[Dict],
14
+ query: str,
15
+ target_project: str,
16
+ collections_searched: int,
17
+ timing_info: Dict[str, float],
18
+ start_time: float,
19
+ brief: bool = False,
20
+ include_raw: bool = False,
21
+ indexing_status: Optional[Dict] = None
22
+ ) -> str:
23
+ """Format search results with rich formatting including emojis and performance metrics."""
24
+
25
+ # Initialize upfront summary
26
+ upfront_summary = ""
27
+
28
+ # Show result summary with emojis
29
+ if results:
30
+ score_info = "high" if results[0]['score'] >= 0.85 else "good" if results[0]['score'] >= 0.75 else "partial"
31
+ upfront_summary += f"🎯 RESULTS: {len(results)} matches ({score_info} relevance, top score: {results[0]['score']:.3f})\n"
32
+
33
+ # Show performance metrics
34
+ total_time = time.time() - start_time
35
+ indexing_info = ""
36
+ if indexing_status and indexing_status.get("percentage", 100) < 100.0:
37
+ indexing_info = f" | 📊 {indexing_status['indexed_conversations']}/{indexing_status['total_conversations']} indexed"
38
+ upfront_summary += f"⚡ PERFORMANCE: {int(total_time * 1000)}ms ({collections_searched} collections searched{indexing_info})\n"
39
+ else:
40
+ upfront_summary += f"❌ NO RESULTS: No conversations found matching '{query}'\n"
41
+
42
+ # Start XML format with upfront summary
43
+ result_text = upfront_summary + "\n<search>\n"
44
+
45
+ # Add indexing status if not fully baselined
46
+ if indexing_status and indexing_status.get("percentage", 100) < 95.0:
47
+ result_text += f' <info status="indexing" progress="{indexing_status["percentage"]:.1f}%" backlog="{indexing_status.get("backlog_count", 0)}">\n'
48
+ result_text += f' <message>📊 Indexing: {indexing_status["indexed_conversations"]}/{indexing_status["total_conversations"]} conversations ({indexing_status["percentage"]:.1f}% complete)</message>\n'
49
+ result_text += f" </info>\n"
50
+
51
+ # Add high-level result summary
52
+ if results:
53
+ # Count time-based results
54
+ now = datetime.now(timezone.utc)
55
+ today_count = 0
56
+ yesterday_count = 0
57
+ week_count = 0
58
+
59
+ for result in results:
60
+ timestamp_str = result.get('timestamp', '')
61
+ if timestamp_str:
62
+ try:
63
+ # Clean timestamp
64
+ timestamp_clean = timestamp_str.replace('Z', '+00:00') if timestamp_str.endswith('Z') else timestamp_str
65
+ timestamp_dt = datetime.fromisoformat(timestamp_clean)
66
+ if timestamp_dt.tzinfo is None:
67
+ timestamp_dt = timestamp_dt.replace(tzinfo=timezone.utc)
68
+
69
+ days_ago = (now - timestamp_dt).days
70
+ if days_ago == 0:
71
+ today_count += 1
72
+ elif days_ago == 1:
73
+ yesterday_count += 1
74
+ if days_ago <= 7:
75
+ week_count += 1
76
+ except:
77
+ pass
78
+
79
+ # Compact summary with key info
80
+ time_info = ""
81
+ if today_count > 0:
82
+ time_info = f"{today_count} today"
83
+ elif yesterday_count > 0:
84
+ time_info = f"{yesterday_count} yesterday"
85
+ elif week_count > 0:
86
+ time_info = f"{week_count} this week"
87
+ else:
88
+ time_info = "older results"
89
+
90
+ score_info = "high" if results[0]['score'] >= 0.85 else "good" if results[0]['score'] >= 0.75 else "partial"
91
+
92
+ result_text += f' <summary count="{len(results)}" relevance="{score_info}" recency="{time_info}" top-score="{results[0]["score"]:.3f}">\n'
93
+
94
+ # Short preview of top result
95
+ top_excerpt = results[0].get('excerpt', results[0].get('content', ''))[:100].strip()
96
+ if '...' not in top_excerpt:
97
+ top_excerpt += "..."
98
+ result_text += f' <preview>{top_excerpt}</preview>\n'
99
+ result_text += f" </summary>\n"
100
+ else:
101
+ result_text += f" <result-summary>\n"
102
+ result_text += f" <headline>No matches found</headline>\n"
103
+ result_text += f" <relevance>No conversations matched your query</relevance>\n"
104
+ result_text += f" </result-summary>\n"
105
+
106
+ # Add metadata
107
+ result_text += f" <meta>\n"
108
+ result_text += f" <q>{query}</q>\n"
109
+ result_text += f" <scope>{target_project if target_project != 'all' else 'all'}</scope>\n"
110
+ result_text += f" <count>{len(results)}</count>\n"
111
+ if results:
112
+ result_text += f" <range>{results[-1]['score']:.3f}-{results[0]['score']:.3f}</range>\n"
113
+
114
+ # Add performance metadata
115
+ total_time = time.time() - start_time
116
+ result_text += f" <perf>\n"
117
+ result_text += f" <ttl>{int(total_time * 1000)}</ttl>\n"
118
+ result_text += f" <emb>{int((timing_info.get('embedding_end', 0) - timing_info.get('embedding_start', 0)) * 1000)}</emb>\n"
119
+ result_text += f" <srch>{int((timing_info.get('search_all_end', 0) - timing_info.get('search_all_start', 0)) * 1000)}</srch>\n"
120
+ result_text += f" <cols>{collections_searched}</cols>\n"
121
+ result_text += f" </perf>\n"
122
+ result_text += f" </meta>\n"
123
+
124
+ # Add individual results
125
+ result_text += " <results>\n"
126
+ for i, result in enumerate(results):
127
+ result_text += f' <r rank="{i+1}">\n'
128
+ result_text += f" <s>{result['score']:.3f}</s>\n"
129
+ result_text += f" <p>{result.get('project_name', 'unknown')}</p>\n"
130
+
131
+ # Calculate relative time
132
+ timestamp_str = result.get('timestamp', '')
133
+ if timestamp_str:
134
+ try:
135
+ timestamp_clean = timestamp_str.replace('Z', '+00:00') if timestamp_str.endswith('Z') else timestamp_str
136
+ timestamp_dt = datetime.fromisoformat(timestamp_clean)
137
+ if timestamp_dt.tzinfo is None:
138
+ timestamp_dt = timestamp_dt.replace(tzinfo=timezone.utc)
139
+ now = datetime.now(timezone.utc)
140
+ days_ago = (now - timestamp_dt).days
141
+ if days_ago == 0:
142
+ time_str = "today"
143
+ elif days_ago == 1:
144
+ time_str = "yesterday"
145
+ else:
146
+ time_str = f"{days_ago}d"
147
+ result_text += f" <t>{time_str}</t>\n"
148
+ except:
149
+ result_text += f" <t>unknown</t>\n"
150
+
151
+ # Get excerpt/content
152
+ excerpt = result.get('excerpt', result.get('content', ''))
153
+
154
+ if not brief and excerpt:
155
+ # Extract title from first line of excerpt
156
+ excerpt_lines = excerpt.split('\n')
157
+ title = excerpt_lines[0][:80] + "..." if len(excerpt_lines[0]) > 80 else excerpt_lines[0]
158
+ result_text += f" <title>{title}</title>\n"
159
+
160
+ # Key finding - summarize the main point
161
+ key_finding = excerpt[:100] + "..." if len(excerpt) > 100 else excerpt
162
+ result_text += f" <key-finding>{key_finding.strip()}</key-finding>\n"
163
+
164
+ # Always include excerpt
165
+ if brief:
166
+ brief_excerpt = excerpt[:100] + "..." if len(excerpt) > 100 else excerpt
167
+ result_text += f" <excerpt>{brief_excerpt.strip()}</excerpt>\n"
168
+ else:
169
+ result_text += f" <excerpt><![CDATA[{excerpt}]]></excerpt>\n"
170
+
171
+ # Add conversation ID if present
172
+ if result.get('conversation_id'):
173
+ result_text += f" <cid>{result['conversation_id']}</cid>\n"
174
+
175
+ # Include raw data if requested
176
+ if include_raw and result.get('raw_payload'):
177
+ result_text += " <raw>\n"
178
+ payload = result['raw_payload']
179
+ result_text += f" <txt><![CDATA[{payload.get('text', '')}]]></txt>\n"
180
+ result_text += f" <id>{result.get('id', '')}</id>\n"
181
+ result_text += " </raw>\n"
182
+
183
+ # Add metadata fields if present
184
+ if result.get('files_analyzed'):
185
+ result_text += f" <files>{', '.join(result['files_analyzed'][:5])}</files>\n"
186
+ if result.get('tools_used'):
187
+ result_text += f" <tools>{', '.join(result['tools_used'][:5])}</tools>\n"
188
+ if result.get('concepts'):
189
+ result_text += f" <concepts>{', '.join(result['concepts'][:5])}</concepts>\n"
190
+
191
+ result_text += " </r>\n"
192
+
193
+ result_text += " </results>\n"
194
+ result_text += "</search>\n"
195
+
196
+ return result_text