npm - claude-self-reflect - Versions diffs - 3.3.0 → 4.0.0 - Mend

claude-self-reflect 3.3.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/.claude/agents/claude-self-reflect-test.md +525 -11
package/.claude/agents/quality-fixer.md +314 -0
package/.claude/agents/reflection-specialist.md +40 -1
package/installer/cli.js +16 -0
package/installer/postinstall.js +14 -0
package/installer/statusline-setup.js +289 -0
package/mcp-server/run-mcp.sh +45 -7
package/mcp-server/src/code_reload_tool.py +271 -0
package/mcp-server/src/embedding_manager.py +60 -26
package/mcp-server/src/enhanced_tool_registry.py +407 -0
package/mcp-server/src/mode_switch_tool.py +181 -0
package/mcp-server/src/parallel_search.py +24 -85
package/mcp-server/src/project_resolver.py +20 -2
package/mcp-server/src/reflection_tools.py +60 -13
package/mcp-server/src/rich_formatting.py +103 -0
package/mcp-server/src/search_tools.py +180 -79
package/mcp-server/src/security_patches.py +555 -0
package/mcp-server/src/server.py +318 -240
package/mcp-server/src/status.py +13 -8
package/mcp-server/src/temporal_tools.py +10 -3
package/mcp-server/src/test_quality.py +153 -0
package/package.json +6 -1
package/scripts/ast_grep_final_analyzer.py +328 -0
package/scripts/ast_grep_unified_registry.py +710 -0
package/scripts/csr-status +511 -0
package/scripts/import-conversations-unified.py +114 -28
package/scripts/session_quality_tracker.py +661 -0
package/scripts/streaming-watcher.py +140 -5
package/scripts/update_patterns.py +334 -0

package/mcp-server/src/search_tools.py CHANGED Viewed

@@ -4,6 +4,7 @@ import os
 import json
 import logging
 import time
+import html
 from typing import Optional, List, Dict, Any
 from datetime import datetime, timezone
 from pathlib import Path
@@ -82,10 +83,20 @@ class SearchTools:
             # Generate embedding for query
             embedding_manager = self.get_embedding_manager()
-            # Determine embedding type based on collection name
-            embedding_type = 'voyage' if collection_name.endswith('_voyage') else 'local'
+            # Determine embedding type based on collection name (v3 and v4 compatible)
+            # v4 format: csr_project_mode_dims (e.g., csr_project_cloud_1024d)
+            # v3 format: project_suffix (e.g., project_voyage)
+            if '_cloud_' in collection_name or collection_name.endswith('_1024d') or collection_name.endswith('_voyage'):
+                embedding_type = 'voyage'
+            else:
+                embedding_type = 'local'
             query_embedding = await embedding_manager.generate_embedding(query, force_type=embedding_type)
+            # FIX: Validate embedding before search
+            if query_embedding is None:
+                logger.warning(f"Embedding generation failed for query in {collection_name}")
+                return []
             # Search the collection
             search_results = await self.qdrant_client.search(
                 collection_name=collection_name,
@@ -131,9 +142,9 @@ class SearchTools:
                     # Apply exponential decay
                     decay_factor = pow(2, -age / self.decay_scale_days)
-                    # Adjust score
+                    # Adjust score - FIX: Maintain comparable scale
                     original_score = result['score']
-                    result['score'] = original_score * (1 - self.decay_weight) + decay_factor * self.decay_weight
+                    result['score'] = original_score * ((1 - self.decay_weight) + self.decay_weight * decay_factor)
                     result['original_score'] = original_score
                     result['decay_factor'] = decay_factor
@@ -168,20 +179,23 @@ class SearchTools:
                 if include_raw:
                     output += f"**Raw Payload:**\n```json\n{json.dumps(result.get('payload', {}), indent=2)}\n```\n\n"
         else:
-            # XML format (default)
-            output = f"<search_results>\n<query>{query}</query>\n<count>{len(results)}</count>\n"
+            # XML format (default) with proper escaping
+            def _esc(x): return html.escape(str(x), quote=False)
+            output = f"<search_results>\n<query>{_esc(query)}</query>\n<count>{len(results)}</count>\n"
             for i, result in enumerate(results, 1):
                 output += f"<result index=\"{i}\">\n"
                 output += f"  <score>{result['score']:.3f}</score>\n"
-                output += f"  <timestamp>{result.get('timestamp', 'N/A')}</timestamp>\n"
-                output += f"  <conversation_id>{result.get('conversation_id', 'N/A')}</conversation_id>\n"
+                output += f"  <timestamp>{_esc(result.get('timestamp', 'N/A'))}</timestamp>\n"
+                output += f"  <conversation_id>{_esc(result.get('conversation_id', 'N/A'))}</conversation_id>\n"
                 if not brief:
                     # Handle both 'content' and 'excerpt' fields
-                    content = result.get('content', result.get('excerpt', ''))
+                    content = result.get('content', result.get('excerpt', result.get('text', '')))
                     truncated = content[:500] + ('...' if len(content) > 500 else '')
                     output += f"  <content><![CDATA[{truncated}]]></content>\n"
                 if include_raw:
-                    output += f"  <raw_payload>{json.dumps(result.get('payload', {}))}</raw_payload>\n"
+                    # Use CDATA for large JSON payloads
+                    output += f"  <raw_payload><![CDATA[{json.dumps(result.get('payload', {}), ensure_ascii=False)}]]></raw_payload>\n"
                 output += "</result>\n"
             output += "</search_results>"
@@ -238,12 +252,14 @@ class SearchTools:
                 ]
                 await ctx.debug(f"Filtered to {len(filtered_collections)} collections from {len(all_collections)} total")
             else:
-                # Use all collections except reflections
+                # Use all collections INCLUDING reflections (with decay)
                 collections_response = await self.qdrant_client.get_collections()
                 collections = collections_response.collections
+                # Include both conversation collections and reflection collections
                 filtered_collections = [
-                    c for c in collections
-                    if not c.name.startswith('reflections')
+                    c for c in collections
+                    if (c.name.endswith('_local') or c.name.endswith('_voyage') or
+                        c.name.startswith('reflections'))
                 ]
                 await ctx.debug(f"Searching across {len(filtered_collections)} collections")
@@ -358,12 +374,14 @@ class SearchTools:
                     if c.name in collection_names
                 ]
             else:
-                # Use all collections except reflections
+                # Use all collections INCLUDING reflections (with decay)
                 collections_response = await self.qdrant_client.get_collections()
                 collections = collections_response.collections
+                # Include both conversation collections and reflection collections
                 filtered_collections = [
-                    c for c in collections
-                    if not c.name.startswith('reflections')
+                    c for c in collections
+                    if (c.name.endswith('_local') or c.name.endswith('_voyage') or
+                        c.name.startswith('reflections'))
                 ]
             # Quick PARALLEL count across collections
@@ -394,22 +412,29 @@ class SearchTools:
             top_result = max(all_results, key=lambda x: x.get('score', 0)) if all_results else None
             top_score = top_result.get('score', 0) if top_result else 0
-            # Format quick search response
+            # Format quick search response with proper XML escaping
+            def _esc(x): return html.escape(str(x), quote=False)
             if not top_result:
                 return "<quick_search><count>0</count><message>No matches found</message></quick_search>"
+            # Get preview text and ensure we have content fallbacks
+            preview_text = top_result.get('excerpt', top_result.get('content', top_result.get('text', '')))[:200]
             return f"""<quick_search>
-<count>{collections_with_matches} collections with matches</count>
+<count>{collections_with_matches}</count>
+<collections_with_matches>{collections_with_matches}</collections_with_matches>
 <top_result>
   <score>{top_result['score']:.3f}</score>
-  <timestamp>{top_result.get('timestamp', 'N/A')}</timestamp>
-  <preview>{top_result.get('excerpt', top_result.get('content', ''))[:200]}...</preview>
+  <timestamp>{_esc(top_result.get('timestamp', 'N/A'))}</timestamp>
+  <preview><![CDATA[{preview_text}...]]></preview>
 </top_result>
 </quick_search>"""
         except Exception as e:
             logger.error(f"Quick search failed: {e}", exc_info=True)
-            return f"<quick_search><error>Quick search failed: {str(e)}</error></quick_search>"
+            def _esc(x): return html.escape(str(x), quote=False)
+            return f"<quick_search><error>Quick search failed: {_esc(str(e))}</error></quick_search>"
     async def search_summary(
         self,
@@ -439,12 +464,14 @@ class SearchTools:
                     if c.name in collection_names
                 ]
             else:
-                # Use all collections except reflections
+                # Use all collections INCLUDING reflections (with decay)
                 collections_response = await self.qdrant_client.get_collections()
                 collections = collections_response.collections
+                # Include both conversation collections and reflection collections
                 filtered_collections = [
-                    c for c in collections
-                    if not c.name.startswith('reflections')
+                    c for c in collections
+                    if (c.name.endswith('_local') or c.name.endswith('_voyage') or
+                        c.name.startswith('reflections'))
                 ]
             # Gather results for summary using PARALLEL search
@@ -534,12 +561,14 @@ class SearchTools:
                     if c.name in collection_names
                 ]
             else:
-                # Use all collections except reflections
+                # Use all collections INCLUDING reflections (with decay)
                 collections_response = await self.qdrant_client.get_collections()
                 collections = collections_response.collections
+                # Include both conversation collections and reflection collections
                 filtered_collections = [
-                    c for c in collections
-                    if not c.name.startswith('reflections')
+                    c for c in collections
+                    if (c.name.endswith('_local') or c.name.endswith('_voyage') or
+                        c.name.startswith('reflections'))
                 ]
             # Gather all results using PARALLEL search
@@ -606,61 +635,102 @@ class SearchTools:
         project: Optional[str] = None
     ) -> str:
         """Search for conversations that analyzed a specific file."""
         await ctx.debug(f"Searching for file: {file_path}, project={project}")
         try:
-            # Normalize file path
-            normalized_path = str(Path(file_path).resolve())
+            # Create multiple path variants to match how paths are stored
+            # Import uses normalize_file_path which replaces /Users/ with ~/
+            path_variants = set()
+            # Original path
+            path_variants.add(file_path)
+            # Basename only
+            path_variants.add(os.path.basename(file_path))
+            # Try to resolve if it's a valid path
+            try:
+                resolved_path = str(Path(file_path).resolve())
+                path_variants.add(resolved_path)
+                # Convert resolved path to ~/ format (matching how import stores it)
+                home_dir = str(Path.home())
+                if resolved_path.startswith(home_dir):
+                    tilde_path = resolved_path.replace(home_dir, '~', 1)
+                    path_variants.add(tilde_path)
+                # Also try with /Users/ replaced by ~/
+                if '/Users/' in resolved_path:
+                    path_variants.add(resolved_path.replace('/Users/', '~/', 1))
+            except:
+                pass
+            # If path starts with ~, also try expanded version
+            if file_path.startswith('~'):
+                expanded = os.path.expanduser(file_path)
+                path_variants.add(expanded)
+            # Convert all to forward slashes for consistency
+            path_variants = {p.replace('\\', '/') for p in path_variants if p}
+            await ctx.debug(f"Searching with path variants: {list(path_variants)}")
             # Search for file mentions in metadata
             collections_response = await self.qdrant_client.get_collections()
             collections = collections_response.collections
-            # Define async function to search a single collection
+            # Define async function to search a single collection using scroll
             async def search_collection(collection_name: str):
                 try:
-                    # Search by payload filter
-                    search_results = await self.qdrant_client.search(
+                    from qdrant_client import models
+                    # Use scroll with proper filter for metadata-only search
+                    results, _ = await self.qdrant_client.scroll(
                         collection_name=collection_name,
-                        query_vector=[0] * 384,  # Dummy vector for metadata search
-                        limit=limit,
-                        query_filter={
-                            "must": [
-                                {
-                                    "key": "files_analyzed",
-                                    "match": {"any": [normalized_path]}
-                                }
+                        scroll_filter=models.Filter(
+                            should=[
+                                models.FieldCondition(
+                                    key="files_analyzed",
+                                    match=models.MatchValue(value=path_variant)
+                                )
+                                for path_variant in path_variants
                             ]
-                        }
+                        ),
+                        limit=limit,
+                        with_payload=True
                     )
-                    results = []
-                    for result in search_results:
-                        results.append({
-                            'conversation_id': result.payload.get('conversation_id'),
-                            'timestamp': result.payload.get('timestamp'),
-                            'content': result.payload.get('content', ''),
-                            'files_analyzed': result.payload.get('files_analyzed', []),
-                            'score': result.score
+                    formatted_results = []
+                    for point in results:
+                        formatted_results.append({
+                            'conversation_id': point.payload.get('conversation_id'),
+                            'timestamp': point.payload.get('timestamp'),
+                            'content': point.payload.get('content', point.payload.get('text', '')),
+                            'files_analyzed': point.payload.get('files_analyzed', []),
+                            'score': 1.0  # No score in scroll, use 1.0 for found items
                         })
-                    return results
+                    return formatted_results
                 except Exception as e:
                     await ctx.debug(f"Error searching {collection_name}: {e}")
                     return []
-            # Use asyncio.gather for PARALLEL search across all collections
+            # SECURITY FIX: Use proper concurrency limiting
             import asyncio
+            from .security_patches import ConcurrencyLimiter
             search_tasks = [search_collection(c.name) for c in collections]
-            # Limit concurrent searches to avoid overload
-            batch_size = 20
+            # Use semaphore-based limiting instead of batching
             all_results = []
-            for i in range(0, len(search_tasks), batch_size):
-                batch = search_tasks[i:i+batch_size]
-                batch_results = await asyncio.gather(*batch)
-                for results in batch_results:
+            batch_results = await ConcurrencyLimiter.limited_gather(search_tasks, limit=10)
+            for results in batch_results:
+                if isinstance(results, Exception):
+                    logger.error(f"Search task failed: {type(results).__name__}: {results}")
+                    await ctx.debug(f"Search task error: {results}")
+                    continue
+                if results:
                     all_results.extend(results)
             # Format results
@@ -743,7 +813,7 @@ def register_search_tools(
         project_resolver  # Pass the resolver
     )
-    @mcp.tool()
+    @mcp.tool(name="csr_reflect_on_past")
     async def reflect_on_past(
         ctx: Context,
         query: str = Field(description="The search query to find semantically similar conversations"),
@@ -756,29 +826,45 @@ def register_search_tools(
         include_raw: bool = Field(default=False, description="Include raw Qdrant payload data for debugging (increases response size)"),
         response_format: str = Field(default="xml", description="Response format: 'xml' or 'markdown'")
     ) -> str:
-        """Search for relevant past conversations using semantic search with optional time decay."""
+        """Search past Claude conversations semantically to find relevant context.
+        WHEN TO USE: User asks 'what did we discuss about X?', 'find conversations about Y',
+        mentions 'remember when' or 'last time', debugging issues that may have been solved before,
+        or finding implementation patterns used in the project.
+        This is the PRIMARY tool for conversation memory - use it liberally!"""
         return await tools.reflect_on_past(ctx, query, limit, min_score, use_decay, project, mode, brief, include_raw, response_format)
-    @mcp.tool()
+    @mcp.tool(name="csr_quick_check")
     async def quick_search(
         ctx: Context,
         query: str = Field(description="The search query to find semantically similar conversations"),
         min_score: float = Field(default=0.3, description="Minimum similarity score (0-1)"),
         project: Optional[str] = Field(default=None, description="Search specific project only. If not provided, searches current project based on working directory. Use 'all' to search across all projects.")
     ) -> str:
-        """Quick search that returns only the count and top result for fast overview."""
+        """Quick check if a topic was discussed before (returns count + top match only).
+        WHEN TO USE: User asks 'have we discussed X?' or 'is there anything about Y?',
+        need a yes/no answer about topic existence, checking if a problem was encountered before.
+        Much faster than full search - use for existence checks!"""
         return await tools.quick_search(ctx, query, min_score, project)
-    @mcp.tool()
+    @mcp.tool(name="csr_search_insights")
     async def search_summary(
         ctx: Context,
         query: str = Field(description="The search query to find semantically similar conversations"),
         project: Optional[str] = Field(default=None, description="Search specific project only. If not provided, searches current project based on working directory. Use 'all' to search across all projects.")
     ) -> str:
-        """Get aggregated insights from search results without individual result details."""
+        """Get aggregated insights and patterns from search results.
+        WHEN TO USE: User wants patterns or trends, analyzing topic evolution,
+        understanding common themes, getting high-level view without details.
+        Provides analysis, not just search results!"""
         return await tools.search_summary(ctx, query, project)
-    @mcp.tool()
+    @mcp.tool(name="csr_get_more")
     async def get_more_results(
         ctx: Context,
         query: str = Field(description="The original search query"),
@@ -787,20 +873,30 @@ def register_search_tools(
         min_score: float = Field(default=0.3, description="Minimum similarity score (0-1)"),
         project: Optional[str] = Field(default=None, description="Search specific project only")
     ) -> str:
-        """Get additional search results after an initial search (pagination support)."""
+        """Get additional search results for paginated exploration.
+        WHEN TO USE: User says 'show me more' after a search, initial results weren't sufficient,
+        deep diving into a topic, user wants comprehensive coverage.
+        Use after initial search when more context is needed!"""
         return await tools.get_more_results(ctx, query, offset, limit, min_score, project)
-    @mcp.tool()
+    @mcp.tool(name="csr_search_by_file")
     async def search_by_file(
         ctx: Context,
         file_path: str = Field(description="The file path to search for in conversations"),
         limit: int = Field(default=10, description="Maximum number of results to return"),
         project: Optional[str] = Field(default=None, description="Search specific project only. Use 'all' to search across all projects.")
     ) -> str:
-        """Search for conversations that analyzed a specific file."""
+        """Find all conversations that analyzed or modified a specific file.
+        WHEN TO USE: User asks 'when did we modify X file?', investigating file history,
+        understanding why changes were made, finding discussions about specific code files.
+        Perfect for code archaeology and understanding file evolution!"""
         return await tools.search_by_file(ctx, file_path, limit, project)
-    @mcp.tool()
+    @mcp.tool(name="csr_search_by_concept")
     async def search_by_concept(
         ctx: Context,
         concept: str = Field(description="The concept to search for (e.g., 'security', 'docker', 'testing')"),
@@ -808,7 +904,12 @@ def register_search_tools(
         project: Optional[str] = Field(default=None, description="Search specific project only. Use 'all' to search across all projects."),
         include_files: bool = Field(default=True, description="Include file information in results")
     ) -> str:
-        """Search for conversations about a specific development concept."""
+        """Search for conversations about specific development concepts or themes.
+        WHEN TO USE: User asks about broad topics like 'security', 'testing', 'performance',
+        looking for all discussions on a technical theme, gathering knowledge about a concept.
+        Ideal for thematic analysis and knowledge gathering!"""
         return await tools.search_by_concept(ctx, concept, limit, project, include_files)
     @mcp.tool()