npm - claude-self-reflect - Versions diffs - 2.4.15 → 2.5.2 - Mend

claude-self-reflect 2.4.15 → 2.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/.claude/agents/claude-self-reflect-test.md +528 -0
package/.claude/agents/import-debugger.md +4 -1
package/.claude/agents/mcp-integration.md +4 -2
package/.claude/agents/qdrant-specialist.md +6 -3
package/Dockerfile.streaming-importer +21 -8
package/Dockerfile.watcher +6 -2
package/docker-compose.yaml +40 -5
package/installer/setup-wizard-docker.js +30 -5
package/mcp-server/pyproject.toml +1 -1
package/mcp-server/src/server.py +246 -7
package/mcp-server/src/utils.py +21 -2
package/package.json +1 -1
package/scripts/import-conversations-enhanced.py +672 -0
package/scripts/import-conversations-unified.py +15 -6
package/scripts/import-watcher.py +0 -88

package/docker-compose.yaml CHANGED Viewed

@@ -18,12 +18,13 @@ services:
       - "${QDRANT_PORT:-6333}:6333"
     volumes:
       - qdrant_data:/qdrant/storage
+      - ./config/qdrant-config.yaml:/qdrant/config/config.yaml:ro
     environment:
       - QDRANT__LOG_LEVEL=INFO
       - QDRANT__SERVICE__HTTP_PORT=6333
     restart: unless-stopped
-    mem_limit: ${QDRANT_MEMORY:-2g}
-    memswap_limit: ${QDRANT_MEMORY:-2g}
+    mem_limit: ${QDRANT_MEMORY:-4g}
+    memswap_limit: ${QDRANT_MEMORY:-4g}
   # One-time import service (runs once then exits)
   importer:
@@ -53,7 +54,7 @@ services:
     profiles: ["import"]
     command: python /scripts/import-conversations-unified.py
-  # Continuous watcher service (optional)
+  # Continuous watcher service (optional) - DEPRECATED, use streaming-importer
   watcher:
     build:
       context: .
@@ -73,10 +74,44 @@ services:
       - OPENAI_API_KEY=${OPENAI_API_KEY:-}
       - VOYAGE_API_KEY=${VOYAGE_API_KEY:-}
       - VOYAGE_KEY=${VOYAGE_KEY:-}
-      - PREFER_LOCAL_EMBEDDINGS=${PREFER_LOCAL_EMBEDDINGS:-false}
+      - PREFER_LOCAL_EMBEDDINGS=${PREFER_LOCAL_EMBEDDINGS:-true}
       - EMBEDDING_MODEL=${EMBEDDING_MODEL:-voyage-3}
-      - WATCH_INTERVAL=${WATCH_INTERVAL:-60}
+      - WATCH_INTERVAL=${WATCH_INTERVAL:-5}
+      - MAX_MEMORY_MB=${MAX_MEMORY_MB:-250}
+      - CHUNK_SIZE=${CHUNK_SIZE:-5}
+      - PYTHONUNBUFFERED=1
+    restart: unless-stopped
+    profiles: ["watch-old"]
+    mem_limit: 500m
+    memswap_limit: 500m
+  # Streaming importer service - Low memory continuous import
+  streaming-importer:
+    build:
+      context: .
+      dockerfile: Dockerfile.streaming-importer
+    container_name: claude-reflection-streaming
+    depends_on:
+      - init-permissions
+      - qdrant
+    volumes:
+      - ${CLAUDE_LOGS_PATH:-~/.claude/projects}:/logs:ro
+      - ${CONFIG_PATH:-~/.claude-self-reflect/config}:/config
+      - ./scripts:/scripts:ro
+    environment:
+      - QDRANT_URL=http://qdrant:6333
+      - STATE_FILE=/config/imported-files.json
+      - VOYAGE_API_KEY=${VOYAGE_API_KEY:-}
+      - VOYAGE_KEY=${VOYAGE_KEY:-}
+      - PREFER_LOCAL_EMBEDDINGS=${PREFER_LOCAL_EMBEDDINGS:-true}
+      - WATCH_INTERVAL=${WATCH_INTERVAL:-5}  # Testing with 5 second interval
+      - MAX_MEMORY_MB=${MAX_MEMORY_MB:-350}  # Total memory including model
+      - OPERATIONAL_MEMORY_MB=${OPERATIONAL_MEMORY_MB:-100}  # Memory for operations (increased for large file handling)
+      - CHUNK_SIZE=${CHUNK_SIZE:-5}
       - PYTHONUNBUFFERED=1
+      - LOGS_DIR=/logs
+      - FASTEMBED_CACHE_PATH=/root/.cache/fastembed
+      - CURRENT_PROJECT_PATH=${PWD}  # Pass current project path for prioritization
     restart: unless-stopped
     profiles: ["watch"]
     mem_limit: 1g

package/installer/setup-wizard-docker.js CHANGED Viewed

@@ -340,12 +340,32 @@ function showManualConfig(mcpScript) {
 }
 async function importConversations() {
-  console.log('\n📚 Importing conversations...');
+  console.log('\n📚 Checking conversation baseline...');
-  const answer = await question('Would you like to import your existing Claude conversations? (y/n): ');
+  // Check if baseline exists by looking for imported files state
+  const stateFile = path.join(configDir, 'imported-files.json');
+  let hasBaseline = false;
+  try {
+    if (fs.existsSync(stateFile)) {
+      const state = JSON.parse(fs.readFileSync(stateFile, 'utf8'));
+      hasBaseline = state.imported_files && Object.keys(state.imported_files).length > 0;
+    }
+  } catch (e) {
+    // State file doesn't exist or is invalid
+  }
+  if (!hasBaseline) {
+    console.log('\n⚠️  No baseline detected. Initial import STRONGLY recommended.');
+    console.log('   Without this, historical conversations won\'t be searchable.');
+    console.log('   The watcher only handles NEW conversations going forward.');
+  }
+  const answer = await question('\nImport existing Claude conversations? (y/n) [recommended: y]: ');
   if (answer.toLowerCase() === 'y') {
-    console.log('🔄 Starting import process...');
+    console.log('🔄 Starting baseline import...');
+    console.log('   This ensures ALL your conversations are searchable');
     console.log('   This may take a few minutes depending on your conversation history');
     try {
@@ -353,12 +373,17 @@ async function importConversations() {
         cwd: projectRoot,
         stdio: 'inherit'
       });
-      console.log('\n✅ Import completed!');
+      console.log('\n✅ Baseline import completed!');
+      console.log('   Historical conversations are now searchable');
     } catch {
       console.log('\n⚠️  Import had some issues, but you can continue');
     }
   } else {
-    console.log('📝 Skipping import. You can import later with:');
+    console.log('\n❌ WARNING: Skipping baseline import means:');
+    console.log('   • Historical conversations will NOT be searchable');
+    console.log('   • Only NEW conversations from now on will be indexed');
+    console.log('   • You may see "BASELINE_NEEDED" warnings in logs');
+    console.log('\n📝 You can run baseline import later with:');
     console.log('   docker compose run --rm importer');
   }
 }

package/mcp-server/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "claude-self-reflect-mcp"
-version = "2.4.11"
+version = "2.5.1"
 description = "MCP server for Claude self-reflection with memory decay"
 # readme = "README.md"
 requires-python = ">=3.10"

package/mcp-server/src/server.py CHANGED Viewed

@@ -108,9 +108,16 @@ async def get_all_collections() -> List[str]:
     return [c.name for c in collections.collections
             if c.name.endswith('_voyage') or c.name.endswith('_local') or c.name.startswith('reflections')]
-async def generate_embedding(text: str) -> List[float]:
-    """Generate embedding using configured provider."""
-    if PREFER_LOCAL_EMBEDDINGS or not voyage_client:
+async def generate_embedding(text: str, force_type: Optional[str] = None) -> List[float]:
+    """Generate embedding using configured provider or forced type.
+    Args:
+        text: Text to embed
+        force_type: Force specific embedding type ('local' or 'voyage')
+    """
+    use_local = force_type == 'local' if force_type else (PREFER_LOCAL_EMBEDDINGS or not voyage_client)
+    if use_local:
         # Use local embeddings
         if not local_embedding_model:
             raise ValueError("Local embedding model not initialized")
@@ -123,6 +130,8 @@ async def generate_embedding(text: str) -> List[float]:
         return embeddings[0].tolist()
     else:
         # Use Voyage AI
+        if not voyage_client:
+            raise ValueError("Voyage client not initialized")
         result = voyage_client.embed(
             texts=[text],
             model="voyage-3-large",
@@ -218,10 +227,10 @@ async def reflect_on_past(
     await ctx.debug(f"DECAY_WEIGHT: {DECAY_WEIGHT}, DECAY_SCALE_DAYS: {DECAY_SCALE_DAYS}")
     try:
-        # Generate embedding
-        timing_info['embedding_start'] = time.time()
-        query_embedding = await generate_embedding(query)
-        timing_info['embedding_end'] = time.time()
+        # We'll generate embeddings on-demand per collection type
+        timing_info['embedding_prep_start'] = time.time()
+        query_embeddings = {}  # Cache embeddings by type
+        timing_info['embedding_prep_end'] = time.time()
         # Get all collections
         timing_info['get_collections_start'] = time.time()
@@ -237,6 +246,7 @@ async def reflect_on_past(
             # Generate the collection name pattern for this project using normalized name
             normalized_name = normalize_project_name(target_project)
             project_hash = hashlib.md5(normalized_name.encode()).hexdigest()[:8]
+            # Search BOTH local and voyage collections for this project
             project_collections = [
                 c for c in all_collections
                 if c.startswith(f"conv_{project_hash}_")
@@ -276,6 +286,18 @@ async def reflect_on_past(
             )
             try:
+                # Determine embedding type for this collection
+                embedding_type_for_collection = 'voyage' if collection_name.endswith('_voyage') else 'local'
+                # Generate or retrieve cached embedding for this type
+                if embedding_type_for_collection not in query_embeddings:
+                    try:
+                        query_embeddings[embedding_type_for_collection] = await generate_embedding(query, force_type=embedding_type_for_collection)
+                    except Exception as e:
+                        await ctx.debug(f"Failed to generate {embedding_type_for_collection} embedding: {e}")
+                        continue
+                query_embedding = query_embeddings[embedding_type_for_collection]
                 if should_use_decay and USE_NATIVE_DECAY and NATIVE_DECAY_AVAILABLE:
                     # Use native Qdrant decay with newer API
                     await ctx.debug(f"Using NATIVE Qdrant decay (new API) for {collection_name}")
@@ -887,6 +909,223 @@ async def get_more_results(
     return response
+@mcp.tool()
+async def search_by_file(
+    ctx: Context,
+    file_path: str = Field(description="The file path to search for in conversations"),
+    limit: int = Field(default=10, description="Maximum number of results to return"),
+    project: Optional[str] = Field(default=None, description="Search specific project only. Use 'all' to search across all projects.")
+) -> str:
+    """Search for conversations that analyzed a specific file."""
+    global qdrant_client
+    # Normalize file path
+    normalized_path = file_path.replace("\\", "/").replace("/Users/", "~/")
+    # Determine which collections to search
+    # If no project specified, search all collections
+    collections = await get_all_collections() if not project else []
+    if project and project != 'all':
+        # Filter collections for specific project
+        project_hash = hashlib.md5(project.encode()).hexdigest()[:8]
+        collection_prefix = f"conv_{project_hash}_"
+        collections = [c for c in await get_all_collections() if c.startswith(collection_prefix)]
+    elif project == 'all':
+        collections = await get_all_collections()
+    if not collections:
+        return "<search_by_file>\n<error>No collections found to search</error>\n</search_by_file>"
+    # Prepare results
+    all_results = []
+    for collection_name in collections:
+        try:
+            # Use scroll to get all points and filter manually
+            # Qdrant's array filtering can be tricky, so we'll filter in code
+            scroll_result = await qdrant_client.scroll(
+                collection_name=collection_name,
+                limit=1000,  # Get a batch
+                with_payload=True
+            )
+            # Filter results that contain the file
+            for point in scroll_result[0]:
+                payload = point.payload
+                files_analyzed = payload.get('files_analyzed', [])
+                files_edited = payload.get('files_edited', [])
+                if normalized_path in files_analyzed or normalized_path in files_edited:
+                    all_results.append({
+                        'score': 1.0,  # File match is always 1.0
+                        'payload': payload,
+                        'collection': collection_name
+                    })
+        except Exception as e:
+            continue
+    # Sort by timestamp (newest first)
+    all_results.sort(key=lambda x: x['payload'].get('timestamp', ''), reverse=True)
+    # Format results
+    if not all_results:
+        return f"""<search_by_file>
+<query>{file_path}</query>
+<normalized_path>{normalized_path}</normalized_path>
+<message>No conversations found that analyzed this file</message>
+</search_by_file>"""
+    results_text = []
+    for i, result in enumerate(all_results[:limit]):
+        payload = result['payload']
+        timestamp = payload.get('timestamp', 'Unknown')
+        conversation_id = payload.get('conversation_id', 'Unknown')
+        project = payload.get('project', 'Unknown')
+        text_preview = payload.get('text', '')[:200] + '...' if len(payload.get('text', '')) > 200 else payload.get('text', '')
+        # Check if file was edited or just read
+        action = "edited" if normalized_path in payload.get('files_edited', []) else "analyzed"
+        # Get related tools used
+        tool_summary = payload.get('tool_summary', {})
+        tools_used = ', '.join(f"{tool}({count})" for tool, count in tool_summary.items())
+        results_text.append(f"""<result rank="{i+1}">
+<conversation_id>{conversation_id}</conversation_id>
+<project>{project}</project>
+<timestamp>{timestamp}</timestamp>
+<action>{action}</action>
+<tools_used>{tools_used}</tools_used>
+<preview>{text_preview}</preview>
+</result>""")
+    return f"""<search_by_file>
+<query>{file_path}</query>
+<normalized_path>{normalized_path}</normalized_path>
+<count>{len(all_results)}</count>
+<results>
+{''.join(results_text)}
+</results>
+</search_by_file>"""
+@mcp.tool()
+async def search_by_concept(
+    ctx: Context,
+    concept: str = Field(description="The concept to search for (e.g., 'security', 'docker', 'testing')"),
+    include_files: bool = Field(default=True, description="Include file information in results"),
+    limit: int = Field(default=10, description="Maximum number of results to return"),
+    project: Optional[str] = Field(default=None, description="Search specific project only. Use 'all' to search across all projects.")
+) -> str:
+    """Search for conversations about a specific development concept."""
+    global qdrant_client
+    # Generate embedding for the concept
+    embedding = await generate_embedding(concept)
+    # Determine which collections to search
+    # If no project specified, search all collections
+    collections = await get_all_collections() if not project else []
+    if project and project != 'all':
+        # Filter collections for specific project
+        project_hash = hashlib.md5(project.encode()).hexdigest()[:8]
+        collection_prefix = f"conv_{project_hash}_"
+        collections = [c for c in await get_all_collections() if c.startswith(collection_prefix)]
+    elif project == 'all':
+        collections = await get_all_collections()
+    if not collections:
+        return "<search_by_concept>\n<error>No collections found to search</error>\n</search_by_concept>"
+    # Search all collections
+    all_results = []
+    for collection_name in collections:
+        try:
+            # Hybrid search: semantic + concept filter
+            results = await qdrant_client.search(
+                collection_name=collection_name,
+                query_vector=embedding,
+                query_filter=models.Filter(
+                    should=[
+                        models.FieldCondition(
+                            key="concepts",
+                            match=models.MatchAny(any=[concept.lower()])
+                        )
+                    ]
+                ),
+                limit=limit * 2,  # Get more results for better filtering
+                with_payload=True
+            )
+            for point in results:
+                payload = point.payload
+                # Boost score if concept is in the concepts list
+                score_boost = 0.2 if concept.lower() in payload.get('concepts', []) else 0.0
+                all_results.append({
+                    'score': float(point.score) + score_boost,
+                    'payload': payload,
+                    'collection': collection_name
+                })
+        except Exception as e:
+            continue
+    # Sort by score and limit
+    all_results.sort(key=lambda x: x['score'], reverse=True)
+    all_results = all_results[:limit]
+    # Format results
+    if not all_results:
+        return f"""<search_by_concept>
+<concept>{concept}</concept>
+<message>No conversations found about this concept</message>
+</search_by_concept>"""
+    results_text = []
+    for i, result in enumerate(all_results):
+        payload = result['payload']
+        score = result['score']
+        timestamp = payload.get('timestamp', 'Unknown')
+        conversation_id = payload.get('conversation_id', 'Unknown')
+        project = payload.get('project', 'Unknown')
+        concepts = payload.get('concepts', [])
+        # Get text preview
+        text_preview = payload.get('text', '')[:200] + '...' if len(payload.get('text', '')) > 200 else payload.get('text', '')
+        # File information
+        files_info = ""
+        if include_files:
+            files_analyzed = payload.get('files_analyzed', [])[:5]
+            if files_analyzed:
+                files_info = f"\n<files_analyzed>{', '.join(files_analyzed)}</files_analyzed>"
+        # Related concepts
+        related_concepts = [c for c in concepts if c != concept.lower()][:5]
+        results_text.append(f"""<result rank="{i+1}">
+<score>{score:.3f}</score>
+<conversation_id>{conversation_id}</conversation_id>
+<project>{project}</project>
+<timestamp>{timestamp}</timestamp>
+<concepts>{', '.join(concepts)}</concepts>
+<related_concepts>{', '.join(related_concepts)}</related_concepts>{files_info}
+<preview>{text_preview}</preview>
+</result>""")
+    return f"""<search_by_concept>
+<concept>{concept}</concept>
+<count>{len(all_results)}</count>
+<results>
+{''.join(results_text)}
+</results>
+</search_by_concept>"""
 # Debug output
 print(f"[DEBUG] FastMCP server created with name: {mcp.name}")

package/mcp-server/src/utils.py CHANGED Viewed

@@ -9,6 +9,8 @@ def normalize_project_name(project_path: str) -> str:
     Handles various path formats:
     - Claude logs format: -Users-kyle-Code-claude-self-reflect -> claude-self-reflect
+    - File paths in Claude logs: /path/to/-Users-kyle-Code-claude-self-reflect/file.jsonl -> claude-self-reflect
+    - Regular file paths: /path/to/project/file.txt -> project
     - Regular paths: /path/to/project -> project
     - Already normalized: project -> project
@@ -49,5 +51,22 @@ def normalize_project_name(project_path: str) -> str:
         # Fallback: just use the last component
         return path_parts[-1] if path_parts else project_path
-    # Handle regular paths - use basename
-    return Path(project_path).name
+    # Check if this is a file path that contains a Claude logs directory
+    # Pattern: /path/to/-Users-...-projects-..../filename
+    path_obj = Path(project_path)
+    # Look for a parent directory that starts with dash (Claude logs format)
+    for parent in path_obj.parents:
+        parent_name = parent.name
+        if parent_name.startswith("-"):
+            # Found a Claude logs directory, process it
+            return normalize_project_name(parent_name)
+    # Handle regular paths - if it's a file, get the parent directory
+    # Otherwise use the directory/project name itself
+    if path_obj.suffix:  # It's a file (has an extension)
+        # Use the parent directory name
+        return path_obj.parent.name
+    else:
+        # Use the directory name itself
+        return path_obj.name

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-self-reflect",
-  "version": "2.4.15",
+  "version": "2.5.2",
   "description": "Give Claude perfect memory of all your conversations - Installation wizard for Python MCP server",
   "keywords": [
     "claude",