npm - claude-self-reflect - Versions diffs - 7.1.10 → 7.1.11 - Mend

claude-self-reflect 7.1.10 → 7.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +36 -78
package/docs/design/GRADER_PROMPT.md +81 -0
package/docs/design/batch_ground_truth_generator.py +496 -0
package/docs/design/batch_import_all_projects.py +477 -0
package/docs/design/batch_import_v3.py +278 -0
package/docs/design/conversation-analyzer/SKILL.md +133 -0
package/docs/design/conversation-analyzer/SKILL_V2.md +218 -0
package/docs/design/conversation-analyzer/extract_structured.py +186 -0
package/docs/design/extract_events_v3.py +533 -0
package/docs/design/import_existing_batch.py +188 -0
package/docs/design/recover_all_batches.py +297 -0
package/docs/design/recover_batch_results.py +287 -0
package/package.json +4 -1

package/docs/design/batch_import_v3.py ADDED Viewed

@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+"""Batch import conversations with V3+SKILL_V2 to Qdrant for comparison testing."""
+import os
+import sys
+import json
+from pathlib import Path
+from dotenv import load_dotenv
+import time
+load_dotenv()
+# Add parent dirs to path
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+try:
+    import anthropic
+except ImportError:
+    print("Error: anthropic SDK not found")
+    sys.exit(1)
+from docs.design.extract_events_v3 import extract_events_v3
+from qdrant_client import QdrantClient
+from qdrant_client.models import Distance, VectorParams, PointStruct
+# Try importing FastEmbed
+try:
+    from fastembed import TextEmbedding
+    FASTEMBED_AVAILABLE = True
+except ImportError:
+    FASTEMBED_AVAILABLE = False
+    print("⚠️  FastEmbed not available, will use Voyage AI")
+def get_embedding(text: str, embedding_model) -> list:
+    """Generate embedding for text."""
+    if FASTEMBED_AVAILABLE and embedding_model:
+        embeddings = list(embedding_model.embed([text]))
+        return embeddings[0].tolist()
+    else:
+        # Fallback to Voyage
+        import voyageai
+        vo = voyageai.Client(api_key=os.getenv('VOYAGE_KEY'))
+        result = vo.embed([text], model="voyage-3", input_type="document")
+        return result.embeddings[0]
+def process_conversation(jsonl_path: Path, client: anthropic.Anthropic, skill_instructions: str,
+                         qdrant_client: QdrantClient, collection_name: str, embedding_model):
+    """Process single conversation with V3+SKILL_V2 and import to Qdrant."""
+    conv_id = jsonl_path.stem
+    print(f"\n{'='*80}")
+    print(f"Processing: {conv_id}")
+    print(f"File: {jsonl_path.name}")
+    print(f"{'='*80}")
+    # Read messages
+    messages = []
+    with open(jsonl_path) as f:
+        for line in f:
+            if line.strip():
+                messages.append(json.loads(line))
+    print(f"Original messages: {len(messages)}")
+    # V3 extraction
+    print("\n🔄 Step 1: V3 Extraction...")
+    result = extract_events_v3(messages)
+    print(f"  Search index: {result['stats']['search_index_tokens']} tokens")
+    print(f"  Context cache: {result['stats']['context_cache_tokens']} tokens")
+    print(f"  Total: {result['stats']['total_tokens']} tokens")
+    print(f"  Signature: {json.dumps(result['signature'], indent=2)}")
+    # Generate narrative with Skill
+    print("\n🔄 Step 2: Generating narrative with SKILL_V2...")
+    prompt = f"""You are analyzing a development conversation. Use the SKILL_V2 guidelines to generate a search-optimized narrative.
+## Extracted Events
+### Search Index
+{result['search_index']}
+### Context Cache
+{result['context_cache']}
+### Conversation Signature
+```json
+{json.dumps(result['signature'], indent=2)}
+```
+Now generate the narrative following SKILL_V2 format exactly."""
+    response = client.messages.create(
+        model="claude-sonnet-4-5-20250929",
+        max_tokens=2048,
+        system=skill_instructions,
+        messages=[{"role": "user", "content": prompt}]
+    )
+    # Extract narrative
+    narrative = ""
+    for block in response.content:
+        if hasattr(block, 'text'):
+            narrative += block.text
+    # Calculate cost
+    input_tokens = response.usage.input_tokens
+    output_tokens = response.usage.output_tokens
+    cost = (input_tokens * 3 + output_tokens * 15) / 1_000_000
+    print(f"  Tokens: {input_tokens} input, {output_tokens} output")
+    print(f"  Cost: ${cost:.6f}")
+    # Generate embedding for the narrative
+    print("\n🔄 Step 3: Generating embedding...")
+    embedding = get_embedding(narrative, embedding_model)
+    print(f"  Embedding dimensions: {len(embedding)}")
+    # Import to Qdrant
+    print("\n🔄 Step 4: Importing to Qdrant...")
+    point = PointStruct(
+        id=conv_id,
+        vector=embedding,
+        payload={
+            "conversation_id": conv_id,
+            "project": "claude-self-reflect",
+            "narrative": narrative,
+            "search_index": result['search_index'],
+            "context_cache": result['context_cache'],
+            "signature": result['signature'],
+            "timestamp": time.time(),
+            "extraction_stats": result['stats']
+        }
+    )
+    qdrant_client.upsert(
+        collection_name=collection_name,
+        points=[point]
+    )
+    print(f"  ✅ Imported to collection: {collection_name}")
+    return {
+        'conversation_id': conv_id,
+        'narrative': narrative,
+        'stats': result['stats'],
+        'cost': cost,
+        'tokens': {'input': input_tokens, 'output': output_tokens}
+    }
+def main():
+    """Main batch import process."""
+    # Setup - use claude-self-reflect project
+    project_dir = Path.home() / ".claude/projects/-Users-username-projects-claude-self-reflect"
+    skill_v2_path = Path(__file__).parent / "conversation-analyzer" / "SKILL_V2.md"
+    if not project_dir.exists():
+        print(f"❌ Project directory not found: {project_dir}")
+        sys.exit(1)
+    if not skill_v2_path.exists():
+        print(f"❌ SKILL_V2.md not found: {skill_v2_path}")
+        sys.exit(1)
+    # Find all conversations
+    conversations = list(project_dir.glob("*.jsonl"))
+    print(f"\n📊 Found {len(conversations)} conversations in claude-self-reflect")
+    # Budget check
+    estimated_cost = len(conversations) * 0.016  # Conservative estimate
+    print(f"💰 Estimated cost: ${estimated_cost:.2f} (budget: $5.00)")
+    if estimated_cost > 5.0:
+        print(f"⚠️  Estimated cost exceeds budget!")
+        limit = int(5.0 / 0.016)
+        print(f"   Limiting to first {limit} conversations")
+        conversations = conversations[:limit]
+    # Initialize clients
+    print("\n🔧 Initializing clients...")
+    # Validate API key
+    api_key = os.getenv("ANTHROPIC_API_KEY")
+    if not api_key:
+        raise ValueError(
+            "ANTHROPIC_API_KEY environment variable required. "
+            "Set it in your .env file or export it in your shell."
+        )
+    anthropic_client = anthropic.Anthropic(api_key=api_key)
+    qdrant_client = QdrantClient(url=os.getenv("QDRANT_URL", "http://localhost:6333"))
+    # Load Skill instructions
+    with open(skill_v2_path) as f:
+        skill_instructions = f.read()
+    # Initialize embedding model
+    embedding_model = None
+    vector_size = 384  # Default for FastEmbed
+    if FASTEMBED_AVAILABLE:
+        print("  Using FastEmbed (384 dimensions)")
+        embedding_model = TextEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
+        vector_size = 384
+    else:
+        print("  Using Voyage AI (1024 dimensions)")
+        vector_size = 1024
+    # Create test collection
+    collection_name = "v3_test_csr"
+    print(f"\n🔧 Creating test collection: {collection_name}")
+    try:
+        qdrant_client.delete_collection(collection_name)
+        print(f"  Deleted existing collection")
+    except:
+        pass
+    qdrant_client.create_collection(
+        collection_name=collection_name,
+        vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE)
+    )
+    print(f"  ✅ Created collection with {vector_size} dimensions")
+    # Process all conversations
+    results = []
+    total_cost = 0.0
+    for i, conv_path in enumerate(conversations, 1):
+        print(f"\n\n{'='*80}")
+        print(f"CONVERSATION {i}/{len(conversations)}")
+        print(f"{'='*80}")
+        try:
+            result = process_conversation(
+                conv_path,
+                anthropic_client,
+                skill_instructions,
+                qdrant_client,
+                collection_name,
+                embedding_model
+            )
+            results.append(result)
+            total_cost += result['cost']
+            print(f"\n✅ Success!")
+            print(f"   Running cost: ${total_cost:.4f}")
+        except Exception as e:
+            print(f"\n❌ Error processing {conv_path.name}: {e}")
+            import traceback
+            traceback.print_exc()
+    # Summary
+    print(f"\n\n{'='*80}")
+    print(f"BATCH IMPORT SUMMARY")
+    print(f"{'='*80}")
+    print(f"Total conversations processed: {len(results)}/{len(conversations)}")
+    print(f"Total cost: ${total_cost:.4f}")
+    print(f"Average cost per conversation: ${total_cost/len(results):.4f}")
+    print(f"Collection: {collection_name}")
+    print(f"\n🎯 Ready for comparison testing!")
+    print(f"\nNext steps:")
+    print(f"1. Search old collection: reflect_on_past(query, project='procsolve-website')")
+    print(f"2. Search new collection: qdrant_client.search(collection_name='{collection_name}')")
+    print(f"3. Compare results side-by-side")
+    return results
+if __name__ == "__main__":
+    results = main()

package/docs/design/conversation-analyzer/SKILL.md ADDED Viewed

@@ -0,0 +1,133 @@
+---
+name: conversation-analyzer
+description: Analyzes Claude Code conversation JSONL files to extract structured data and generate problem-solution narratives for semantic search indexing
+---
+# Conversation Analyzer Skill
+You are a conversation analysis expert. Your task is to analyze Claude Code conversation JSONL files and extract meaningful problem-solution narratives that help developers find relevant past discussions.
+## Input Format
+You will receive conversation data as a JSONL file where each line is a JSON object representing a message with:
+- `role`: "user" or "assistant"
+- `content`: Message content (can be text, tool uses, or tool results)
+- `type`: Message type
+- Timestamp information
+## Your Analysis Process
+### Step 1: Extract Structured Data (Python)
+Use the provided `extract_structured.py` script to parse the JSONL and extract:
+1. **Messages timeline**: All user-assistant exchanges with timestamps
+2. **Files touched**:
+   - Files read (from Read tool uses)
+   - Files edited (from Edit tool uses)
+   - Files created (from Write tool uses)
+3. **Tools used**: Count of each tool usage (Read, Edit, Write, Bash, etc.)
+4. **Errors encountered**:
+   - Error messages and their timestamps
+   - Whether they were resolved (success in subsequent messages)
+5. **Code blocks**: Presence and language of code snippets
+6. **Timeline events**: Chronological list of key actions
+### Step 2: Analyze the Narrative
+Examine the structured data to understand:
+1. **What was the user trying to accomplish?**
+   - Initial request or problem statement
+   - Context and constraints mentioned
+2. **What solutions were attempted?**
+   - Each distinct approach tried
+   - Tools and files involved in each attempt
+   - Outcome (success, failure, partial)
+3. **What was learned?**
+   - Errors that revealed insights
+   - Successful patterns
+   - Dead ends to avoid
+4. **What was the final outcome?**
+   - Was the problem solved?
+   - What was the working solution?
+   - Any remaining issues?
+### Step 3: Generate Problem-Solution Narrative (Markdown)
+Create a structured markdown document with this EXACT format:
+```markdown
+## Problem Statement
+[One paragraph: What was the user trying to accomplish or fix?]
+## Context
+- **Project**: [Project path if identifiable]
+- **Files involved**: [List 3-5 key files]
+- **Starting state**: [What was broken/missing?]
+## Timeline of Events
+[Chronological list of key actions with timestamps - max 10 entries]
+## Attempted Solutions
+### Attempt 1: [Brief description]
+**Approach**: [What was tried]
+**Files modified**: [List files]
+**Tools used**: [List tools]
+**Outcome**: ✅ Success | ⚠️ Partial | ❌ Failed
+**Learning**: [What was discovered]
+[Include relevant code snippet if applicable]
+### Attempt 2: [If applicable]
+...
+## Final Solution
+**Implementation**:
+```[language]
+[Key code changes - only the essentials]
+```
+**Files Modified**:
+- file.py (approximate line numbers if known)
+- config.yml
+**Verification**:
+[How was success confirmed? Tests? Manual verification?]
+## Outcome
+✅ Success | ⚠️ Partial | ❌ Unresolved
+[One paragraph summary of final state]
+## Lessons Learned
+1. [Key insight 1 - actionable]
+2. [Key insight 2 - actionable]
+3. [Key insight 3 - actionable]
+## Keywords
+[Comma-separated: technologies, concepts, patterns mentioned]
+```
+## Quality Guidelines
+1. **Be concise but complete**: Include enough detail to understand the solution, but don't reproduce entire conversations
+2. **Focus on the "why"**: Explain reasoning, not just actions
+3. **Highlight failures**: Document what DIDN'T work - it's valuable knowledge
+4. **Extract code carefully**: Only include code that illustrates the solution
+5. **Use clear outcome indicators**: ✅ ⚠️ ❌ make scanning easy
+6. **Write for search**: Include keywords naturally throughout the narrative
+## Output Requirements
+Your final output MUST be valid markdown following the exact structure above. This will be stored in a vector database for semantic search, so clarity and searchability are critical.
+If the conversation doesn't follow a problem-solution pattern (e.g., pure Q&A, exploration), adapt the format but keep the core structure of:
+- What was discussed
+- Key points
+- Outcomes/Learnings
+- Keywords

package/docs/design/conversation-analyzer/SKILL_V2.md ADDED Viewed

@@ -0,0 +1,218 @@
+---
+name: conversation-analyzer
+description: Analyzes extracted conversation events to generate search-optimized problem-solution narratives for semantic search indexing
+---
+# Conversation Analyzer Skill V2 (Opus-Validated)
+You are a conversation analysis expert specializing in creating **search-optimized narratives** from development sessions.
+## Input Format
+You will receive **extracted events** from a conversation, not the full JSONL:
+### Search Index (500 tokens)
+- User requests (the problem)
+- Solution patterns (what was done)
+- Active issues (unresolved errors)
+### Context Cache (1000 tokens)
+- Implementation details
+- Error recovery sequences
+- Validation results
+### Conversation Signature (metadata)
+- completion_status: success/failed/partial
+- frameworks: [list]
+- pattern_reusability: high/medium/low
+- error_recovery: true/false
+## Your Analysis Process
+### Step 1: Understand the Session
+From the extracted events, identify:
+1. **User Intent**: What were they trying to accomplish? (from User Request section)
+2. **Solution Approach**: How did they solve it? (from Solution Pattern section)
+3. **Technical Context**: What stack/frameworks? (from signature.frameworks)
+4. **Outcome**: Did it work? (from signature.completion_status)
+### Step 2: Extract Reusable Patterns
+**Opus recommendation**: Focus on the reusable pattern, not specific implementation.
+Examples:
+- ✅ "Array item removal with cascade updates across dependent components"
+- ❌ "Removed array index 2 and updated lines 45-52"
+### Step 3: Generate Search-Optimized Narrative
+Create markdown following this **exact format**:
+```markdown
+## Search Summary
+[1-2 sentences, keyword-rich description of what was accomplished. Include: action verb, technology stack, problem type]
+## Problem-Solution Mapping
+**Request**: [Exact user request from Search Index]
+**Solution Type**: [Choose one: create | edit | debug | refactor | optimize | deploy]
+**Tools Used**: [List from Implementation Details]
+**Files Modified**: [File names with operation type - from Solution Pattern]
+## Technical Pattern
+[Describe the reusable pattern in 2-3 sentences. Focus on the approach that can be applied to similar problems.]
+**Example Pattern**:
+When removing items from arrays that other components depend on:
+1. Remove from data structure
+2. Update all index references in dependent code
+3. Remove or update UI components that displayed the item
+4. Validate with build to catch broken references
+## Implementation Details
+**Operation**: [From Solution Pattern: e.g., "cascade_updates"]
+**Scope**: [From Context Cache: e.g., "12 coordinated changes"]
+**Context**: [Why this was needed - from Implementation Details]
+## Validation & Outcome
+**Build Status**: [From Validation section: Success/Failed]
+**Tests**: [If mentioned in Validation]
+**Deployment**: [If mentioned in Validation]
+**Completion**: [From signature.completion_status]
+**Error Recovery**: [From signature.error_recovery + Error Recovery section]
+## Search Keywords
+**Primary** (most specific, 3-5 terms):
+[e.g., "Next.js team member removal", "React array cascade updates", "MultiEdit batch operations"]
+**Secondary** (broader context, 5-8 terms):
+[e.g., "about page modification", "component cleanup", "Next.js 15", "TypeScript React", "production build validation"]
+**Frameworks/Tools**:
+[From signature.frameworks: e.g., "React", "Next.js", "TypeScript"]
+**Pattern Tags**:
+[From Solution Pattern operation_type: e.g., "cascade_updates", "removal", "refactor"]
+```
+## Critical Guidelines for Search Optimization
+### 1. Keyword Density
+- Use technical terms naturally throughout
+- Include framework versions when available
+- Mention file types (.tsx, .py, etc.)
+- Reference specific tools by name
+### 2. Pattern Abstraction
+**Opus insight**: "Preserve edit patterns as reusable templates, not just 'files modified'"
+✅ Good: "Multi-point refactoring pattern: Update data model, propagate changes through component tree, validate with type checking"
+❌ Bad: "Changed file page.tsx"
+### 3. Problem-Solution Pairs
+**Opus recommendation**: "Pair each user request with its resolution"
+Always show:
+- What they asked for → What was done
+- Error encountered → How it was fixed
+- Test failed → How it passed
+### 4. Metadata Utilization
+Use the conversation signature to add context:
+- If `pattern_reusability: "high"` → Emphasize the pattern's broader applicability
+- If `error_recovery: true` → Highlight the debugging process
+- If `completion_status: "success"` → Note validation methods
+### 5. Future Search Scenarios
+Write so these queries would find this conversation:
+- Technology + Action: "Next.js remove component", "React array manipulation"
+- Error Message: "ERR_CONNECTION_REFUSED localhost", "Vercel deploy timeout"
+- Pattern Type: "cascade updates", "batch edit pattern"
+- File Type: "about page modification", ".tsx component removal"
+## Example Output
+Here's what a well-formatted narrative looks like:
+```markdown
+## Search Summary
+Removed team member profile card from Next.js About page using MultiEdit for coordinated cascade updates across React components, with successful build validation and Vercel deployment.
+## Problem-Solution Mapping
+**Request**: Remove Rama's team member card from /about page including profile data and UI components
+**Solution Type**: edit
+**Tools Used**: MultiEdit, Bash (build), Playwright (testing), Vercel CLI (deployment)
+**Files Modified**:
+- src/app/about/page.tsx (cascade_updates: 12 coordinated changes)
+## Technical Pattern
+Array item removal with cascade updates: When removing an array element that multiple components reference, perform atomic batch updates to prevent intermediate broken states. Remove data entry, update all index-dependent code, remove UI components, then validate with build.
+## Implementation Details
+**Operation**: cascade_updates (batch operation)
+**Scope**: 12 coordinated changes in single MultiEdit
+**Context**: User requested removal of specific team member ("Rama") from About page
+## Validation & Outcome
+**Build Status**: Success (Next.js 15.4.6 compiled in 10.0s, 71 pages generated)
+**Tests**: Playwright navigation test passed (localhost:3000/about loaded successfully)
+**Deployment**: Vercel production deployment succeeded
+**Completion**: success
+**Error Recovery**: Resolved ERR_CONNECTION_REFUSED by starting dev server, worked around Vercel CLI --token error
+## Search Keywords
+**Primary**:
+Next.js team member removal, React array cascade updates, MultiEdit batch operations, about page modification, component cleanup
+**Secondary**:
+Next.js 15 production build, TypeScript React components, array item deletion pattern, coordinated refactoring, Playwright testing, Vercel deployment
+**Frameworks/Tools**:
+React, Next.js, TypeScript, MultiEdit, Playwright, Vercel
+**Pattern Tags**:
+cascade_updates, removal, batch-edit
+```
+## Output Requirements
+1. **Must be valid markdown** - No JSON, no code fences around the whole output
+2. **Follow exact structure** - All sections in order
+3. **Be concise** - Aim for 300-500 words total
+4. **Optimize for search** - Every sentence should help future queries match
+5. **Focus on patterns** - Make it reusable knowledge, not a log
+Remember: This narrative will be embedded and searched semantically. Write for the developer searching 6 months later who has a similar problem.