npm - cursor-recursive-rag - Versions diffs - 0.2.0-alpha.2 → 0.2.0 - Mend

cursor-recursive-rag 0.2.0-alpha.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (210) hide show

package/README.md +179 -203
package/dist/adapters/llm/anthropic.d.ts +27 -0
package/dist/adapters/llm/anthropic.d.ts.map +1 -0
package/dist/adapters/llm/anthropic.js +287 -0
package/dist/adapters/llm/anthropic.js.map +1 -0
package/dist/adapters/llm/base.d.ts +62 -0
package/dist/adapters/llm/base.d.ts.map +1 -0
package/dist/adapters/llm/base.js +140 -0
package/dist/adapters/llm/base.js.map +1 -0
package/dist/adapters/llm/deepseek.d.ts +24 -0
package/dist/adapters/llm/deepseek.d.ts.map +1 -0
package/dist/adapters/llm/deepseek.js +228 -0
package/dist/adapters/llm/deepseek.js.map +1 -0
package/dist/adapters/llm/groq.d.ts +25 -0
package/dist/adapters/llm/groq.d.ts.map +1 -0
package/dist/adapters/llm/groq.js +265 -0
package/dist/adapters/llm/groq.js.map +1 -0
package/dist/adapters/llm/index.d.ts +62 -0
package/dist/adapters/llm/index.d.ts.map +1 -0
package/dist/adapters/llm/index.js +380 -0
package/dist/adapters/llm/index.js.map +1 -0
package/dist/adapters/llm/ollama.d.ts +23 -0
package/dist/adapters/llm/ollama.d.ts.map +1 -0
package/dist/adapters/llm/ollama.js +261 -0
package/dist/adapters/llm/ollama.js.map +1 -0
package/dist/adapters/llm/openai.d.ts +22 -0
package/dist/adapters/llm/openai.d.ts.map +1 -0
package/dist/adapters/llm/openai.js +232 -0
package/dist/adapters/llm/openai.js.map +1 -0
package/dist/adapters/llm/openrouter.d.ts +27 -0
package/dist/adapters/llm/openrouter.d.ts.map +1 -0
package/dist/adapters/llm/openrouter.js +305 -0
package/dist/adapters/llm/openrouter.js.map +1 -0
package/dist/adapters/vector/index.d.ts.map +1 -1
package/dist/adapters/vector/index.js +8 -0
package/dist/adapters/vector/index.js.map +1 -1
package/dist/adapters/vector/redis-native.d.ts +35 -0
package/dist/adapters/vector/redis-native.d.ts.map +1 -0
package/dist/adapters/vector/redis-native.js +170 -0
package/dist/adapters/vector/redis-native.js.map +1 -0
package/dist/cli/commands/chat.d.ts +4 -0
package/dist/cli/commands/chat.d.ts.map +1 -0
package/dist/cli/commands/chat.js +374 -0
package/dist/cli/commands/chat.js.map +1 -0
package/dist/cli/commands/maintenance.d.ts +4 -0
package/dist/cli/commands/maintenance.d.ts.map +1 -0
package/dist/cli/commands/maintenance.js +237 -0
package/dist/cli/commands/maintenance.js.map +1 -0
package/dist/cli/commands/rules.d.ts +9 -0
package/dist/cli/commands/rules.d.ts.map +1 -0
package/dist/cli/commands/rules.js +639 -0
package/dist/cli/commands/rules.js.map +1 -0
package/dist/cli/commands/setup.js +5 -4
package/dist/cli/commands/setup.js.map +1 -1
package/dist/cli/index.js +6 -0
package/dist/cli/index.js.map +1 -1
package/dist/config/memoryConfig.d.ts +427 -0
package/dist/config/memoryConfig.d.ts.map +1 -0
package/dist/config/memoryConfig.js +258 -0
package/dist/config/memoryConfig.js.map +1 -0
package/dist/config/rulesConfig.d.ts +486 -0
package/dist/config/rulesConfig.d.ts.map +1 -0
package/dist/config/rulesConfig.js +345 -0
package/dist/config/rulesConfig.js.map +1 -0
package/dist/dashboard/coreTools.d.ts +14 -0
package/dist/dashboard/coreTools.d.ts.map +1 -0
package/dist/dashboard/coreTools.js +413 -0
package/dist/dashboard/coreTools.js.map +1 -0
package/dist/dashboard/public/index.html +1982 -13
package/dist/dashboard/server.d.ts +1 -8
package/dist/dashboard/server.d.ts.map +1 -1
package/dist/dashboard/server.js +846 -13
package/dist/dashboard/server.js.map +1 -1
package/dist/dashboard/toolRegistry.d.ts +192 -0
package/dist/dashboard/toolRegistry.d.ts.map +1 -0
package/dist/dashboard/toolRegistry.js +322 -0
package/dist/dashboard/toolRegistry.js.map +1 -0
package/dist/proxy/index.d.ts +1 -1
package/dist/proxy/index.d.ts.map +1 -1
package/dist/proxy/index.js +9 -6
package/dist/proxy/index.js.map +1 -1
package/dist/server/index.js +21 -0
package/dist/server/index.js.map +1 -1
package/dist/server/tools/crawl.d.ts.map +1 -1
package/dist/server/tools/crawl.js +8 -0
package/dist/server/tools/crawl.js.map +1 -1
package/dist/server/tools/index.d.ts.map +1 -1
package/dist/server/tools/index.js +19 -1
package/dist/server/tools/index.js.map +1 -1
package/dist/server/tools/ingest.d.ts.map +1 -1
package/dist/server/tools/ingest.js +5 -0
package/dist/server/tools/ingest.js.map +1 -1
package/dist/server/tools/memory.d.ts +250 -0
package/dist/server/tools/memory.d.ts.map +1 -0
package/dist/server/tools/memory.js +472 -0
package/dist/server/tools/memory.js.map +1 -0
package/dist/server/tools/recursive-query.d.ts.map +1 -1
package/dist/server/tools/recursive-query.js +6 -0
package/dist/server/tools/recursive-query.js.map +1 -1
package/dist/server/tools/search.d.ts.map +1 -1
package/dist/server/tools/search.js +6 -0
package/dist/server/tools/search.js.map +1 -1
package/dist/services/activity-log.d.ts +10 -0
package/dist/services/activity-log.d.ts.map +1 -0
package/dist/services/activity-log.js +53 -0
package/dist/services/activity-log.js.map +1 -0
package/dist/services/categoryManager.d.ts +110 -0
package/dist/services/categoryManager.d.ts.map +1 -0
package/dist/services/categoryManager.js +549 -0
package/dist/services/categoryManager.js.map +1 -0
package/dist/services/contextEnvironment.d.ts +206 -0
package/dist/services/contextEnvironment.d.ts.map +1 -0
package/dist/services/contextEnvironment.js +481 -0
package/dist/services/contextEnvironment.js.map +1 -0
package/dist/services/conversationProcessor.d.ts +99 -0
package/dist/services/conversationProcessor.d.ts.map +1 -0
package/dist/services/conversationProcessor.js +311 -0
package/dist/services/conversationProcessor.js.map +1 -0
package/dist/services/cursorChatReader.d.ts +129 -0
package/dist/services/cursorChatReader.d.ts.map +1 -0
package/dist/services/cursorChatReader.js +419 -0
package/dist/services/cursorChatReader.js.map +1 -0
package/dist/services/decayCalculator.d.ts +85 -0
package/dist/services/decayCalculator.d.ts.map +1 -0
package/dist/services/decayCalculator.js +182 -0
package/dist/services/decayCalculator.js.map +1 -0
package/dist/services/enhancedVectorStore.d.ts +102 -0
package/dist/services/enhancedVectorStore.d.ts.map +1 -0
package/dist/services/enhancedVectorStore.js +245 -0
package/dist/services/enhancedVectorStore.js.map +1 -0
package/dist/services/hybridScorer.d.ts +120 -0
package/dist/services/hybridScorer.d.ts.map +1 -0
package/dist/services/hybridScorer.js +334 -0
package/dist/services/hybridScorer.js.map +1 -0
package/dist/services/knowledgeExtractor.d.ts +45 -0
package/dist/services/knowledgeExtractor.d.ts.map +1 -0
package/dist/services/knowledgeExtractor.js +436 -0
package/dist/services/knowledgeExtractor.js.map +1 -0
package/dist/services/knowledgeStorage.d.ts +102 -0
package/dist/services/knowledgeStorage.d.ts.map +1 -0
package/dist/services/knowledgeStorage.js +383 -0
package/dist/services/knowledgeStorage.js.map +1 -0
package/dist/services/maintenanceScheduler.d.ts +89 -0
package/dist/services/maintenanceScheduler.d.ts.map +1 -0
package/dist/services/maintenanceScheduler.js +479 -0
package/dist/services/maintenanceScheduler.js.map +1 -0
package/dist/services/memoryMetadataStore.d.ts +62 -0
package/dist/services/memoryMetadataStore.d.ts.map +1 -0
package/dist/services/memoryMetadataStore.js +570 -0
package/dist/services/memoryMetadataStore.js.map +1 -0
package/dist/services/recursiveRetrieval.d.ts +122 -0
package/dist/services/recursiveRetrieval.d.ts.map +1 -0
package/dist/services/recursiveRetrieval.js +443 -0
package/dist/services/recursiveRetrieval.js.map +1 -0
package/dist/services/relationshipGraph.d.ts +77 -0
package/dist/services/relationshipGraph.d.ts.map +1 -0
package/dist/services/relationshipGraph.js +411 -0
package/dist/services/relationshipGraph.js.map +1 -0
package/dist/services/rlmSafeguards.d.ts +273 -0
package/dist/services/rlmSafeguards.d.ts.map +1 -0
package/dist/services/rlmSafeguards.js +705 -0
package/dist/services/rlmSafeguards.js.map +1 -0
package/dist/services/rulesAnalyzer.d.ts +119 -0
package/dist/services/rulesAnalyzer.d.ts.map +1 -0
package/dist/services/rulesAnalyzer.js +768 -0
package/dist/services/rulesAnalyzer.js.map +1 -0
package/dist/services/rulesMerger.d.ts +75 -0
package/dist/services/rulesMerger.d.ts.map +1 -0
package/dist/services/rulesMerger.js +404 -0
package/dist/services/rulesMerger.js.map +1 -0
package/dist/services/rulesParser.d.ts +127 -0
package/dist/services/rulesParser.d.ts.map +1 -0
package/dist/services/rulesParser.js +594 -0
package/dist/services/rulesParser.js.map +1 -0
package/dist/services/smartChunker.d.ts +110 -0
package/dist/services/smartChunker.d.ts.map +1 -0
package/dist/services/smartChunker.js +520 -0
package/dist/services/smartChunker.js.map +1 -0
package/dist/types/categories.d.ts +105 -0
package/dist/types/categories.d.ts.map +1 -0
package/dist/types/categories.js +108 -0
package/dist/types/categories.js.map +1 -0
package/dist/types/extractedKnowledge.d.ts +233 -0
package/dist/types/extractedKnowledge.d.ts.map +1 -0
package/dist/types/extractedKnowledge.js +56 -0
package/dist/types/extractedKnowledge.js.map +1 -0
package/dist/types/index.d.ts +9 -2
package/dist/types/index.d.ts.map +1 -1
package/dist/types/index.js +12 -1
package/dist/types/index.js.map +1 -1
package/dist/types/llmProvider.d.ts +282 -0
package/dist/types/llmProvider.d.ts.map +1 -0
package/dist/types/llmProvider.js +48 -0
package/dist/types/llmProvider.js.map +1 -0
package/dist/types/memory.d.ts +227 -0
package/dist/types/memory.d.ts.map +1 -0
package/dist/types/memory.js +76 -0
package/dist/types/memory.js.map +1 -0
package/dist/types/relationships.d.ts +167 -0
package/dist/types/relationships.d.ts.map +1 -0
package/dist/types/relationships.js +106 -0
package/dist/types/relationships.js.map +1 -0
package/dist/types/rulesOptimizer.d.ts +345 -0
package/dist/types/rulesOptimizer.d.ts.map +1 -0
package/dist/types/rulesOptimizer.js +22 -0
package/dist/types/rulesOptimizer.js.map +1 -0
package/docs/cursor-recursive-rag-memory-spec.md +4569 -0
package/docs/cursor-recursive-rag-tasks.md +1355 -0
package/package.json +6 -3
package/restart-rag.sh +16 -0

package/docs/cursor-recursive-rag-tasks.md ADDED Viewed

@@ -0,0 +1,1355 @@
+# cursor-recursive-rag Memory Features - Task Breakdown
+## Overview
+This document provides a Linear-compatible task breakdown for implementing advanced memory features in cursor-recursive-rag. Each task includes estimates and acceptance criteria.
+---
+## Epic: Phase 1 - Foundation (Enhanced Schema & Temporal Decay)
+### CRR-101: Define Enhanced Chunk Interface ✅
+**Estimate**: 1 point
+**Labels**: foundation, types
+**Status**: COMPLETED
+Create new TypeScript interfaces for enhanced chunks with temporal tracking, importance, decay scores, and entity tags.
+**File**: `src/types/memory.ts`
+**Acceptance Criteria**:
+- [x] EnhancedChunk interface defined with all new fields
+- [x] ChunkType enum with all knowledge types
+- [x] EntityTag and EntityType defined
+- [x] Types exported from main index
+- [x] Existing code continues to compile
+---
+### CRR-102: Create Memory Metadata Store ✅
+**Estimate**: 3 points
+**Labels**: foundation, database
+**Blocks**: CRR-103, CRR-104
+**Status**: COMPLETED
+Implement SQLite-based metadata store for temporal tracking, relationships, and categories.
+**File**: `src/services/memoryMetadataStore.ts`
+**Acceptance Criteria**:
+- [x] SQLite database created on first run
+- [x] Tables: chunks_metadata, relationships, access_log, categories, processed_conversations
+- [x] CRUD operations for all tables
+- [x] Indexes created for performance
+- [x] Access recording updates last_accessed_at and access_count
+---
+### CRR-103: Implement Decay Score Calculator ✅
+**Estimate**: 2 points
+**Labels**: foundation, algorithm
+**Blocked by**: CRR-102
+**Status**: COMPLETED
+Create decay calculator with configurable half-life, access factors, and importance weighting.
+**File**: `src/services/decayCalculator.ts`
+**Acceptance Criteria**:
+- [x] Decay scores range 0.0 to 1.0
+- [x] New chunks with high importance start high
+- [x] Frequently accessed chunks maintain high scores
+- [x] Old unused chunks decay toward 0
+- [x] Batch update completes in <5s for 10k chunks
+- [x] Configurable weights and half-life
+---
+### CRR-104: Integrate Metadata Store with Vector Store ✅
+**Estimate**: 3 points
+**Labels**: foundation, integration
+**Blocked by**: CRR-102, CRR-103
+**Status**: COMPLETED
+Create EnhancedVectorStore wrapper that combines existing vector store with metadata tracking.
+**File**: `src/services/enhancedVectorStore.ts`
+**Acceptance Criteria**:
+- [x] All existing tests continue to pass
+- [x] Metadata stored for new chunks
+- [x] Search results include decay scores
+- [x] Access recorded for returned results
+- [x] Re-ranking produces different order than pure similarity
+---
+## Epic: Phase 2 - Cursor Chat History Integration
+### CRR-201: Implement Cursor Database Reader ✅
+**Estimate**: 3 points
+**Labels**: chat-history, database
+**Status**: COMPLETED
+Create service to read Cursor's chat history from its SQLite database.
+**File**: `src/services/cursorChatReader.ts`
+**Acceptance Criteria**:
+- [x] Correctly locates Cursor DB on macOS, Windows, Linux
+- [x] Reads conversations without corrupting database
+- [x] Handles database being locked (read-only mode)
+- [x] Returns empty array if no conversations
+- [x] Supports filtering by date, project, code presence
+---
+### CRR-202: Create Conversation Processor ✅
+**Estimate**: 2 points
+**Labels**: chat-history, processing
+**Blocked by**: CRR-201
+**Status**: COMPLETED
+Process raw conversations into structured chunks with embeddings.
+**File**: `src/services/conversationProcessor.ts`
+**Acceptance Criteria**:
+- [x] Groups messages into logical exchanges
+- [x] Creates embeddings for each chunk
+- [x] Extracts code blocks as separate chunks
+- [x] Calculates reasonable importance scores
+- [x] Extracts basic entities (languages, files)
+---
+### CRR-203: Implement Chat History Ingestion CLI ✅
+**Estimate**: 2 points
+**Labels**: chat-history, cli
+**Blocked by**: CRR-201, CRR-202
+**Status**: COMPLETED
+Add CLI commands for chat history ingestion and watching.
+**File**: `src/cli/commands/chat.ts`
+**Acceptance Criteria**:
+- [x] `cursor-rag chat ingest` ingests new conversations
+- [x] `cursor-rag chat list` shows available conversations
+- [x] `cursor-rag chat watch` runs in background mode
+- [x] Already-processed conversations are skipped
+- [x] Progress displayed during ingestion
+- [x] `cursor-rag chat stats` shows ingestion statistics
+- [x] `cursor-rag chat reset` resets processing status
+---
+## Epic: Phase 3 - Knowledge Extraction Pipeline
+### CRR-301: Define Knowledge Extraction Schema ✅
+**Estimate**: 1 point
+**Labels**: extraction, types
+**Status**: COMPLETED
+Define TypeScript interfaces for extracted knowledge (solutions, patterns, decisions, etc.).
+**File**: `src/types/extractedKnowledge.ts`
+**Acceptance Criteria**:
+- [x] All types properly defined and exported
+- [x] Types support JSON serialisation
+- [x] Confidence scores bounded 0-1
+- [x] CodeChange interface for before/after
+---
+### CRR-302: Implement LLM Knowledge Extractor ✅
+**Estimate**: 5 points
+**Labels**: extraction, llm
+**Blocked by**: CRR-301
+**Status**: COMPLETED
+Create service that uses LLM to extract structured knowledge from conversations.
+**File**: `src/services/knowledgeExtractor.ts`
+**Acceptance Criteria**:
+- [x] Extracts solutions with problem/solution pairs
+- [x] Extracts patterns with implementation examples
+- [x] Extracts decisions with reasoning
+- [x] Handles LLM response parsing errors gracefully
+- [x] Heuristic fallback when LLM unavailable
+- [x] Low-confidence items filtered out
+- [x] Configurable extraction settings
+---
+### CRR-303: Create Knowledge Storage Service ✅
+**Estimate**: 3 points
+**Labels**: extraction, storage
+**Blocked by**: CRR-301, CRR-302, CRR-104
+**Status**: COMPLETED
+Store extracted knowledge as first-class searchable chunks with relationships.
+**File**: `src/services/knowledgeStorage.ts`
+**Acceptance Criteria**:
+- [x] Solutions stored with full problem/solution context
+- [x] Patterns include implementation examples
+- [x] Decisions include reasoning and alternatives
+- [x] Relationships created between related items
+- [x] Appropriate importance scores assigned
+- [x] Integrated with `cursor-rag chat ingest --extract`
+---
+## Epic: Phase 4 - Relationship Graph
+### CRR-401: Define Relationship Types ✅
+**Estimate**: 1 point
+**Labels**: graph, types
+**Status**: COMPLETED
+Define all relationship types and graph interfaces.
+**File**: `src/types/relationships.ts`
+**Acceptance Criteria**:
+- [x] All 19 relationship types defined (extended from spec's 13)
+- [x] RelationshipType enum exported (unified with memory.ts)
+- [x] Relationship interface with strength and metadata
+- [x] GraphTraversalOptions and GraphNode interfaces
+- [x] Helper functions: isBidirectional, getReverseType, getRelationshipsByCategory
+- [x] Relationship categories: semantic, causal, temporal, conflict, preference, structural
+---
+### CRR-402: Implement Graph Service ✅
+**Estimate**: 4 points
+**Labels**: graph, service
+**Blocked by**: CRR-401, CRR-102
+**Status**: COMPLETED
+Create graph operations for relationship-based retrieval and contradiction detection.
+**File**: `src/services/relationshipGraph.ts`
+**Acceptance Criteria**:
+- [x] All relationship types supported
+- [x] Graph traversal respects depth limits (BFS with configurable maxDepth)
+- [x] Bidirectional relationships create two edges
+- [x] Contradiction detection identifies potential conflicts
+- [x] Traversal filters by type and strength
+- [x] Additional features: cluster detection, supersession chains, graph stats
+---
+## Epic: Phase 5 - Hierarchical Memory (Categories/Summaries)
+### CRR-501: Define Category Structure ✅
+**Estimate**: 1 point
+**Labels**: categories, types
+**Status**: COMPLETED
+Define category types and default categories.
+**File**: `src/types/categories.ts`
+**Acceptance Criteria**:
+- [x] Category interface with summary and stats (ExtendedCategory, CategoryWithStats)
+- [x] CategoryItem interface for assignments (re-exported from memory.ts)
+- [x] DEFAULT_CATEGORIES array with 10 categories
+- [x] Types exported
+- [x] Helper functions: findCategoriesByTags, scoreCategoryMatch, getDefaultCategoryNames
+---
+### CRR-502: Implement Category Manager ✅
+**Estimate**: 5 points
+**Labels**: categories, service
+**Blocked by**: CRR-501, CRR-302
+**Status**: COMPLETED
+Create service for category management and summary evolution.
+**File**: `src/services/categoryManager.ts`
+**Acceptance Criteria**:
+- [x] Default categories created on first run (initialize method)
+- [x] Chunks classified with relevance scores (heuristic + LLM modes)
+- [x] Summaries evolve as new items added (evolveSummary method)
+- [x] Contradictions handled (LLM detects updates/changes)
+- [x] Category selection uses LLM for relevance (selectRelevantCategories)
+- [x] Additional: keyword matching, tag extraction, custom category creation
+---
+## Epic: Phase 6 - Background Maintenance Jobs
+### CRR-601: Implement Maintenance Scheduler ✅
+**Estimate**: 4 points
+**Labels**: maintenance, background
+**Blocked by**: CRR-103, CRR-502
+Create scheduled jobs for consolidation, summarisation, and cleanup.
+**File**: `src/services/maintenanceScheduler.ts`
+**Acceptance Criteria**:
+- [x] Jobs run on schedule (setInterval-based scheduling with proper cron-like timing)
+- [x] Jobs can be triggered manually (runJob method with 5 job types)
+- [x] Nightly consolidation completes in <5 minutes (decay, duplicates, hot items, archival)
+- [x] Weekly summarisation updates all categories (evolveSummary for each category)
+- [x] Monthly re-index handles large databases (graph analysis, old item archival, vacuum)
+- [x] Proper error handling and logging (MaintenanceJobResult with metrics and errors)
+---
+### CRR-602: Add Maintenance CLI Commands ✅
+**Estimate**: 2 points
+**Labels**: maintenance, cli
+**Blocked by**: CRR-601
+Add CLI commands for maintenance operations.
+**File**: `src/cli/commands/maintenance.ts`
+**Acceptance Criteria**:
+- [x] `cursor-rag maintenance run <job>` works (decay, consolidate, summarize, reindex, cleanup)
+- [x] `cursor-rag maintenance start` runs background (with proper scheduling and graceful shutdown)
+- [x] `cursor-rag maintenance stats` shows metrics (memory, graph, scheduler, categories)
+- [x] `cursor-rag maintenance cleanup` safely removes data (with --confirm flag)
+- [x] Dry run mode prevents accidental data loss (--dry-run flag for cleanup)
+---
+## Epic: Phase 7 - Enhanced Retrieval Scoring
+### CRR-701: Implement Hybrid Scorer ✅
+**Estimate**: 4 points
+**Labels**: retrieval, scoring
+**Blocked by**: CRR-402, CRR-502
+Create hybrid scoring combining similarity, decay, importance, and graph relationships.
+**File**: `src/services/hybridScorer.ts`
+**Acceptance Criteria**:
+- [x] Final scores combine all components correctly (weighted combination of 6 factors)
+- [x] Graph boost increases scores for related items (via getGraphContext traversal)
+- [x] Type boost favours solutions and patterns (configurable typeBoosts map)
+- [x] Tiered retrieval tries summaries first (tieredRetrieval method)
+- [x] Recency boost favours recently accessed items (calculateRecencyScore with half-life)
+- [x] Configurable weights (ScoringConfig with DEFAULT_SCORING_CONFIG)
+---
+### CRR-702: Add New MCP Tools ✅
+**Estimate**: 3 points
+**Labels**: mcp, tools
+**Blocked by**: CRR-701
+Add new MCP tools for memory features.
+**File**: `src/server/tools/memory.ts`
+**Acceptance Criteria**:
+- [x] search_past_solutions tool working (searches solution chunks with hybrid scoring)
+- [x] find_similar_issues tool working (includes graph traversal for related items)
+- [x] get_project_patterns tool working (filters by category, sorted by importance)
+- [x] recall_decision tool working (searches decision/standard chunks)
+- [x] get_category_summary tool working (returns category summary and metadata)
+- [x] ingest_chat_history tool working (with optional knowledge extraction)
+- [x] memory_stats tool working (comprehensive stats output)
+---
+### CRR-703: Create Memory Configuration ✅
+**Estimate**: 1 point
+**Labels**: config
+Define configuration schema and defaults for all memory features.
+**File**: `src/config/memoryConfig.ts`
+**Acceptance Criteria**:
+- [x] MemoryConfig interface complete (Zod schema with 7 config sections)
+- [x] DEFAULT_MEMORY_CONFIG with sensible defaults (all sections have defaults)
+- [x] Config validation (validateMemoryConfig function with weight sum check)
+- [x] Environment variable overrides (getMemoryConfigWithEnvOverrides function)
+---
+## Epic: Phase 8 - RLM-Style Recursive Retrieval
+*Based on the Recursive Language Models paper (Zhang et al., 2024)*
+### CRR-801: Implement Context Environment ✅
+**Estimate**: 5 points
+**Labels**: rlm, retrieval, core
+Create sandboxed environment for RLM-style context processing with cost tracking and budget enforcement.
+**File**: `src/services/contextEnvironment.ts`
+**Acceptance Criteria**:
+- [x] Context can be loaded as environment variables (loadContext method)
+- [x] State description gives LLM overview without full content (getStateDescription method)
+- [x] Peek allows selective viewing of chunks (peek method with start/end params)
+- [x] Filter supports regex patterns (filter method with RegExp)
+- [x] Sub-queries track cost and enforce budget (subQuery with CostTracker)
+- [x] Async batch queries work with concurrency limit (batchSubQuery with configurable concurrency)
+- [x] Termination conditions enforced (shouldTerminate checks iterations, cost, sub-calls, timeout)
+---
+### CRR-802: Implement Recursive Retrieval Controller ✅
+**Estimate**: 5 points
+**Labels**: rlm, retrieval, core
+**Blocked by**: CRR-801, CRR-701
+Orchestrate iterative retrieval with complexity assessment and action parsing.
+**File**: `src/services/recursiveRetrieval.ts`
+**Acceptance Criteria**:
+- [x] Simple queries use direct retrieval (returns direct when complexity === 'simple')
+- [x] Complex queries trigger recursive processing (iterativeProcess method)
+- [x] Complexity assessment considers context size and query type (assessComplexity method)
+- [x] Actions are parsed and executed correctly (parseAction and executeAction methods)
+- [x] Cost tracked across iterations (via ContextEnvironment.getTotalCost)
+- [x] Early termination on budget/iteration limits (handles Budget/Limit/Timeout errors)
+---
+### CRR-803: Implement Smart Chunking Strategies ✅
+**Estimate**: 3 points
+**Labels**: rlm, chunking
+**Blocked by**: CRR-801
+Implement multiple chunking strategies based on RLM paper patterns.
+**File**: `src/services/smartChunker.ts`
+**Acceptance Criteria**:
+- [x] Uniform chunking by count/size (uniformChunk, uniformChunkEnhanced methods)
+- [x] Semantic chunking groups similar content (semanticChunk with k-means clustering)
+- [x] Keyword-based chunking filters by patterns (keywordChunk with regex)
+- [x] Structural chunking groups by source file (structuralChunk method)
+- [x] Adaptive chunking chooses strategy based on content/query (adaptiveChunk method)
+---
+### CRR-804: Implement Anti-Pattern Mitigations ✅
+**Estimate**: 3 points
+**Labels**: rlm, safety
+**Blocked by**: CRR-801, CRR-802
+Implement safeguards from RLM paper's Negative Results section.
+**File**: `src/services/rlmSafeguards.ts`
+**Acceptance Criteria**:
+- [x] Model-specific configurations (MODEL_CONFIGS map with per-model settings)
+- [x] Capability detection for code execution ability (ModelCapabilities, getModelCapabilities)
+- [x] Token budget management (TokenBudgetManager with reserve ratio)
+- [x] Multi-signal termination detection (TerminationDetector with 4 signals)
+- [x] Sub-call throttling and caching (SubCallThrottler with MD5 cache keys)
+- [x] Circuit breaker for runaway trajectories (CircuitBreaker class)
+- [x] Model prior-based pre-filtering (PriorBasedFilter with keyword extraction)
+---
+## Epic: Phase 9 - Dashboard Tools UI
+*Interactive tool execution from the web dashboard*
+### CRR-901: Define Tool Registry Interface
+**Estimate**: 2 points
+**Labels**: dashboard, tools, types
+**Status**: ✅ COMPLETED
+Create a registry system for exposing RAG tools to the dashboard UI.
+**File**: `src/dashboard/toolRegistry.ts`
+**Acceptance Criteria**:
+- [x] ToolDefinition interface with name, description, parameters schema
+- [x] ToolParameter interface with type, required, default, validation
+- [x] ToolResult interface with success/error states
+- [x] Registry supports dynamic tool registration
+- [x] JSON Schema generation for parameter forms
+---
+### CRR-902: Implement Dashboard Tools API
+**Estimate**: 3 points
+**Labels**: dashboard, api
+**Blocked by**: CRR-901
+**Status**: ✅ COMPLETED
+Add API endpoints for tool discovery and execution.
+**File**: `src/dashboard/server.ts`
+**Acceptance Criteria**:
+- [x] `GET /api/tools` returns list of available tools with schemas
+- [x] `POST /api/tools/:name/execute` runs a tool with parameters
+- [x] `GET /api/tools/:name/status/:jobId` for long-running tools
+- [x] Proper error handling and validation
+- [ ] Rate limiting to prevent abuse (deferred - not critical for MVP)
+---
+### CRR-903: Create Tools UI Panel
+**Estimate**: 4 points
+**Labels**: dashboard, ui
+**Blocked by**: CRR-902
+**Status**: ✅ COMPLETED
+Build interactive tools section in the dashboard.
+**File**: `src/dashboard/public/index.html`
+**Acceptance Criteria**:
+- [x] New "Tools" tab in dashboard navigation
+- [x] Tool cards with name, description, and "Run" button
+- [x] Dynamic form generation from parameter schemas
+- [x] Real-time execution status and progress
+- [x] Result display with syntax highlighting for code/JSON
+- [x] Execution history with re-run capability
+- [x] Tool categories/filtering (search, ingest, maintenance, etc.)
+---
+### CRR-904: Register Core Tools
+**Estimate**: 2 points
+**Labels**: dashboard, tools
+**Blocked by**: CRR-901, CRR-902
+**Status**: ✅ COMPLETED
+Register existing RAG tools with the dashboard registry.
+**File**: `src/dashboard/coreTools.ts`
+**Acceptance Criteria**:
+- [x] `search` - Search knowledge base with query
+- [x] `ingest_document` - Ingest text/URL into RAG
+- [x] `crawl_and_ingest` - Crawl website and ingest
+- [x] `chat_ingest` - Ingest Cursor chat history
+- [x] `chat_list` - List available conversations
+- [x] `memory_stats` - Show memory statistics
+- [x] `list_sources` - List ingested sources
+- [x] All tools have proper parameter validation
+---
+## Epic: Phase 10 - Cursor Rules Optimizer
+*Intelligent cleanup and optimization of Cursor rules and AGENTS.md files*
+### CRR-1000: Implement LLM Provider System (Strategy Pattern) ✅
+**Estimate**: 5 points
+**Labels**: llm, infrastructure, core
+**Priority**: HIGH - Required by CRR-1004 and other LLM-dependent features
+**Status**: COMPLETED
+Create a flexible LLM provider system using the strategy pattern that supports multiple backends.
+**Files**: `src/types/llmProvider.ts`, `src/adapters/llm/index.ts`, `src/adapters/llm/*.ts`
+**Acceptance Criteria**:
+- [x] LLMProvider interface with chat/complete methods
+- [x] LLMProviderConfig type with provider-specific options
+- [x] LLMResponse type with content, usage stats, model info
+**Provider Implementations**:
+- [x] **CursorProvider**: Placeholder (falls back to Ollama)
+- [x] **OpenAIProvider**: OpenAI API (GPT-4o, GPT-4o-mini, o1, etc.)
+- [x] **AnthropicProvider**: Claude API (claude-3.5-sonnet, opus, haiku)
+- [x] **DeepSeekProvider**: DeepSeek API (deepseek-chat, deepseek-coder)
+- [x] **GroqProvider**: Groq API (llama, mixtral models)
+- [x] **OllamaProvider**: Local Ollama models
+- [x] **OpenRouterProvider**: OpenRouter for unified API access
+**Configuration**:
+- [x] Environment variable support (OPENAI_API_KEY, ANTHROPIC_API_KEY, etc.)
+- [x] Config file support (~/.cursor-rag/llm-config.json)
+- [x] Auto-detection: env vars → config file → fallback to Ollama
+**Features**:
+- [x] Retry logic with exponential backoff
+- [x] Rate limiting per provider
+- [x] Cost tracking per request
+- [x] Streaming support where available
+- [x] Model capability detection (context length, vision, JSON mode)
+- [x] Fallback chain (if primary fails, try next)
+---
+### CRR-1001: Define Rules Analysis Types ✅
+**Estimate**: 2 points
+**Labels**: rules, types
+**Status**: COMPLETED
+Define types for rule parsing, analysis, and optimization.
+**File**: `src/types/rulesOptimizer.ts`
+**Acceptance Criteria**:
+- [x] RuleFile interface (path, content, format: mdc/md/txt)
+- [x] ParsedRule interface (id, title, content, tags, dependencies)
+- [x] RuleCluster interface for grouping related rules
+- [x] OptimizationReport interface with before/after metrics
+- [x] DuplicateMatch interface with similarity score
+- [x] MergeCandidate interface for rule combinations
+---
+### CRR-1002: Implement Rules Parser ✅
+**Estimate**: 3 points
+**Labels**: rules, parser
+**Blocked by**: CRR-1001
+**Status**: COMPLETED
+Parse various rule file formats into structured representation.
+**File**: `src/services/rulesParser.ts`
+**Acceptance Criteria**:
+- [x] Parse `.mdc` files (Cursor rules format)
+- [x] Parse `AGENTS.md` files
+- [x] Parse `.cursorrules` legacy format
+- [x] Extract rule metadata (globs, descriptions, always-apply flags)
+- [x] Handle nested rule structures
+- [x] Preserve original formatting for non-modified rules
+---
+### CRR-1003: Implement Rules Analyzer ✅
+**Estimate**: 4 points
+**Labels**: rules, analysis
+**Blocked by**: CRR-1002
+**Status**: COMPLETED
+Detect duplicate and near-duplicate rules using semantic similarity.
+**File**: `src/services/rulesAnalyzer.ts`
+**Acceptance Criteria**:
+- [x] Exact duplicate detection (content hash)
+- [x] Semantic similarity using embeddings (configurable threshold)
+- [x] Detect rules that are subsets of others
+- [x] Identify contradicting rules
+- [x] Group related rules by topic/technology
+- [x] Find outdated rules
+- [x] Generate optimization report
+---
+### CRR-1004: Implement Rules Optimizer CLI ✅
+**Estimate**: 3 points
+**Labels**: rules, cli
+**Blocked by**: CRR-1003
+**Status**: COMPLETED
+CLI commands for rules optimization.
+**File**: `src/cli/commands/rules.ts`
+**Acceptance Criteria**:
+- [x] `cursor-rag rules analyze <folder>` - Analyze rules without changes
+- [x] `cursor-rag rules list <folder>` - List all rules
+- [x] `cursor-rag rules duplicates <folder>` - Show duplicates only
+- [x] `cursor-rag rules conflicts <folder>` - Show conflicts only
+- [x] `cursor-rag rules outdated <folder>` - Show outdated rules
+- [x] `cursor-rag rules optimize <folder>` - Run full optimization
+- [x] `--dry-run` flag for preview mode
+- [x] `--json` flag for JSON output
+- [x] Progress display and summary statistics
+---
+### CRR-1005: Implement Rules Merger ✅
+**Estimate**: 5 points
+**Labels**: rules, llm
+**Blocked by**: CRR-1000, CRR-1003
+**Status**: COMPLETED
+Use LLM to intelligently merge and consolidate related rules.
+**File**: `src/services/rulesMerger.ts`
+**Acceptance Criteria**:
+- [x] Uses LLMProvider system (CRR-1000) for AI operations
+- [x] Merge duplicate rules preserving all unique information
+- [x] Combine related rules into comprehensive single rules
+- [x] Rewrite verbose rules to be more concise
+- [x] Preserve critical details while reducing token count
+- [x] Maintain rule intent and effectiveness
+- [x] Support dry-run mode with preview
+- [x] Configurable aggressiveness (conservative/balanced/aggressive)
+**CLI Commands Added**:
+- `cursor-rag rules merge <folder>` - Merge duplicates using LLM
+- `cursor-rag rules rewrite <folder>` - Rewrite verbose rules to be more concise
+---
+### CRR-1006: Add Rules Optimizer to Dashboard ✅
+**Estimate**: 3 points
+**Labels**: rules, dashboard
+**Blocked by**: CRR-1003, CRR-903
+**Status**: COMPLETED
+Add rules optimization UI to dashboard tools.
+**Acceptance Criteria**:
+- [x] "Rules Optimizer" tool card in Tools section (purple gradient panel)
+- [x] Folder path input with validation (+ server-side folder browser)
+- [x] Analysis results display with duplicate highlighting
+- [x] One-click optimize with confirmation (dry run + apply modes)
+- [x] LLM provider configuration (via Settings tab, not dropdown)
+- [x] Natural language rules support (LLM-interpreted custom rules)
+- [x] In-app modal/toast system (replaced browser alerts)
+- [ ] Before/after comparison view (future enhancement)
+- [ ] Token count savings visualization (future enhancement)
+- [ ] Download optimized rules as zip (future enhancement)
+**Dashboard Features Added**:
+- Rules Optimizer panel in Tools tab with folder browser
+- Server-side folder navigation API (`/api/system/browse`)
+- Auto-optimize API (`/api/rules/auto-optimize`)
+- Works with or without LLM (pattern matching always available)
+- Automatic backup creation before applying changes
+---
+### CRR-1007: Implement Duplicate Detector (Merged into CRR-1003)
+**Estimate**: 4 points
+**Labels**: rules, analysis
+**Blocked by**: CRR-1002
+**Status**: MERGED into CRR-1003
+Detect duplicate and near-duplicate rules using semantic similarity.
+**File**: `src/services/rulesDuplicateDetector.ts`
+**Acceptance Criteria** (merged into rulesAnalyzer.ts):
+- [x] Exact duplicate detection (content hash)
+- [x] Semantic similarity using embeddings (configurable threshold)
+- [x] Detect rules that are subsets of others
+- [x] Identify contradicting rules
+- [x] Group related rules by topic/technology
+- [x] Generate similarity matrix for rule set
+---
+### CRR-1008: Add Configurable Rules Analyzer Settings ✅
+**Estimate**: 2 points
+**Labels**: rules, config, dashboard
+**Blocked by**: CRR-1003
+**Status**: COMPLETED
+Add user-configurable settings for the rules analyzer via config file and dashboard API.
+**File**: `src/config/rulesConfig.ts`, `src/dashboard/server.ts`
+**Acceptance Criteria**:
+- [x] RulesAnalyzerConfig schema with Zod validation
+- [x] Analysis settings (duplicateThreshold, maxAgeDays, oldYearThreshold)
+- [x] Custom version check patterns (user-defined technology versions)
+- [x] Custom deprecation patterns (user-defined deprecated code detectors)
+- [x] Custom tag extraction patterns
+- [x] File include/exclude patterns
+- [x] Dashboard API: GET/PUT /api/rules/config
+- [x] Dashboard API: POST /api/rules/validate-pattern
+- [x] Dashboard API: POST /api/rules/test-pattern
+- [x] Dashboard API: CRUD for version-checks and deprecation-patterns
+- [x] Example templates for common technologies (disabled by default)
+---
+## Epic: Phase 11 - Comprehensive Test Suite
+*Unit, integration, and E2E tests for the entire system*
+### CRR-1101: Test Infrastructure Setup
+**Estimate**: 3 points
+**Labels**: testing, infrastructure
+Set up testing framework, configuration, and CI integration.
+**Files**: `vitest.config.ts`, `package.json`, `tests/setup.ts`
+**Acceptance Criteria**:
+- [ ] Vitest configured with TypeScript support
+- [ ] Test scripts in package.json (`test`, `test:unit`, `test:integration`, `test:e2e`, `test:coverage`)
+- [ ] Coverage thresholds configured (minimum 70%)
+- [ ] Test setup file with common mocks and utilities
+- [ ] SQLite in-memory database for test isolation
+- [ ] Mock embeddings adapter for fast tests
+- [ ] GitHub Actions CI workflow for automated testing
+---
+### CRR-1102: Unit Tests - Core Types & Utilities
+**Estimate**: 2 points
+**Labels**: testing, unit
+**Blocked by**: CRR-1101
+Test type guards, enums, and utility functions.
+**Files**: `tests/unit/types/*.test.ts`, `tests/unit/utils/*.test.ts`
+**Acceptance Criteria**:
+- [ ] `memory.ts` types and enums tested
+- [ ] `relationships.ts` helper functions tested (isBidirectional, getReverseType, getRelationshipsByCategory)
+- [ ] `extractedKnowledge.ts` type validation tested
+- [ ] Chunker utility functions tested
+- [ ] Config parsing and validation tested
+---
+### CRR-1103: Unit Tests - Services (Phase 1-2)
+**Estimate**: 4 points
+**Labels**: testing, unit
+**Blocked by**: CRR-1101
+Test foundation and chat history services.
+**Files**: `tests/unit/services/*.test.ts`
+**Acceptance Criteria**:
+- [ ] `MemoryMetadataStore` CRUD operations tested
+- [ ] `MemoryMetadataStore` relationship operations tested
+- [ ] `MemoryMetadataStore` category operations tested
+- [ ] `DecayCalculator` scoring logic tested
+- [ ] `DecayCalculator` edge cases (new chunks, old chunks, high access)
+- [ ] `EnhancedVectorStore` wrapper tested with mock vector store
+- [ ] `CursorChatReader` path detection tested (mock filesystem)
+- [ ] `ConversationProcessor` chunking and entity extraction tested
+---
+### CRR-1104: Unit Tests - Services (Phase 3-4)
+**Estimate**: 4 points
+**Labels**: testing, unit
+**Blocked by**: CRR-1101
+Test knowledge extraction and relationship graph services.
+**Files**: `tests/unit/services/*.test.ts`
+**Acceptance Criteria**:
+- [ ] `KnowledgeExtractor` heuristic extraction tested
+- [ ] `KnowledgeExtractor` LLM extraction tested (mocked LLM)
+- [ ] `KnowledgeExtractor` confidence filtering tested
+- [ ] `KnowledgeStorage` storage operations tested
+- [ ] `KnowledgeStorage` relationship creation tested
+- [ ] `RelationshipGraph` traversal tested (depth limits, type filtering)
+- [ ] `RelationshipGraph` bidirectional relationships tested
+- [ ] `RelationshipGraph` contradiction detection tested
+- [ ] `RelationshipGraph` cluster finding tested
+---
+### CRR-1105: Unit Tests - Adapters
+**Estimate**: 3 points
+**Labels**: testing, unit
+**Blocked by**: CRR-1101
+Test vector store and embedding adapters.
+**Files**: `tests/unit/adapters/*.test.ts`
+**Acceptance Criteria**:
+- [ ] Memory vector store tested (add, search, delete)
+- [ ] Xenova embeddings tested (mocked transformer)
+- [ ] OpenAI embeddings tested (mocked API)
+- [ ] Ollama embeddings tested (mocked API)
+- [ ] Adapter factory functions tested
+- [ ] Error handling for adapter failures tested
+---
+### CRR-1106: Integration Tests - Vector Store + Metadata
+**Estimate**: 4 points
+**Labels**: testing, integration
+**Blocked by**: CRR-1103, CRR-1105
+Test interactions between vector store and metadata store.
+**Files**: `tests/integration/vector-metadata.test.ts`
+**Acceptance Criteria**:
+- [ ] EnhancedVectorStore upsert stores in both stores
+- [ ] Search results enriched with metadata correctly
+- [ ] Access recording updates decay scores
+- [ ] Re-ranking with decay scores produces different order
+- [ ] Archived chunks filtered from search results
+- [ ] Relationship data included in search results
+---
+### CRR-1107: Integration Tests - Knowledge Pipeline
+**Estimate**: 4 points
+**Labels**: testing, integration
+**Blocked by**: CRR-1104
+Test the full knowledge extraction and storage pipeline.
+**Files**: `tests/integration/knowledge-pipeline.test.ts`
+**Acceptance Criteria**:
+- [ ] Conversation → extraction → storage flow tested
+- [ ] Solutions stored with correct chunk types
+- [ ] Patterns stored with implementation examples
+- [ ] Decisions stored with reasoning
+- [ ] Relationships created between related knowledge
+- [ ] Duplicate conversations not re-processed
+- [ ] Entity extraction from conversations tested
+---
+### CRR-1108: Integration Tests - CLI Commands
+**Estimate**: 3 points
+**Labels**: testing, integration
+**Blocked by**: CRR-1103, CRR-1104
+Test CLI commands end-to-end.
+**Files**: `tests/integration/cli/*.test.ts`
+**Acceptance Criteria**:
+- [ ] `cursor-rag ingest` command tested
+- [ ] `cursor-rag search` command tested
+- [ ] `cursor-rag status` command tested
+- [ ] `cursor-rag chat list` command tested
+- [ ] `cursor-rag chat ingest` command tested
+- [ ] `cursor-rag chat stats` command tested
+- [ ] Error handling for invalid inputs tested
+- [ ] Help output validated
+---
+### CRR-1109: Integration Tests - MCP Server & Tools
+**Estimate**: 4 points
+**Labels**: testing, integration
+**Blocked by**: CRR-1106
+Test MCP server protocol and tool execution.
+**Files**: `tests/integration/mcp/*.test.ts`
+**Acceptance Criteria**:
+- [ ] MCP server initializes correctly
+- [ ] Tool listing returns all available tools
+- [ ] `search` tool returns relevant results
+- [ ] `ingest` tool processes documents
+- [ ] `crawl` tool handles URLs
+- [ ] `recursive_query` tool performs multi-hop retrieval
+- [ ] `list_sources` tool returns ingested sources
+- [ ] Error responses follow MCP protocol
+---
+### CRR-1110: E2E Tests - Dashboard UI
+**Estimate**: 5 points
+**Labels**: testing, e2e
+**Blocked by**: CRR-1101
+Test dashboard web interface with Playwright.
+**Files**: `tests/e2e/dashboard/*.test.ts`
+**Acceptance Criteria**:
+- [ ] Dashboard loads without errors
+- [ ] Search form submits and displays results
+- [ ] Activity log displays recent operations
+- [ ] Statistics cards show correct data
+- [ ] Sources list displays ingested documents
+- [ ] Navigation between tabs works
+- [ ] Dark/light mode toggle works (if implemented)
+- [ ] Responsive layout on mobile viewport
+- [ ] Error states displayed correctly
+---
+### CRR-1111: E2E Tests - Full User Flows
+**Estimate**: 5 points
+**Labels**: testing, e2e
+**Blocked by**: CRR-1108, CRR-1109, CRR-1110
+Test complete user workflows from ingestion to retrieval.
+**Files**: `tests/e2e/flows/*.test.ts`
+**Acceptance Criteria**:
+- [ ] Flow: Ingest URL → Search → View results
+- [ ] Flow: Ingest file → Search → Verify content
+- [ ] Flow: Chat ingest → Search past solutions
+- [ ] Flow: MCP search from simulated Cursor request
+- [ ] Flow: Dashboard search → Click result → View details
+- [ ] Performance: Search returns in <500ms for 1000 chunks
+- [ ] Performance: Ingest 100 documents in <30s
+---
+### CRR-1112: Test Fixtures & Factories
+**Estimate**: 2 points
+**Labels**: testing, infrastructure
+**Blocked by**: CRR-1101
+Create reusable test fixtures and data factories.
+**Files**: `tests/fixtures/*.ts`, `tests/factories/*.ts`
+**Acceptance Criteria**:
+- [ ] Sample conversations fixture (various formats)
+- [ ] Sample documents fixture (markdown, code, mixed)
+- [ ] EnhancedChunk factory with sensible defaults
+- [ ] Conversation factory with customizable messages
+- [ ] Relationship factory for graph tests
+- [ ] Category factory for hierarchy tests
+- [ ] Mock vector store with predictable search results
+- [ ] Mock LLM with configurable responses
+---
+### CRR-1113: Test Documentation & Coverage Report
+**Estimate**: 1 point
+**Labels**: testing, documentation
+**Blocked by**: CRR-1102 through CRR-1112
+Document testing strategy and generate coverage reports.
+**Files**: `docs/TESTING.md`, `coverage/`
+**Acceptance Criteria**:
+- [ ] TESTING.md with testing strategy overview
+- [ ] Instructions for running different test suites
+- [ ] Coverage report generation configured
+- [ ] Coverage badges in README
+- [ ] Test naming conventions documented
+- [ ] Mock usage guidelines documented
+---
+## Epic: Phase 12 - PageIndex Integration (Vectorless RAG)
+*Complementary retrieval using hierarchical tree indexes based on [PageIndex](https://github.com/VectifyAI/PageIndex)*
+**Key Insight**: Vector RAG and PageIndex solve retrieval differently and work together:
+- **Vector RAG excels at**: Semantic similarity, cross-document search, finding related concepts
+- **PageIndex excels at**: Structured documents (PDFs, reports), preserving document hierarchy, explainable retrieval with page references
+### CRR-1201: Define PageIndex Types
+**Estimate**: 2 points
+**Labels**: pageindex, types
+Define TypeScript types for PageIndex tree structures and operations.
+**File**: `src/adapters/pageindex/types.ts`
+**Acceptance Criteria**:
+- [ ] TreeNode interface (title, node_id, start_index, end_index, summary, nodes)
+- [ ] TreeIndex interface (document metadata + root nodes)
+- [ ] PageIndexResult interface (node IDs, content, page references)
+- [ ] PageIndexConfig interface (model, maxPagesPerNode, maxTokensPerNode)
+- [ ] PageIndexAdapter interface (buildIndex, search, getNodeContent)
+---
+### CRR-1202: Implement Tree Builder Adapter
+**Estimate**: 4 points
+**Labels**: pageindex, adapter
+**Blocked by**: CRR-1201, CRR-1000
+Create adapter that wraps PageIndex Python via child_process.
+**File**: `src/adapters/pageindex/tree-builder.ts`
+**Acceptance Criteria**:
+- [ ] Spawns Python process with pageindex package
+- [ ] Passes PDF/markdown path and config options
+- [ ] Parses JSON tree output
+- [ ] Handles Python errors gracefully
+- [ ] Caches built trees to avoid re-processing
+- [ ] Supports both PDF and markdown input
+---
+### CRR-1203: Implement Tree Storage
+**Estimate**: 2 points
+**Labels**: pageindex, storage
+**Blocked by**: CRR-1201
+Store and retrieve PageIndex tree structures.
+**File**: `src/adapters/pageindex/tree-store.ts`
+**Acceptance Criteria**:
+- [ ] Save tree JSON to ~/.cursor-rag/pageindex/
+- [ ] Retrieve tree by document ID/path
+- [ ] List all indexed documents with tree metadata
+- [ ] Delete tree when source document removed
+- [ ] Verify tree freshness against source modification time
+---
+### CRR-1204: Implement Tree Searcher
+**Estimate**: 4 points
+**Labels**: pageindex, retrieval
+**Blocked by**: CRR-1201, CRR-1203, CRR-1000
+LLM-based tree traversal for retrieval.
+**File**: `src/adapters/pageindex/tree-searcher.ts`
+**Acceptance Criteria**:
+- [ ] Uses LLM to navigate tree hierarchy
+- [ ] Returns relevant node IDs with reasoning
+- [ ] Fetches full content for selected nodes
+- [ ] Includes page references in results
+- [ ] Supports multi-step tree navigation
+- [ ] Tracks reasoning chain for explainability
+---
+### CRR-1205: Implement Hybrid Search Merger
+**Estimate**: 3 points
+**Labels**: pageindex, retrieval
+**Blocked by**: CRR-1204
+Merge results from vector search and PageIndex.
+**File**: `src/services/hybridSearchMerger.ts`
+**Acceptance Criteria**:
+- [ ] Run vector and PageIndex searches in parallel
+- [ ] Configurable weighting between sources
+- [ ] Deduplicate overlapping results
+- [ ] Preserve source attribution (vector vs PageIndex)
+- [ ] Include page references for PageIndex results
+- [ ] Score normalization across retrieval methods
+---
+### CRR-1206: Add PageIndex MCP Tools
+**Estimate**: 3 points
+**Labels**: pageindex, mcp
+**Blocked by**: CRR-1202, CRR-1204, CRR-1205
+New MCP tools for PageIndex operations.
+**File**: `src/server/tools/pageindex.ts`
+**Acceptance Criteria**:
+- [ ] `pageindex_ingest` - Build tree for PDF/markdown
+- [ ] `pageindex_search` - Query using tree navigation
+- [ ] `pageindex_list` - List indexed documents with tree info
+- [ ] `hybrid_search` - Combined vector + PageIndex search
+- [ ] All tools have proper input validation
+- [ ] Results include source and page references
+---
+### CRR-1207: Add PageIndex CLI Commands
+**Estimate**: 2 points
+**Labels**: pageindex, cli
+**Blocked by**: CRR-1202, CRR-1203, CRR-1204
+CLI commands for PageIndex management.
+**File**: `src/cli/commands/pageindex.ts`
+**Acceptance Criteria**:
+- [ ] `cursor-rag pageindex build <path>` - Build tree index
+- [ ] `cursor-rag pageindex list` - List indexed documents
+- [ ] `cursor-rag pageindex search <query>` - Search with tree navigation
+- [ ] `cursor-rag pageindex info <document>` - Show tree structure
+- [ ] `cursor-rag pageindex remove <document>` - Remove tree index
+- [ ] Progress display during tree building
+---
+### CRR-1208: Dashboard PageIndex Integration
+**Estimate**: 3 points
+**Labels**: pageindex, dashboard
+**Blocked by**: CRR-1206, CRR-903
+Add PageIndex features to web dashboard.
+**File**: `src/dashboard/public/index.html`, `src/dashboard/server.ts`
+**Acceptance Criteria**:
+- [ ] PageIndex tab showing indexed documents
+- [ ] Tree structure visualization
+- [ ] Search mode toggle (Vector / PageIndex / Hybrid)
+- [ ] Results show source type and page references
+- [ ] Build tree from uploaded PDF
+- [ ] Tree node expansion/collapse
+---
+### CRR-1209: Auto-Detection and Routing
+**Estimate**: 2 points
+**Labels**: pageindex, ingest
+**Blocked by**: CRR-1202, CRR-1205
+Automatically route documents to appropriate pipeline.
+**File**: `src/server/tools/ingest.ts` (modification)
+**Acceptance Criteria**:
+- [ ] Detect document type (PDF, markdown, text, URL)
+- [ ] PDFs automatically get PageIndex tree + vector embeddings
+- [ ] Structured markdown gets PageIndex tree + vector embeddings
+- [ ] Plain text/URLs only use vector pipeline
+- [ ] Configuration to override auto-detection
+- [ ] Progress reporting for dual-pipeline ingestion
+---
+## Summary
+| Epic | Tasks | Total Points |
+|------|-------|--------------|
+| Phase 1: Foundation | 4 | 9 |
+| Phase 2: Chat History | 3 | 7 |
+| Phase 3: Knowledge Extraction | 3 | 9 |
+| Phase 4: Relationship Graph | 2 | 5 |
+| Phase 5: Hierarchical Memory | 2 | 6 |
+| Phase 6: Maintenance | 2 | 6 |
+| Phase 7: Enhanced Retrieval | 3 | 8 |
+| Phase 8: RLM Recursive Retrieval | 4 | 16 |
+| Phase 9: Dashboard Tools UI | 4 | 11 |
+| Phase 10: Rules Optimizer | 9 | 30 |
+| Phase 11: Test Suite | 13 | 44 |
+| Phase 12: PageIndex Integration | 9 | 25 |
+| **Total** | **58** | **176** |
+---
+## Suggested Sprint Planning
+### Sprint 1 (Week 1-2): Foundation + Chat History
+- CRR-101, CRR-102, CRR-103, CRR-104
+- CRR-201, CRR-202, CRR-203
+- **Points**: 16
+### Sprint 2 (Week 3-4): Knowledge Extraction + Graph
+- CRR-301, CRR-302, CRR-303
+- CRR-401, CRR-402
+- **Points**: 14
+### Sprint 3 (Week 5-6): Categories + Maintenance + Retrieval
+- CRR-501, CRR-502
+- CRR-601, CRR-602
+- CRR-701, CRR-702, CRR-703
+- **Points**: 20
+### Sprint 4 (Week 7-8): RLM Recursive Retrieval
+- CRR-801, CRR-802, CRR-803, CRR-804
+- **Points**: 16
+### Sprint 5 (Week 9-10): Dashboard Tools + LLM Provider + Rules Start
+- CRR-901, CRR-902, CRR-903, CRR-904
+- CRR-1000 (LLM Provider System - enables Phase 10 LLM features)
+- CRR-1001, CRR-1002
+- **Points**: 23
+### Sprint 6 (Week 11-12): Rules Optimizer Completion
+- CRR-1003, CRR-1004, CRR-1005, CRR-1006, CRR-1007
+- **Points**: 19
+**Total estimated time: 11-12 weeks**
+---
+## Dependencies Graph
+```
+CRR-101 ──┬── CRR-102 ──┬── CRR-103 ──┬── CRR-104
+          │             │             │
+          │             └── CRR-402 ──┘
+          │
+          └── CRR-301 ──── CRR-302 ──── CRR-303 ──┐
+                                                   │
+CRR-201 ──── CRR-202 ──── CRR-203                 │
+                                                   │
+CRR-401 ──── CRR-402 ─────────────────────────────┤
+                                                   │
+CRR-501 ──── CRR-502 ─────────────────────────────┤
+                                                   │
+CRR-601 ──── CRR-602                              │
+                                                   │
+          ┌───────────────────────────────────────┘
+          │
+CRR-701 ──┼── CRR-702
+          │
+CRR-703 ──┴── CRR-801 ──┬── CRR-802 ──┬── CRR-803
+                        │             │
+                        └─────────────┴── CRR-804
+Phase 9: Dashboard Tools (can run in parallel)
+CRR-901 ──── CRR-902 ──── CRR-903
+                    └──── CRR-904
+Phase 10: Rules Optimizer (can run in parallel)
+CRR-1000 (LLM Provider) ──┐
+                          ├──── CRR-1004 (Rules Merger - needs LLM)
+CRR-1001 ──── CRR-1002 ──── CRR-1003 ──┘
+                                  └──── CRR-1005 ──── CRR-1006
+                                              └──── CRR-1007 (requires CRR-903)
+LLM Provider Priority Order (CRR-1000):
+1. Cursor AI (via MCP) → if running in Cursor IDE
+2. Environment vars → OPENAI_API_KEY, ANTHROPIC_API_KEY, etc.
+3. Config file → ~/.cursor-rag/llm-config.json
+4. Ollama → local fallback (free, no API key)
+Phase 11: Test Suite (can run in parallel, tests existing features)
+CRR-1101 (Infrastructure) ──┬── CRR-1102 (Types/Utils)
+                            ├── CRR-1103 (Services 1-2) ──┬── CRR-1106 (Integration: Vector+Meta)
+                            ├── CRR-1104 (Services 3-4) ──┴── CRR-1107 (Integration: Knowledge)
+                            ├── CRR-1105 (Adapters) ──────┘
+                            ├── CRR-1108 (Integration: CLI)
+                            ├── CRR-1109 (Integration: MCP)
+                            ├── CRR-1110 (E2E: Dashboard)
+                            └── CRR-1112 (Fixtures)
+                                    │
+CRR-1106 + CRR-1107 + CRR-1108 + CRR-1109 + CRR-1110 ──── CRR-1111 (E2E: Flows)
+                                    │
+All tests (CRR-1102 through CRR-1112) ──── CRR-1113 (Documentation)
+Phase 12: PageIndex Integration (Vectorless RAG complement)
+CRR-1201 (Types) ──── CRR-1202 (Tree Builder) ──── CRR-1203 (Tree Storage)
+                                │
+                                └── CRR-1204 (Tree Searcher) ──── CRR-1205 (Hybrid Merger)
+                                                                        │
+                                                                        └── CRR-1206 (MCP Tools)
+                                                                                │
+CRR-1000 (LLM Provider) ──────────────────────────────────────────────────────────┘
+                                                                                │
+                                                                CRR-1207 (CLI Commands)
+                                                                        │
+                                                        CRR-1208 (Dashboard Integration)
+                                                                        │
+                                                                CRR-1209 (Auto-Detection)
+Notes:
+- Phase 8 (RLM) depends on CRR-701 (Hybrid Scorer)
+- Phase 9 (Dashboard Tools) is independent, can start anytime
+- Phase 10 (Rules Optimizer) is independent, can start anytime
+- Phase 11 (Test Suite) is independent, can start anytime - tests existing code
+- Phase 12 (PageIndex) is independent, can start anytime - complements vector RAG
+- CRR-1000 (LLM Provider) enables all LLM-dependent features across the system
+- CRR-1004 depends on CRR-1000 + CRR-1003 for LLM-powered merging
+- CRR-1007 depends on CRR-903 (Tools UI Panel) for dashboard integration
+- CRR-1101 (Test Infrastructure) should be done first in Phase 11
+- CRR-1204 and CRR-1206 depend on CRR-1000 (LLM Provider) for tree traversal
+- CRR-1208 depends on CRR-903 (Dashboard Tools UI) for integration
+```