cursor-recursive-rag 0.2.0-alpha.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. package/README.md +179 -203
  2. package/dist/adapters/llm/anthropic.d.ts +27 -0
  3. package/dist/adapters/llm/anthropic.d.ts.map +1 -0
  4. package/dist/adapters/llm/anthropic.js +287 -0
  5. package/dist/adapters/llm/anthropic.js.map +1 -0
  6. package/dist/adapters/llm/base.d.ts +62 -0
  7. package/dist/adapters/llm/base.d.ts.map +1 -0
  8. package/dist/adapters/llm/base.js +140 -0
  9. package/dist/adapters/llm/base.js.map +1 -0
  10. package/dist/adapters/llm/deepseek.d.ts +24 -0
  11. package/dist/adapters/llm/deepseek.d.ts.map +1 -0
  12. package/dist/adapters/llm/deepseek.js +228 -0
  13. package/dist/adapters/llm/deepseek.js.map +1 -0
  14. package/dist/adapters/llm/groq.d.ts +25 -0
  15. package/dist/adapters/llm/groq.d.ts.map +1 -0
  16. package/dist/adapters/llm/groq.js +265 -0
  17. package/dist/adapters/llm/groq.js.map +1 -0
  18. package/dist/adapters/llm/index.d.ts +62 -0
  19. package/dist/adapters/llm/index.d.ts.map +1 -0
  20. package/dist/adapters/llm/index.js +380 -0
  21. package/dist/adapters/llm/index.js.map +1 -0
  22. package/dist/adapters/llm/ollama.d.ts +23 -0
  23. package/dist/adapters/llm/ollama.d.ts.map +1 -0
  24. package/dist/adapters/llm/ollama.js +261 -0
  25. package/dist/adapters/llm/ollama.js.map +1 -0
  26. package/dist/adapters/llm/openai.d.ts +22 -0
  27. package/dist/adapters/llm/openai.d.ts.map +1 -0
  28. package/dist/adapters/llm/openai.js +232 -0
  29. package/dist/adapters/llm/openai.js.map +1 -0
  30. package/dist/adapters/llm/openrouter.d.ts +27 -0
  31. package/dist/adapters/llm/openrouter.d.ts.map +1 -0
  32. package/dist/adapters/llm/openrouter.js +305 -0
  33. package/dist/adapters/llm/openrouter.js.map +1 -0
  34. package/dist/adapters/vector/index.d.ts.map +1 -1
  35. package/dist/adapters/vector/index.js +8 -0
  36. package/dist/adapters/vector/index.js.map +1 -1
  37. package/dist/adapters/vector/redis-native.d.ts +35 -0
  38. package/dist/adapters/vector/redis-native.d.ts.map +1 -0
  39. package/dist/adapters/vector/redis-native.js +170 -0
  40. package/dist/adapters/vector/redis-native.js.map +1 -0
  41. package/dist/cli/commands/chat.d.ts +4 -0
  42. package/dist/cli/commands/chat.d.ts.map +1 -0
  43. package/dist/cli/commands/chat.js +374 -0
  44. package/dist/cli/commands/chat.js.map +1 -0
  45. package/dist/cli/commands/maintenance.d.ts +4 -0
  46. package/dist/cli/commands/maintenance.d.ts.map +1 -0
  47. package/dist/cli/commands/maintenance.js +237 -0
  48. package/dist/cli/commands/maintenance.js.map +1 -0
  49. package/dist/cli/commands/rules.d.ts +9 -0
  50. package/dist/cli/commands/rules.d.ts.map +1 -0
  51. package/dist/cli/commands/rules.js +639 -0
  52. package/dist/cli/commands/rules.js.map +1 -0
  53. package/dist/cli/commands/setup.js +5 -4
  54. package/dist/cli/commands/setup.js.map +1 -1
  55. package/dist/cli/index.js +6 -0
  56. package/dist/cli/index.js.map +1 -1
  57. package/dist/config/memoryConfig.d.ts +427 -0
  58. package/dist/config/memoryConfig.d.ts.map +1 -0
  59. package/dist/config/memoryConfig.js +258 -0
  60. package/dist/config/memoryConfig.js.map +1 -0
  61. package/dist/config/rulesConfig.d.ts +486 -0
  62. package/dist/config/rulesConfig.d.ts.map +1 -0
  63. package/dist/config/rulesConfig.js +345 -0
  64. package/dist/config/rulesConfig.js.map +1 -0
  65. package/dist/dashboard/coreTools.d.ts +14 -0
  66. package/dist/dashboard/coreTools.d.ts.map +1 -0
  67. package/dist/dashboard/coreTools.js +413 -0
  68. package/dist/dashboard/coreTools.js.map +1 -0
  69. package/dist/dashboard/public/index.html +1982 -13
  70. package/dist/dashboard/server.d.ts +1 -8
  71. package/dist/dashboard/server.d.ts.map +1 -1
  72. package/dist/dashboard/server.js +846 -13
  73. package/dist/dashboard/server.js.map +1 -1
  74. package/dist/dashboard/toolRegistry.d.ts +192 -0
  75. package/dist/dashboard/toolRegistry.d.ts.map +1 -0
  76. package/dist/dashboard/toolRegistry.js +322 -0
  77. package/dist/dashboard/toolRegistry.js.map +1 -0
  78. package/dist/proxy/index.d.ts +1 -1
  79. package/dist/proxy/index.d.ts.map +1 -1
  80. package/dist/proxy/index.js +9 -6
  81. package/dist/proxy/index.js.map +1 -1
  82. package/dist/server/index.js +21 -0
  83. package/dist/server/index.js.map +1 -1
  84. package/dist/server/tools/crawl.d.ts.map +1 -1
  85. package/dist/server/tools/crawl.js +8 -0
  86. package/dist/server/tools/crawl.js.map +1 -1
  87. package/dist/server/tools/index.d.ts.map +1 -1
  88. package/dist/server/tools/index.js +19 -1
  89. package/dist/server/tools/index.js.map +1 -1
  90. package/dist/server/tools/ingest.d.ts.map +1 -1
  91. package/dist/server/tools/ingest.js +5 -0
  92. package/dist/server/tools/ingest.js.map +1 -1
  93. package/dist/server/tools/memory.d.ts +250 -0
  94. package/dist/server/tools/memory.d.ts.map +1 -0
  95. package/dist/server/tools/memory.js +472 -0
  96. package/dist/server/tools/memory.js.map +1 -0
  97. package/dist/server/tools/recursive-query.d.ts.map +1 -1
  98. package/dist/server/tools/recursive-query.js +6 -0
  99. package/dist/server/tools/recursive-query.js.map +1 -1
  100. package/dist/server/tools/search.d.ts.map +1 -1
  101. package/dist/server/tools/search.js +6 -0
  102. package/dist/server/tools/search.js.map +1 -1
  103. package/dist/services/activity-log.d.ts +10 -0
  104. package/dist/services/activity-log.d.ts.map +1 -0
  105. package/dist/services/activity-log.js +53 -0
  106. package/dist/services/activity-log.js.map +1 -0
  107. package/dist/services/categoryManager.d.ts +110 -0
  108. package/dist/services/categoryManager.d.ts.map +1 -0
  109. package/dist/services/categoryManager.js +549 -0
  110. package/dist/services/categoryManager.js.map +1 -0
  111. package/dist/services/contextEnvironment.d.ts +206 -0
  112. package/dist/services/contextEnvironment.d.ts.map +1 -0
  113. package/dist/services/contextEnvironment.js +481 -0
  114. package/dist/services/contextEnvironment.js.map +1 -0
  115. package/dist/services/conversationProcessor.d.ts +99 -0
  116. package/dist/services/conversationProcessor.d.ts.map +1 -0
  117. package/dist/services/conversationProcessor.js +311 -0
  118. package/dist/services/conversationProcessor.js.map +1 -0
  119. package/dist/services/cursorChatReader.d.ts +129 -0
  120. package/dist/services/cursorChatReader.d.ts.map +1 -0
  121. package/dist/services/cursorChatReader.js +419 -0
  122. package/dist/services/cursorChatReader.js.map +1 -0
  123. package/dist/services/decayCalculator.d.ts +85 -0
  124. package/dist/services/decayCalculator.d.ts.map +1 -0
  125. package/dist/services/decayCalculator.js +182 -0
  126. package/dist/services/decayCalculator.js.map +1 -0
  127. package/dist/services/enhancedVectorStore.d.ts +102 -0
  128. package/dist/services/enhancedVectorStore.d.ts.map +1 -0
  129. package/dist/services/enhancedVectorStore.js +245 -0
  130. package/dist/services/enhancedVectorStore.js.map +1 -0
  131. package/dist/services/hybridScorer.d.ts +120 -0
  132. package/dist/services/hybridScorer.d.ts.map +1 -0
  133. package/dist/services/hybridScorer.js +334 -0
  134. package/dist/services/hybridScorer.js.map +1 -0
  135. package/dist/services/knowledgeExtractor.d.ts +45 -0
  136. package/dist/services/knowledgeExtractor.d.ts.map +1 -0
  137. package/dist/services/knowledgeExtractor.js +436 -0
  138. package/dist/services/knowledgeExtractor.js.map +1 -0
  139. package/dist/services/knowledgeStorage.d.ts +102 -0
  140. package/dist/services/knowledgeStorage.d.ts.map +1 -0
  141. package/dist/services/knowledgeStorage.js +383 -0
  142. package/dist/services/knowledgeStorage.js.map +1 -0
  143. package/dist/services/maintenanceScheduler.d.ts +89 -0
  144. package/dist/services/maintenanceScheduler.d.ts.map +1 -0
  145. package/dist/services/maintenanceScheduler.js +479 -0
  146. package/dist/services/maintenanceScheduler.js.map +1 -0
  147. package/dist/services/memoryMetadataStore.d.ts +62 -0
  148. package/dist/services/memoryMetadataStore.d.ts.map +1 -0
  149. package/dist/services/memoryMetadataStore.js +570 -0
  150. package/dist/services/memoryMetadataStore.js.map +1 -0
  151. package/dist/services/recursiveRetrieval.d.ts +122 -0
  152. package/dist/services/recursiveRetrieval.d.ts.map +1 -0
  153. package/dist/services/recursiveRetrieval.js +443 -0
  154. package/dist/services/recursiveRetrieval.js.map +1 -0
  155. package/dist/services/relationshipGraph.d.ts +77 -0
  156. package/dist/services/relationshipGraph.d.ts.map +1 -0
  157. package/dist/services/relationshipGraph.js +411 -0
  158. package/dist/services/relationshipGraph.js.map +1 -0
  159. package/dist/services/rlmSafeguards.d.ts +273 -0
  160. package/dist/services/rlmSafeguards.d.ts.map +1 -0
  161. package/dist/services/rlmSafeguards.js +705 -0
  162. package/dist/services/rlmSafeguards.js.map +1 -0
  163. package/dist/services/rulesAnalyzer.d.ts +119 -0
  164. package/dist/services/rulesAnalyzer.d.ts.map +1 -0
  165. package/dist/services/rulesAnalyzer.js +768 -0
  166. package/dist/services/rulesAnalyzer.js.map +1 -0
  167. package/dist/services/rulesMerger.d.ts +75 -0
  168. package/dist/services/rulesMerger.d.ts.map +1 -0
  169. package/dist/services/rulesMerger.js +404 -0
  170. package/dist/services/rulesMerger.js.map +1 -0
  171. package/dist/services/rulesParser.d.ts +127 -0
  172. package/dist/services/rulesParser.d.ts.map +1 -0
  173. package/dist/services/rulesParser.js +594 -0
  174. package/dist/services/rulesParser.js.map +1 -0
  175. package/dist/services/smartChunker.d.ts +110 -0
  176. package/dist/services/smartChunker.d.ts.map +1 -0
  177. package/dist/services/smartChunker.js +520 -0
  178. package/dist/services/smartChunker.js.map +1 -0
  179. package/dist/types/categories.d.ts +105 -0
  180. package/dist/types/categories.d.ts.map +1 -0
  181. package/dist/types/categories.js +108 -0
  182. package/dist/types/categories.js.map +1 -0
  183. package/dist/types/extractedKnowledge.d.ts +233 -0
  184. package/dist/types/extractedKnowledge.d.ts.map +1 -0
  185. package/dist/types/extractedKnowledge.js +56 -0
  186. package/dist/types/extractedKnowledge.js.map +1 -0
  187. package/dist/types/index.d.ts +9 -2
  188. package/dist/types/index.d.ts.map +1 -1
  189. package/dist/types/index.js +12 -1
  190. package/dist/types/index.js.map +1 -1
  191. package/dist/types/llmProvider.d.ts +282 -0
  192. package/dist/types/llmProvider.d.ts.map +1 -0
  193. package/dist/types/llmProvider.js +48 -0
  194. package/dist/types/llmProvider.js.map +1 -0
  195. package/dist/types/memory.d.ts +227 -0
  196. package/dist/types/memory.d.ts.map +1 -0
  197. package/dist/types/memory.js +76 -0
  198. package/dist/types/memory.js.map +1 -0
  199. package/dist/types/relationships.d.ts +167 -0
  200. package/dist/types/relationships.d.ts.map +1 -0
  201. package/dist/types/relationships.js +106 -0
  202. package/dist/types/relationships.js.map +1 -0
  203. package/dist/types/rulesOptimizer.d.ts +345 -0
  204. package/dist/types/rulesOptimizer.d.ts.map +1 -0
  205. package/dist/types/rulesOptimizer.js +22 -0
  206. package/dist/types/rulesOptimizer.js.map +1 -0
  207. package/docs/cursor-recursive-rag-memory-spec.md +4569 -0
  208. package/docs/cursor-recursive-rag-tasks.md +1355 -0
  209. package/package.json +6 -3
  210. package/restart-rag.sh +16 -0
@@ -0,0 +1,4569 @@
1
+ # cursor-recursive-rag: Advanced Memory System Specification
2
+
3
+ ## Executive Summary
4
+
5
+ This specification defines the architecture and implementation tasks for building an advanced memory system directly into cursor-recursive-rag. The goal is to transform the existing multi-hop RAG system into an intelligent, evolving knowledge base that:
6
+
7
+ 1. **Learns from Cursor chat history** - Extracts solutions, patterns, decisions, and standards from past development sessions
8
+ 2. **Implements temporal decay** - Prioritises recent, frequently-accessed knowledge while gracefully retiring stale content
9
+ 3. **Supports relationship graphs** - Enables multi-hop reasoning through typed relationships between knowledge entities
10
+ 4. **Provides hierarchical memory** - Organises knowledge into Resources → Items → Categories for efficient retrieval
11
+ 5. **Handles contradictions** - Detects and resolves conflicting information automatically
12
+
13
+ ---
14
+
15
+ ## Research Foundation: Recursive Language Models
16
+
17
+ This specification incorporates key insights from the Recursive Language Models (RLM) paper (Zhang et al., 2024), which demonstrates that LLMs can process inputs **two orders of magnitude beyond their context windows** by treating prompts as external environment objects rather than feeding them directly into the neural network.
18
+
19
+ ### Core RLM Insight
20
+
21
+ > "The key insight is that long prompts should not be fed into the neural network directly but should instead be treated as part of the environment that the LLM can symbolically interact with."
22
+
23
+ The RLM approach exposes the prompt as a variable in a REPL environment, allowing the model to:
24
+ - **Peek into** and **decompose** the prompt programmatically
25
+ - **Recursively invoke itself** over snippets
26
+ - **Build up answers** through variable storage
27
+ - **Filter context** using code (regex, keyword searches) based on model priors
28
+
29
+ ### Key Findings Relevant to This Project
30
+
31
+ | Finding | Implication for cursor-recursive-rag |
32
+ |---------|-------------------------------------|
33
+ | RLMs scale to 10M+ tokens | Our chat history + docs can exceed context windows |
34
+ | REPL environment enables long input handling | We should treat retrieved context as environment variables |
35
+ | Recursive sub-calling helps information-dense tasks | Multi-hop retrieval should support recursive decomposition |
36
+ | Performance degrades with complexity AND length | Need complexity-aware chunking strategies |
37
+ | Costs are high-variance due to trajectory length | Need early termination and cost budgets |
38
+
39
+ ### Negative Results to Avoid (From RLM Paper)
40
+
41
+ The paper's Appendix A provides critical anti-patterns we must avoid:
42
+
43
+ 1. **Same prompt across all models is problematic** - Different models need tuned prompts; Qwen3-Coder needed extra warnings about excessive sub-calls
44
+ 2. **Models without coding capabilities struggle** - Our recursive retrieval must work with non-coding models too
45
+ 3. **Thinking models run out of output tokens** - Budget output tokens carefully for reasoning models
46
+ 4. **Synchronous calls are slow** - Implement async sub-calls from the start
47
+ 5. **Final answer detection is brittle** - Need robust termination conditions, not just tag-based detection
48
+
49
+ ---
50
+
51
+ ## Table of Contents
52
+
53
+ 1. [Architecture Overview](#architecture-overview)
54
+ 2. [Phase 1: Foundation - Enhanced Schema & Temporal Decay](#phase-1-foundation)
55
+ 3. [Phase 2: Cursor Chat History Integration](#phase-2-chat-history)
56
+ 4. [Phase 3: Knowledge Extraction Pipeline](#phase-3-knowledge-extraction)
57
+ 5. [Phase 4: Relationship Graph](#phase-4-relationship-graph)
58
+ 6. [Phase 5: Hierarchical Memory (Categories/Summaries)](#phase-5-hierarchical-memory)
59
+ 7. [Phase 6: Background Maintenance Jobs](#phase-6-maintenance)
60
+ 8. [Phase 7: Enhanced Retrieval Scoring](#phase-7-retrieval)
61
+ 9. [Phase 8: RLM-Style Recursive Retrieval](#phase-8-rlm-retrieval)
62
+ 10. [Phase 12: PageIndex Integration (Vectorless RAG)](#phase-12-pageindex)
63
+ 11. [Database Schema](#database-schema)
64
+ 12. [MCP Tool Definitions](#mcp-tools)
65
+ 13. [Configuration Schema](#configuration)
66
+ 14. [Anti-Patterns and Negative Results](#anti-patterns)
67
+ 15. [Testing Strategy](#testing)
68
+
69
+ ---
70
+
71
+ ## Architecture Overview
72
+
73
+ ### Current State
74
+
75
+ ```
76
+ ┌─────────────────────────────────────────────────────────────┐
77
+ │ cursor-recursive-rag (current) │
78
+ ├─────────────────────────────────────────────────────────────┤
79
+ │ Ingestion: URLs, files, directories │
80
+ │ Storage: Vector store (Redis/Qdrant/Chroma/Cloudflare) │
81
+ │ Retrieval: Multi-hop with query decomposition │
82
+ │ Interface: MCP server for Cursor IDE │
83
+ └─────────────────────────────────────────────────────────────┘
84
+ ```
85
+
86
+ ### Target State
87
+
88
+ ```
89
+ ┌─────────────────────────────────────────────────────────────────────────────┐
90
+ │ cursor-recursive-rag (enhanced) │
91
+ ├─────────────────────────────────────────────────────────────────────────────┤
92
+ │ │
93
+ │ INGESTION LAYER │
94
+ │ ├── URLs/Files/Directories (existing) │
95
+ │ ├── Cursor Chat History (NEW) │
96
+ │ │ └── SQLite reader → Conversation processor → Knowledge extractor │
97
+ │ └── Manual knowledge entries (NEW) │
98
+ │ │
99
+ │ STORAGE LAYER │
100
+ │ ├── Vector Store (existing, enhanced schema) │
101
+ │ │ └── + timestamps, access tracking, importance, decay scores │
102
+ │ ├── Relationship Graph (NEW) │
103
+ │ │ └── SQLite/Redis graph with typed edges │
104
+ │ └── Category Summaries (NEW) │
105
+ │ └── Evolving markdown summaries per topic │
106
+ │ │
107
+ │ RETRIEVAL LAYER │
108
+ │ ├── Multi-hop retrieval (existing) │
109
+ │ ├── Hybrid scoring: similarity + time decay + importance (NEW) │
110
+ │ ├── Graph traversal for related knowledge (NEW) │
111
+ │ └── Tiered retrieval: summaries → items → raw (NEW) │
112
+ │ │
113
+ │ MAINTENANCE LAYER (NEW) │
114
+ │ ├── Nightly consolidation │
115
+ │ ├── Weekly summarisation │
116
+ │ ├── Monthly re-indexing │
117
+ │ └── Contradiction detection & resolution │
118
+ │ │
119
+ │ MCP INTERFACE │
120
+ │ ├── Existing tools (search, recall, etc.) │
121
+ │ └── NEW: search_past_solutions, find_similar_issues, get_project_patterns │
122
+ │ │
123
+ └─────────────────────────────────────────────────────────────────────────────┘
124
+ ```
125
+
126
+ ### Technology Stack
127
+
128
+ | Component | Technology | Rationale |
129
+ |-----------|------------|-----------|
130
+ | Vector Store | Existing (Redis Stack/Qdrant/Chroma) | Already implemented |
131
+ | Relationship Graph | SQLite (embedded) | No external dependencies, ACID compliant |
132
+ | Chat History Access | SQLite reader | Direct access to Cursor's state.vscdb |
133
+ | Knowledge Extraction | LLM (configurable) | Structured extraction from conversations |
134
+ | Background Jobs | node-cron or custom scheduler | Lightweight, no external deps |
135
+ | Category Summaries | Markdown files or SQLite | Simple, human-readable |
136
+
137
+ ---
138
+
139
+ ## Phase 1: Foundation - Enhanced Schema & Temporal Decay {#phase-1-foundation}
140
+
141
+ ### Overview
142
+
143
+ Enhance the existing chunk schema to support temporal tracking, access patterns, and decay scoring. This is the foundation for all subsequent features.
144
+
145
+ ### Tasks
146
+
147
+ #### Task 1.1: Define Enhanced Chunk Interface
148
+
149
+ **File**: `src/types/memory.ts` (new file)
150
+
151
+ ```typescript
152
+ /**
153
+ * Enhanced chunk interface with temporal and importance tracking
154
+ */
155
+ export interface EnhancedChunk {
156
+ // Existing fields
157
+ id: string;
158
+ content: string;
159
+ embedding: number[];
160
+ source: string;
161
+ metadata: Record<string, unknown>;
162
+
163
+ // NEW: Temporal tracking
164
+ createdAt: Date;
165
+ updatedAt: Date;
166
+ lastAccessedAt: Date | null;
167
+ accessCount: number;
168
+
169
+ // NEW: Importance & decay
170
+ importance: number; // 0.0 - 1.0, default 0.5
171
+ decayScore: number; // Calculated: combines age, access, importance
172
+ isArchived: boolean; // Soft delete for low-relevance items
173
+
174
+ // NEW: Type classification
175
+ chunkType: ChunkType;
176
+
177
+ // NEW: Relationships (IDs of related chunks)
178
+ relatedChunkIds: string[];
179
+
180
+ // NEW: Entity tags
181
+ entities: EntityTag[];
182
+
183
+ // NEW: Source tracking for chat-derived knowledge
184
+ sourceConversationId?: string;
185
+ sourceMessageIndex?: number;
186
+ }
187
+
188
+ export enum ChunkType {
189
+ DOCUMENTATION = 'documentation',
190
+ CODE = 'code',
191
+ SOLUTION = 'solution', // Problem + solution from chat
192
+ PATTERN = 'pattern', // Reusable code pattern
193
+ DECISION = 'decision', // Architectural decision
194
+ STANDARD = 'standard', // Coding standard/guideline
195
+ PREFERENCE = 'preference', // User preference
196
+ CATEGORY_SUMMARY = 'category_summary', // High-level summary
197
+ }
198
+
199
+ export interface EntityTag {
200
+ type: EntityType;
201
+ value: string;
202
+ confidence: number;
203
+ }
204
+
205
+ export enum EntityType {
206
+ TOOL = 'tool', // e.g., "postgresql", "redis"
207
+ LANGUAGE = 'language', // e.g., "typescript", "php"
208
+ FRAMEWORK = 'framework', // e.g., "laravel", "vue"
209
+ CONCEPT = 'concept', // e.g., "authentication", "caching"
210
+ PROJECT = 'project', // e.g., "tvd-platform"
211
+ PERSON = 'person', // e.g., team member names
212
+ FILE = 'file', // e.g., specific file paths
213
+ COMPONENT = 'component', // e.g., "auth-service"
214
+ }
215
+ ```
216
+
217
+ **Acceptance Criteria**:
218
+ - [ ] Type definitions compile without errors
219
+ - [ ] All existing code continues to work (backward compatible)
220
+ - [ ] Types are exported from main index
221
+
222
+ ---
223
+
224
+ #### Task 1.2: Create Memory Metadata Store
225
+
226
+ **File**: `src/services/memoryMetadataStore.ts` (new file)
227
+
228
+ Create a SQLite-based metadata store that tracks temporal information separately from the vector store. This allows any vector store backend to gain memory capabilities.
229
+
230
+ ```typescript
231
+ /**
232
+ * SQLite-based metadata store for memory tracking
233
+ *
234
+ * This store maintains temporal metadata, access patterns, and relationships
235
+ * independently of the vector store, allowing any vector backend to gain
236
+ * memory capabilities.
237
+ */
238
+ export class MemoryMetadataStore {
239
+ private db: Database;
240
+
241
+ constructor(dbPath: string) {
242
+ // Initialize SQLite database
243
+ }
244
+
245
+ async initialize(): Promise<void> {
246
+ // Create tables: chunks_metadata, relationships, access_log, categories
247
+ }
248
+
249
+ // Chunk metadata operations
250
+ async upsertChunkMetadata(chunk: EnhancedChunk): Promise<void>;
251
+ async getChunkMetadata(chunkId: string): Promise<ChunkMetadata | null>;
252
+ async recordAccess(chunkId: string): Promise<void>;
253
+ async updateDecayScores(): Promise<void>;
254
+ async archiveStaleChunks(threshold: number): Promise<string[]>;
255
+
256
+ // Relationship operations
257
+ async addRelationship(from: string, to: string, type: RelationshipType, strength?: number): Promise<void>;
258
+ async getRelatedChunks(chunkId: string, type?: RelationshipType): Promise<RelatedChunk[]>;
259
+ async findContradictions(chunkId: string): Promise<Contradiction[]>;
260
+
261
+ // Category operations
262
+ async upsertCategory(category: Category): Promise<void>;
263
+ async getCategory(name: string): Promise<Category | null>;
264
+ async listCategories(): Promise<Category[]>;
265
+
266
+ // Analytics
267
+ async getAccessStats(since?: Date): Promise<AccessStats>;
268
+ async getDecayReport(): Promise<DecayReport>;
269
+ }
270
+ ```
271
+
272
+ **SQL Schema** (to be created in `initialize()`):
273
+
274
+ ```sql
275
+ -- Chunk metadata (extends vector store data)
276
+ CREATE TABLE IF NOT EXISTS chunks_metadata (
277
+ chunk_id TEXT PRIMARY KEY,
278
+ source TEXT NOT NULL,
279
+ chunk_type TEXT NOT NULL DEFAULT 'documentation',
280
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
281
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
282
+ last_accessed_at DATETIME,
283
+ access_count INTEGER DEFAULT 0,
284
+ importance REAL DEFAULT 0.5,
285
+ decay_score REAL DEFAULT 1.0,
286
+ is_archived BOOLEAN DEFAULT FALSE,
287
+ source_conversation_id TEXT,
288
+ source_message_index INTEGER,
289
+ entities_json TEXT -- JSON array of EntityTag
290
+ );
291
+
292
+ -- Relationships between chunks
293
+ CREATE TABLE IF NOT EXISTS relationships (
294
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
295
+ from_chunk_id TEXT NOT NULL,
296
+ to_chunk_id TEXT NOT NULL,
297
+ relationship_type TEXT NOT NULL,
298
+ strength REAL DEFAULT 0.5,
299
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
300
+ metadata_json TEXT,
301
+ UNIQUE(from_chunk_id, to_chunk_id, relationship_type),
302
+ FOREIGN KEY (from_chunk_id) REFERENCES chunks_metadata(chunk_id),
303
+ FOREIGN KEY (to_chunk_id) REFERENCES chunks_metadata(chunk_id)
304
+ );
305
+
306
+ -- Access log for analytics
307
+ CREATE TABLE IF NOT EXISTS access_log (
308
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
309
+ chunk_id TEXT NOT NULL,
310
+ accessed_at DATETIME DEFAULT CURRENT_TIMESTAMP,
311
+ query_context TEXT,
312
+ FOREIGN KEY (chunk_id) REFERENCES chunks_metadata(chunk_id)
313
+ );
314
+
315
+ -- Category summaries
316
+ CREATE TABLE IF NOT EXISTS categories (
317
+ name TEXT PRIMARY KEY,
318
+ description TEXT,
319
+ summary_markdown TEXT,
320
+ chunk_count INTEGER DEFAULT 0,
321
+ last_updated DATETIME DEFAULT CURRENT_TIMESTAMP,
322
+ parent_category TEXT,
323
+ FOREIGN KEY (parent_category) REFERENCES categories(name)
324
+ );
325
+
326
+ -- Processed conversations (to avoid re-processing)
327
+ CREATE TABLE IF NOT EXISTS processed_conversations (
328
+ conversation_id TEXT PRIMARY KEY,
329
+ processed_at DATETIME DEFAULT CURRENT_TIMESTAMP,
330
+ chunks_created INTEGER DEFAULT 0,
331
+ knowledge_extracted_json TEXT
332
+ );
333
+
334
+ -- Indexes for performance
335
+ CREATE INDEX IF NOT EXISTS idx_chunks_decay ON chunks_metadata(decay_score);
336
+ CREATE INDEX IF NOT EXISTS idx_chunks_type ON chunks_metadata(chunk_type);
337
+ CREATE INDEX IF NOT EXISTS idx_chunks_archived ON chunks_metadata(is_archived);
338
+ CREATE INDEX IF NOT EXISTS idx_relationships_from ON relationships(from_chunk_id);
339
+ CREATE INDEX IF NOT EXISTS idx_relationships_to ON relationships(to_chunk_id);
340
+ CREATE INDEX IF NOT EXISTS idx_access_log_chunk ON access_log(chunk_id);
341
+ ```
342
+
343
+ **Acceptance Criteria**:
344
+ - [ ] SQLite database is created on first run
345
+ - [ ] All CRUD operations work correctly
346
+ - [ ] Access logging updates last_accessed_at and access_count
347
+ - [ ] Indexes are created for performance-critical queries
348
+
349
+ ---
350
+
351
+ #### Task 1.3: Implement Decay Score Calculator
352
+
353
+ **File**: `src/services/decayCalculator.ts` (new file)
354
+
355
+ ```typescript
356
+ /**
357
+ * Calculates decay scores for chunks based on multiple factors
358
+ *
359
+ * Formula: decayScore = (ageFactor * 0.3) + (accessFactor * 0.3) + (importanceFactor * 0.4)
360
+ *
361
+ * Where:
362
+ * - ageFactor = 1.0 / (1.0 + (ageDays / halfLifeDays))
363
+ * - accessFactor = min(1.0, accessCount / expectedAccesses) * recencyBoost
364
+ * - importanceFactor = chunk.importance
365
+ */
366
+ export class DecayCalculator {
367
+ private config: DecayConfig;
368
+
369
+ constructor(config?: Partial<DecayConfig>) {
370
+ this.config = {
371
+ halfLifeDays: 60, // Age at which decay is 50%
372
+ expectedAccessesPerMonth: 5, // Expected access frequency
373
+ recencyBoostDays: 7, // Boost for recently accessed
374
+ recencyBoostMultiplier: 1.5,
375
+ weights: {
376
+ age: 0.3,
377
+ access: 0.3,
378
+ importance: 0.4,
379
+ },
380
+ ...config,
381
+ };
382
+ }
383
+
384
+ calculateDecayScore(chunk: ChunkMetadata, now: Date = new Date()): number {
385
+ const ageFactor = this.calculateAgeFactor(chunk.createdAt, now);
386
+ const accessFactor = this.calculateAccessFactor(chunk, now);
387
+ const importanceFactor = chunk.importance;
388
+
389
+ return (
390
+ ageFactor * this.config.weights.age +
391
+ accessFactor * this.config.weights.access +
392
+ importanceFactor * this.config.weights.importance
393
+ );
394
+ }
395
+
396
+ private calculateAgeFactor(createdAt: Date, now: Date): number {
397
+ const ageDays = (now.getTime() - createdAt.getTime()) / (1000 * 60 * 60 * 24);
398
+ return 1.0 / (1.0 + (ageDays / this.config.halfLifeDays));
399
+ }
400
+
401
+ private calculateAccessFactor(chunk: ChunkMetadata, now: Date): number {
402
+ const baseAccessScore = Math.min(1.0, chunk.accessCount / this.config.expectedAccessesPerMonth);
403
+
404
+ // Apply recency boost if accessed recently
405
+ if (chunk.lastAccessedAt) {
406
+ const daysSinceAccess = (now.getTime() - chunk.lastAccessedAt.getTime()) / (1000 * 60 * 60 * 24);
407
+ if (daysSinceAccess <= this.config.recencyBoostDays) {
408
+ return Math.min(1.0, baseAccessScore * this.config.recencyBoostMultiplier);
409
+ }
410
+ }
411
+
412
+ return baseAccessScore;
413
+ }
414
+
415
+ // Batch update all decay scores
416
+ async updateAllDecayScores(store: MemoryMetadataStore): Promise<UpdateResult> {
417
+ // Implementation: fetch all non-archived chunks, recalculate scores, batch update
418
+ }
419
+
420
+ // Get chunks below threshold for potential archival
421
+ getArchivalCandidates(chunks: ChunkMetadata[], threshold: number = 0.2): ChunkMetadata[] {
422
+ return chunks.filter(c => c.decayScore < threshold && !c.isArchived);
423
+ }
424
+ }
425
+
426
+ export interface DecayConfig {
427
+ halfLifeDays: number;
428
+ expectedAccessesPerMonth: number;
429
+ recencyBoostDays: number;
430
+ recencyBoostMultiplier: number;
431
+ weights: {
432
+ age: number;
433
+ access: number;
434
+ importance: number;
435
+ };
436
+ }
437
+ ```
438
+
439
+ **Acceptance Criteria**:
440
+ - [ ] Decay scores range from 0.0 to 1.0
441
+ - [ ] New chunks with high importance start with high scores
442
+ - [ ] Frequently accessed chunks maintain high scores
443
+ - [ ] Old, unused chunks decay toward 0
444
+ - [ ] Batch update completes in reasonable time (<5s for 10k chunks)
445
+
446
+ ---
447
+
448
+ #### Task 1.4: Integrate Metadata Store with Existing Vector Store
449
+
450
+ **File**: `src/services/enhancedVectorStore.ts` (new file)
451
+
452
+ Create a wrapper that combines the existing vector store with the new metadata store:
453
+
454
+ ```typescript
455
+ /**
456
+ * Enhanced vector store that wraps existing implementation with memory capabilities
457
+ */
458
+ export class EnhancedVectorStore {
459
+ private vectorStore: VectorStore; // Existing implementation
460
+ private metadataStore: MemoryMetadataStore;
461
+ private decayCalculator: DecayCalculator;
462
+
463
+ constructor(
464
+ vectorStore: VectorStore,
465
+ metadataStore: MemoryMetadataStore,
466
+ decayConfig?: Partial<DecayConfig>
467
+ ) {
468
+ this.vectorStore = vectorStore;
469
+ this.metadataStore = metadataStore;
470
+ this.decayCalculator = new DecayCalculator(decayConfig);
471
+ }
472
+
473
+ // Enhanced upsert: stores in both vector store and metadata store
474
+ async upsert(chunks: EnhancedChunk[]): Promise<void> {
475
+ // 1. Upsert to vector store (existing behavior)
476
+ await this.vectorStore.upsert(chunks);
477
+
478
+ // 2. Upsert metadata
479
+ for (const chunk of chunks) {
480
+ await this.metadataStore.upsertChunkMetadata(chunk);
481
+ }
482
+ }
483
+
484
+ // Enhanced search: combines vector similarity with decay scoring
485
+ async search(query: string, options: EnhancedSearchOptions): Promise<EnhancedSearchResult[]> {
486
+ // 1. Get candidates from vector store
487
+ const candidates = await this.vectorStore.search(query, {
488
+ topK: options.topK * 2, // Over-fetch to allow for re-ranking
489
+ ...options,
490
+ });
491
+
492
+ // 2. Enrich with metadata
493
+ const enriched = await this.enrichWithMetadata(candidates);
494
+
495
+ // 3. Re-rank with enhanced scoring
496
+ const ranked = this.rerank(enriched, options);
497
+
498
+ // 4. Record access for returned results
499
+ await this.recordAccess(ranked.slice(0, options.topK), query);
500
+
501
+ return ranked.slice(0, options.topK);
502
+ }
503
+
504
+ private rerank(results: EnhancedSearchResult[], options: EnhancedSearchOptions): EnhancedSearchResult[] {
505
+ return results
506
+ .map(r => ({
507
+ ...r,
508
+ finalScore: this.calculateFinalScore(r, options),
509
+ }))
510
+ .sort((a, b) => b.finalScore - a.finalScore);
511
+ }
512
+
513
+ private calculateFinalScore(result: EnhancedSearchResult, options: EnhancedSearchOptions): number {
514
+ const weights = options.scoreWeights ?? {
515
+ similarity: 0.5,
516
+ decay: 0.3,
517
+ importance: 0.2,
518
+ };
519
+
520
+ return (
521
+ result.similarityScore * weights.similarity +
522
+ result.metadata.decayScore * weights.decay +
523
+ result.metadata.importance * weights.importance
524
+ );
525
+ }
526
+
527
+ private async recordAccess(results: EnhancedSearchResult[], queryContext: string): Promise<void> {
528
+ for (const result of results) {
529
+ await this.metadataStore.recordAccess(result.id);
530
+ }
531
+ }
532
+ }
533
+
534
+ export interface EnhancedSearchOptions {
535
+ topK: number;
536
+ filter?: Record<string, unknown>;
537
+ includeArchived?: boolean;
538
+ chunkTypes?: ChunkType[];
539
+ minDecayScore?: number;
540
+ scoreWeights?: {
541
+ similarity: number;
542
+ decay: number;
543
+ importance: number;
544
+ };
545
+ }
546
+ ```
547
+
548
+ **Acceptance Criteria**:
549
+ - [ ] All existing tests continue to pass
550
+ - [ ] Metadata is correctly stored for new chunks
551
+ - [ ] Search results include decay scores
552
+ - [ ] Access is recorded for returned results
553
+ - [ ] Re-ranking produces different order than pure similarity
554
+
555
+ ---
556
+
557
+ ## Phase 2: Cursor Chat History Integration {#phase-2-chat-history}
558
+
559
+ ### Overview
560
+
561
+ Implement the ability to read and process Cursor IDE chat history directly from its SQLite database.
562
+
563
+ ### Tasks
564
+
565
+ #### Task 2.1: Implement Cursor Database Reader
566
+
567
+ **File**: `src/services/cursorChatReader.ts` (new file)
568
+
569
+ ```typescript
570
+ import Database from 'better-sqlite3';
571
+ import * as os from 'os';
572
+ import * as path from 'path';
573
+
574
+ /**
575
+ * Reads chat history directly from Cursor's SQLite database
576
+ */
577
+ export class CursorChatReader {
578
+ private dbPath: string;
579
+
580
+ constructor(customPath?: string) {
581
+ this.dbPath = customPath ?? this.getDefaultDbPath();
582
+ }
583
+
584
+ private getDefaultDbPath(): string {
585
+ const platform = os.platform();
586
+ const home = os.homedir();
587
+
588
+ switch (platform) {
589
+ case 'darwin':
590
+ return path.join(home, 'Library/Application Support/Cursor/User/globalStorage/state.vscdb');
591
+ case 'win32':
592
+ return path.join(process.env.APPDATA ?? home, 'Cursor/User/globalStorage/state.vscdb');
593
+ case 'linux':
594
+ return path.join(home, '.config/Cursor/User/globalStorage/state.vscdb');
595
+ default:
596
+ throw new Error(`Unsupported platform: ${platform}`);
597
+ }
598
+ }
599
+
600
+ /**
601
+ * List all conversations with metadata
602
+ */
603
+ async listConversations(options?: ListConversationsOptions): Promise<ConversationSummary[]> {
604
+ const db = new Database(this.dbPath, { readonly: true });
605
+
606
+ try {
607
+ // Cursor stores chat data in ItemTable with specific keys
608
+ const row = db.prepare(`
609
+ SELECT value FROM ItemTable
610
+ WHERE [key] = 'workbench.panel.aichat.view.aichat.chatdata'
611
+ `).get() as { value: string } | undefined;
612
+
613
+ if (!row) return [];
614
+
615
+ const chatData = JSON.parse(row.value);
616
+ return this.parseConversations(chatData, options);
617
+ } finally {
618
+ db.close();
619
+ }
620
+ }
621
+
622
+ /**
623
+ * Get full conversation content
624
+ */
625
+ async getConversation(conversationId: string): Promise<Conversation | null> {
626
+ const conversations = await this.listConversations();
627
+ return conversations.find(c => c.id === conversationId) ?? null;
628
+ }
629
+
630
+ /**
631
+ * Search conversations by content
632
+ */
633
+ async searchConversations(query: string, options?: SearchOptions): Promise<ConversationSummary[]> {
634
+ const conversations = await this.listConversations();
635
+ const queryLower = query.toLowerCase();
636
+
637
+ return conversations.filter(c => {
638
+ const contentMatch = c.messages.some(m =>
639
+ m.content.toLowerCase().includes(queryLower)
640
+ );
641
+ const hasCode = options?.hasCode ? c.messages.some(m => m.codeBlocks.length > 0) : true;
642
+
643
+ return contentMatch && hasCode;
644
+ });
645
+ }
646
+
647
+ private parseConversations(chatData: any, options?: ListConversationsOptions): ConversationSummary[] {
648
+ // Parse Cursor's chat data format
649
+ // This will need adjustment based on actual Cursor data structure
650
+ const conversations: ConversationSummary[] = [];
651
+
652
+ // Cursor stores conversations in a specific format
653
+ // Implementation depends on actual structure
654
+
655
+ return conversations;
656
+ }
657
+ }
658
+
659
+ export interface ConversationSummary {
660
+ id: string;
661
+ title: string;
662
+ createdAt: Date;
663
+ updatedAt: Date;
664
+ messageCount: number;
665
+ hasCodeBlocks: boolean;
666
+ project?: string;
667
+ languages: string[];
668
+ preview: string;
669
+ messages: Message[];
670
+ }
671
+
672
+ export interface Message {
673
+ role: 'user' | 'assistant';
674
+ content: string;
675
+ timestamp: Date;
676
+ codeBlocks: CodeBlock[];
677
+ filesReferenced: string[];
678
+ }
679
+
680
+ export interface CodeBlock {
681
+ language: string;
682
+ code: string;
683
+ filename?: string;
684
+ }
685
+
686
+ export interface ListConversationsOptions {
687
+ limit?: number;
688
+ since?: Date;
689
+ hasCode?: boolean;
690
+ project?: string;
691
+ }
692
+
693
+ export interface SearchOptions {
694
+ maxResults?: number;
695
+ hasCode?: boolean;
696
+ }
697
+
698
+ export interface Conversation extends ConversationSummary {
699
+ fullContent: string;
700
+ }
701
+ ```
702
+
703
+ **Acceptance Criteria**:
704
+ - [ ] Correctly locates Cursor database on macOS, Windows, Linux
705
+ - [ ] Reads conversation data without corrupting database
706
+ - [ ] Handles database being locked by Cursor (read-only mode)
707
+ - [ ] Returns empty array if no conversations found
708
+ - [ ] Supports filtering by date, project, code presence
709
+
710
+ ---
711
+
712
+ #### Task 2.2: Create Conversation Processor
713
+
714
+ **File**: `src/services/conversationProcessor.ts` (new file)
715
+
716
+ ```typescript
717
+ /**
718
+ * Processes raw conversations into structured chunks
719
+ */
720
+ export class ConversationProcessor {
721
+ private embeddings: EmbeddingsService;
722
+
723
+ constructor(embeddings: EmbeddingsService) {
724
+ this.embeddings = embeddings;
725
+ }
726
+
727
+ /**
728
+ * Process a conversation into chunks for storage
729
+ */
730
+ async processConversation(conversation: Conversation): Promise<ProcessedConversation> {
731
+ const chunks: EnhancedChunk[] = [];
732
+
733
+ // 1. Create chunks for each message exchange (user question + assistant response)
734
+ const exchanges = this.groupIntoExchanges(conversation.messages);
735
+
736
+ for (const exchange of exchanges) {
737
+ const chunk = await this.createExchangeChunk(exchange, conversation);
738
+ chunks.push(chunk);
739
+ }
740
+
741
+ // 2. Create separate chunks for code blocks with context
742
+ for (const message of conversation.messages) {
743
+ for (const codeBlock of message.codeBlocks) {
744
+ const chunk = await this.createCodeChunk(codeBlock, message, conversation);
745
+ chunks.push(chunk);
746
+ }
747
+ }
748
+
749
+ // 3. Extract entities from the conversation
750
+ const entities = await this.extractEntities(conversation);
751
+
752
+ return {
753
+ conversationId: conversation.id,
754
+ chunks,
755
+ entities,
756
+ metadata: {
757
+ project: conversation.project,
758
+ languages: conversation.languages,
759
+ filesReferenced: this.getAllFilesReferenced(conversation),
760
+ },
761
+ };
762
+ }
763
+
764
+ private groupIntoExchanges(messages: Message[]): Exchange[] {
765
+ const exchanges: Exchange[] = [];
766
+ let currentExchange: Exchange | null = null;
767
+
768
+ for (const message of messages) {
769
+ if (message.role === 'user') {
770
+ if (currentExchange) {
771
+ exchanges.push(currentExchange);
772
+ }
773
+ currentExchange = { userMessage: message, assistantMessages: [] };
774
+ } else if (currentExchange) {
775
+ currentExchange.assistantMessages.push(message);
776
+ }
777
+ }
778
+
779
+ if (currentExchange) {
780
+ exchanges.push(currentExchange);
781
+ }
782
+
783
+ return exchanges;
784
+ }
785
+
786
+ private async createExchangeChunk(exchange: Exchange, conversation: Conversation): Promise<EnhancedChunk> {
787
+ const content = this.formatExchange(exchange);
788
+ const embedding = await this.embeddings.embed(content);
789
+
790
+ return {
791
+ id: `chat-${conversation.id}-${exchange.userMessage.timestamp.getTime()}`,
792
+ content,
793
+ embedding,
794
+ source: `cursor-chat:${conversation.id}`,
795
+ metadata: {
796
+ exchangeTimestamp: exchange.userMessage.timestamp,
797
+ hasCode: exchange.assistantMessages.some(m => m.codeBlocks.length > 0),
798
+ },
799
+ createdAt: exchange.userMessage.timestamp,
800
+ updatedAt: exchange.userMessage.timestamp,
801
+ lastAccessedAt: null,
802
+ accessCount: 0,
803
+ importance: this.calculateExchangeImportance(exchange),
804
+ decayScore: 1.0,
805
+ isArchived: false,
806
+ chunkType: ChunkType.SOLUTION,
807
+ relatedChunkIds: [],
808
+ entities: [],
809
+ sourceConversationId: conversation.id,
810
+ };
811
+ }
812
+
813
+ private formatExchange(exchange: Exchange): string {
814
+ const parts = [
815
+ `## User Question\n${exchange.userMessage.content}`,
816
+ `## Assistant Response\n${exchange.assistantMessages.map(m => m.content).join('\n\n')}`,
817
+ ];
818
+
819
+ if (exchange.assistantMessages.some(m => m.codeBlocks.length > 0)) {
820
+ const codeBlocks = exchange.assistantMessages.flatMap(m => m.codeBlocks);
821
+ parts.push(`## Code\n${codeBlocks.map(cb => `\`\`\`${cb.language}\n${cb.code}\n\`\`\``).join('\n\n')}`);
822
+ }
823
+
824
+ return parts.join('\n\n');
825
+ }
826
+
827
+ private calculateExchangeImportance(exchange: Exchange): number {
828
+ let importance = 0.5;
829
+
830
+ // Boost for code-containing responses
831
+ if (exchange.assistantMessages.some(m => m.codeBlocks.length > 0)) {
832
+ importance += 0.2;
833
+ }
834
+
835
+ // Boost for longer, detailed responses
836
+ const totalLength = exchange.assistantMessages.reduce((sum, m) => sum + m.content.length, 0);
837
+ if (totalLength > 2000) importance += 0.1;
838
+
839
+ // Boost for file modifications
840
+ if (exchange.assistantMessages.some(m => m.filesReferenced.length > 0)) {
841
+ importance += 0.1;
842
+ }
843
+
844
+ return Math.min(1.0, importance);
845
+ }
846
+
847
+ private async extractEntities(conversation: Conversation): Promise<EntityTag[]> {
848
+ // Basic entity extraction - can be enhanced with LLM
849
+ const entities: EntityTag[] = [];
850
+ const content = conversation.messages.map(m => m.content).join(' ');
851
+
852
+ // Extract languages from code blocks
853
+ for (const lang of conversation.languages) {
854
+ entities.push({
855
+ type: EntityType.LANGUAGE,
856
+ value: lang,
857
+ confidence: 1.0,
858
+ });
859
+ }
860
+
861
+ // Extract file references
862
+ const filePattern = /(?:file|path|\.(?:ts|js|php|vue|css|html|json|md))[\s:]+([^\s,]+)/gi;
863
+ let match;
864
+ while ((match = filePattern.exec(content)) !== null) {
865
+ entities.push({
866
+ type: EntityType.FILE,
867
+ value: match[1],
868
+ confidence: 0.7,
869
+ });
870
+ }
871
+
872
+ return entities;
873
+ }
874
+ }
875
+
876
+ interface Exchange {
877
+ userMessage: Message;
878
+ assistantMessages: Message[];
879
+ }
880
+
881
+ interface ProcessedConversation {
882
+ conversationId: string;
883
+ chunks: EnhancedChunk[];
884
+ entities: EntityTag[];
885
+ metadata: {
886
+ project?: string;
887
+ languages: string[];
888
+ filesReferenced: string[];
889
+ };
890
+ }
891
+ ```
892
+
893
+ **Acceptance Criteria**:
894
+ - [ ] Groups messages into logical exchanges
895
+ - [ ] Creates embeddings for each chunk
896
+ - [ ] Extracts code blocks as separate chunks
897
+ - [ ] Calculates reasonable importance scores
898
+ - [ ] Extracts basic entities (languages, files)
899
+
900
+ ---
901
+
902
+ #### Task 2.3: Implement Chat History Ingestion CLI
903
+
904
+ **File**: `src/cli/ingestChats.ts` (new file)
905
+
906
+ Add new CLI commands for chat history ingestion:
907
+
908
+ ```typescript
909
+ /**
910
+ * CLI commands for ingesting Cursor chat history
911
+ */
912
+ export function registerChatCommands(program: Command) {
913
+ const chatCommand = program
914
+ .command('chat')
915
+ .description('Manage Cursor chat history ingestion');
916
+
917
+ // Ingest all chats
918
+ chatCommand
919
+ .command('ingest')
920
+ .description('Ingest Cursor chat history into the knowledge base')
921
+ .option('-s, --since <date>', 'Only ingest chats since this date (ISO format)')
922
+ .option('-p, --project <name>', 'Only ingest chats for a specific project')
923
+ .option('--has-code', 'Only ingest chats containing code blocks')
924
+ .option('-l, --limit <number>', 'Maximum number of conversations to ingest', parseInt)
925
+ .option('--dry-run', 'Show what would be ingested without actually ingesting')
926
+ .action(async (options) => {
927
+ const reader = new CursorChatReader();
928
+ const processor = new ConversationProcessor(getEmbeddings());
929
+ const store = getEnhancedVectorStore();
930
+ const metadataStore = getMetadataStore();
931
+
932
+ console.log('Reading Cursor chat history...');
933
+ const conversations = await reader.listConversations({
934
+ since: options.since ? new Date(options.since) : undefined,
935
+ hasCode: options.hasCode,
936
+ project: options.project,
937
+ limit: options.limit,
938
+ });
939
+
940
+ console.log(`Found ${conversations.length} conversations`);
941
+
942
+ // Filter out already processed
943
+ const unprocessed = await filterUnprocessed(conversations, metadataStore);
944
+ console.log(`${unprocessed.length} new conversations to process`);
945
+
946
+ if (options.dryRun) {
947
+ for (const conv of unprocessed) {
948
+ console.log(` - ${conv.title} (${conv.messageCount} messages)`);
949
+ }
950
+ return;
951
+ }
952
+
953
+ // Process and ingest
954
+ let totalChunks = 0;
955
+ for (const conv of unprocessed) {
956
+ const processed = await processor.processConversation(conv);
957
+ await store.upsert(processed.chunks);
958
+ await metadataStore.markConversationProcessed(conv.id, processed.chunks.length);
959
+ totalChunks += processed.chunks.length;
960
+ console.log(` ✓ ${conv.title}: ${processed.chunks.length} chunks`);
961
+ }
962
+
963
+ console.log(`\nIngested ${totalChunks} chunks from ${unprocessed.length} conversations`);
964
+ });
965
+
966
+ // List chats
967
+ chatCommand
968
+ .command('list')
969
+ .description('List available Cursor conversations')
970
+ .option('-l, --limit <number>', 'Maximum number to show', parseInt, 20)
971
+ .option('--processed', 'Only show processed conversations')
972
+ .option('--unprocessed', 'Only show unprocessed conversations')
973
+ .action(async (options) => {
974
+ // Implementation
975
+ });
976
+
977
+ // Watch for new chats
978
+ chatCommand
979
+ .command('watch')
980
+ .description('Watch for new conversations and ingest automatically')
981
+ .option('-i, --interval <minutes>', 'Check interval in minutes', parseInt, 5)
982
+ .action(async (options) => {
983
+ console.log(`Watching for new conversations every ${options.interval} minutes...`);
984
+
985
+ const ingestNew = async () => {
986
+ // Run ingest for conversations since last check
987
+ };
988
+
989
+ setInterval(ingestNew, options.interval * 60 * 1000);
990
+ await ingestNew(); // Run immediately
991
+ });
992
+ }
993
+ ```
994
+
995
+ **Acceptance Criteria**:
996
+ - [ ] `cursor-rag chat ingest` ingests all new conversations
997
+ - [ ] `cursor-rag chat list` shows available conversations
998
+ - [ ] `cursor-rag chat watch` runs in background mode
999
+ - [ ] Already-processed conversations are skipped
1000
+ - [ ] Progress is displayed during ingestion
1001
+
1002
+ ---
1003
+
1004
+ ## Phase 3: Knowledge Extraction Pipeline {#phase-3-knowledge-extraction}
1005
+
1006
+ ### Overview
1007
+
1008
+ Use LLM to extract structured knowledge (solutions, patterns, decisions, standards) from conversations.
1009
+
1010
+ ### Tasks
1011
+
1012
+ #### Task 3.1: Define Knowledge Extraction Schema
1013
+
1014
+ **File**: `src/types/extractedKnowledge.ts` (new file)
1015
+
1016
+ ```typescript
1017
+ /**
1018
+ * Structured knowledge extracted from conversations
1019
+ */
1020
+ export interface ExtractedKnowledge {
1021
+ conversationId: string;
1022
+ extractedAt: Date;
1023
+
1024
+ // Problem/solution pairs
1025
+ solutions: Solution[];
1026
+
1027
+ // Reusable code patterns
1028
+ patterns: Pattern[];
1029
+
1030
+ // Architectural/technical decisions
1031
+ decisions: Decision[];
1032
+
1033
+ // Coding standards/guidelines
1034
+ standards: Standard[];
1035
+
1036
+ // User preferences/corrections
1037
+ preferences: Preference[];
1038
+
1039
+ // Entities mentioned
1040
+ entities: ExtractedEntity[];
1041
+ }
1042
+
1043
+ export interface Solution {
1044
+ id: string;
1045
+ problem: string; // Description of the problem
1046
+ errorMessage?: string; // Specific error if applicable
1047
+ solution: string; // How it was solved
1048
+ codeChanges: CodeChange[]; // Code that was added/modified
1049
+ filesAffected: string[]; // Files that were changed
1050
+ tags: string[]; // Auto-generated tags
1051
+ confidence: number; // How confident the extraction is
1052
+ }
1053
+
1054
+ export interface Pattern {
1055
+ id: string;
1056
+ name: string; // Pattern name (e.g., "Repository Pattern")
1057
+ description: string; // What the pattern does
1058
+ useCase: string; // When to use it
1059
+ implementation: string; // Code example
1060
+ language: string;
1061
+ relatedPatterns: string[]; // IDs of related patterns
1062
+ tags: string[];
1063
+ confidence: number;
1064
+ }
1065
+
1066
+ export interface Decision {
1067
+ id: string;
1068
+ topic: string; // What was decided about
1069
+ decision: string; // The actual decision
1070
+ reasoning: string; // Why this decision was made
1071
+ alternatives?: string[]; // Other options considered
1072
+ tradeoffs?: string[]; // Known tradeoffs
1073
+ context: string; // Surrounding context
1074
+ tags: string[];
1075
+ confidence: number;
1076
+ }
1077
+
1078
+ export interface Standard {
1079
+ id: string;
1080
+ category: string; // e.g., "naming", "structure", "testing"
1081
+ rule: string; // The standard/guideline
1082
+ examples: string[]; // Examples of following the standard
1083
+ counterExamples?: string[];// Examples of violations
1084
+ rationale?: string; // Why this standard exists
1085
+ tags: string[];
1086
+ confidence: number;
1087
+ }
1088
+
1089
+ export interface Preference {
1090
+ id: string;
1091
+ aspect: string; // What aspect (e.g., "indentation", "approach")
1092
+ preference: string; // The preferred way
1093
+ correction?: string; // What was corrected
1094
+ context: string; // When this applies
1095
+ confidence: number;
1096
+ }
1097
+
1098
+ export interface ExtractedEntity {
1099
+ type: EntityType;
1100
+ name: string;
1101
+ description?: string;
1102
+ relationships: EntityRelationship[];
1103
+ }
1104
+
1105
+ export interface EntityRelationship {
1106
+ targetEntity: string;
1107
+ relationshipType: string;
1108
+ strength: number;
1109
+ }
1110
+
1111
+ export interface CodeChange {
1112
+ filename?: string;
1113
+ language: string;
1114
+ before?: string;
1115
+ after: string;
1116
+ explanation?: string;
1117
+ }
1118
+ ```
1119
+
1120
+ **Acceptance Criteria**:
1121
+ - [ ] All types are properly defined and exported
1122
+ - [ ] Types support serialisation to JSON
1123
+ - [ ] Confidence scores are bounded 0-1
1124
+
1125
+ ---
1126
+
1127
+ #### Task 3.2: Implement LLM Knowledge Extractor
1128
+
1129
+ **File**: `src/services/knowledgeExtractor.ts` (new file)
1130
+
1131
+ ```typescript
1132
+ /**
1133
+ * Uses LLM to extract structured knowledge from conversations
1134
+ */
1135
+ export class KnowledgeExtractor {
1136
+ private llm: LLMService;
1137
+ private config: ExtractionConfig;
1138
+
1139
+ constructor(llm: LLMService, config?: Partial<ExtractionConfig>) {
1140
+ this.llm = llm;
1141
+ this.config = {
1142
+ extractSolutions: true,
1143
+ extractPatterns: true,
1144
+ extractDecisions: true,
1145
+ extractStandards: true,
1146
+ extractPreferences: true,
1147
+ minConfidence: 0.6,
1148
+ ...config,
1149
+ };
1150
+ }
1151
+
1152
+ /**
1153
+ * Extract all knowledge from a conversation
1154
+ */
1155
+ async extract(conversation: Conversation): Promise<ExtractedKnowledge> {
1156
+ const fullContent = this.formatConversationForLLM(conversation);
1157
+
1158
+ const prompt = `
1159
+ You are a knowledge extraction specialist. Analyse this Cursor IDE conversation and extract structured knowledge.
1160
+
1161
+ ## Conversation
1162
+ ${fullContent}
1163
+
1164
+ ## Instructions
1165
+ Extract the following types of knowledge. Only extract items you are confident about (confidence > 0.6).
1166
+
1167
+ ### 1. Solutions
1168
+ Problems that were solved. Include:
1169
+ - Clear description of the problem
1170
+ - Error messages if any
1171
+ - The solution that worked
1172
+ - Code changes made
1173
+ - Files affected
1174
+
1175
+ ### 2. Patterns
1176
+ Reusable code patterns or approaches used. Include:
1177
+ - Pattern name
1178
+ - What it does
1179
+ - When to use it
1180
+ - Example implementation
1181
+
1182
+ ### 3. Decisions
1183
+ Technical or architectural decisions made. Include:
1184
+ - What was decided
1185
+ - The reasoning
1186
+ - Alternatives considered
1187
+ - Tradeoffs
1188
+
1189
+ ### 4. Standards
1190
+ Coding standards or guidelines established. Include:
1191
+ - The rule/guideline
1192
+ - Examples
1193
+ - Rationale
1194
+
1195
+ ### 5. Preferences
1196
+ User preferences or corrections. Include:
1197
+ - What aspect
1198
+ - The preferred approach
1199
+ - What was corrected
1200
+
1201
+ ## Output Format
1202
+ Return a JSON object matching this schema:
1203
+ {
1204
+ "solutions": [...],
1205
+ "patterns": [...],
1206
+ "decisions": [...],
1207
+ "standards": [...],
1208
+ "preferences": [...],
1209
+ "entities": [...]
1210
+ }
1211
+
1212
+ Only include items where you have enough information to be useful. Empty arrays are fine.
1213
+ `;
1214
+
1215
+ const response = await this.llm.invoke(prompt, {
1216
+ responseFormat: 'json',
1217
+ temperature: 0.3, // Low temperature for consistent extraction
1218
+ });
1219
+
1220
+ const extracted = this.parseResponse(response);
1221
+ return this.filterByConfidence(extracted);
1222
+ }
1223
+
1224
+ /**
1225
+ * Extract only solutions (for quick processing)
1226
+ */
1227
+ async extractSolutions(conversation: Conversation): Promise<Solution[]> {
1228
+ // Focused extraction prompt for just solutions
1229
+ }
1230
+
1231
+ /**
1232
+ * Batch extract from multiple conversations
1233
+ */
1234
+ async batchExtract(conversations: Conversation[], options?: BatchOptions): Promise<ExtractedKnowledge[]> {
1235
+ const results: ExtractedKnowledge[] = [];
1236
+
1237
+ for (const conv of conversations) {
1238
+ try {
1239
+ const extracted = await this.extract(conv);
1240
+ results.push(extracted);
1241
+
1242
+ if (options?.onProgress) {
1243
+ options.onProgress(results.length, conversations.length);
1244
+ }
1245
+ } catch (error) {
1246
+ console.error(`Failed to extract from ${conv.id}:`, error);
1247
+ if (!options?.continueOnError) throw error;
1248
+ }
1249
+ }
1250
+
1251
+ return results;
1252
+ }
1253
+
1254
+ private formatConversationForLLM(conversation: Conversation): string {
1255
+ return conversation.messages
1256
+ .map(m => `**${m.role.toUpperCase()}**:\n${m.content}`)
1257
+ .join('\n\n---\n\n');
1258
+ }
1259
+
1260
+ private parseResponse(response: string): ExtractedKnowledge {
1261
+ // Parse JSON response, handling common issues
1262
+ try {
1263
+ return JSON.parse(response);
1264
+ } catch {
1265
+ // Try to extract JSON from markdown code blocks
1266
+ const jsonMatch = response.match(/```json\n?([\s\S]*?)\n?```/);
1267
+ if (jsonMatch) {
1268
+ return JSON.parse(jsonMatch[1]);
1269
+ }
1270
+ throw new Error('Failed to parse LLM response as JSON');
1271
+ }
1272
+ }
1273
+
1274
+ private filterByConfidence(knowledge: ExtractedKnowledge): ExtractedKnowledge {
1275
+ const minConf = this.config.minConfidence;
1276
+
1277
+ return {
1278
+ ...knowledge,
1279
+ solutions: knowledge.solutions.filter(s => s.confidence >= minConf),
1280
+ patterns: knowledge.patterns.filter(p => p.confidence >= minConf),
1281
+ decisions: knowledge.decisions.filter(d => d.confidence >= minConf),
1282
+ standards: knowledge.standards.filter(s => s.confidence >= minConf),
1283
+ preferences: knowledge.preferences.filter(p => p.confidence >= minConf),
1284
+ };
1285
+ }
1286
+ }
1287
+
1288
+ interface ExtractionConfig {
1289
+ extractSolutions: boolean;
1290
+ extractPatterns: boolean;
1291
+ extractDecisions: boolean;
1292
+ extractStandards: boolean;
1293
+ extractPreferences: boolean;
1294
+ minConfidence: number;
1295
+ }
1296
+
1297
+ interface BatchOptions {
1298
+ continueOnError?: boolean;
1299
+ onProgress?: (completed: number, total: number) => void;
1300
+ }
1301
+ ```
1302
+
1303
+ **Acceptance Criteria**:
1304
+ - [ ] Extracts solutions with problem/solution pairs
1305
+ - [ ] Extracts patterns with implementation examples
1306
+ - [ ] Extracts decisions with reasoning
1307
+ - [ ] Handles LLM response parsing errors gracefully
1308
+ - [ ] Batch extraction shows progress
1309
+ - [ ] Low-confidence items are filtered out
1310
+
1311
+ ---
1312
+
1313
+ #### Task 3.3: Create Knowledge Storage Service
1314
+
1315
+ **File**: `src/services/knowledgeStorage.ts` (new file)
1316
+
1317
+ Store extracted knowledge as first-class chunks:
1318
+
1319
+ ```typescript
1320
+ /**
1321
+ * Stores extracted knowledge as searchable chunks
1322
+ */
1323
+ export class KnowledgeStorage {
1324
+ private vectorStore: EnhancedVectorStore;
1325
+ private metadataStore: MemoryMetadataStore;
1326
+ private embeddings: EmbeddingsService;
1327
+
1328
+ /**
1329
+ * Store extracted knowledge from a conversation
1330
+ */
1331
+ async storeKnowledge(knowledge: ExtractedKnowledge): Promise<StoreResult> {
1332
+ const chunks: EnhancedChunk[] = [];
1333
+
1334
+ // Store solutions
1335
+ for (const solution of knowledge.solutions) {
1336
+ const chunk = await this.createSolutionChunk(solution, knowledge.conversationId);
1337
+ chunks.push(chunk);
1338
+ }
1339
+
1340
+ // Store patterns
1341
+ for (const pattern of knowledge.patterns) {
1342
+ const chunk = await this.createPatternChunk(pattern, knowledge.conversationId);
1343
+ chunks.push(chunk);
1344
+ }
1345
+
1346
+ // Store decisions
1347
+ for (const decision of knowledge.decisions) {
1348
+ const chunk = await this.createDecisionChunk(decision, knowledge.conversationId);
1349
+ chunks.push(chunk);
1350
+ }
1351
+
1352
+ // Store standards
1353
+ for (const standard of knowledge.standards) {
1354
+ const chunk = await this.createStandardChunk(standard, knowledge.conversationId);
1355
+ chunks.push(chunk);
1356
+ }
1357
+
1358
+ // Upsert all chunks
1359
+ await this.vectorStore.upsert(chunks);
1360
+
1361
+ // Create relationships between related items
1362
+ await this.createRelationships(knowledge);
1363
+
1364
+ return {
1365
+ chunksCreated: chunks.length,
1366
+ solutionsStored: knowledge.solutions.length,
1367
+ patternsStored: knowledge.patterns.length,
1368
+ decisionsStored: knowledge.decisions.length,
1369
+ standardsStored: knowledge.standards.length,
1370
+ };
1371
+ }
1372
+
1373
+ private async createSolutionChunk(solution: Solution, conversationId: string): Promise<EnhancedChunk> {
1374
+ const content = this.formatSolution(solution);
1375
+ const embedding = await this.embeddings.embed(content);
1376
+
1377
+ return {
1378
+ id: `solution-${solution.id}`,
1379
+ content,
1380
+ embedding,
1381
+ source: `extracted:${conversationId}`,
1382
+ metadata: {
1383
+ solutionId: solution.id,
1384
+ errorMessage: solution.errorMessage,
1385
+ filesAffected: solution.filesAffected,
1386
+ tags: solution.tags,
1387
+ },
1388
+ createdAt: new Date(),
1389
+ updatedAt: new Date(),
1390
+ lastAccessedAt: null,
1391
+ accessCount: 0,
1392
+ importance: 0.8, // High importance for solutions
1393
+ decayScore: 1.0,
1394
+ isArchived: false,
1395
+ chunkType: ChunkType.SOLUTION,
1396
+ relatedChunkIds: [],
1397
+ entities: solution.tags.map(t => ({
1398
+ type: EntityType.CONCEPT,
1399
+ value: t,
1400
+ confidence: 0.7,
1401
+ })),
1402
+ sourceConversationId: conversationId,
1403
+ };
1404
+ }
1405
+
1406
+ private formatSolution(solution: Solution): string {
1407
+ const parts = [
1408
+ `# Problem\n${solution.problem}`,
1409
+ ];
1410
+
1411
+ if (solution.errorMessage) {
1412
+ parts.push(`## Error\n\`\`\`\n${solution.errorMessage}\n\`\`\``);
1413
+ }
1414
+
1415
+ parts.push(`# Solution\n${solution.solution}`);
1416
+
1417
+ if (solution.codeChanges.length > 0) {
1418
+ const codeSection = solution.codeChanges
1419
+ .map(cc => {
1420
+ let code = `### ${cc.filename ?? 'Code'}\n`;
1421
+ if (cc.before) {
1422
+ code += `Before:\n\`\`\`${cc.language}\n${cc.before}\n\`\`\`\n`;
1423
+ }
1424
+ code += `After:\n\`\`\`${cc.language}\n${cc.after}\n\`\`\``;
1425
+ if (cc.explanation) {
1426
+ code += `\n${cc.explanation}`;
1427
+ }
1428
+ return code;
1429
+ })
1430
+ .join('\n\n');
1431
+ parts.push(`# Code Changes\n${codeSection}`);
1432
+ }
1433
+
1434
+ if (solution.filesAffected.length > 0) {
1435
+ parts.push(`# Files\n${solution.filesAffected.join(', ')}`);
1436
+ }
1437
+
1438
+ return parts.join('\n\n');
1439
+ }
1440
+
1441
+ // Similar methods for patterns, decisions, standards...
1442
+
1443
+ private async createRelationships(knowledge: ExtractedKnowledge): Promise<void> {
1444
+ // Link patterns that reference each other
1445
+ for (const pattern of knowledge.patterns) {
1446
+ for (const relatedId of pattern.relatedPatterns) {
1447
+ await this.metadataStore.addRelationship(
1448
+ `pattern-${pattern.id}`,
1449
+ `pattern-${relatedId}`,
1450
+ RelationshipType.RELATES_TO,
1451
+ 0.7
1452
+ );
1453
+ }
1454
+ }
1455
+
1456
+ // Link solutions to patterns they use
1457
+ // Link decisions to their outcomes
1458
+ // etc.
1459
+ }
1460
+ }
1461
+
1462
+ interface StoreResult {
1463
+ chunksCreated: number;
1464
+ solutionsStored: number;
1465
+ patternsStored: number;
1466
+ decisionsStored: number;
1467
+ standardsStored: number;
1468
+ }
1469
+ ```
1470
+
1471
+ **Acceptance Criteria**:
1472
+ - [ ] Solutions are stored with full problem/solution context
1473
+ - [ ] Patterns include implementation examples
1474
+ - [ ] Decisions include reasoning and alternatives
1475
+ - [ ] Relationships are created between related items
1476
+ - [ ] All chunks have appropriate importance scores
1477
+
1478
+ ---
1479
+
1480
+ ## Phase 4: Relationship Graph {#phase-4-relationship-graph}
1481
+
1482
+ ### Overview
1483
+
1484
+ Implement typed relationships between chunks for multi-hop reasoning.
1485
+
1486
+ ### Tasks
1487
+
1488
+ #### Task 4.1: Define Relationship Types
1489
+
1490
+ **File**: `src/types/relationships.ts` (new file)
1491
+
1492
+ ```typescript
1493
+ /**
1494
+ * Typed relationships between knowledge chunks
1495
+ */
1496
+ export enum RelationshipType {
1497
+ // Semantic relationships
1498
+ RELATES_TO = 'RELATES_TO', // General relationship
1499
+ SIMILAR_TO = 'SIMILAR_TO', // Semantically similar
1500
+
1501
+ // Causal relationships
1502
+ LEADS_TO = 'LEADS_TO', // A causes/leads to B
1503
+ DERIVED_FROM = 'DERIVED_FROM', // B is derived from A
1504
+ SOLVES = 'SOLVES', // Solution solves problem
1505
+
1506
+ // Temporal relationships
1507
+ SUPERSEDES = 'SUPERSEDES', // A replaces B (B is outdated)
1508
+ OCCURRED_BEFORE = 'OCCURRED_BEFORE', // Temporal ordering
1509
+ EVOLVED_INTO = 'EVOLVED_INTO', // A evolved into B
1510
+
1511
+ // Conflict relationships
1512
+ CONTRADICTS = 'CONTRADICTS', // A and B conflict
1513
+ INVALIDATED_BY = 'INVALIDATED_BY', // A is invalidated by B
1514
+
1515
+ // Preference relationships
1516
+ PREFERS_OVER = 'PREFERS_OVER', // User prefers A over B
1517
+
1518
+ // Structural relationships
1519
+ PART_OF = 'PART_OF', // A is part of B
1520
+ DEPENDS_ON = 'DEPENDS_ON', // A depends on B
1521
+ IMPLEMENTS = 'IMPLEMENTS', // A implements B (e.g., pattern)
1522
+ EXEMPLIFIES = 'EXEMPLIFIES', // A is an example of B
1523
+ }
1524
+
1525
+ export interface Relationship {
1526
+ id: string;
1527
+ fromChunkId: string;
1528
+ toChunkId: string;
1529
+ type: RelationshipType;
1530
+ strength: number; // 0.0 - 1.0
1531
+ createdAt: Date;
1532
+ metadata: Record<string, unknown>;
1533
+
1534
+ // Bidirectional flag - if true, relationship works both ways
1535
+ bidirectional: boolean;
1536
+ }
1537
+
1538
+ export interface GraphTraversalOptions {
1539
+ maxDepth: number;
1540
+ relationshipTypes?: RelationshipType[];
1541
+ minStrength?: number;
1542
+ excludeArchived?: boolean;
1543
+ }
1544
+
1545
+ export interface GraphNode {
1546
+ chunkId: string;
1547
+ depth: number;
1548
+ path: string[]; // IDs of chunks in path from start
1549
+ relationshipType: RelationshipType;
1550
+ strength: number;
1551
+ }
1552
+ ```
1553
+
1554
+ ---
1555
+
1556
+ #### Task 4.2: Implement Graph Service
1557
+
1558
+ **File**: `src/services/relationshipGraph.ts` (new file)
1559
+
1560
+ ```typescript
1561
+ /**
1562
+ * Graph operations for relationship-based retrieval
1563
+ */
1564
+ export class RelationshipGraph {
1565
+ private metadataStore: MemoryMetadataStore;
1566
+
1567
+ constructor(metadataStore: MemoryMetadataStore) {
1568
+ this.metadataStore = metadataStore;
1569
+ }
1570
+
1571
+ /**
1572
+ * Add a relationship between two chunks
1573
+ */
1574
+ async addRelationship(
1575
+ fromId: string,
1576
+ toId: string,
1577
+ type: RelationshipType,
1578
+ options?: {
1579
+ strength?: number;
1580
+ bidirectional?: boolean;
1581
+ metadata?: Record<string, unknown>;
1582
+ }
1583
+ ): Promise<void> {
1584
+ await this.metadataStore.addRelationship(fromId, toId, type, options?.strength ?? 0.5);
1585
+
1586
+ if (options?.bidirectional) {
1587
+ // Add reverse relationship
1588
+ const reverseType = this.getReverseRelationshipType(type);
1589
+ await this.metadataStore.addRelationship(toId, fromId, reverseType, options?.strength ?? 0.5);
1590
+ }
1591
+ }
1592
+
1593
+ /**
1594
+ * Traverse graph from a starting chunk
1595
+ */
1596
+ async traverse(startChunkId: string, options: GraphTraversalOptions): Promise<GraphNode[]> {
1597
+ const visited = new Set<string>();
1598
+ const results: GraphNode[] = [];
1599
+
1600
+ const queue: Array<{ chunkId: string; depth: number; path: string[] }> = [
1601
+ { chunkId: startChunkId, depth: 0, path: [] }
1602
+ ];
1603
+
1604
+ while (queue.length > 0) {
1605
+ const current = queue.shift()!;
1606
+
1607
+ if (visited.has(current.chunkId) || current.depth > options.maxDepth) {
1608
+ continue;
1609
+ }
1610
+
1611
+ visited.add(current.chunkId);
1612
+
1613
+ // Get relationships from this node
1614
+ const relationships = await this.metadataStore.getRelatedChunks(
1615
+ current.chunkId,
1616
+ options.relationshipTypes?.[0] // TODO: support multiple types
1617
+ );
1618
+
1619
+ for (const rel of relationships) {
1620
+ if (options.minStrength && rel.strength < options.minStrength) {
1621
+ continue;
1622
+ }
1623
+
1624
+ if (options.excludeArchived) {
1625
+ const metadata = await this.metadataStore.getChunkMetadata(rel.toChunkId);
1626
+ if (metadata?.isArchived) continue;
1627
+ }
1628
+
1629
+ results.push({
1630
+ chunkId: rel.toChunkId,
1631
+ depth: current.depth + 1,
1632
+ path: [...current.path, current.chunkId],
1633
+ relationshipType: rel.type,
1634
+ strength: rel.strength,
1635
+ });
1636
+
1637
+ queue.push({
1638
+ chunkId: rel.toChunkId,
1639
+ depth: current.depth + 1,
1640
+ path: [...current.path, current.chunkId],
1641
+ });
1642
+ }
1643
+ }
1644
+
1645
+ return results;
1646
+ }
1647
+
1648
+ /**
1649
+ * Find contradictions for a chunk
1650
+ */
1651
+ async findContradictions(chunkId: string): Promise<Contradiction[]> {
1652
+ const contradicting = await this.metadataStore.getRelatedChunks(
1653
+ chunkId,
1654
+ RelationshipType.CONTRADICTS
1655
+ );
1656
+
1657
+ const invalidatedBy = await this.metadataStore.getRelatedChunks(
1658
+ chunkId,
1659
+ RelationshipType.INVALIDATED_BY
1660
+ );
1661
+
1662
+ return [
1663
+ ...contradicting.map(r => ({
1664
+ chunkId: r.toChunkId,
1665
+ type: 'contradiction' as const,
1666
+ strength: r.strength,
1667
+ })),
1668
+ ...invalidatedBy.map(r => ({
1669
+ chunkId: r.toChunkId,
1670
+ type: 'invalidation' as const,
1671
+ strength: r.strength,
1672
+ })),
1673
+ ];
1674
+ }
1675
+
1676
+ /**
1677
+ * Automatically detect potential contradictions
1678
+ */
1679
+ async detectContradictions(newChunk: EnhancedChunk): Promise<PotentialContradiction[]> {
1680
+ // Find semantically similar chunks
1681
+ const similar = await this.metadataStore.findSimilarByContent(
1682
+ newChunk.id,
1683
+ 0.85 // High similarity threshold
1684
+ );
1685
+
1686
+ const potentialContradictions: PotentialContradiction[] = [];
1687
+
1688
+ for (const candidate of similar) {
1689
+ // Check if they have conflicting timestamps (newer vs older)
1690
+ // Check if they have different conclusions about the same topic
1691
+ // This could use LLM for sophisticated contradiction detection
1692
+
1693
+ if (this.mightContradict(newChunk, candidate)) {
1694
+ potentialContradictions.push({
1695
+ existingChunkId: candidate.id,
1696
+ newChunkId: newChunk.id,
1697
+ similarity: candidate.similarity,
1698
+ reason: 'High semantic similarity with different content',
1699
+ });
1700
+ }
1701
+ }
1702
+
1703
+ return potentialContradictions;
1704
+ }
1705
+
1706
+ private getReverseRelationshipType(type: RelationshipType): RelationshipType {
1707
+ const reverseMap: Record<RelationshipType, RelationshipType> = {
1708
+ [RelationshipType.LEADS_TO]: RelationshipType.DERIVED_FROM,
1709
+ [RelationshipType.DERIVED_FROM]: RelationshipType.LEADS_TO,
1710
+ [RelationshipType.SUPERSEDES]: RelationshipType.SUPERSEDES, // Inverse doesn't make sense
1711
+ [RelationshipType.PART_OF]: RelationshipType.PART_OF, // Inverse is "contains"
1712
+ // ... etc
1713
+ [RelationshipType.RELATES_TO]: RelationshipType.RELATES_TO,
1714
+ [RelationshipType.SIMILAR_TO]: RelationshipType.SIMILAR_TO,
1715
+ [RelationshipType.SOLVES]: RelationshipType.SOLVES,
1716
+ [RelationshipType.OCCURRED_BEFORE]: RelationshipType.OCCURRED_BEFORE,
1717
+ [RelationshipType.EVOLVED_INTO]: RelationshipType.DERIVED_FROM,
1718
+ [RelationshipType.CONTRADICTS]: RelationshipType.CONTRADICTS,
1719
+ [RelationshipType.INVALIDATED_BY]: RelationshipType.INVALIDATED_BY,
1720
+ [RelationshipType.PREFERS_OVER]: RelationshipType.PREFERS_OVER,
1721
+ [RelationshipType.DEPENDS_ON]: RelationshipType.DEPENDS_ON,
1722
+ [RelationshipType.IMPLEMENTS]: RelationshipType.EXEMPLIFIES,
1723
+ [RelationshipType.EXEMPLIFIES]: RelationshipType.IMPLEMENTS,
1724
+ };
1725
+
1726
+ return reverseMap[type] ?? type;
1727
+ }
1728
+
1729
+ private mightContradict(chunk1: EnhancedChunk, chunk2: any): boolean {
1730
+ // Simple heuristic - can be enhanced with LLM
1731
+ // If both are solutions/decisions and one is much newer, might supersede
1732
+ if (
1733
+ (chunk1.chunkType === ChunkType.SOLUTION || chunk1.chunkType === ChunkType.DECISION) &&
1734
+ (chunk2.chunkType === ChunkType.SOLUTION || chunk2.chunkType === ChunkType.DECISION)
1735
+ ) {
1736
+ const timeDiff = Math.abs(
1737
+ chunk1.createdAt.getTime() - new Date(chunk2.createdAt).getTime()
1738
+ );
1739
+ const daysDiff = timeDiff / (1000 * 60 * 60 * 24);
1740
+
1741
+ return daysDiff > 7; // If more than a week apart, might conflict
1742
+ }
1743
+
1744
+ return false;
1745
+ }
1746
+ }
1747
+
1748
+ interface Contradiction {
1749
+ chunkId: string;
1750
+ type: 'contradiction' | 'invalidation';
1751
+ strength: number;
1752
+ }
1753
+
1754
+ interface PotentialContradiction {
1755
+ existingChunkId: string;
1756
+ newChunkId: string;
1757
+ similarity: number;
1758
+ reason: string;
1759
+ }
1760
+ ```
1761
+
1762
+ **Acceptance Criteria**:
1763
+ - [ ] All 13 relationship types are supported
1764
+ - [ ] Graph traversal respects depth limits
1765
+ - [ ] Bidirectional relationships create two edges
1766
+ - [ ] Contradiction detection identifies potential conflicts
1767
+ - [ ] Traversal filters by relationship type and strength
1768
+
1769
+ ---
1770
+
1771
+ ## Phase 5: Hierarchical Memory (Categories/Summaries) {#phase-5-hierarchical-memory}
1772
+
1773
+ ### Overview
1774
+
1775
+ Implement the 3-layer memory hierarchy: Resources → Items → Categories.
1776
+
1777
+ ### Tasks
1778
+
1779
+ #### Task 5.1: Define Category Structure
1780
+
1781
+ **File**: `src/types/categories.ts` (new file)
1782
+
1783
+ ```typescript
1784
+ /**
1785
+ * Category for grouping related knowledge
1786
+ */
1787
+ export interface Category {
1788
+ name: string; // Unique identifier (e.g., "authentication")
1789
+ displayName: string; // Human-readable name
1790
+ description: string; // What this category covers
1791
+ parentCategory?: string; // For hierarchical categories
1792
+
1793
+ // Evolving summary
1794
+ summary: string; // Markdown summary of all items in category
1795
+ summaryUpdatedAt: Date;
1796
+
1797
+ // Statistics
1798
+ itemCount: number;
1799
+ lastItemAddedAt: Date;
1800
+
1801
+ // Auto-generated tags for this category
1802
+ tags: string[];
1803
+ }
1804
+
1805
+ export interface CategoryItem {
1806
+ chunkId: string;
1807
+ category: string;
1808
+ addedAt: Date;
1809
+ relevanceScore: number; // How well it fits the category
1810
+ }
1811
+
1812
+ /**
1813
+ * Predefined categories for common knowledge types
1814
+ */
1815
+ export const DEFAULT_CATEGORIES: Partial<Category>[] = [
1816
+ {
1817
+ name: 'authentication',
1818
+ displayName: 'Authentication',
1819
+ description: 'Login, sessions, JWT, OAuth, security',
1820
+ tags: ['auth', 'security', 'login', 'jwt', 'oauth'],
1821
+ },
1822
+ {
1823
+ name: 'database',
1824
+ displayName: 'Database',
1825
+ description: 'Queries, migrations, models, relationships',
1826
+ tags: ['sql', 'database', 'query', 'migration', 'model'],
1827
+ },
1828
+ {
1829
+ name: 'api',
1830
+ displayName: 'API',
1831
+ description: 'REST, GraphQL, endpoints, requests, responses',
1832
+ tags: ['api', 'rest', 'graphql', 'endpoint', 'http'],
1833
+ },
1834
+ {
1835
+ name: 'testing',
1836
+ displayName: 'Testing',
1837
+ description: 'Unit tests, integration tests, mocking, fixtures',
1838
+ tags: ['test', 'testing', 'mock', 'fixture', 'assertion'],
1839
+ },
1840
+ {
1841
+ name: 'frontend',
1842
+ displayName: 'Frontend',
1843
+ description: 'UI components, styling, state management',
1844
+ tags: ['ui', 'component', 'style', 'css', 'state'],
1845
+ },
1846
+ {
1847
+ name: 'devops',
1848
+ displayName: 'DevOps',
1849
+ description: 'Deployment, CI/CD, Docker, infrastructure',
1850
+ tags: ['deploy', 'docker', 'ci', 'cd', 'infrastructure'],
1851
+ },
1852
+ {
1853
+ name: 'architecture',
1854
+ displayName: 'Architecture',
1855
+ description: 'Design patterns, system design, decisions',
1856
+ tags: ['pattern', 'architecture', 'design', 'structure'],
1857
+ },
1858
+ {
1859
+ name: 'performance',
1860
+ displayName: 'Performance',
1861
+ description: 'Optimisation, caching, profiling',
1862
+ tags: ['performance', 'optimisation', 'cache', 'speed'],
1863
+ },
1864
+ {
1865
+ name: 'debugging',
1866
+ displayName: 'Debugging',
1867
+ description: 'Error resolution, troubleshooting, fixes',
1868
+ tags: ['bug', 'error', 'fix', 'debug', 'issue'],
1869
+ },
1870
+ {
1871
+ name: 'standards',
1872
+ displayName: 'Standards',
1873
+ description: 'Coding standards, conventions, best practices',
1874
+ tags: ['standard', 'convention', 'practice', 'guideline'],
1875
+ },
1876
+ ];
1877
+ ```
1878
+
1879
+ ---
1880
+
1881
+ #### Task 5.2: Implement Category Manager
1882
+
1883
+ **File**: `src/services/categoryManager.ts` (new file)
1884
+
1885
+ ```typescript
1886
+ /**
1887
+ * Manages category organisation and summary evolution
1888
+ */
1889
+ export class CategoryManager {
1890
+ private metadataStore: MemoryMetadataStore;
1891
+ private llm: LLMService;
1892
+
1893
+ constructor(metadataStore: MemoryMetadataStore, llm: LLMService) {
1894
+ this.metadataStore = metadataStore;
1895
+ this.llm = llm;
1896
+ }
1897
+
1898
+ /**
1899
+ * Initialise default categories
1900
+ */
1901
+ async initialise(): Promise<void> {
1902
+ for (const cat of DEFAULT_CATEGORIES) {
1903
+ const existing = await this.metadataStore.getCategory(cat.name!);
1904
+ if (!existing) {
1905
+ await this.metadataStore.upsertCategory({
1906
+ ...cat,
1907
+ summary: '',
1908
+ summaryUpdatedAt: new Date(),
1909
+ itemCount: 0,
1910
+ lastItemAddedAt: new Date(),
1911
+ } as Category);
1912
+ }
1913
+ }
1914
+ }
1915
+
1916
+ /**
1917
+ * Classify a chunk into categories
1918
+ */
1919
+ async classifyChunk(chunk: EnhancedChunk): Promise<CategoryClassification[]> {
1920
+ const categories = await this.metadataStore.listCategories();
1921
+
1922
+ // Use LLM to classify
1923
+ const prompt = `
1924
+ Classify this knowledge item into one or more categories.
1925
+
1926
+ ## Item
1927
+ ${chunk.content}
1928
+
1929
+ ## Available Categories
1930
+ ${categories.map(c => `- ${c.name}: ${c.description}`).join('\n')}
1931
+
1932
+ ## Instructions
1933
+ Return a JSON array of classifications:
1934
+ [
1935
+ { "category": "category_name", "relevanceScore": 0.0-1.0, "reason": "why" },
1936
+ ...
1937
+ ]
1938
+
1939
+ Only include categories with relevanceScore > 0.5.
1940
+ `;
1941
+
1942
+ const response = await this.llm.invoke(prompt, { responseFormat: 'json' });
1943
+ return JSON.parse(response);
1944
+ }
1945
+
1946
+ /**
1947
+ * Add a chunk to a category and update summary
1948
+ */
1949
+ async addToCategory(chunkId: string, categoryName: string, relevanceScore: number): Promise<void> {
1950
+ // Add to category_items table
1951
+ await this.metadataStore.addChunkToCategory(chunkId, categoryName, relevanceScore);
1952
+
1953
+ // Update category statistics
1954
+ const category = await this.metadataStore.getCategory(categoryName);
1955
+ if (category) {
1956
+ await this.metadataStore.upsertCategory({
1957
+ ...category,
1958
+ itemCount: category.itemCount + 1,
1959
+ lastItemAddedAt: new Date(),
1960
+ });
1961
+ }
1962
+ }
1963
+
1964
+ /**
1965
+ * Evolve a category's summary with new items
1966
+ */
1967
+ async evolveSummary(categoryName: string): Promise<void> {
1968
+ const category = await this.metadataStore.getCategory(categoryName);
1969
+ if (!category) return;
1970
+
1971
+ // Get recent items in this category
1972
+ const recentItems = await this.metadataStore.getCategoryItems(categoryName, {
1973
+ limit: 20,
1974
+ since: category.summaryUpdatedAt,
1975
+ });
1976
+
1977
+ if (recentItems.length === 0) return;
1978
+
1979
+ // Get chunk content for new items
1980
+ const itemContents = await Promise.all(
1981
+ recentItems.map(item => this.metadataStore.getChunkContent(item.chunkId))
1982
+ );
1983
+
1984
+ const prompt = `
1985
+ You are a Memory Synchronisation Specialist.
1986
+
1987
+ ## Category: ${category.displayName}
1988
+ ${category.description}
1989
+
1990
+ ## Current Summary
1991
+ ${category.summary || 'No existing summary.'}
1992
+
1993
+ ## New Items to Integrate
1994
+ ${itemContents.map((content, i) => `### Item ${i + 1}\n${content}`).join('\n\n')}
1995
+
1996
+ ## Instructions
1997
+ 1. Update the summary to incorporate new information
1998
+ 2. If new items conflict with existing summary, update to reflect the latest state
1999
+ 3. Keep the summary concise but comprehensive
2000
+ 4. Use markdown formatting
2001
+ 5. Focus on actionable knowledge, patterns, and decisions
2002
+
2003
+ Return ONLY the updated summary markdown.
2004
+ `;
2005
+
2006
+ const newSummary = await this.llm.invoke(prompt);
2007
+
2008
+ await this.metadataStore.upsertCategory({
2009
+ ...category,
2010
+ summary: newSummary,
2011
+ summaryUpdatedAt: new Date(),
2012
+ });
2013
+ }
2014
+
2015
+ /**
2016
+ * Get category summary for retrieval
2017
+ */
2018
+ async getCategorySummary(categoryName: string): Promise<string | null> {
2019
+ const category = await this.metadataStore.getCategory(categoryName);
2020
+ return category?.summary ?? null;
2021
+ }
2022
+
2023
+ /**
2024
+ * Determine which categories might answer a query
2025
+ */
2026
+ async selectRelevantCategories(query: string): Promise<string[]> {
2027
+ const categories = await this.metadataStore.listCategories();
2028
+
2029
+ const prompt = `
2030
+ Query: ${query}
2031
+
2032
+ Available Categories:
2033
+ ${categories.map(c => `- ${c.name}: ${c.description} (${c.itemCount} items)`).join('\n')}
2034
+
2035
+ Return a JSON array of category names most likely to contain the answer.
2036
+ Only include categories that are clearly relevant.
2037
+ `;
2038
+
2039
+ const response = await this.llm.invoke(prompt, { responseFormat: 'json' });
2040
+ return JSON.parse(response);
2041
+ }
2042
+ }
2043
+
2044
+ interface CategoryClassification {
2045
+ category: string;
2046
+ relevanceScore: number;
2047
+ reason: string;
2048
+ }
2049
+ ```
2050
+
2051
+ **Acceptance Criteria**:
2052
+ - [ ] Default categories are created on first run
2053
+ - [ ] Chunks are classified with relevance scores
2054
+ - [ ] Summaries evolve as new items are added
2055
+ - [ ] Contradictions are handled (new info updates summary)
2056
+ - [ ] Category selection uses LLM for relevance
2057
+
2058
+ ---
2059
+
2060
+ ## Phase 6: Background Maintenance Jobs {#phase-6-maintenance}
2061
+
2062
+ ### Overview
2063
+
2064
+ Implement scheduled jobs for consolidation, summarisation, and cleanup.
2065
+
2066
+ ### Tasks
2067
+
2068
+ #### Task 6.1: Implement Maintenance Scheduler
2069
+
2070
+ **File**: `src/services/maintenanceScheduler.ts` (new file)
2071
+
2072
+ ```typescript
2073
+ import * as cron from 'node-cron';
2074
+
2075
+ /**
2076
+ * Schedules and runs background maintenance jobs
2077
+ */
2078
+ export class MaintenanceScheduler {
2079
+ private metadataStore: MemoryMetadataStore;
2080
+ private decayCalculator: DecayCalculator;
2081
+ private categoryManager: CategoryManager;
2082
+ private jobs: Map<string, cron.ScheduledTask> = new Map();
2083
+
2084
+ constructor(
2085
+ metadataStore: MemoryMetadataStore,
2086
+ decayCalculator: DecayCalculator,
2087
+ categoryManager: CategoryManager
2088
+ ) {
2089
+ this.metadataStore = metadataStore;
2090
+ this.decayCalculator = decayCalculator;
2091
+ this.categoryManager = categoryManager;
2092
+ }
2093
+
2094
+ /**
2095
+ * Start all scheduled jobs
2096
+ */
2097
+ start(): void {
2098
+ // Nightly consolidation - 3 AM
2099
+ this.jobs.set('nightly', cron.schedule('0 3 * * *', () => {
2100
+ this.runNightlyConsolidation().catch(console.error);
2101
+ }));
2102
+
2103
+ // Weekly summarisation - Sunday 4 AM
2104
+ this.jobs.set('weekly', cron.schedule('0 4 * * 0', () => {
2105
+ this.runWeeklySummarisation().catch(console.error);
2106
+ }));
2107
+
2108
+ // Monthly re-indexing - 1st of month, 5 AM
2109
+ this.jobs.set('monthly', cron.schedule('0 5 1 * *', () => {
2110
+ this.runMonthlyReindex().catch(console.error);
2111
+ }));
2112
+
2113
+ // Hourly decay score update
2114
+ this.jobs.set('hourly', cron.schedule('0 * * * *', () => {
2115
+ this.decayCalculator.updateAllDecayScores(this.metadataStore).catch(console.error);
2116
+ }));
2117
+
2118
+ console.log('Maintenance scheduler started');
2119
+ }
2120
+
2121
+ /**
2122
+ * Stop all scheduled jobs
2123
+ */
2124
+ stop(): void {
2125
+ for (const [name, job] of this.jobs) {
2126
+ job.stop();
2127
+ console.log(`Stopped job: ${name}`);
2128
+ }
2129
+ this.jobs.clear();
2130
+ }
2131
+
2132
+ /**
2133
+ * Run a specific job immediately (for testing/manual trigger)
2134
+ */
2135
+ async runJob(jobName: 'nightly' | 'weekly' | 'monthly'): Promise<void> {
2136
+ switch (jobName) {
2137
+ case 'nightly':
2138
+ await this.runNightlyConsolidation();
2139
+ break;
2140
+ case 'weekly':
2141
+ await this.runWeeklySummarisation();
2142
+ break;
2143
+ case 'monthly':
2144
+ await this.runMonthlyReindex();
2145
+ break;
2146
+ }
2147
+ }
2148
+
2149
+ /**
2150
+ * Nightly: Consolidate duplicates, update decay scores, promote hot items
2151
+ */
2152
+ private async runNightlyConsolidation(): Promise<void> {
2153
+ console.log('Running nightly consolidation...');
2154
+ const startTime = Date.now();
2155
+
2156
+ // 1. Update all decay scores
2157
+ const decayResult = await this.decayCalculator.updateAllDecayScores(this.metadataStore);
2158
+ console.log(` Updated ${decayResult.updated} decay scores`);
2159
+
2160
+ // 2. Find and merge duplicates
2161
+ const duplicates = await this.findDuplicates();
2162
+ for (const group of duplicates) {
2163
+ await this.mergeDuplicates(group);
2164
+ }
2165
+ console.log(` Merged ${duplicates.length} duplicate groups`);
2166
+
2167
+ // 3. Promote frequently accessed items
2168
+ const hotItems = await this.metadataStore.getHotItems(24); // Last 24 hours
2169
+ for (const item of hotItems) {
2170
+ await this.metadataStore.increaseImportance(item.chunkId, 0.05);
2171
+ }
2172
+ console.log(` Promoted ${hotItems.length} hot items`);
2173
+
2174
+ // 4. Archive stale items
2175
+ const archiveCandidates = await this.metadataStore.getArchiveCandidates(0.2);
2176
+ for (const chunk of archiveCandidates) {
2177
+ await this.metadataStore.archiveChunk(chunk.chunkId);
2178
+ }
2179
+ console.log(` Archived ${archiveCandidates.length} stale items`);
2180
+
2181
+ const duration = (Date.now() - startTime) / 1000;
2182
+ console.log(`Nightly consolidation completed in ${duration.toFixed(1)}s`);
2183
+ }
2184
+
2185
+ /**
2186
+ * Weekly: Update category summaries, compress old items
2187
+ */
2188
+ private async runWeeklySummarisation(): Promise<void> {
2189
+ console.log('Running weekly summarisation...');
2190
+ const startTime = Date.now();
2191
+
2192
+ // 1. Update all category summaries
2193
+ const categories = await this.metadataStore.listCategories();
2194
+ for (const category of categories) {
2195
+ await this.categoryManager.evolveSummary(category.name);
2196
+ }
2197
+ console.log(` Updated ${categories.length} category summaries`);
2198
+
2199
+ // 2. Compress old, rarely accessed items
2200
+ const oldItems = await this.metadataStore.getOldItems(30); // > 30 days
2201
+ for (const item of oldItems) {
2202
+ if (item.accessCount < 3) {
2203
+ // Could compress content or move to cold storage
2204
+ await this.metadataStore.markForCompression(item.chunkId);
2205
+ }
2206
+ }
2207
+ console.log(` Marked items for compression`);
2208
+
2209
+ const duration = (Date.now() - startTime) / 1000;
2210
+ console.log(`Weekly summarisation completed in ${duration.toFixed(1)}s`);
2211
+ }
2212
+
2213
+ /**
2214
+ * Monthly: Re-index embeddings, rebuild graph edges, deep cleanup
2215
+ */
2216
+ private async runMonthlyReindex(): Promise<void> {
2217
+ console.log('Running monthly re-index...');
2218
+ const startTime = Date.now();
2219
+
2220
+ // 1. Identify stale embeddings (from old embedding model)
2221
+ // This would be relevant if embedding model changes
2222
+
2223
+ // 2. Re-weight graph edges based on actual usage
2224
+ const edgeStats = await this.metadataStore.getRelationshipUsageStats();
2225
+ for (const stat of edgeStats) {
2226
+ if (stat.timesTraversed === 0) {
2227
+ // Weaken unused edges
2228
+ await this.metadataStore.updateRelationshipStrength(
2229
+ stat.relationshipId,
2230
+ stat.strength * 0.8
2231
+ );
2232
+ } else {
2233
+ // Strengthen frequently used edges
2234
+ await this.metadataStore.updateRelationshipStrength(
2235
+ stat.relationshipId,
2236
+ Math.min(1.0, stat.strength * 1.1)
2237
+ );
2238
+ }
2239
+ }
2240
+ console.log(` Re-weighted ${edgeStats.length} relationship edges`);
2241
+
2242
+ // 3. Archive very old, never-accessed items
2243
+ const veryOld = await this.metadataStore.getOldItems(180); // > 6 months
2244
+ for (const item of veryOld) {
2245
+ if (item.accessCount === 0) {
2246
+ await this.metadataStore.archiveChunk(item.chunkId);
2247
+ }
2248
+ }
2249
+ console.log(` Archived very old items`);
2250
+
2251
+ // 4. Vacuum database
2252
+ await this.metadataStore.vacuum();
2253
+ console.log(` Database vacuumed`);
2254
+
2255
+ const duration = (Date.now() - startTime) / 1000;
2256
+ console.log(`Monthly re-index completed in ${duration.toFixed(1)}s`);
2257
+ }
2258
+
2259
+ private async findDuplicates(): Promise<DuplicateGroup[]> {
2260
+ // Use vector similarity to find near-duplicates
2261
+ // Group chunks with > 0.95 similarity
2262
+ return [];
2263
+ }
2264
+
2265
+ private async mergeDuplicates(group: DuplicateGroup): Promise<void> {
2266
+ // Keep the most recent/highest importance chunk
2267
+ // Merge metadata from others
2268
+ // Create SUPERSEDES relationships
2269
+ // Archive the duplicates
2270
+ }
2271
+ }
2272
+
2273
+ interface DuplicateGroup {
2274
+ primaryChunkId: string;
2275
+ duplicateChunkIds: string[];
2276
+ similarity: number;
2277
+ }
2278
+ ```
2279
+
2280
+ **Acceptance Criteria**:
2281
+ - [ ] Jobs run on schedule (cron syntax works)
2282
+ - [ ] Jobs can be triggered manually
2283
+ - [ ] Nightly consolidation completes in < 5 minutes
2284
+ - [ ] Weekly summarisation updates all categories
2285
+ - [ ] Monthly re-index handles large databases
2286
+ - [ ] All jobs have proper error handling and logging
2287
+
2288
+ ---
2289
+
2290
+ #### Task 6.2: Add CLI Commands for Maintenance
2291
+
2292
+ **File**: `src/cli/maintenance.ts` (new file)
2293
+
2294
+ ```typescript
2295
+ /**
2296
+ * CLI commands for maintenance operations
2297
+ */
2298
+ export function registerMaintenanceCommands(program: Command) {
2299
+ const maintenanceCommand = program
2300
+ .command('maintenance')
2301
+ .description('Memory system maintenance operations');
2302
+
2303
+ // Run specific job
2304
+ maintenanceCommand
2305
+ .command('run <job>')
2306
+ .description('Run a maintenance job (nightly, weekly, monthly)')
2307
+ .action(async (job: string) => {
2308
+ const scheduler = getMaintenanceScheduler();
2309
+
2310
+ if (!['nightly', 'weekly', 'monthly'].includes(job)) {
2311
+ console.error(`Unknown job: ${job}. Use: nightly, weekly, or monthly`);
2312
+ process.exit(1);
2313
+ }
2314
+
2315
+ console.log(`Running ${job} maintenance...`);
2316
+ await scheduler.runJob(job as any);
2317
+ console.log('Done!');
2318
+ });
2319
+
2320
+ // Start background scheduler
2321
+ maintenanceCommand
2322
+ .command('start')
2323
+ .description('Start the background maintenance scheduler')
2324
+ .action(async () => {
2325
+ const scheduler = getMaintenanceScheduler();
2326
+ scheduler.start();
2327
+ console.log('Maintenance scheduler running. Press Ctrl+C to stop.');
2328
+
2329
+ // Keep process alive
2330
+ process.on('SIGINT', () => {
2331
+ scheduler.stop();
2332
+ process.exit(0);
2333
+ });
2334
+ });
2335
+
2336
+ // Show statistics
2337
+ maintenanceCommand
2338
+ .command('stats')
2339
+ .description('Show memory system statistics')
2340
+ .action(async () => {
2341
+ const store = getMetadataStore();
2342
+ const stats = await store.getStats();
2343
+
2344
+ console.log('\n📊 Memory System Statistics\n');
2345
+ console.log(`Total chunks: ${stats.totalChunks}`);
2346
+ console.log(`Active chunks: ${stats.activeChunks}`);
2347
+ console.log(`Archived chunks: ${stats.archivedChunks}`);
2348
+ console.log(`Categories: ${stats.categoryCount}`);
2349
+ console.log(`Relationships: ${stats.relationshipCount}`);
2350
+ console.log(`\nBy type:`);
2351
+ for (const [type, count] of Object.entries(stats.byType)) {
2352
+ console.log(` ${type}: ${count}`);
2353
+ }
2354
+ console.log(`\nDecay distribution:`);
2355
+ console.log(` High (>0.7): ${stats.decayDistribution.high}`);
2356
+ console.log(` Medium (0.3-0.7): ${stats.decayDistribution.medium}`);
2357
+ console.log(` Low (<0.3): ${stats.decayDistribution.low}`);
2358
+ });
2359
+
2360
+ // Cleanup
2361
+ maintenanceCommand
2362
+ .command('cleanup')
2363
+ .description('Remove archived items and compact database')
2364
+ .option('--dry-run', 'Show what would be removed without removing')
2365
+ .option('--older-than <days>', 'Only remove items archived more than N days ago', parseInt)
2366
+ .action(async (options) => {
2367
+ const store = getMetadataStore();
2368
+
2369
+ const candidates = await store.getCleanupCandidates({
2370
+ olderThan: options.olderThan ?? 30,
2371
+ });
2372
+
2373
+ console.log(`Found ${candidates.length} items to clean up`);
2374
+
2375
+ if (options.dryRun) {
2376
+ for (const item of candidates.slice(0, 20)) {
2377
+ console.log(` - ${item.chunkId} (archived ${item.archivedAt})`);
2378
+ }
2379
+ if (candidates.length > 20) {
2380
+ console.log(` ... and ${candidates.length - 20} more`);
2381
+ }
2382
+ return;
2383
+ }
2384
+
2385
+ await store.cleanupArchived(candidates.map(c => c.chunkId));
2386
+ await store.vacuum();
2387
+ console.log('Cleanup complete!');
2388
+ });
2389
+ }
2390
+ ```
2391
+
2392
+ **Acceptance Criteria**:
2393
+ - [ ] `cursor-rag maintenance run nightly` works
2394
+ - [ ] `cursor-rag maintenance start` runs in background
2395
+ - [ ] `cursor-rag maintenance stats` shows useful metrics
2396
+ - [ ] `cursor-rag maintenance cleanup` safely removes old data
2397
+ - [ ] Dry run mode prevents accidental data loss
2398
+
2399
+ ---
2400
+
2401
+ ## Phase 7: Enhanced Retrieval Scoring {#phase-7-retrieval}
2402
+
2403
+ ### Overview
2404
+
2405
+ Implement hybrid scoring that combines vector similarity with temporal decay, importance, and graph relationships.
2406
+
2407
+ ### Tasks
2408
+
2409
+ #### Task 7.1: Implement Hybrid Scorer
2410
+
2411
+ **File**: `src/services/hybridScorer.ts` (new file)
2412
+
2413
+ ```typescript
2414
+ /**
2415
+ * Combines multiple signals for final retrieval ranking
2416
+ */
2417
+ export class HybridScorer {
2418
+ private config: ScoringConfig;
2419
+ private graph: RelationshipGraph;
2420
+
2421
+ constructor(graph: RelationshipGraph, config?: Partial<ScoringConfig>) {
2422
+ this.graph = graph;
2423
+ this.config = {
2424
+ weights: {
2425
+ similarity: 0.35,
2426
+ decay: 0.20,
2427
+ importance: 0.15,
2428
+ recency: 0.10,
2429
+ graphBoost: 0.10,
2430
+ typeBoost: 0.10,
2431
+ },
2432
+ typeBoosts: {
2433
+ [ChunkType.SOLUTION]: 1.2,
2434
+ [ChunkType.PATTERN]: 1.15,
2435
+ [ChunkType.DECISION]: 1.1,
2436
+ [ChunkType.STANDARD]: 1.05,
2437
+ [ChunkType.DOCUMENTATION]: 1.0,
2438
+ [ChunkType.CODE]: 1.0,
2439
+ [ChunkType.PREFERENCE]: 0.9,
2440
+ [ChunkType.CATEGORY_SUMMARY]: 1.3, // Summaries are very useful
2441
+ },
2442
+ ...config,
2443
+ };
2444
+ }
2445
+
2446
+ /**
2447
+ * Score and rank search results
2448
+ */
2449
+ async scoreResults(
2450
+ results: SearchResult[],
2451
+ query: string,
2452
+ context?: ScoringContext
2453
+ ): Promise<ScoredResult[]> {
2454
+ const scored: ScoredResult[] = [];
2455
+
2456
+ // Get graph context if we have seed chunks
2457
+ const graphContext = context?.seedChunkIds
2458
+ ? await this.getGraphContext(context.seedChunkIds)
2459
+ : new Map<string, number>();
2460
+
2461
+ for (const result of results) {
2462
+ const scores = {
2463
+ similarity: result.score,
2464
+ decay: result.metadata.decayScore,
2465
+ importance: result.metadata.importance,
2466
+ recency: this.calculateRecencyScore(result.metadata.lastAccessedAt),
2467
+ graphBoost: graphContext.get(result.id) ?? 0,
2468
+ typeBoost: this.config.typeBoosts[result.metadata.chunkType] ?? 1.0,
2469
+ };
2470
+
2471
+ const finalScore = this.calculateFinalScore(scores);
2472
+
2473
+ scored.push({
2474
+ ...result,
2475
+ scores,
2476
+ finalScore,
2477
+ });
2478
+ }
2479
+
2480
+ // Sort by final score
2481
+ scored.sort((a, b) => b.finalScore - a.finalScore);
2482
+
2483
+ return scored;
2484
+ }
2485
+
2486
+ /**
2487
+ * Tiered retrieval: summaries first, then items if needed
2488
+ */
2489
+ async tieredRetrieval(
2490
+ query: string,
2491
+ vectorStore: EnhancedVectorStore,
2492
+ categoryManager: CategoryManager
2493
+ ): Promise<TieredResult> {
2494
+ // Stage 1: Select relevant categories
2495
+ const relevantCategories = await categoryManager.selectRelevantCategories(query);
2496
+
2497
+ // Stage 2: Get category summaries
2498
+ const summaries: CategorySummary[] = [];
2499
+ for (const catName of relevantCategories) {
2500
+ const summary = await categoryManager.getCategorySummary(catName);
2501
+ if (summary) {
2502
+ summaries.push({ category: catName, summary });
2503
+ }
2504
+ }
2505
+
2506
+ // Stage 3: Check if summaries are sufficient
2507
+ const sufficientFromSummaries = await this.checkSufficiency(query, summaries);
2508
+
2509
+ if (sufficientFromSummaries.sufficient) {
2510
+ return {
2511
+ tier: 'summary',
2512
+ results: summaries,
2513
+ message: 'Answered from category summaries',
2514
+ };
2515
+ }
2516
+
2517
+ // Stage 4: Drill down into specific items
2518
+ const items = await vectorStore.search(query, {
2519
+ topK: 10,
2520
+ chunkTypes: [ChunkType.SOLUTION, ChunkType.PATTERN, ChunkType.DECISION],
2521
+ filter: {
2522
+ category: { $in: relevantCategories },
2523
+ },
2524
+ });
2525
+
2526
+ const scored = await this.scoreResults(items, query);
2527
+
2528
+ return {
2529
+ tier: 'item',
2530
+ results: scored,
2531
+ categorySummaries: summaries,
2532
+ message: 'Retrieved specific items',
2533
+ };
2534
+ }
2535
+
2536
+ private calculateFinalScore(scores: ScoreComponents): number {
2537
+ const w = this.config.weights;
2538
+
2539
+ return (
2540
+ scores.similarity * w.similarity +
2541
+ scores.decay * w.decay +
2542
+ scores.importance * w.importance +
2543
+ scores.recency * w.recency +
2544
+ scores.graphBoost * w.graphBoost +
2545
+ (scores.typeBoost - 1.0) * w.typeBoost // Normalise type boost around 0
2546
+ );
2547
+ }
2548
+
2549
+ private calculateRecencyScore(lastAccessed: Date | null): number {
2550
+ if (!lastAccessed) return 0;
2551
+
2552
+ const daysSinceAccess = (Date.now() - lastAccessed.getTime()) / (1000 * 60 * 60 * 24);
2553
+ return 1.0 / (1.0 + (daysSinceAccess / 7)); // 7-day half-life
2554
+ }
2555
+
2556
+ private async getGraphContext(seedChunkIds: string[]): Promise<Map<string, number>> {
2557
+ const boost = new Map<string, number>();
2558
+
2559
+ for (const seedId of seedChunkIds) {
2560
+ const related = await this.graph.traverse(seedId, {
2561
+ maxDepth: 2,
2562
+ minStrength: 0.3,
2563
+ });
2564
+
2565
+ for (const node of related) {
2566
+ const existingBoost = boost.get(node.chunkId) ?? 0;
2567
+ // Boost decreases with depth
2568
+ const depthFactor = 1.0 / (1.0 + node.depth);
2569
+ const newBoost = node.strength * depthFactor;
2570
+ boost.set(node.chunkId, Math.max(existingBoost, newBoost));
2571
+ }
2572
+ }
2573
+
2574
+ return boost;
2575
+ }
2576
+
2577
+ private async checkSufficiency(
2578
+ query: string,
2579
+ summaries: CategorySummary[]
2580
+ ): Promise<{ sufficient: boolean; reason: string }> {
2581
+ // Use LLM to determine if summaries answer the query
2582
+ // For now, simple heuristic: if summaries exist and query is general
2583
+
2584
+ if (summaries.length === 0) {
2585
+ return { sufficient: false, reason: 'No relevant summaries' };
2586
+ }
2587
+
2588
+ // Could use LLM here for sophisticated check
2589
+ const queryLower = query.toLowerCase();
2590
+ const isSpecific = queryLower.includes('how') ||
2591
+ queryLower.includes('error') ||
2592
+ queryLower.includes('fix') ||
2593
+ queryLower.includes('specific');
2594
+
2595
+ if (isSpecific) {
2596
+ return { sufficient: false, reason: 'Query requires specific details' };
2597
+ }
2598
+
2599
+ return { sufficient: true, reason: 'Summaries appear sufficient' };
2600
+ }
2601
+ }
2602
+
2603
+ interface ScoringConfig {
2604
+ weights: {
2605
+ similarity: number;
2606
+ decay: number;
2607
+ importance: number;
2608
+ recency: number;
2609
+ graphBoost: number;
2610
+ typeBoost: number;
2611
+ };
2612
+ typeBoosts: Record<ChunkType, number>;
2613
+ }
2614
+
2615
+ interface ScoringContext {
2616
+ seedChunkIds?: string[]; // Chunks to boost related items
2617
+ preferredTypes?: ChunkType[]; // Boost certain types
2618
+ project?: string; // Boost project-specific items
2619
+ }
2620
+
2621
+ interface ScoreComponents {
2622
+ similarity: number;
2623
+ decay: number;
2624
+ importance: number;
2625
+ recency: number;
2626
+ graphBoost: number;
2627
+ typeBoost: number;
2628
+ }
2629
+
2630
+ interface ScoredResult extends SearchResult {
2631
+ scores: ScoreComponents;
2632
+ finalScore: number;
2633
+ }
2634
+
2635
+ interface TieredResult {
2636
+ tier: 'summary' | 'item';
2637
+ results: any[];
2638
+ categorySummaries?: CategorySummary[];
2639
+ message: string;
2640
+ }
2641
+
2642
+ interface CategorySummary {
2643
+ category: string;
2644
+ summary: string;
2645
+ }
2646
+ ```
2647
+
2648
+ **Acceptance Criteria**:
2649
+ - [ ] Final scores combine all components correctly
2650
+ - [ ] Graph boost increases scores for related items
2651
+ - [ ] Type boost favours solutions and patterns
2652
+ - [ ] Tiered retrieval tries summaries first
2653
+ - [ ] Recency boost favours recently accessed items
2654
+
2655
+ ---
2656
+
2657
+ ## Phase 8: RLM-Style Recursive Retrieval {#phase-8-rlm-retrieval}
2658
+
2659
+ ### Overview
2660
+
2661
+ Implement Recursive Language Model patterns for handling queries that require processing large amounts of context. Based on the RLM paper findings, this phase treats retrieved context as environment variables that the LLM can programmatically interact with, decompose, and recursively process.
2662
+
2663
+ ### Key Concepts from RLM Research
2664
+
2665
+ 1. **Context as Environment Variable**: Instead of stuffing all retrieved chunks into the prompt, load them as variables the model can selectively examine
2666
+ 2. **Programmatic Filtering**: Allow the model to filter context using code (regex, keyword searches) before semantic analysis
2667
+ 3. **Recursive Sub-calls**: Enable the model to invoke itself on smaller chunks when processing information-dense content
2668
+ 4. **Variable-based Answer Building**: Build up answers in variables across multiple iterations
2669
+ 5. **Cost Budgets**: Prevent runaway costs with explicit budgets and early termination
2670
+
2671
+ ### Tasks
2672
+
2673
+ #### Task 8.1: Implement Context Environment
2674
+
2675
+ **File**: `src/services/contextEnvironment.ts` (new file)
2676
+
2677
+ Create a sandboxed environment where retrieved context is stored as variables:
2678
+
2679
+ ```typescript
2680
+ /**
2681
+ * Context Environment for RLM-style retrieval
2682
+ *
2683
+ * Treats retrieved context as environment variables that can be
2684
+ * programmatically examined, filtered, and decomposed.
2685
+ */
2686
+ export class ContextEnvironment {
2687
+ private variables: Map<string, ContextVariable> = new Map();
2688
+ private executionLog: ExecutionStep[] = [];
2689
+ private costTracker: CostTracker;
2690
+ private config: EnvironmentConfig;
2691
+
2692
+ constructor(config?: Partial<EnvironmentConfig>) {
2693
+ this.config = {
2694
+ maxIterations: 20,
2695
+ maxSubCalls: 50,
2696
+ costBudget: 1.0, // USD
2697
+ timeoutMs: 120000, // 2 minutes
2698
+ enableAsyncSubCalls: true,
2699
+ ...config,
2700
+ };
2701
+ this.costTracker = new CostTracker(this.config.costBudget);
2702
+ }
2703
+
2704
+ /**
2705
+ * Load context chunks as environment variables
2706
+ */
2707
+ loadContext(chunks: EnhancedChunk[], variableName: string = 'context'): void {
2708
+ const contextVar: ContextVariable = {
2709
+ name: variableName,
2710
+ type: 'chunks',
2711
+ value: chunks,
2712
+ metadata: {
2713
+ totalLength: chunks.reduce((sum, c) => sum + c.content.length, 0),
2714
+ chunkCount: chunks.length,
2715
+ chunkLengths: chunks.map(c => c.content.length),
2716
+ },
2717
+ };
2718
+
2719
+ this.variables.set(variableName, contextVar);
2720
+
2721
+ this.log({
2722
+ type: 'load_context',
2723
+ variableName,
2724
+ metadata: contextVar.metadata,
2725
+ });
2726
+ }
2727
+
2728
+ /**
2729
+ * Get environment state description for LLM
2730
+ * (Tells the model what's available without showing all content)
2731
+ */
2732
+ getStateDescription(): string {
2733
+ const vars = Array.from(this.variables.entries()).map(([name, v]) => {
2734
+ if (v.type === 'chunks') {
2735
+ return `- \`${name}\`: ${v.metadata.chunkCount} chunks, ${v.metadata.totalLength} total chars`;
2736
+ }
2737
+ return `- \`${name}\`: ${typeof v.value}`;
2738
+ });
2739
+
2740
+ return `## Environment State
2741
+ Variables:
2742
+ ${vars.join('\n')}
2743
+
2744
+ Available operations:
2745
+ - \`peek(varName, start?, end?)\` - View portion of a variable
2746
+ - \`filter(varName, pattern)\` - Filter chunks by regex pattern
2747
+ - \`chunk(varName, size)\` - Split into smaller chunks
2748
+ - \`subQuery(query, context)\` - Invoke sub-LLM on context
2749
+ - \`store(varName, value)\` - Store intermediate result
2750
+ - \`getAnswer()\` - Return final answer from environment
2751
+
2752
+ Remaining budget: $${this.costTracker.remaining.toFixed(2)}
2753
+ Iterations: ${this.executionLog.filter(s => s.type === 'iteration').length}/${this.config.maxIterations}
2754
+ Sub-calls: ${this.executionLog.filter(s => s.type === 'sub_call').length}/${this.config.maxSubCalls}
2755
+ `;
2756
+ }
2757
+
2758
+ /**
2759
+ * Peek at portion of context (without loading full content into LLM)
2760
+ */
2761
+ peek(variableName: string, start?: number, end?: number): string {
2762
+ const variable = this.variables.get(variableName);
2763
+ if (!variable) return `Error: Variable '${variableName}' not found`;
2764
+
2765
+ if (variable.type === 'chunks') {
2766
+ const chunks = variable.value as EnhancedChunk[];
2767
+ const selected = chunks.slice(start ?? 0, end ?? 3);
2768
+ return selected.map((c, i) =>
2769
+ `[Chunk ${(start ?? 0) + i}] (${c.content.length} chars):\n${c.content.substring(0, 500)}${c.content.length > 500 ? '...' : ''}`
2770
+ ).join('\n\n');
2771
+ }
2772
+
2773
+ return String(variable.value).substring(start ?? 0, end ?? 1000);
2774
+ }
2775
+
2776
+ /**
2777
+ * Filter chunks by regex pattern
2778
+ */
2779
+ filter(variableName: string, pattern: string): EnhancedChunk[] {
2780
+ const variable = this.variables.get(variableName);
2781
+ if (!variable || variable.type !== 'chunks') return [];
2782
+
2783
+ const regex = new RegExp(pattern, 'i');
2784
+ const chunks = variable.value as EnhancedChunk[];
2785
+ const filtered = chunks.filter(c => regex.test(c.content));
2786
+
2787
+ this.log({
2788
+ type: 'filter',
2789
+ variableName,
2790
+ pattern,
2791
+ resultCount: filtered.length,
2792
+ });
2793
+
2794
+ return filtered;
2795
+ }
2796
+
2797
+ /**
2798
+ * Split variable into smaller chunks for processing
2799
+ */
2800
+ chunk(variableName: string, size: number): EnhancedChunk[][] {
2801
+ const variable = this.variables.get(variableName);
2802
+ if (!variable || variable.type !== 'chunks') return [];
2803
+
2804
+ const chunks = variable.value as EnhancedChunk[];
2805
+ const batches: EnhancedChunk[][] = [];
2806
+
2807
+ for (let i = 0; i < chunks.length; i += size) {
2808
+ batches.push(chunks.slice(i, i + size));
2809
+ }
2810
+
2811
+ this.log({
2812
+ type: 'chunk',
2813
+ variableName,
2814
+ size,
2815
+ batchCount: batches.length,
2816
+ });
2817
+
2818
+ return batches;
2819
+ }
2820
+
2821
+ /**
2822
+ * Execute a sub-LLM call on context
2823
+ * Implements async sub-calls as recommended by RLM paper
2824
+ */
2825
+ async subQuery(
2826
+ llm: LLMService,
2827
+ query: string,
2828
+ context: string | EnhancedChunk[],
2829
+ options?: SubQueryOptions
2830
+ ): Promise<string> {
2831
+ // Check budget before calling
2832
+ if (!this.costTracker.canAfford(options?.estimatedCost ?? 0.01)) {
2833
+ throw new BudgetExceededError('Cost budget exceeded');
2834
+ }
2835
+
2836
+ const subCallCount = this.executionLog.filter(s => s.type === 'sub_call').length;
2837
+ if (subCallCount >= this.config.maxSubCalls) {
2838
+ throw new LimitExceededError('Maximum sub-calls exceeded');
2839
+ }
2840
+
2841
+ const contextStr = Array.isArray(context)
2842
+ ? context.map(c => c.content).join('\n\n---\n\n')
2843
+ : context;
2844
+
2845
+ const startTime = Date.now();
2846
+
2847
+ const response = await llm.invoke(
2848
+ `${query}\n\nContext:\n${contextStr}`,
2849
+ {
2850
+ maxTokens: options?.maxTokens ?? 2000,
2851
+ temperature: options?.temperature ?? 0.3,
2852
+ }
2853
+ );
2854
+
2855
+ const cost = this.estimateCost(contextStr.length, response.length);
2856
+ this.costTracker.record(cost);
2857
+
2858
+ this.log({
2859
+ type: 'sub_call',
2860
+ query: query.substring(0, 100),
2861
+ contextLength: contextStr.length,
2862
+ responseLength: response.length,
2863
+ cost,
2864
+ durationMs: Date.now() - startTime,
2865
+ });
2866
+
2867
+ return response;
2868
+ }
2869
+
2870
+ /**
2871
+ * Batch sub-queries with async execution (RLM paper recommendation)
2872
+ */
2873
+ async batchSubQuery(
2874
+ llm: LLMService,
2875
+ queries: Array<{ query: string; context: string | EnhancedChunk[] }>
2876
+ ): Promise<string[]> {
2877
+ if (!this.config.enableAsyncSubCalls) {
2878
+ // Sequential fallback
2879
+ const results: string[] = [];
2880
+ for (const q of queries) {
2881
+ results.push(await this.subQuery(llm, q.query, q.context));
2882
+ }
2883
+ return results;
2884
+ }
2885
+
2886
+ // Parallel execution with concurrency limit
2887
+ const CONCURRENCY = 5;
2888
+ const results: string[] = new Array(queries.length);
2889
+
2890
+ for (let i = 0; i < queries.length; i += CONCURRENCY) {
2891
+ const batch = queries.slice(i, i + CONCURRENCY);
2892
+ const batchResults = await Promise.all(
2893
+ batch.map((q, j) =>
2894
+ this.subQuery(llm, q.query, q.context)
2895
+ .then(r => ({ index: i + j, result: r }))
2896
+ )
2897
+ );
2898
+
2899
+ for (const { index, result } of batchResults) {
2900
+ results[index] = result;
2901
+ }
2902
+ }
2903
+
2904
+ return results;
2905
+ }
2906
+
2907
+ /**
2908
+ * Store intermediate result
2909
+ */
2910
+ store(variableName: string, value: any): void {
2911
+ this.variables.set(variableName, {
2912
+ name: variableName,
2913
+ type: typeof value === 'object' ? 'object' : 'primitive',
2914
+ value,
2915
+ metadata: {},
2916
+ });
2917
+
2918
+ this.log({
2919
+ type: 'store',
2920
+ variableName,
2921
+ valueType: typeof value,
2922
+ });
2923
+ }
2924
+
2925
+ /**
2926
+ * Check if we should terminate
2927
+ */
2928
+ shouldTerminate(): { terminate: boolean; reason?: string } {
2929
+ if (this.costTracker.exceeded) {
2930
+ return { terminate: true, reason: 'Cost budget exceeded' };
2931
+ }
2932
+
2933
+ const iterations = this.executionLog.filter(s => s.type === 'iteration').length;
2934
+ if (iterations >= this.config.maxIterations) {
2935
+ return { terminate: true, reason: 'Maximum iterations reached' };
2936
+ }
2937
+
2938
+ const subCalls = this.executionLog.filter(s => s.type === 'sub_call').length;
2939
+ if (subCalls >= this.config.maxSubCalls) {
2940
+ return { terminate: true, reason: 'Maximum sub-calls reached' };
2941
+ }
2942
+
2943
+ return { terminate: false };
2944
+ }
2945
+
2946
+ private log(step: Omit<ExecutionStep, 'timestamp'>): void {
2947
+ this.executionLog.push({
2948
+ ...step,
2949
+ timestamp: new Date(),
2950
+ });
2951
+ }
2952
+
2953
+ private estimateCost(inputChars: number, outputChars: number): number {
2954
+ // Rough estimate: $0.01 per 1000 chars input, $0.03 per 1000 chars output
2955
+ return (inputChars / 1000) * 0.01 + (outputChars / 1000) * 0.03;
2956
+ }
2957
+
2958
+ getExecutionLog(): ExecutionStep[] {
2959
+ return [...this.executionLog];
2960
+ }
2961
+
2962
+ getTotalCost(): number {
2963
+ return this.costTracker.total;
2964
+ }
2965
+ }
2966
+
2967
+ interface ContextVariable {
2968
+ name: string;
2969
+ type: 'chunks' | 'primitive' | 'object';
2970
+ value: any;
2971
+ metadata: Record<string, any>;
2972
+ }
2973
+
2974
+ interface ExecutionStep {
2975
+ type: 'load_context' | 'iteration' | 'filter' | 'chunk' | 'sub_call' | 'store';
2976
+ timestamp: Date;
2977
+ [key: string]: any;
2978
+ }
2979
+
2980
+ interface EnvironmentConfig {
2981
+ maxIterations: number;
2982
+ maxSubCalls: number;
2983
+ costBudget: number;
2984
+ timeoutMs: number;
2985
+ enableAsyncSubCalls: boolean;
2986
+ }
2987
+
2988
+ interface SubQueryOptions {
2989
+ maxTokens?: number;
2990
+ temperature?: number;
2991
+ estimatedCost?: number;
2992
+ }
2993
+
2994
+ class CostTracker {
2995
+ total = 0;
2996
+ constructor(private budget: number) {}
2997
+
2998
+ get remaining(): number { return this.budget - this.total; }
2999
+ get exceeded(): boolean { return this.total >= this.budget; }
3000
+
3001
+ canAfford(amount: number): boolean {
3002
+ return this.total + amount <= this.budget;
3003
+ }
3004
+
3005
+ record(amount: number): void {
3006
+ this.total += amount;
3007
+ }
3008
+ }
3009
+
3010
+ class BudgetExceededError extends Error {
3011
+ constructor(message: string) {
3012
+ super(message);
3013
+ this.name = 'BudgetExceededError';
3014
+ }
3015
+ }
3016
+
3017
+ class LimitExceededError extends Error {
3018
+ constructor(message: string) {
3019
+ super(message);
3020
+ this.name = 'LimitExceededError';
3021
+ }
3022
+ }
3023
+ ```
3024
+
3025
+ **Acceptance Criteria**:
3026
+ - [ ] Context can be loaded as environment variables
3027
+ - [ ] State description gives LLM overview without full content
3028
+ - [ ] Peek allows selective viewing
3029
+ - [ ] Filter supports regex patterns
3030
+ - [ ] Sub-queries track cost and enforce budget
3031
+ - [ ] Async batch queries work with concurrency limit
3032
+ - [ ] Termination conditions are enforced
3033
+
3034
+ ---
3035
+
3036
+ #### Task 8.2: Implement Recursive Retrieval Controller
3037
+
3038
+ **File**: `src/services/recursiveRetrieval.ts` (new file)
3039
+
3040
+ Orchestrate the iterative retrieval loop:
3041
+
3042
+ ```typescript
3043
+ /**
3044
+ * Recursive Retrieval Controller
3045
+ *
3046
+ * Implements RLM-style iterative retrieval where the model can
3047
+ * examine, filter, and recursively process context.
3048
+ */
3049
+ export class RecursiveRetrievalController {
3050
+ private vectorStore: EnhancedVectorStore;
3051
+ private llm: LLMService;
3052
+ private subLlm: LLMService; // Smaller/cheaper model for sub-calls
3053
+ private config: RetrievalConfig;
3054
+
3055
+ constructor(
3056
+ vectorStore: EnhancedVectorStore,
3057
+ llm: LLMService,
3058
+ subLlm?: LLMService,
3059
+ config?: Partial<RetrievalConfig>
3060
+ ) {
3061
+ this.vectorStore = vectorStore;
3062
+ this.llm = llm;
3063
+ this.subLlm = subLlm ?? llm;
3064
+ this.config = {
3065
+ initialRetrievalK: 20,
3066
+ maxIterations: 10,
3067
+ enableRecursiveSubCalls: true,
3068
+ costBudget: 0.50,
3069
+ ...config,
3070
+ };
3071
+ }
3072
+
3073
+ /**
3074
+ * Execute recursive retrieval for a query
3075
+ */
3076
+ async retrieve(query: string, options?: RetrieveOptions): Promise<RetrievalResult> {
3077
+ // Step 1: Initial retrieval
3078
+ const initialChunks = await this.vectorStore.search(query, {
3079
+ topK: this.config.initialRetrievalK,
3080
+ ...options?.searchOptions,
3081
+ });
3082
+
3083
+ // Step 2: Assess complexity - do we need recursive processing?
3084
+ const complexity = await this.assessComplexity(query, initialChunks);
3085
+
3086
+ if (complexity === 'simple') {
3087
+ // Simple query - return initial results
3088
+ return {
3089
+ chunks: initialChunks,
3090
+ strategy: 'direct',
3091
+ iterations: 1,
3092
+ cost: 0,
3093
+ };
3094
+ }
3095
+
3096
+ // Step 3: Complex query - use RLM-style processing
3097
+ const env = new ContextEnvironment({
3098
+ maxIterations: this.config.maxIterations,
3099
+ costBudget: this.config.costBudget,
3100
+ });
3101
+
3102
+ env.loadContext(initialChunks);
3103
+
3104
+ return await this.iterativeProcess(query, env);
3105
+ }
3106
+
3107
+ /**
3108
+ * Assess query complexity to decide strategy
3109
+ * (RLM paper: "more complex problems exhibit degradation at shorter lengths")
3110
+ */
3111
+ private async assessComplexity(
3112
+ query: string,
3113
+ chunks: EnhancedChunk[]
3114
+ ): Promise<'simple' | 'moderate' | 'complex'> {
3115
+ // Heuristics based on RLM paper findings
3116
+ const totalContext = chunks.reduce((sum, c) => sum + c.content.length, 0);
3117
+
3118
+ // If context fits comfortably, might be simple
3119
+ if (totalContext < 50000) {
3120
+ // Check if query requires aggregation or multi-hop reasoning
3121
+ const aggregationKeywords = /how many|count|list all|compare|summarize|aggregate/i;
3122
+ const multiHopKeywords = /because|therefore|which.*then|after.*when/i;
3123
+
3124
+ if (aggregationKeywords.test(query) || multiHopKeywords.test(query)) {
3125
+ return 'moderate';
3126
+ }
3127
+ return 'simple';
3128
+ }
3129
+
3130
+ // Large context or complex query
3131
+ if (totalContext > 200000) {
3132
+ return 'complex';
3133
+ }
3134
+
3135
+ return 'moderate';
3136
+ }
3137
+
3138
+ /**
3139
+ * Iterative RLM-style processing
3140
+ */
3141
+ private async iterativeProcess(
3142
+ query: string,
3143
+ env: ContextEnvironment
3144
+ ): Promise<RetrievalResult> {
3145
+ let iteration = 0;
3146
+ let answer: string | null = null;
3147
+
3148
+ while (iteration < this.config.maxIterations) {
3149
+ iteration++;
3150
+
3151
+ // Check termination conditions
3152
+ const { terminate, reason } = env.shouldTerminate();
3153
+ if (terminate) {
3154
+ console.log(`Terminating: ${reason}`);
3155
+ break;
3156
+ }
3157
+
3158
+ // Get next action from LLM
3159
+ const action = await this.getNextAction(query, env, iteration);
3160
+
3161
+ if (action.type === 'answer') {
3162
+ answer = action.value;
3163
+ break;
3164
+ }
3165
+
3166
+ // Execute the action
3167
+ await this.executeAction(action, env);
3168
+ }
3169
+
3170
+ // Collect final results
3171
+ const relevantChunks = this.collectRelevantChunks(env);
3172
+
3173
+ return {
3174
+ chunks: relevantChunks,
3175
+ strategy: 'recursive',
3176
+ iterations: iteration,
3177
+ cost: env.getTotalCost(),
3178
+ answer,
3179
+ executionLog: env.getExecutionLog(),
3180
+ };
3181
+ }
3182
+
3183
+ /**
3184
+ * Ask LLM what to do next
3185
+ */
3186
+ private async getNextAction(
3187
+ query: string,
3188
+ env: ContextEnvironment,
3189
+ iteration: number
3190
+ ): Promise<RetrievalAction> {
3191
+ const prompt = `You are processing a query using a context environment. Your goal is to find relevant information efficiently.
3192
+
3193
+ ## Query
3194
+ ${query}
3195
+
3196
+ ${env.getStateDescription()}
3197
+
3198
+ ## Iteration ${iteration}
3199
+
3200
+ Based on the query and current state, decide your next action. You can:
3201
+
3202
+ 1. **peek** - Look at specific chunks to understand content
3203
+ 2. **filter** - Filter chunks by keyword/pattern to narrow down
3204
+ 3. **chunk** - Split context into batches for parallel processing
3205
+ 4. **subQuery** - Ask a question about a subset of context
3206
+ 5. **store** - Store intermediate findings
3207
+ 6. **answer** - Provide final answer if you have enough information
3208
+
3209
+ Respond with a JSON action:
3210
+ \`\`\`json
3211
+ {
3212
+ "type": "peek|filter|chunk|subQuery|store|answer",
3213
+ "params": { ... },
3214
+ "reasoning": "why this action"
3215
+ }
3216
+ \`\`\`
3217
+
3218
+ Be efficient - don't examine everything if you can filter first. Use subQuery for semantic understanding.`;
3219
+
3220
+ const response = await this.llm.invoke(prompt, {
3221
+ maxTokens: 1000,
3222
+ temperature: 0.2,
3223
+ });
3224
+
3225
+ return this.parseAction(response);
3226
+ }
3227
+
3228
+ /**
3229
+ * Execute a retrieval action
3230
+ */
3231
+ private async executeAction(action: RetrievalAction, env: ContextEnvironment): Promise<void> {
3232
+ switch (action.type) {
3233
+ case 'peek':
3234
+ const peekResult = env.peek(
3235
+ action.params.variable ?? 'context',
3236
+ action.params.start,
3237
+ action.params.end
3238
+ );
3239
+ env.store('_lastPeek', peekResult);
3240
+ break;
3241
+
3242
+ case 'filter':
3243
+ const filtered = env.filter(
3244
+ action.params.variable ?? 'context',
3245
+ action.params.pattern
3246
+ );
3247
+ env.store(action.params.outputVariable ?? 'filtered', filtered);
3248
+ break;
3249
+
3250
+ case 'chunk':
3251
+ const batches = env.chunk(
3252
+ action.params.variable ?? 'context',
3253
+ action.params.size ?? 5
3254
+ );
3255
+ env.store(action.params.outputVariable ?? 'batches', batches);
3256
+ break;
3257
+
3258
+ case 'subQuery':
3259
+ if (this.config.enableRecursiveSubCalls && action.params.batch) {
3260
+ // Batch sub-queries for efficiency
3261
+ const batchQueries = action.params.contexts.map((ctx: any) => ({
3262
+ query: action.params.query,
3263
+ context: ctx,
3264
+ }));
3265
+ const results = await env.batchSubQuery(this.subLlm, batchQueries);
3266
+ env.store(action.params.outputVariable ?? 'subResults', results);
3267
+ } else {
3268
+ const result = await env.subQuery(
3269
+ this.subLlm,
3270
+ action.params.query,
3271
+ action.params.context
3272
+ );
3273
+ env.store(action.params.outputVariable ?? 'subResult', result);
3274
+ }
3275
+ break;
3276
+
3277
+ case 'store':
3278
+ env.store(action.params.variable, action.params.value);
3279
+ break;
3280
+ }
3281
+ }
3282
+
3283
+ private parseAction(response: string): RetrievalAction {
3284
+ // Extract JSON from response
3285
+ const jsonMatch = response.match(/```json\n?([\s\S]*?)\n?```/);
3286
+ if (jsonMatch) {
3287
+ try {
3288
+ return JSON.parse(jsonMatch[1]);
3289
+ } catch {
3290
+ // Fall through to default
3291
+ }
3292
+ }
3293
+
3294
+ // Try parsing entire response as JSON
3295
+ try {
3296
+ return JSON.parse(response);
3297
+ } catch {
3298
+ // Default to answer if parsing fails
3299
+ return { type: 'answer', params: { value: response }, reasoning: 'Parse failed' };
3300
+ }
3301
+ }
3302
+
3303
+ private collectRelevantChunks(env: ContextEnvironment): EnhancedChunk[] {
3304
+ // Collect chunks that were accessed/filtered during processing
3305
+ const log = env.getExecutionLog();
3306
+ const accessedChunkIds = new Set<string>();
3307
+
3308
+ // Implementation would track which chunks were actually used
3309
+ // For now, return the filtered/relevant set
3310
+ return [];
3311
+ }
3312
+ }
3313
+
3314
+ interface RetrievalConfig {
3315
+ initialRetrievalK: number;
3316
+ maxIterations: number;
3317
+ enableRecursiveSubCalls: boolean;
3318
+ costBudget: number;
3319
+ }
3320
+
3321
+ interface RetrieveOptions {
3322
+ searchOptions?: EnhancedSearchOptions;
3323
+ }
3324
+
3325
+ interface RetrievalAction {
3326
+ type: 'peek' | 'filter' | 'chunk' | 'subQuery' | 'store' | 'answer';
3327
+ params: Record<string, any>;
3328
+ reasoning?: string;
3329
+ }
3330
+
3331
+ interface RetrievalResult {
3332
+ chunks: EnhancedChunk[];
3333
+ strategy: 'direct' | 'recursive';
3334
+ iterations: number;
3335
+ cost: number;
3336
+ answer?: string | null;
3337
+ executionLog?: ExecutionStep[];
3338
+ }
3339
+ ```
3340
+
3341
+ **Acceptance Criteria**:
3342
+ - [ ] Simple queries use direct retrieval
3343
+ - [ ] Complex queries trigger recursive processing
3344
+ - [ ] Actions are parsed and executed correctly
3345
+ - [ ] Cost is tracked across iterations
3346
+ - [ ] Termination conditions are enforced
3347
+ - [ ] Batch sub-queries use async execution
3348
+
3349
+ ---
3350
+
3351
+ #### Task 8.3: Implement Smart Chunking Strategies
3352
+
3353
+ **File**: `src/services/smartChunker.ts` (new file)
3354
+
3355
+ Implement intelligent chunking based on RLM paper patterns:
3356
+
3357
+ ```typescript
3358
+ /**
3359
+ * Smart Chunking Strategies
3360
+ *
3361
+ * Based on RLM paper observations about how models chunk context:
3362
+ * - Uniform chunking by size
3363
+ * - Semantic chunking by topic
3364
+ * - Keyword-based chunking
3365
+ * - Structural chunking (by file, section, etc.)
3366
+ */
3367
+ export class SmartChunker {
3368
+ private llm: LLMService;
3369
+
3370
+ constructor(llm: LLMService) {
3371
+ this.llm = llm;
3372
+ }
3373
+
3374
+ /**
3375
+ * Uniform chunking - split by count
3376
+ */
3377
+ uniformChunk<T>(items: T[], batchSize: number): T[][] {
3378
+ const batches: T[][] = [];
3379
+ for (let i = 0; i < items.length; i += batchSize) {
3380
+ batches.push(items.slice(i, i + batchSize));
3381
+ }
3382
+ return batches;
3383
+ }
3384
+
3385
+ /**
3386
+ * Character-based chunking with overlap
3387
+ */
3388
+ charChunk(text: string, chunkSize: number, overlap: number = 100): string[] {
3389
+ const chunks: string[] = [];
3390
+ let start = 0;
3391
+
3392
+ while (start < text.length) {
3393
+ const end = Math.min(start + chunkSize, text.length);
3394
+ chunks.push(text.substring(start, end));
3395
+ start = end - overlap;
3396
+ if (start < 0) start = 0;
3397
+ }
3398
+
3399
+ return chunks;
3400
+ }
3401
+
3402
+ /**
3403
+ * Semantic chunking - group by topic similarity
3404
+ * (RLM pattern: models benefit from semantically coherent chunks)
3405
+ */
3406
+ async semanticChunk(
3407
+ chunks: EnhancedChunk[],
3408
+ targetGroups: number
3409
+ ): Promise<EnhancedChunk[][]> {
3410
+ // Use embeddings to cluster
3411
+ const embeddings = chunks.map(c => c.embedding);
3412
+ const clusters = this.kMeansClustering(embeddings, targetGroups);
3413
+
3414
+ const groups: EnhancedChunk[][] = Array.from(
3415
+ { length: targetGroups },
3416
+ () => []
3417
+ );
3418
+
3419
+ clusters.forEach((clusterIdx, chunkIdx) => {
3420
+ groups[clusterIdx].push(chunks[chunkIdx]);
3421
+ });
3422
+
3423
+ return groups.filter(g => g.length > 0);
3424
+ }
3425
+
3426
+ /**
3427
+ * Keyword-based chunking - group by shared keywords
3428
+ * (RLM pattern: filter by regex, then process matching chunks together)
3429
+ */
3430
+ keywordChunk(
3431
+ chunks: EnhancedChunk[],
3432
+ keywords: string[]
3433
+ ): Map<string, EnhancedChunk[]> {
3434
+ const groups = new Map<string, EnhancedChunk[]>();
3435
+
3436
+ for (const keyword of keywords) {
3437
+ const pattern = new RegExp(keyword, 'i');
3438
+ const matching = chunks.filter(c => pattern.test(c.content));
3439
+ if (matching.length > 0) {
3440
+ groups.set(keyword, matching);
3441
+ }
3442
+ }
3443
+
3444
+ // Add unmatched chunks to 'other'
3445
+ const matched = new Set(
3446
+ Array.from(groups.values()).flatMap(g => g.map(c => c.id))
3447
+ );
3448
+ const unmatched = chunks.filter(c => !matched.has(c.id));
3449
+ if (unmatched.length > 0) {
3450
+ groups.set('_other', unmatched);
3451
+ }
3452
+
3453
+ return groups;
3454
+ }
3455
+
3456
+ /**
3457
+ * Structural chunking - group by source file or section
3458
+ */
3459
+ structuralChunk(chunks: EnhancedChunk[]): Map<string, EnhancedChunk[]> {
3460
+ const groups = new Map<string, EnhancedChunk[]>();
3461
+
3462
+ for (const chunk of chunks) {
3463
+ const source = chunk.source || '_unknown';
3464
+ if (!groups.has(source)) {
3465
+ groups.set(source, []);
3466
+ }
3467
+ groups.get(source)!.push(chunk);
3468
+ }
3469
+
3470
+ return groups;
3471
+ }
3472
+
3473
+ /**
3474
+ * Adaptive chunking - choose strategy based on content
3475
+ */
3476
+ async adaptiveChunk(
3477
+ chunks: EnhancedChunk[],
3478
+ query: string
3479
+ ): Promise<ChunkingResult> {
3480
+ // Analyze content and query to choose strategy
3481
+ const hasCodeContent = chunks.some(c =>
3482
+ c.chunkType === ChunkType.CODE || /```[\s\S]*```/.test(c.content)
3483
+ );
3484
+
3485
+ const hasStructuredSources = new Set(chunks.map(c => c.source)).size > 1;
3486
+
3487
+ const queryNeedsAggregation = /how many|count|list|all|every/i.test(query);
3488
+
3489
+ if (hasCodeContent && hasStructuredSources) {
3490
+ // Code from multiple files - use structural chunking
3491
+ return {
3492
+ strategy: 'structural',
3493
+ groups: this.structuralChunk(chunks),
3494
+ };
3495
+ }
3496
+
3497
+ if (queryNeedsAggregation) {
3498
+ // Aggregation query - uniform chunks for parallel processing
3499
+ return {
3500
+ strategy: 'uniform',
3501
+ groups: new Map([
3502
+ ['batch', this.uniformChunk(chunks, 10).map(batch => batch)],
3503
+ ].flatMap(([k, batches]) =>
3504
+ batches.map((b, i) => [`${k}_${i}`, b] as [string, EnhancedChunk[]])
3505
+ )),
3506
+ };
3507
+ }
3508
+
3509
+ // Default to semantic chunking
3510
+ const semanticGroups = await this.semanticChunk(chunks, 5);
3511
+ return {
3512
+ strategy: 'semantic',
3513
+ groups: new Map(semanticGroups.map((g, i) => [`topic_${i}`, g])),
3514
+ };
3515
+ }
3516
+
3517
+ /**
3518
+ * Simple k-means clustering for semantic grouping
3519
+ */
3520
+ private kMeansClustering(
3521
+ embeddings: number[][],
3522
+ k: number,
3523
+ maxIterations: number = 50
3524
+ ): number[] {
3525
+ if (embeddings.length === 0) return [];
3526
+ if (embeddings.length <= k) {
3527
+ return embeddings.map((_, i) => i);
3528
+ }
3529
+
3530
+ const dim = embeddings[0].length;
3531
+
3532
+ // Initialize centroids randomly
3533
+ const centroids: number[][] = [];
3534
+ const indices = new Set<number>();
3535
+ while (centroids.length < k) {
3536
+ const idx = Math.floor(Math.random() * embeddings.length);
3537
+ if (!indices.has(idx)) {
3538
+ indices.add(idx);
3539
+ centroids.push([...embeddings[idx]]);
3540
+ }
3541
+ }
3542
+
3543
+ let assignments = new Array(embeddings.length).fill(0);
3544
+
3545
+ for (let iter = 0; iter < maxIterations; iter++) {
3546
+ // Assign points to nearest centroid
3547
+ const newAssignments = embeddings.map(emb => {
3548
+ let minDist = Infinity;
3549
+ let closest = 0;
3550
+ for (let c = 0; c < centroids.length; c++) {
3551
+ const dist = this.euclideanDistance(emb, centroids[c]);
3552
+ if (dist < minDist) {
3553
+ minDist = dist;
3554
+ closest = c;
3555
+ }
3556
+ }
3557
+ return closest;
3558
+ });
3559
+
3560
+ // Check convergence
3561
+ if (newAssignments.every((a, i) => a === assignments[i])) {
3562
+ break;
3563
+ }
3564
+ assignments = newAssignments;
3565
+
3566
+ // Update centroids
3567
+ for (let c = 0; c < k; c++) {
3568
+ const members = embeddings.filter((_, i) => assignments[i] === c);
3569
+ if (members.length > 0) {
3570
+ for (let d = 0; d < dim; d++) {
3571
+ centroids[c][d] = members.reduce((sum, m) => sum + m[d], 0) / members.length;
3572
+ }
3573
+ }
3574
+ }
3575
+ }
3576
+
3577
+ return assignments;
3578
+ }
3579
+
3580
+ private euclideanDistance(a: number[], b: number[]): number {
3581
+ return Math.sqrt(a.reduce((sum, val, i) => sum + (val - b[i]) ** 2, 0));
3582
+ }
3583
+ }
3584
+
3585
+ interface ChunkingResult {
3586
+ strategy: 'uniform' | 'semantic' | 'keyword' | 'structural';
3587
+ groups: Map<string, EnhancedChunk[]>;
3588
+ }
3589
+ ```
3590
+
3591
+ **Acceptance Criteria**:
3592
+ - [ ] Uniform chunking works correctly
3593
+ - [ ] Semantic chunking groups similar content
3594
+ - [ ] Keyword chunking filters by patterns
3595
+ - [ ] Structural chunking groups by source
3596
+ - [ ] Adaptive chunking chooses appropriate strategy
3597
+
3598
+ ---
3599
+
3600
+ ## Phase 12: PageIndex Integration (Vectorless RAG) {#phase-12-pageindex}
3601
+
3602
+ ### Overview
3603
+
3604
+ This phase integrates [PageIndex](https://github.com/VectifyAI/PageIndex) - a vectorless, reasoning-based RAG system that builds hierarchical tree indexes from documents and uses LLM reasoning for retrieval. This complements the existing vector-based retrieval system.
3605
+
3606
+ ### Key Insight: Complementary Approaches
3607
+
3608
+ Vector RAG and PageIndex solve retrieval differently and work together:
3609
+
3610
+ | Aspect | Vector RAG | PageIndex |
3611
+ |--------|-----------|-----------|
3612
+ | **Excels at** | Semantic similarity, cross-document search | Structured documents, preserving hierarchy |
3613
+ | **Retrieval method** | Embed query → Top-K similarity | LLM navigates tree → Returns node IDs |
3614
+ | **Storage** | Vector DB (Redis/Qdrant/Chroma) | JSON file (tree structure) |
3615
+ | **Index type** | Flat vector embeddings | Hierarchical tree with node IDs |
3616
+ | **Explainability** | Similarity scores | Page references, reasoning chain |
3617
+
3618
+ ### Integration Architecture
3619
+
3620
+ ```
3621
+ ┌─────────────────────────────────────────────────────────────┐
3622
+ │ Cursor Recursive RAG │
3623
+ ├─────────────────────────────────────────────────────────────┤
3624
+ │ │
3625
+ │ ┌─────────────────┐ ┌──────────────────────────────┐ │
3626
+ │ │ Ingest Router │────▶│ Document Type Detector │ │
3627
+ │ └─────────────────┘ └──────────────────────────────┘ │
3628
+ │ │ │ │
3629
+ │ ▼ ▼ │
3630
+ │ ┌─────────────────┐ ┌──────────────────────────────┐ │
3631
+ │ │ Vector Pipeline │ │ PageIndex Pipeline (NEW) │ │
3632
+ │ │ (existing) │ │ │ │
3633
+ │ │ ┌─────────────┐ │ │ ┌──────────────────────────┐ │ │
3634
+ │ │ │ Chunker │ │ │ │ PageIndex Tree Builder │ │ │
3635
+ │ │ ├─────────────┤ │ │ ├──────────────────────────┤ │ │
3636
+ │ │ │ Embedder │ │ │ │ Tree Storage (JSON/DB) │ │ │
3637
+ │ │ ├─────────────┤ │ │ ├──────────────────────────┤ │ │
3638
+ │ │ │ Vector Store│ │ │ │ LLM Tree Traversal │ │ │
3639
+ │ │ └─────────────┘ │ │ └──────────────────────────┘ │ │
3640
+ │ └─────────────────┘ └──────────────────────────────┘ │
3641
+ │ │ │ │
3642
+ │ └────────────┬───────────┘ │
3643
+ │ ▼ │
3644
+ │ ┌──────────────────┐ │
3645
+ │ │ Hybrid Merger │ │
3646
+ │ │ (combines both) │ │
3647
+ │ └──────────────────┘ │
3648
+ │ │ │
3649
+ │ ▼ │
3650
+ │ ┌──────────────────┐ │
3651
+ │ │ RecursiveQuery │ │
3652
+ │ │ Tool │ │
3653
+ │ └──────────────────┘ │
3654
+ └─────────────────────────────────────────────────────────────┘
3655
+ ```
3656
+
3657
+ ### New Components
3658
+
3659
+ #### PageIndex Adapter (`src/adapters/pageindex/`)
3660
+
3661
+ ```
3662
+ src/adapters/pageindex/
3663
+ ├── index.ts # Factory and exports
3664
+ ├── types.ts # PageIndex tree node types
3665
+ ├── tree-builder.ts # Wraps PageIndex Python via child_process
3666
+ ├── tree-store.ts # Stores/retrieves tree JSON
3667
+ └── tree-searcher.ts # LLM-based tree traversal for retrieval
3668
+ ```
3669
+
3670
+ #### Key Interfaces
3671
+
3672
+ ```typescript
3673
+ /**
3674
+ * PageIndex tree node structure (mirrors Python output)
3675
+ */
3676
+ export interface TreeNode {
3677
+ title: string;
3678
+ node_id: string;
3679
+ start_index: number; // Start page
3680
+ end_index: number; // End page
3681
+ summary: string;
3682
+ nodes?: TreeNode[]; // Child nodes
3683
+ }
3684
+
3685
+ /**
3686
+ * Complete tree index for a document
3687
+ */
3688
+ export interface TreeIndex {
3689
+ documentId: string;
3690
+ sourcePath: string;
3691
+ documentDescription?: string;
3692
+ createdAt: Date;
3693
+ model: string;
3694
+ nodes: TreeNode[];
3695
+ }
3696
+
3697
+ /**
3698
+ * PageIndex search result
3699
+ */
3700
+ export interface PageIndexResult {
3701
+ nodeId: string;
3702
+ title: string;
3703
+ content: string;
3704
+ startPage: number;
3705
+ endPage: number;
3706
+ summary: string;
3707
+ reasoningChain: string[]; // How LLM navigated to this node
3708
+ confidence: number;
3709
+ }
3710
+
3711
+ /**
3712
+ * PageIndex adapter interface
3713
+ */
3714
+ export interface PageIndexAdapter {
3715
+ // Build tree from document
3716
+ buildIndex(
3717
+ sourcePath: string,
3718
+ options?: PageIndexOptions
3719
+ ): Promise<TreeIndex>;
3720
+
3721
+ // Search using LLM reasoning
3722
+ search(
3723
+ query: string,
3724
+ treeIndex: TreeIndex
3725
+ ): Promise<PageIndexResult[]>;
3726
+
3727
+ // Get full content for nodes
3728
+ getNodeContent(
3729
+ treeIndex: TreeIndex,
3730
+ nodeIds: string[]
3731
+ ): Promise<string[]>;
3732
+ }
3733
+ ```
3734
+
3735
+ ### Configuration Extension
3736
+
3737
+ Add to `RAGConfig` in `src/types/index.ts`:
3738
+
3739
+ ```typescript
3740
+ export interface RAGConfig {
3741
+ // ... existing fields ...
3742
+
3743
+ pageIndex?: {
3744
+ enabled: boolean;
3745
+ pythonPath?: string; // Path to Python with pageindex installed
3746
+ model?: string; // OpenAI model for tree building (default: gpt-4o-mini)
3747
+ maxPagesPerNode?: number; // Default: 10
3748
+ maxTokensPerNode?: number; // Default: 20000
3749
+ useCachedTrees?: boolean; // Cache built trees (default: true)
3750
+ autoIndexPDFs?: boolean; // Auto-build tree for PDFs (default: true)
3751
+ hybridSearchWeight?: number; // Weight for PageIndex in hybrid (0-1, default: 0.5)
3752
+ };
3753
+ }
3754
+ ```
3755
+
3756
+ ### New MCP Tools
3757
+
3758
+ | Tool | Description |
3759
+ |------|-------------|
3760
+ | `pageindex_ingest` | Build a PageIndex tree for a PDF/markdown document |
3761
+ | `pageindex_search` | Query using hierarchical tree navigation |
3762
+ | `pageindex_list` | List all indexed documents with tree structure |
3763
+ | `hybrid_search` | Combine vector + PageIndex results |
3764
+
3765
+ ### Hybrid Search Service
3766
+
3767
+ ```typescript
3768
+ /**
3769
+ * Hybrid Search Merger
3770
+ * Combines results from vector search and PageIndex
3771
+ */
3772
+ export async function hybridSearch(
3773
+ query: string,
3774
+ vectorStore: VectorStore,
3775
+ pageIndexAdapter: PageIndexAdapter,
3776
+ options: HybridSearchOptions
3777
+ ): Promise<HybridSearchResult> {
3778
+ // Run both searches in parallel
3779
+ const [vectorResults, pageIndexResults] = await Promise.all([
3780
+ vectorStore.search(embedding, { topK: options.vectorTopK }),
3781
+ pageIndexAdapter.search(query, treeIndex)
3782
+ ]);
3783
+
3784
+ // Merge with configurable weighting
3785
+ return mergeResults(vectorResults, pageIndexResults, options.weights);
3786
+ }
3787
+
3788
+ interface HybridSearchResult {
3789
+ results: Array<{
3790
+ id: string;
3791
+ content: string;
3792
+ score: number;
3793
+ source: 'vector' | 'pageindex';
3794
+ pageReference?: string; // For PageIndex results
3795
+ }>;
3796
+ vectorResultCount: number;
3797
+ pageIndexResultCount: number;
3798
+ mergedCount: number;
3799
+ }
3800
+ ```
3801
+
3802
+ ### Usage Example
3803
+
3804
+ **Ingesting a PDF:**
3805
+ ```bash
3806
+ cursor-rag ingest --source ./report.pdf
3807
+
3808
+ [Vector Pipeline] Created 247 chunks → stored in Redis
3809
+ [PageIndex Pipeline] Built tree with 23 nodes → stored in ~/.cursor-rag/pageindex/report.json
3810
+ ```
3811
+
3812
+ **Querying:**
3813
+ ```
3814
+ User: "What were Q3 revenue figures?"
3815
+
3816
+ [Hybrid Search]
3817
+ ├── Vector: Found 5 chunks with 0.82 avg similarity
3818
+ └── PageIndex: Tree traversal found nodes 0012, 0015 (Financial Results section)
3819
+
3820
+ [Merged Result]
3821
+ Sources:
3822
+ - report.pdf, pages 21-23 (PageIndex)
3823
+ - report.pdf, chunk 145 (Vector)
3824
+ ```
3825
+
3826
+ ### Python Integration
3827
+
3828
+ Since PageIndex is Python-based, the recommended integration approach is via child process:
3829
+
3830
+ ```typescript
3831
+ import { spawn } from 'child_process';
3832
+
3833
+ async function buildPageIndexTree(
3834
+ pdfPath: string,
3835
+ options: PageIndexOptions
3836
+ ): Promise<TreeIndex> {
3837
+ return new Promise((resolve, reject) => {
3838
+ const args = [
3839
+ 'run_pageindex.py',
3840
+ '--pdf_path', pdfPath,
3841
+ '--model', options.model || 'gpt-4o-mini',
3842
+ '--max-pages-per-node', String(options.maxPagesPerNode || 10),
3843
+ '--max-tokens-per-node', String(options.maxTokensPerNode || 20000),
3844
+ ];
3845
+
3846
+ const proc = spawn(options.pythonPath || 'python3', args, {
3847
+ cwd: options.pageindexPath,
3848
+ env: { ...process.env, CHATGPT_API_KEY: options.apiKey },
3849
+ });
3850
+
3851
+ let stdout = '';
3852
+ let stderr = '';
3853
+
3854
+ proc.stdout.on('data', (data) => { stdout += data; });
3855
+ proc.stderr.on('data', (data) => { stderr += data; });
3856
+
3857
+ proc.on('close', (code) => {
3858
+ if (code === 0) {
3859
+ resolve(JSON.parse(stdout));
3860
+ } else {
3861
+ reject(new Error(`PageIndex failed: ${stderr}`));
3862
+ }
3863
+ });
3864
+ });
3865
+ }
3866
+ ```
3867
+
3868
+ ### Benefits of This Integration
3869
+
3870
+ 1. **Additive, not replacement** - existing vector functionality unchanged
3871
+ 2. **Best of both worlds** - semantic search + structural navigation
3872
+ 3. **Explainable results** - PageIndex provides exact page references
3873
+ 4. **Configurable** - users can enable/disable PageIndex per their needs
3874
+ 5. **Document-type aware** - automatically uses best strategy per document
3875
+
3876
+ ### Considerations
3877
+
3878
+ | Factor | Notes |
3879
+ |--------|-------|
3880
+ | **API Cost** | PageIndex uses LLM calls for both indexing and retrieval |
3881
+ | **Latency** | Tree building is slower than chunking (but only at ingest time) |
3882
+ | **Python dependency** | Requires Python 3.x with pageindex package installed |
3883
+ | **Storage** | Tree indexes are JSON files (~10-100KB per document) |
3884
+
3885
+ ### Implementation Phases
3886
+
3887
+ 1. **Phase 1: Core Integration** - Adapter, storage, basic search
3888
+ 2. **Phase 2: Hybrid Search** - Merge with vector results, attribution
3889
+ 3. **Phase 3: Enhanced Features** - CLI, dashboard, auto-detection
3890
+
3891
+ ---
3892
+
3893
+ ## MCP Tool Definitions {#mcp-tools}
3894
+
3895
+ ### New Tools to Add
3896
+
3897
+ ```typescript
3898
+ /**
3899
+ * New MCP tools for memory features
3900
+ */
3901
+ export const MEMORY_TOOLS: Tool[] = [
3902
+ {
3903
+ name: 'search_past_solutions',
3904
+ description: 'Search for solutions from previous development sessions and Cursor chats',
3905
+ inputSchema: {
3906
+ type: 'object',
3907
+ properties: {
3908
+ query: {
3909
+ type: 'string',
3910
+ description: 'Description of the problem or error',
3911
+ },
3912
+ errorMessage: {
3913
+ type: 'string',
3914
+ description: 'Specific error message if applicable',
3915
+ },
3916
+ project: {
3917
+ type: 'string',
3918
+ description: 'Filter to a specific project',
3919
+ },
3920
+ limit: {
3921
+ type: 'number',
3922
+ description: 'Maximum results to return',
3923
+ default: 5,
3924
+ },
3925
+ },
3926
+ required: ['query'],
3927
+ },
3928
+ },
3929
+ {
3930
+ name: 'find_similar_issues',
3931
+ description: 'Find past issues similar to the current problem',
3932
+ inputSchema: {
3933
+ type: 'object',
3934
+ properties: {
3935
+ context: {
3936
+ type: 'string',
3937
+ description: 'Current context or code snippet',
3938
+ },
3939
+ errorMessage: {
3940
+ type: 'string',
3941
+ description: 'Error message if any',
3942
+ },
3943
+ },
3944
+ required: ['context'],
3945
+ },
3946
+ },
3947
+ {
3948
+ name: 'get_project_patterns',
3949
+ description: 'Get established patterns and standards for this project',
3950
+ inputSchema: {
3951
+ type: 'object',
3952
+ properties: {
3953
+ project: {
3954
+ type: 'string',
3955
+ description: 'Project name',
3956
+ },
3957
+ category: {
3958
+ type: 'string',
3959
+ description: 'Category filter (e.g., authentication, database)',
3960
+ },
3961
+ },
3962
+ required: ['project'],
3963
+ },
3964
+ },
3965
+ {
3966
+ name: 'recall_decision',
3967
+ description: 'Recall why a technical decision was made',
3968
+ inputSchema: {
3969
+ type: 'object',
3970
+ properties: {
3971
+ topic: {
3972
+ type: 'string',
3973
+ description: 'What the decision was about',
3974
+ },
3975
+ project: {
3976
+ type: 'string',
3977
+ description: 'Project context',
3978
+ },
3979
+ },
3980
+ required: ['topic'],
3981
+ },
3982
+ },
3983
+ {
3984
+ name: 'get_category_summary',
3985
+ description: 'Get a high-level summary for a knowledge category',
3986
+ inputSchema: {
3987
+ type: 'object',
3988
+ properties: {
3989
+ category: {
3990
+ type: 'string',
3991
+ description: 'Category name (e.g., authentication, testing)',
3992
+ },
3993
+ },
3994
+ required: ['category'],
3995
+ },
3996
+ },
3997
+ {
3998
+ name: 'ingest_chat_history',
3999
+ description: 'Manually trigger chat history ingestion',
4000
+ inputSchema: {
4001
+ type: 'object',
4002
+ properties: {
4003
+ since: {
4004
+ type: 'string',
4005
+ description: 'Only ingest chats since this date (ISO format)',
4006
+ },
4007
+ hasCode: {
4008
+ type: 'boolean',
4009
+ description: 'Only ingest chats with code blocks',
4010
+ },
4011
+ },
4012
+ },
4013
+ },
4014
+ {
4015
+ name: 'memory_stats',
4016
+ description: 'Get statistics about the memory system',
4017
+ inputSchema: {
4018
+ type: 'object',
4019
+ properties: {},
4020
+ },
4021
+ },
4022
+ ];
4023
+ ```
4024
+
4025
+ ---
4026
+
4027
+ ## Configuration Schema {#configuration}
4028
+
4029
+ ### Enhanced Configuration
4030
+
4031
+ **File**: `src/config/memoryConfig.ts` (new file)
4032
+
4033
+ ```typescript
4034
+ /**
4035
+ * Configuration for memory features
4036
+ */
4037
+ export interface MemoryConfig {
4038
+ // Metadata store
4039
+ metadataDbPath: string; // Default: ~/.cursor-rag/memory.db
4040
+
4041
+ // Cursor chat integration
4042
+ cursorDbPath?: string; // Auto-detected by default
4043
+ autoIngestChats: boolean; // Default: false
4044
+ chatIngestInterval: number; // Minutes, default: 30
4045
+
4046
+ // Decay configuration
4047
+ decay: {
4048
+ halfLifeDays: number; // Default: 60
4049
+ archiveThreshold: number; // Default: 0.2
4050
+ recencyBoostDays: number; // Default: 7
4051
+ };
4052
+
4053
+ // Knowledge extraction
4054
+ extraction: {
4055
+ enabled: boolean; // Default: true
4056
+ minConfidence: number; // Default: 0.6
4057
+ extractSolutions: boolean; // Default: true
4058
+ extractPatterns: boolean; // Default: true
4059
+ extractDecisions: boolean; // Default: true
4060
+ extractStandards: boolean; // Default: true
4061
+ };
4062
+
4063
+ // Categories
4064
+ categories: {
4065
+ autoClassify: boolean; // Default: true
4066
+ autoEvolveSummaries: boolean; // Default: true
4067
+ summaryEvolutionThreshold: number; // New items before re-summarising
4068
+ };
4069
+
4070
+ // Maintenance
4071
+ maintenance: {
4072
+ enabled: boolean; // Default: true
4073
+ nightlyTime: string; // Cron expression, default: "0 3 * * *"
4074
+ weeklyTime: string; // Default: "0 4 * * 0"
4075
+ monthlyTime: string; // Default: "0 5 1 * *"
4076
+ };
4077
+
4078
+ // Scoring
4079
+ scoring: {
4080
+ weights: {
4081
+ similarity: number;
4082
+ decay: number;
4083
+ importance: number;
4084
+ recency: number;
4085
+ graphBoost: number;
4086
+ typeBoost: number;
4087
+ };
4088
+ };
4089
+ }
4090
+
4091
+ export const DEFAULT_MEMORY_CONFIG: MemoryConfig = {
4092
+ metadataDbPath: '~/.cursor-rag/memory.db',
4093
+ autoIngestChats: false,
4094
+ chatIngestInterval: 30,
4095
+
4096
+ decay: {
4097
+ halfLifeDays: 60,
4098
+ archiveThreshold: 0.2,
4099
+ recencyBoostDays: 7,
4100
+ },
4101
+
4102
+ extraction: {
4103
+ enabled: true,
4104
+ minConfidence: 0.6,
4105
+ extractSolutions: true,
4106
+ extractPatterns: true,
4107
+ extractDecisions: true,
4108
+ extractStandards: true,
4109
+ },
4110
+
4111
+ categories: {
4112
+ autoClassify: true,
4113
+ autoEvolveSummaries: true,
4114
+ summaryEvolutionThreshold: 5,
4115
+ },
4116
+
4117
+ maintenance: {
4118
+ enabled: true,
4119
+ nightlyTime: '0 3 * * *',
4120
+ weeklyTime: '0 4 * * 0',
4121
+ monthlyTime: '0 5 1 * *',
4122
+ },
4123
+
4124
+ scoring: {
4125
+ weights: {
4126
+ similarity: 0.35,
4127
+ decay: 0.20,
4128
+ importance: 0.15,
4129
+ recency: 0.10,
4130
+ graphBoost: 0.10,
4131
+ typeBoost: 0.10,
4132
+ },
4133
+ },
4134
+ }};
4135
+ ```
4136
+
4137
+ ---
4138
+
4139
+ ## Anti-Patterns and Negative Results {#anti-patterns}
4140
+
4141
+ Based on the Recursive Language Models paper's Negative Results (Appendix A) and our own testing, these are critical anti-patterns to avoid:
4142
+
4143
+ ### 1. One-Size-Fits-All Prompts
4144
+
4145
+ **Problem**: Using the exact same prompts/configurations across all models.
4146
+
4147
+ > "We originally wrote the RLM system prompt with in context examples for GPT-5, and tried to use the same system prompt for Qwen3-Coder, but found that it led to different, undesirable behavior."
4148
+
4149
+ **Solution in This Project**:
4150
+ ```typescript
4151
+ // BAD: Same config for all models
4152
+ const config = { maxSubCalls: 100 };
4153
+
4154
+ // GOOD: Model-specific configurations
4155
+ const MODEL_CONFIGS: Record<string, ModelConfig> = {
4156
+ 'gpt-4': { maxSubCalls: 100, warnOnExcessiveCalls: false },
4157
+ 'claude': { maxSubCalls: 100, warnOnExcessiveCalls: false },
4158
+ 'qwen': { maxSubCalls: 50, warnOnExcessiveCalls: true }, // Needs warning
4159
+ 'local': { maxSubCalls: 20, warnOnExcessiveCalls: true },
4160
+ };
4161
+ ```
4162
+
4163
+ ### 2. Assuming All Models Can Execute Code
4164
+
4165
+ **Problem**: Models without sufficient coding capabilities struggle with REPL-based approaches.
4166
+
4167
+ > "We found from small scale experiments that smaller models like Qwen3-8B struggled without sufficient coding abilities."
4168
+
4169
+ **Solution**: Implement capability detection and fallback paths:
4170
+ ```typescript
4171
+ interface ModelCapabilities {
4172
+ codeExecution: 'excellent' | 'good' | 'limited' | 'none';
4173
+ contextWindow: number;
4174
+ outputTokens: number;
4175
+ }
4176
+
4177
+ async function chooseRetrievalStrategy(
4178
+ capabilities: ModelCapabilities,
4179
+ queryComplexity: 'simple' | 'moderate' | 'complex'
4180
+ ): Promise<'direct' | 'iterative' | 'recursive'> {
4181
+ // Models without coding ability can't use recursive REPL approach
4182
+ if (capabilities.codeExecution === 'none' || capabilities.codeExecution === 'limited') {
4183
+ return queryComplexity === 'simple' ? 'direct' : 'iterative';
4184
+ }
4185
+
4186
+ return queryComplexity === 'complex' ? 'recursive' : 'iterative';
4187
+ }
4188
+ ```
4189
+
4190
+ ### 3. Unlimited Thinking/Reasoning Tokens
4191
+
4192
+ **Problem**: Thinking models can exhaust output tokens with reasoning before producing results.
4193
+
4194
+ > "The smaller gap compared to the evaluated models... are due to multiple trajectories running out of output tokens while producing outputs due to thinking tokens exceeding the maximum output token length."
4195
+
4196
+ **Solution**: Reserve output tokens and enforce budgets:
4197
+ ```typescript
4198
+ interface TokenBudget {
4199
+ totalOutputTokens: number;
4200
+ reservedForAnswer: number;
4201
+ maxThinkingTokens: number;
4202
+ }
4203
+
4204
+ function getTokenBudget(model: string, taskType: string): TokenBudget {
4205
+ const modelLimits = MODEL_LIMITS[model];
4206
+
4207
+ // Reserve 20-30% for final answer
4208
+ const reservedForAnswer = Math.floor(modelLimits.outputTokens * 0.25);
4209
+
4210
+ return {
4211
+ totalOutputTokens: modelLimits.outputTokens,
4212
+ reservedForAnswer,
4213
+ maxThinkingTokens: modelLimits.outputTokens - reservedForAnswer,
4214
+ };
4215
+ }
4216
+ ```
4217
+
4218
+ ### 4. Synchronous-Only Sub-Calls
4219
+
4220
+ **Problem**: Sequential LLM calls create significant latency.
4221
+
4222
+ > "We implemented all sub-LM queries naively as blocking / sequential calls, which caused our RLM experiments to be slow."
4223
+
4224
+ **Solution**: Implement async sub-calls from the start (already included in Phase 8):
4225
+ ```typescript
4226
+ // BAD: Sequential processing
4227
+ for (const chunk of chunks) {
4228
+ const result = await llm.invoke(query, chunk); // Blocks!
4229
+ results.push(result);
4230
+ }
4231
+
4232
+ // GOOD: Parallel with concurrency limit
4233
+ const CONCURRENCY = 5;
4234
+ const results = await pMap(
4235
+ chunks,
4236
+ chunk => llm.invoke(query, chunk),
4237
+ { concurrency: CONCURRENCY }
4238
+ );
4239
+ ```
4240
+
4241
+ ### 5. Tag-Based Answer Detection
4242
+
4243
+ **Problem**: Relying on the model to wrap answers in specific tags is brittle.
4244
+
4245
+ > "The current strategy for distinguishing between a 'next turn' and a final answer for the RLM is to have it wrap its answer in FINAL() or FINAL\_VAR() tags... we also found the model to make strange decisions (e.g. it outputs its plan as a final answer)."
4246
+
4247
+ **Solution**: Use multiple termination signals and validation:
4248
+ ```typescript
4249
+ interface TerminationDetection {
4250
+ // Multiple signals - don't rely on just one
4251
+ explicitTag: boolean; // Model used FINAL() tag
4252
+ confidenceStatement: boolean; // Model expressed confidence
4253
+ noMoreActions: boolean; // Model didn't request more operations
4254
+ answerValidation: boolean; // Answer passes format validation
4255
+
4256
+ // Require multiple signals for termination
4257
+ shouldTerminate(): boolean {
4258
+ const signals = [
4259
+ this.explicitTag,
4260
+ this.confidenceStatement,
4261
+ this.noMoreActions,
4262
+ this.answerValidation,
4263
+ ].filter(Boolean).length;
4264
+
4265
+ return signals >= 2; // Require at least 2 signals
4266
+ }
4267
+ }
4268
+
4269
+ // Also add safeguards against premature termination
4270
+ function validateAnswer(answer: string, query: string): ValidationResult {
4271
+ // Check answer isn't just a plan/thought
4272
+ if (answer.toLowerCase().includes('i will') ||
4273
+ answer.toLowerCase().includes('let me')) {
4274
+ return { valid: false, reason: 'Answer appears to be a plan, not a result' };
4275
+ }
4276
+
4277
+ // Check answer addresses the query
4278
+ // ... additional validation
4279
+
4280
+ return { valid: true };
4281
+ }
4282
+ ```
4283
+
4284
+ ### 6. No Cost/Iteration Limits
4285
+
4286
+ **Problem**: Without limits, runaway trajectories can be expensive.
4287
+
4288
+ > "RLMs iteratively interact with their context until they find a suitable answer, leading to large differences in iteration length... many outlier RLM runs are significantly more expensive than any base model query."
4289
+
4290
+ **Solution**: Enforce strict budgets (already in Phase 8) and add circuit breakers:
4291
+ ```typescript
4292
+ class CircuitBreaker {
4293
+ private failures = 0;
4294
+ private lastFailure: Date | null = null;
4295
+ private state: 'closed' | 'open' | 'half-open' = 'closed';
4296
+
4297
+ constructor(
4298
+ private threshold: number = 3,
4299
+ private resetTimeMs: number = 60000
4300
+ ) {}
4301
+
4302
+ async execute<T>(operation: () => Promise<T>): Promise<T> {
4303
+ if (this.state === 'open') {
4304
+ if (Date.now() - this.lastFailure!.getTime() > this.resetTimeMs) {
4305
+ this.state = 'half-open';
4306
+ } else {
4307
+ throw new Error('Circuit breaker is open');
4308
+ }
4309
+ }
4310
+
4311
+ try {
4312
+ const result = await operation();
4313
+ this.onSuccess();
4314
+ return result;
4315
+ } catch (error) {
4316
+ this.onFailure();
4317
+ throw error;
4318
+ }
4319
+ }
4320
+
4321
+ private onSuccess(): void {
4322
+ this.failures = 0;
4323
+ this.state = 'closed';
4324
+ }
4325
+
4326
+ private onFailure(): void {
4327
+ this.failures++;
4328
+ this.lastFailure = new Date();
4329
+ if (this.failures >= this.threshold) {
4330
+ this.state = 'open';
4331
+ }
4332
+ }
4333
+ }
4334
+ ```
4335
+
4336
+ ### 7. Excessive Sub-Calls for Simple Operations
4337
+
4338
+ **Problem**: Some models over-use sub-calls, making thousands of calls for basic tasks.
4339
+
4340
+ > "We observed a trajectory on OOLONG where the model tries to reproduce its correct answer more than five times before choosing the incorrect answer in the end."
4341
+
4342
+ **Solution**: Add sub-call throttling and caching:
4343
+ ```typescript
4344
+ class SubCallThrottler {
4345
+ private callCounts = new Map<string, number>();
4346
+ private cache = new Map<string, string>();
4347
+
4348
+ async throttledCall(
4349
+ key: string,
4350
+ llm: LLMService,
4351
+ prompt: string,
4352
+ maxCalls: number = 3
4353
+ ): Promise<string> {
4354
+ // Check cache first
4355
+ const cacheKey = this.hashPrompt(prompt);
4356
+ if (this.cache.has(cacheKey)) {
4357
+ return this.cache.get(cacheKey)!;
4358
+ }
4359
+
4360
+ // Check call count
4361
+ const count = this.callCounts.get(key) ?? 0;
4362
+ if (count >= maxCalls) {
4363
+ throw new Error(`Maximum sub-calls (${maxCalls}) exceeded for ${key}`);
4364
+ }
4365
+
4366
+ this.callCounts.set(key, count + 1);
4367
+
4368
+ const result = await llm.invoke(prompt);
4369
+ this.cache.set(cacheKey, result);
4370
+
4371
+ return result;
4372
+ }
4373
+
4374
+ private hashPrompt(prompt: string): string {
4375
+ // Simple hash for caching
4376
+ return Buffer.from(prompt).toString('base64').substring(0, 32);
4377
+ }
4378
+ }
4379
+ ```
4380
+
4381
+ ### 8. Not Using Model Priors for Filtering
4382
+
4383
+ **Problem**: Processing all context equally instead of leveraging model knowledge.
4384
+
4385
+ > "A key intuition for why the RLM abstraction can maintain strong performance on huge inputs without exploding costs is the LM's ability to filter input context without explicitly seeing it."
4386
+
4387
+ **Solution**: Pre-filter with keywords/patterns before semantic analysis:
4388
+ ```typescript
4389
+ async function smartFilter(
4390
+ chunks: EnhancedChunk[],
4391
+ query: string,
4392
+ llm: LLMService
4393
+ ): Promise<EnhancedChunk[]> {
4394
+ // Step 1: Ask model for likely keywords (uses model priors)
4395
+ const keywordsPrompt = `Given this query, what specific keywords, names, or patterns would likely appear in relevant documents?
4396
+
4397
+ Query: ${query}
4398
+
4399
+ Return only a JSON array of 5-10 keywords/patterns.`;
4400
+
4401
+ const keywordsResponse = await llm.invoke(keywordsPrompt);
4402
+ const keywords = JSON.parse(keywordsResponse);
4403
+
4404
+ // Step 2: Filter chunks by keywords FIRST (cheap)
4405
+ const pattern = new RegExp(keywords.join('|'), 'i');
4406
+ const filtered = chunks.filter(c => pattern.test(c.content));
4407
+
4408
+ // Step 3: Only do expensive semantic analysis on filtered set
4409
+ if (filtered.length < chunks.length * 0.3) {
4410
+ return filtered; // Good filtering, use this subset
4411
+ }
4412
+
4413
+ // Filtering didn't narrow enough, fall back to semantic
4414
+ return chunks;
4415
+ }
4416
+ ```
4417
+
4418
+ ### Summary: Implementation Checklist
4419
+
4420
+ | Anti-Pattern | Mitigation | Phase |
4421
+ |-------------|------------|-------|
4422
+ | Same prompts for all models | Model-specific configs | 8 |
4423
+ | Assuming code execution | Capability detection + fallback | 8 |
4424
+ | Unlimited thinking tokens | Token budgets | 8 |
4425
+ | Synchronous sub-calls | Async with concurrency | 8 |
4426
+ | Tag-based termination | Multiple signals + validation | 8 |
4427
+ | No cost limits | Budget enforcement + circuit breakers | 8 |
4428
+ | Excessive sub-calls | Throttling + caching | 8 |
4429
+ | Not using model priors | Keyword pre-filtering | 7, 8 |
4430
+
4431
+ ---
4432
+
4433
+ ## Testing Strategy {#testing}
4434
+
4435
+ ### Test Categories
4436
+
4437
+ 1. **Unit Tests**
4438
+ - DecayCalculator scoring
4439
+ - RelationshipGraph traversal
4440
+ - KnowledgeExtractor parsing
4441
+ - CategoryManager classification
4442
+
4443
+ 2. **Integration Tests**
4444
+ - Cursor chat reading
4445
+ - Full ingestion pipeline
4446
+ - Search with decay scoring
4447
+ - Maintenance job execution
4448
+
4449
+ 3. **End-to-End Tests**
4450
+ - CLI commands
4451
+ - MCP tool responses
4452
+ - Full workflow: ingest → extract → search → retrieve
4453
+
4454
+ ### Key Test Cases
4455
+
4456
+ ```typescript
4457
+ // Example test structure
4458
+ describe('DecayCalculator', () => {
4459
+ it('should give high score to new chunks with high importance', () => {
4460
+ const chunk = createChunk({ createdAt: new Date(), importance: 0.9 });
4461
+ const score = calculator.calculateDecayScore(chunk);
4462
+ expect(score).toBeGreaterThan(0.8);
4463
+ });
4464
+
4465
+ it('should decay old unused chunks', () => {
4466
+ const chunk = createChunk({
4467
+ createdAt: subDays(new Date(), 120),
4468
+ accessCount: 0,
4469
+ importance: 0.5,
4470
+ });
4471
+ const score = calculator.calculateDecayScore(chunk);
4472
+ expect(score).toBeLessThan(0.4);
4473
+ });
4474
+
4475
+ it('should boost frequently accessed chunks', () => {
4476
+ const chunk = createChunk({
4477
+ createdAt: subDays(new Date(), 60),
4478
+ accessCount: 20,
4479
+ lastAccessedAt: subDays(new Date(), 1),
4480
+ });
4481
+ const score = calculator.calculateDecayScore(chunk);
4482
+ expect(score).toBeGreaterThan(0.6);
4483
+ });
4484
+ });
4485
+
4486
+ describe('CursorChatReader', () => {
4487
+ it('should read conversations from Cursor database', async () => {
4488
+ const reader = new CursorChatReader(TEST_DB_PATH);
4489
+ const conversations = await reader.listConversations();
4490
+ expect(conversations.length).toBeGreaterThan(0);
4491
+ });
4492
+
4493
+ it('should parse code blocks correctly', async () => {
4494
+ const conversation = await reader.getConversation(TEST_CONV_ID);
4495
+ const codeBlocks = conversation.messages.flatMap(m => m.codeBlocks);
4496
+ expect(codeBlocks.length).toBeGreaterThan(0);
4497
+ expect(codeBlocks[0].language).toBeDefined();
4498
+ });
4499
+ });
4500
+ ```
4501
+
4502
+ ---
4503
+
4504
+ ## Implementation Order
4505
+
4506
+ ### Recommended Sequence
4507
+
4508
+ | Phase | Duration | Dependencies |
4509
+ |-------|----------|--------------|
4510
+ | Phase 1: Foundation | 3-4 days | None |
4511
+ | Phase 2: Chat History | 2-3 days | Phase 1 |
4512
+ | Phase 3: Knowledge Extraction | 3-4 days | Phase 1, 2 |
4513
+ | Phase 4: Relationship Graph | 2-3 days | Phase 1 |
4514
+ | Phase 5: Hierarchical Memory | 2-3 days | Phase 1, 3 |
4515
+ | Phase 6: Maintenance | 2 days | Phase 1, 5 |
4516
+ | Phase 7: Enhanced Retrieval | 2-3 days | All previous |
4517
+
4518
+ **Total estimated time: 16-22 days**
4519
+
4520
+ ### Quick Wins (Can be done first)
4521
+
4522
+ 1. Task 1.1: Enhanced types (1 hour)
4523
+ 2. Task 1.3: Decay calculator (2 hours)
4524
+ 3. Task 2.1: Cursor DB reader (4 hours)
4525
+ 4. Task 6.2: Maintenance CLI (2 hours)
4526
+
4527
+ ### High-Impact Features
4528
+
4529
+ 1. **Cursor chat ingestion** - Immediate value from existing conversations
4530
+ 2. **Temporal decay** - Keeps retrieval relevant without manual curation
4531
+ 3. **Category summaries** - Reduces token usage, improves overview answers
4532
+
4533
+ ---
4534
+
4535
+ ## Appendix: File Structure
4536
+
4537
+ ```
4538
+ src/
4539
+ ├── types/
4540
+ │ ├── memory.ts # Enhanced chunk types
4541
+ │ ├── extractedKnowledge.ts # Knowledge extraction types
4542
+ │ ├── relationships.ts # Relationship types
4543
+ │ └── categories.ts # Category types
4544
+ ├── services/
4545
+ │ ├── memoryMetadataStore.ts # SQLite metadata store
4546
+ │ ├── decayCalculator.ts # Decay scoring
4547
+ │ ├── enhancedVectorStore.ts # Wrapper with memory features
4548
+ │ ├── cursorChatReader.ts # Cursor DB access
4549
+ │ ├── conversationProcessor.ts # Chat processing
4550
+ │ ├── knowledgeExtractor.ts # LLM extraction
4551
+ │ ├── knowledgeStorage.ts # Store extracted knowledge
4552
+ │ ├── relationshipGraph.ts # Graph operations
4553
+ │ ├── categoryManager.ts # Category management
4554
+ │ ├── maintenanceScheduler.ts # Background jobs
4555
+ │ └── hybridScorer.ts # Enhanced scoring
4556
+ ├── cli/
4557
+ │ ├── ingestChats.ts # Chat ingestion commands
4558
+ │ └── maintenance.ts # Maintenance commands
4559
+ ├── config/
4560
+ │ └── memoryConfig.ts # Configuration
4561
+ └── mcp/
4562
+ └── memoryTools.ts # New MCP tools
4563
+ ```
4564
+
4565
+ ---
4566
+
4567
+ *Document Version: 1.0*
4568
+ *Created: January 2025*
4569
+ *For: cursor-recursive-rag memory enhancement project*