claude-self-reflect 2.4.5 โ 2.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -52
- package/installer/setup-wizard-docker.js +4 -1
- package/mcp-server/run-mcp-docker.sh +1 -1
- package/package.json +1 -1
- package/mcp-server/src/server_v2.py +0 -254
package/README.md
CHANGED
|
@@ -123,21 +123,27 @@ Once installed, just talk naturally:
|
|
|
123
123
|
|
|
124
124
|
The reflection specialist automatically activates. No special commands needed.
|
|
125
125
|
|
|
126
|
-
## Performance & Usage Guide
|
|
126
|
+
## Performance & Usage Guide
|
|
127
127
|
|
|
128
|
-
### ๐
|
|
129
|
-
|
|
130
|
-
- Compressed XML response format (40% smaller)
|
|
131
|
-
- Optimized excerpts (350 chars for context, 100 chars in brief mode)
|
|
132
|
-
- Smart defaults (5 results to avoid missing relevant conversations)
|
|
128
|
+
### ๐ Lightning Fast Search
|
|
129
|
+
Optimized to deliver results in **200-350ms** (10-40x faster than v2.4.4)
|
|
133
130
|
|
|
134
131
|
### ๐ฏ Recommended Usage: Through Reflection-Specialist Agent
|
|
135
132
|
|
|
136
133
|
**Why use the agent instead of direct MCP tools?**
|
|
137
|
-
-
|
|
138
|
-
-
|
|
139
|
-
- Better user experience
|
|
140
|
-
-
|
|
134
|
+
- **Preserves your main conversation context** - Search results don't clutter your working memory
|
|
135
|
+
- **Rich formatted responses** - Clean markdown instead of raw XML in your conversation
|
|
136
|
+
- **Better user experience** - Real-time streaming feedback and progress indicators
|
|
137
|
+
- **Proper tool counting** - Shows actual tool usage instead of "0 tool uses"
|
|
138
|
+
- **Automatic cross-project search** - Agent suggests searching across projects when relevant
|
|
139
|
+
- **Specialized search tools** - Access to quick_search, search_summary, and pagination
|
|
140
|
+
|
|
141
|
+
**Context Preservation Benefit:**
|
|
142
|
+
When you use the reflection-specialist agent, all the search results and processing happen in an isolated context. This means:
|
|
143
|
+
- Your main conversation stays clean and focused
|
|
144
|
+
- No XML dumps or raw data in your chat history
|
|
145
|
+
- Multiple searches won't exhaust your context window
|
|
146
|
+
- You get just the insights, not the implementation details
|
|
141
147
|
|
|
142
148
|
**Example:**
|
|
143
149
|
```
|
|
@@ -147,61 +153,40 @@ You: "What Docker issues did we solve?"
|
|
|
147
153
|
โฟ Searching 57 collections...
|
|
148
154
|
โฟ Found 5 relevant conversations
|
|
149
155
|
โฟ Done (1 tool use ยท 12k tokens ยท 2.3s)
|
|
156
|
+
[Returns clean, formatted insights without cluttering your context]
|
|
150
157
|
```
|
|
151
158
|
|
|
152
159
|
### โก Performance Baselines
|
|
153
160
|
|
|
154
|
-
| Method | Search Time | Total Time | Best For |
|
|
155
|
-
|
|
156
|
-
| Direct MCP | 200-350ms | 200-350ms | Programmatic use,
|
|
157
|
-
| Via Agent | 200-350ms |
|
|
161
|
+
| Method | Search Time | Total Time | Context Impact | Best For |
|
|
162
|
+
|--------|------------|------------|----------------|----------|
|
|
163
|
+
| Direct MCP | 200-350ms | 200-350ms | Uses main context | Programmatic use, when context space matters |
|
|
164
|
+
| Via Agent | 200-350ms | 24-30s* | Isolated context | Interactive use, exploration, multiple searches |
|
|
158
165
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
## Project-Scoped Search (New in v2.4.3)
|
|
166
|
+
*Note: The 24-30s includes context preservation overhead, which keeps your main conversation clean
|
|
162
167
|
|
|
163
|
-
|
|
168
|
+
**Note**: The specialized tools (`quick_search`, `search_summary`, `get_more_results`) only work through the reflection-specialist agent due to MCP protocol limitations.
|
|
164
169
|
|
|
165
|
-
|
|
170
|
+
## Key Features
|
|
166
171
|
|
|
167
|
-
###
|
|
172
|
+
### ๐ฏ Project-Scoped Search
|
|
173
|
+
Searches are **project-aware by default** (v2.4.3+). Claude automatically searches within your current project:
|
|
168
174
|
|
|
169
175
|
```
|
|
170
|
-
#
|
|
171
|
-
You: "What authentication method did we
|
|
172
|
-
Claude: [Searches ONLY
|
|
173
|
-
"Found 3 conversations about JWT authentication..."
|
|
176
|
+
# In ~/projects/MyApp
|
|
177
|
+
You: "What authentication method did we use?"
|
|
178
|
+
Claude: [Searches ONLY MyApp conversations]
|
|
174
179
|
|
|
175
|
-
# To search everywhere
|
|
180
|
+
# To search everywhere
|
|
176
181
|
You: "Search all projects for WebSocket implementations"
|
|
177
182
|
Claude: [Searches across ALL your projects]
|
|
178
|
-
"Found implementations in 5 projects: ..."
|
|
179
|
-
|
|
180
|
-
# To search a specific project
|
|
181
|
-
You: "Find Docker setup in claude-self-reflect project"
|
|
182
|
-
Claude: [Searches only claude-self-reflect conversations]
|
|
183
183
|
```
|
|
184
184
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
| Search Type | How to Trigger | Example |
|
|
185
|
+
| Search Scope | How to Trigger | Example |
|
|
188
186
|
|------------|----------------|---------|
|
|
189
|
-
|
|
|
190
|
-
|
|
|
191
|
-
|
|
|
192
|
-
|
|
193
|
-
### Why This Change?
|
|
194
|
-
|
|
195
|
-
- **Focused Results**: No more sifting through unrelated conversations
|
|
196
|
-
- **Better Performance**: Single-project search is ~100ms faster
|
|
197
|
-
- **Natural Workflow**: Results match your current working context
|
|
198
|
-
- **Privacy**: Work and personal projects stay isolated
|
|
199
|
-
|
|
200
|
-
### Upgrading from Earlier Versions?
|
|
201
|
-
|
|
202
|
-
Your existing conversations remain searchable. The only change is that searches now default to your current project. To get the old behavior, simply ask to "search all projects".
|
|
203
|
-
|
|
204
|
-
See [Project-Scoped Search Guide](docs/project-scoped-search.md) for detailed examples and advanced usage.
|
|
187
|
+
| Current Project (default) | Just ask normally | "What did we discuss about caching?" |
|
|
188
|
+
| All Projects | Say "all projects" | "Search all projects for error handling" |
|
|
189
|
+
| Specific Project | Name the project | "Find auth code in MyApp project" |
|
|
205
190
|
|
|
206
191
|
## Memory Decay
|
|
207
192
|
|
|
@@ -272,10 +257,14 @@ Both embedding options work well. Local mode uses FastEmbed for privacy and offl
|
|
|
272
257
|
- [GitHub Issues](https://github.com/ramakay/claude-self-reflect/issues)
|
|
273
258
|
- [Discussions](https://github.com/ramakay/claude-self-reflect/discussions)
|
|
274
259
|
|
|
275
|
-
##
|
|
260
|
+
## What's New
|
|
261
|
+
|
|
262
|
+
### Recent Updates
|
|
263
|
+
- **v2.4.5** - 10-40x performance boost, context preservation
|
|
264
|
+
- **v2.4.3** - Project-scoped search (breaking change)
|
|
265
|
+
- **v2.3.7** - Local embeddings by default for privacy
|
|
276
266
|
|
|
277
|
-
-
|
|
278
|
-
- ๐ฌ [Project-Scoped Search Feedback](https://github.com/ramakay/claude-self-reflect/discussions/17) - Share your experience with the breaking change
|
|
267
|
+
๐ [Full Release History](docs/release-history.md) | ๐ฌ [Discussions](https://github.com/ramakay/claude-self-reflect/discussions)
|
|
279
268
|
|
|
280
269
|
## Contributing
|
|
281
270
|
|
|
@@ -262,7 +262,10 @@ async function configureClaude() {
|
|
|
262
262
|
|
|
263
263
|
// Create a script that runs the MCP server in Docker
|
|
264
264
|
const scriptContent = `#!/bin/bash
|
|
265
|
-
|
|
265
|
+
# Run the MCP server in the Docker container with stdin attached
|
|
266
|
+
# Using python -u for unbuffered output
|
|
267
|
+
# Using the main module which properly supports local embeddings
|
|
268
|
+
docker exec -i claude-reflection-mcp python -u -m src
|
|
266
269
|
`;
|
|
267
270
|
|
|
268
271
|
await fs.writeFile(mcpScript, scriptContent, { mode: 0o755 });
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
# Run the MCP server in the Docker container with stdin attached
|
|
3
3
|
# Using python -u for unbuffered output
|
|
4
|
-
# Using
|
|
4
|
+
# Using the main module which properly supports local embeddings
|
|
5
5
|
docker exec -i claude-reflection-mcp python -u -m src
|
package/package.json
CHANGED
|
@@ -1,254 +0,0 @@
|
|
|
1
|
-
"""Claude Reflect MCP Server with Native Qdrant Memory Decay (v2.0.0)."""
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Any, Optional, List, Dict, Union
|
|
6
|
-
from datetime import datetime
|
|
7
|
-
import json
|
|
8
|
-
|
|
9
|
-
from fastmcp import FastMCP, Context
|
|
10
|
-
from pydantic import BaseModel, Field
|
|
11
|
-
from qdrant_client import AsyncQdrantClient, models
|
|
12
|
-
from qdrant_client.models import (
|
|
13
|
-
PointStruct, VectorParams, Distance
|
|
14
|
-
)
|
|
15
|
-
try:
|
|
16
|
-
from qdrant_client.models import (
|
|
17
|
-
Query, Formula, Expression, MultExpression,
|
|
18
|
-
ExpDecayExpression, DecayParamsExpression,
|
|
19
|
-
SearchRequest, NamedQuery
|
|
20
|
-
)
|
|
21
|
-
NATIVE_DECAY_AVAILABLE = True
|
|
22
|
-
except ImportError:
|
|
23
|
-
# Fallback for older qdrant-client versions
|
|
24
|
-
NATIVE_DECAY_AVAILABLE = False
|
|
25
|
-
Query = Formula = Expression = MultExpression = None
|
|
26
|
-
ExpDecayExpression = DecayParamsExpression = None
|
|
27
|
-
SearchRequest = NamedQuery = None
|
|
28
|
-
import voyageai
|
|
29
|
-
from dotenv import load_dotenv
|
|
30
|
-
|
|
31
|
-
# Load environment variables
|
|
32
|
-
env_path = Path(__file__).parent.parent.parent / '.env'
|
|
33
|
-
load_dotenv(env_path)
|
|
34
|
-
|
|
35
|
-
# Configuration
|
|
36
|
-
QDRANT_URL = os.getenv('QDRANT_URL', 'http://localhost:6333')
|
|
37
|
-
VOYAGE_API_KEY = os.getenv('VOYAGE_KEY') or os.getenv('VOYAGE_KEY-2')
|
|
38
|
-
ENABLE_MEMORY_DECAY = os.getenv('ENABLE_MEMORY_DECAY', 'false').lower() == 'true'
|
|
39
|
-
DECAY_WEIGHT = float(os.getenv('DECAY_WEIGHT', '0.3'))
|
|
40
|
-
DECAY_SCALE_DAYS = float(os.getenv('DECAY_SCALE_DAYS', '90'))
|
|
41
|
-
|
|
42
|
-
# Initialize Voyage AI client
|
|
43
|
-
voyage_client = voyageai.Client(api_key=VOYAGE_API_KEY) if VOYAGE_API_KEY else None
|
|
44
|
-
|
|
45
|
-
# Debug environment loading (disabled for production)
|
|
46
|
-
# print(f"[DEBUG] Qdrant Native Decay Server v2.0.0")
|
|
47
|
-
# print(f"[DEBUG] ENABLE_MEMORY_DECAY: {ENABLE_MEMORY_DECAY}")
|
|
48
|
-
# print(f"[DEBUG] DECAY_WEIGHT: {DECAY_WEIGHT}")
|
|
49
|
-
# print(f"[DEBUG] DECAY_SCALE_DAYS: {DECAY_SCALE_DAYS}")
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
class SearchResult(BaseModel):
|
|
53
|
-
"""A single search result."""
|
|
54
|
-
id: str
|
|
55
|
-
score: float
|
|
56
|
-
timestamp: str
|
|
57
|
-
role: str
|
|
58
|
-
excerpt: str
|
|
59
|
-
project_name: str
|
|
60
|
-
conversation_id: Optional[str] = None
|
|
61
|
-
collection_name: str
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
# Initialize FastMCP instance
|
|
65
|
-
mcp = FastMCP(
|
|
66
|
-
name="claude-reflect",
|
|
67
|
-
instructions="Search past conversations and store reflections with time-based memory decay (v2.0.0 - Native Qdrant)"
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
# Create Qdrant client
|
|
71
|
-
qdrant_client = AsyncQdrantClient(url=QDRANT_URL)
|
|
72
|
-
|
|
73
|
-
async def get_voyage_collections() -> List[str]:
|
|
74
|
-
"""Get all Voyage collections."""
|
|
75
|
-
collections = await qdrant_client.get_collections()
|
|
76
|
-
return [c.name for c in collections.collections if c.name.endswith('_voyage')]
|
|
77
|
-
|
|
78
|
-
async def generate_embedding(text: str) -> List[float]:
|
|
79
|
-
"""Generate embedding using Voyage AI."""
|
|
80
|
-
if not voyage_client:
|
|
81
|
-
raise ValueError("Voyage AI API key not configured")
|
|
82
|
-
|
|
83
|
-
result = voyage_client.embed(
|
|
84
|
-
texts=[text],
|
|
85
|
-
model="voyage-3-large",
|
|
86
|
-
input_type="query"
|
|
87
|
-
)
|
|
88
|
-
return result.embeddings[0]
|
|
89
|
-
|
|
90
|
-
# Register tools
|
|
91
|
-
@mcp.tool()
|
|
92
|
-
async def reflect_on_past(
|
|
93
|
-
ctx: Context,
|
|
94
|
-
query: str = Field(description="The search query to find semantically similar conversations"),
|
|
95
|
-
limit: int = Field(default=5, description="Maximum number of results to return"),
|
|
96
|
-
min_score: float = Field(default=0.3, description="Minimum similarity score (0-1)"),
|
|
97
|
-
use_decay: Union[int, str] = Field(default=-1, description="Apply time-based decay: 1=enable, 0=disable, -1=use environment default (accepts int or str)")
|
|
98
|
-
) -> str:
|
|
99
|
-
"""Search for relevant past conversations using semantic search with optional time decay."""
|
|
100
|
-
|
|
101
|
-
# Normalize use_decay to integer
|
|
102
|
-
if isinstance(use_decay, str):
|
|
103
|
-
try:
|
|
104
|
-
use_decay = int(use_decay)
|
|
105
|
-
except ValueError:
|
|
106
|
-
raise ValueError("use_decay must be '1', '0', or '-1'")
|
|
107
|
-
|
|
108
|
-
# Parse decay parameter using integer approach
|
|
109
|
-
should_use_decay = (
|
|
110
|
-
True if use_decay == 1
|
|
111
|
-
else False if use_decay == 0
|
|
112
|
-
else ENABLE_MEMORY_DECAY # -1 or any other value
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
await ctx.debug(f"Searching for: {query}")
|
|
116
|
-
await ctx.debug(f"Decay enabled: {should_use_decay}")
|
|
117
|
-
await ctx.debug(f"Using Qdrant Native Decay (v2.0.0)")
|
|
118
|
-
|
|
119
|
-
try:
|
|
120
|
-
# Generate embedding
|
|
121
|
-
query_embedding = await generate_embedding(query)
|
|
122
|
-
|
|
123
|
-
# Get all Voyage collections
|
|
124
|
-
voyage_collections = await get_voyage_collections()
|
|
125
|
-
if not voyage_collections:
|
|
126
|
-
return "No conversation collections found. Please import conversations first."
|
|
127
|
-
|
|
128
|
-
await ctx.debug(f"Searching across {len(voyage_collections)} collections")
|
|
129
|
-
|
|
130
|
-
all_results = []
|
|
131
|
-
|
|
132
|
-
# Search each collection with native Qdrant decay
|
|
133
|
-
for collection_name in voyage_collections:
|
|
134
|
-
try:
|
|
135
|
-
if should_use_decay and NATIVE_DECAY_AVAILABLE:
|
|
136
|
-
# Build the query with native Qdrant decay formula
|
|
137
|
-
query_obj = Query(
|
|
138
|
-
nearest=query_embedding,
|
|
139
|
-
formula=Formula(
|
|
140
|
-
sum=[
|
|
141
|
-
# Original similarity score
|
|
142
|
-
Expression(variable="score"),
|
|
143
|
-
# Decay boost term
|
|
144
|
-
Expression(
|
|
145
|
-
mult=MultExpression(
|
|
146
|
-
mult=[
|
|
147
|
-
# Decay weight
|
|
148
|
-
Expression(constant=DECAY_WEIGHT),
|
|
149
|
-
# Exponential decay function
|
|
150
|
-
Expression(
|
|
151
|
-
exp_decay=DecayParamsExpression(
|
|
152
|
-
# Use timestamp field for decay
|
|
153
|
-
x=Expression(datetime_key="timestamp"),
|
|
154
|
-
# Decay from current time (server-side)
|
|
155
|
-
target=Expression(datetime="now"),
|
|
156
|
-
# Scale in milliseconds
|
|
157
|
-
scale=DECAY_SCALE_DAYS * 24 * 60 * 60 * 1000,
|
|
158
|
-
# Standard exponential decay midpoint
|
|
159
|
-
midpoint=0.5
|
|
160
|
-
)
|
|
161
|
-
)
|
|
162
|
-
]
|
|
163
|
-
)
|
|
164
|
-
)
|
|
165
|
-
]
|
|
166
|
-
)
|
|
167
|
-
)
|
|
168
|
-
|
|
169
|
-
# Execute query with native decay
|
|
170
|
-
results = await qdrant_client.query_points(
|
|
171
|
-
collection_name=collection_name,
|
|
172
|
-
query=query_obj,
|
|
173
|
-
limit=limit,
|
|
174
|
-
score_threshold=min_score,
|
|
175
|
-
with_payload=True
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
await ctx.debug(f"Native decay search in {collection_name} returned {len(results.points)} results")
|
|
179
|
-
else:
|
|
180
|
-
# Standard search without decay
|
|
181
|
-
results = await qdrant_client.search(
|
|
182
|
-
collection_name=collection_name,
|
|
183
|
-
query_vector=query_embedding,
|
|
184
|
-
limit=limit,
|
|
185
|
-
score_threshold=min_score,
|
|
186
|
-
with_payload=True
|
|
187
|
-
)
|
|
188
|
-
results = models.QueryResponse(points=results)
|
|
189
|
-
|
|
190
|
-
# Process results
|
|
191
|
-
for point in results.points:
|
|
192
|
-
all_results.append(SearchResult(
|
|
193
|
-
id=str(point.id),
|
|
194
|
-
score=point.score,
|
|
195
|
-
timestamp=point.payload.get('timestamp', datetime.now().isoformat()),
|
|
196
|
-
role=point.payload.get('start_role', point.payload.get('role', 'unknown')),
|
|
197
|
-
excerpt=(point.payload.get('text', '')[:500] + '...'),
|
|
198
|
-
project_name=point.payload.get('project', collection_name.replace('conv_', '').replace('_voyage', '')),
|
|
199
|
-
conversation_id=point.payload.get('conversation_id'),
|
|
200
|
-
collection_name=collection_name
|
|
201
|
-
))
|
|
202
|
-
|
|
203
|
-
except Exception as e:
|
|
204
|
-
await ctx.debug(f"Error searching {collection_name}: {str(e)}")
|
|
205
|
-
continue
|
|
206
|
-
|
|
207
|
-
# Sort by score and limit
|
|
208
|
-
all_results.sort(key=lambda x: x.score, reverse=True)
|
|
209
|
-
all_results = all_results[:limit]
|
|
210
|
-
|
|
211
|
-
if not all_results:
|
|
212
|
-
return f"No conversations found matching '{query}'. Try different keywords or check if conversations have been imported."
|
|
213
|
-
|
|
214
|
-
# Format results
|
|
215
|
-
result_text = f"Found {len(all_results)} relevant conversation(s) for '{query}':\n\n"
|
|
216
|
-
for i, result in enumerate(all_results):
|
|
217
|
-
result_text += f"**Result {i+1}** (Score: {result.score:.3f})\n"
|
|
218
|
-
result_text += f"Time: {datetime.fromisoformat(result.timestamp).strftime('%Y-%m-%d %H:%M:%S')}\n"
|
|
219
|
-
result_text += f"Project: {result.project_name}\n"
|
|
220
|
-
result_text += f"Role: {result.role}\n"
|
|
221
|
-
result_text += f"Excerpt: {result.excerpt}\n"
|
|
222
|
-
result_text += "---\n\n"
|
|
223
|
-
|
|
224
|
-
return result_text
|
|
225
|
-
|
|
226
|
-
except Exception as e:
|
|
227
|
-
await ctx.error(f"Search failed: {str(e)}")
|
|
228
|
-
return f"Failed to search conversations: {str(e)}"
|
|
229
|
-
|
|
230
|
-
@mcp.tool()
|
|
231
|
-
async def store_reflection(
|
|
232
|
-
ctx: Context,
|
|
233
|
-
content: str = Field(description="The insight or reflection to store"),
|
|
234
|
-
tags: List[str] = Field(default=[], description="Tags to categorize this reflection")
|
|
235
|
-
) -> str:
|
|
236
|
-
"""Store an important insight or reflection for future reference."""
|
|
237
|
-
|
|
238
|
-
try:
|
|
239
|
-
# TODO: Implement actual storage in a dedicated reflections collection
|
|
240
|
-
# For now, just acknowledge the storage
|
|
241
|
-
tags_str = ', '.join(tags) if tags else 'none'
|
|
242
|
-
return f"Reflection stored successfully with tags: {tags_str}"
|
|
243
|
-
|
|
244
|
-
except Exception as e:
|
|
245
|
-
await ctx.error(f"Store failed: {str(e)}")
|
|
246
|
-
return f"Failed to store reflection: {str(e)}"
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
# Debug output (disabled for production)
|
|
250
|
-
# print(f"[DEBUG] FastMCP server v2.0.0 created with native Qdrant decay")
|
|
251
|
-
|
|
252
|
-
# Run the server when executed as main module
|
|
253
|
-
if __name__ == "__main__":
|
|
254
|
-
mcp.run(transport="stdio", show_banner=False)
|