claude-self-reflect 2.5.10 → 2.5.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -150,13 +150,14 @@ Recent conversations matter more. Old ones fade. Like your brain, but reliable.
150
150
 
151
151
  ## What's New
152
152
 
153
+ - **v2.5.11** - Critical cloud mode fix - Environment variables now properly passed to MCP server
154
+ - **v2.5.10** - Emergency hotfix for MCP server startup failure (dead code removal)
153
155
  - **v2.5.6** - Tool Output Extraction - Captures git changes & tool outputs for cross-agent discovery
154
156
  - **v2.5.5** - Critical dependency fix & streaming importer enhancements
155
157
  - **v2.5.4** - Documentation & bug fixes (import path & state file compatibility)
156
158
  - **v2.5.3** - Streamlined README & import architecture diagram
157
159
  - **v2.5.2** - State file compatibility fix
158
160
  - **v2.4.5** - 10-40x performance boost
159
- - **v2.4.3** - Project-scoped search
160
161
 
161
162
  [Full changelog](docs/release-history.md)
162
163
 
@@ -21,5 +21,53 @@ else
21
21
  source venv/bin/activate
22
22
  fi
23
23
 
24
+ # CRITICAL FIX: Pass through environment variables from Claude Code
25
+ # These environment variables are set by `claude mcp add -e KEY=value`
26
+ # Export them so the Python process can access them
27
+ if [ ! -z "$VOYAGE_KEY" ]; then
28
+ export VOYAGE_KEY="$VOYAGE_KEY"
29
+ fi
30
+
31
+ if [ ! -z "$VOYAGE_KEY_2" ]; then
32
+ export VOYAGE_KEY_2="$VOYAGE_KEY_2"
33
+ fi
34
+
35
+ if [ ! -z "$PREFER_LOCAL_EMBEDDINGS" ]; then
36
+ export PREFER_LOCAL_EMBEDDINGS="$PREFER_LOCAL_EMBEDDINGS"
37
+ fi
38
+
39
+ if [ ! -z "$QDRANT_URL" ]; then
40
+ export QDRANT_URL="$QDRANT_URL"
41
+ fi
42
+
43
+ if [ ! -z "$ENABLE_MEMORY_DECAY" ]; then
44
+ export ENABLE_MEMORY_DECAY="$ENABLE_MEMORY_DECAY"
45
+ fi
46
+
47
+ if [ ! -z "$DECAY_WEIGHT" ]; then
48
+ export DECAY_WEIGHT="$DECAY_WEIGHT"
49
+ fi
50
+
51
+ if [ ! -z "$DECAY_SCALE_DAYS" ]; then
52
+ export DECAY_SCALE_DAYS="$DECAY_SCALE_DAYS"
53
+ fi
54
+
55
+ if [ ! -z "$EMBEDDING_MODEL" ]; then
56
+ export EMBEDDING_MODEL="$EMBEDDING_MODEL"
57
+ fi
58
+
59
+ # The embedding manager now handles cache properly in a controlled directory
60
+ # Set to 'false' if you want to use HuggingFace instead of Qdrant CDN
61
+ if [ -z "$FASTEMBED_SKIP_HUGGINGFACE" ]; then
62
+ export FASTEMBED_SKIP_HUGGINGFACE=true
63
+ fi
64
+
65
+ # Debug: Show what environment variables are being passed
66
+ echo "[DEBUG] Environment variables for MCP server:"
67
+ echo "[DEBUG] VOYAGE_KEY: ${VOYAGE_KEY:+set}"
68
+ echo "[DEBUG] PREFER_LOCAL_EMBEDDINGS: ${PREFER_LOCAL_EMBEDDINGS:-not set}"
69
+ echo "[DEBUG] QDRANT_URL: ${QDRANT_URL:-not set}"
70
+ echo "[DEBUG] ENABLE_MEMORY_DECAY: ${ENABLE_MEMORY_DECAY:-not set}"
71
+
24
72
  # Run the MCP server
25
73
  exec python -m src
@@ -36,37 +36,48 @@ except ImportError:
36
36
  import voyageai
37
37
  from dotenv import load_dotenv
38
38
 
39
- # Load environment variables
39
+ # Load environment variables from .env file (fallback only)
40
40
  env_path = Path(__file__).parent.parent.parent / '.env'
41
- load_dotenv(env_path)
41
+ load_dotenv(env_path, override=False) # Don't override process environment
42
42
 
43
- # Configuration
43
+ # Configuration - prioritize process environment variables over .env file
44
44
  QDRANT_URL = os.getenv('QDRANT_URL', 'http://localhost:6333')
45
- VOYAGE_API_KEY = os.getenv('VOYAGE_KEY') or os.getenv('VOYAGE_KEY-2')
45
+ VOYAGE_API_KEY = os.getenv('VOYAGE_KEY') or os.getenv('VOYAGE_KEY-2') or os.getenv('VOYAGE_KEY_2')
46
46
  ENABLE_MEMORY_DECAY = os.getenv('ENABLE_MEMORY_DECAY', 'false').lower() == 'true'
47
47
  DECAY_WEIGHT = float(os.getenv('DECAY_WEIGHT', '0.3'))
48
48
  DECAY_SCALE_DAYS = float(os.getenv('DECAY_SCALE_DAYS', '90'))
49
49
  USE_NATIVE_DECAY = os.getenv('USE_NATIVE_DECAY', 'false').lower() == 'true'
50
50
 
51
- # Embedding configuration
52
- PREFER_LOCAL_EMBEDDINGS = os.getenv('PREFER_LOCAL_EMBEDDINGS', 'false').lower() == 'true'
51
+ # Embedding configuration - now using lazy initialization
52
+ # CRITICAL: Default changed to 'true' for local embeddings for privacy
53
+ PREFER_LOCAL_EMBEDDINGS = os.getenv('PREFER_LOCAL_EMBEDDINGS', 'true').lower() == 'true'
53
54
  EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
54
55
 
55
- # Initialize Voyage AI client (only if not using local embeddings)
56
- voyage_client = None
57
- if not PREFER_LOCAL_EMBEDDINGS and VOYAGE_API_KEY:
58
- voyage_client = voyageai.Client(api_key=VOYAGE_API_KEY)
56
+ # Import the robust embedding manager
57
+ from .embedding_manager import get_embedding_manager
59
58
 
60
- # Initialize local embedding model if needed
61
- local_embedding_model = None
62
- if PREFER_LOCAL_EMBEDDINGS or not VOYAGE_API_KEY:
59
+ # Lazy initialization - models will be loaded on first use
60
+ embedding_manager = None
61
+ voyage_client = None # Keep for backward compatibility
62
+ local_embedding_model = None # Keep for backward compatibility
63
+
64
+ def initialize_embeddings():
65
+ """Initialize embedding models with robust fallback."""
66
+ global embedding_manager, voyage_client, local_embedding_model
63
67
  try:
64
- from fastembed import TextEmbedding
65
- local_embedding_model = TextEmbedding(model_name=EMBEDDING_MODEL)
66
- print(f"[DEBUG] Initialized local embedding model: {EMBEDDING_MODEL}")
67
- except ImportError:
68
- print("[ERROR] FastEmbed not available. Install with: pip install fastembed")
69
- raise
68
+ embedding_manager = get_embedding_manager()
69
+ print(f"[INFO] Embedding manager initialized: {embedding_manager.get_model_info()}")
70
+
71
+ # Set backward compatibility references
72
+ if embedding_manager.model_type == 'voyage':
73
+ voyage_client = embedding_manager.voyage_client
74
+ elif embedding_manager.model_type == 'local':
75
+ local_embedding_model = embedding_manager.model
76
+
77
+ return True
78
+ except Exception as e:
79
+ print(f"[ERROR] Failed to initialize embeddings: {e}")
80
+ return False
70
81
 
71
82
  # Debug environment loading
72
83
  print(f"[DEBUG] Environment variables loaded:")
@@ -88,6 +99,7 @@ class SearchResult(BaseModel):
88
99
  excerpt: str
89
100
  project_name: str
90
101
  conversation_id: Optional[str] = None
102
+ base_conversation_id: Optional[str] = None
91
103
  collection_name: str
92
104
  raw_payload: Optional[Dict[str, Any]] = None # Full Qdrant payload when debug mode enabled
93
105
 
@@ -100,6 +112,99 @@ mcp = FastMCP(
100
112
 
101
113
  # Create Qdrant client
102
114
  qdrant_client = AsyncQdrantClient(url=QDRANT_URL)
115
+
116
+ # Track indexing status (updated periodically)
117
+ indexing_status = {
118
+ "last_check": 0,
119
+ "indexed_conversations": 0,
120
+ "total_conversations": 0,
121
+ "percentage": 100.0,
122
+ "backlog_count": 0,
123
+ "is_checking": False
124
+ }
125
+
126
+ async def update_indexing_status():
127
+ """Update indexing status by checking JSONL files vs Qdrant collections.
128
+ This is a lightweight check that compares file counts, not full content."""
129
+ global indexing_status
130
+
131
+ # Don't run concurrent checks
132
+ if indexing_status["is_checking"]:
133
+ return
134
+
135
+ # Only check every 5 minutes to avoid overhead
136
+ current_time = time.time()
137
+ if current_time - indexing_status["last_check"] < 300: # 5 minutes
138
+ return
139
+
140
+ indexing_status["is_checking"] = True
141
+
142
+ try:
143
+ # Count total JSONL files
144
+ projects_dir = Path.home() / ".claude" / "projects"
145
+ total_files = 0
146
+ indexed_files = 0
147
+
148
+ if projects_dir.exists():
149
+ # Get all JSONL files
150
+ jsonl_files = list(projects_dir.glob("**/*.jsonl"))
151
+ total_files = len(jsonl_files)
152
+
153
+ # Check imported-files.json to see what's been imported
154
+ # The streaming importer uses imported-files.json with nested structure
155
+ # Try multiple possible locations for the config file
156
+ possible_paths = [
157
+ Path.home() / ".claude-self-reflect" / "config" / "imported-files.json",
158
+ Path(__file__).parent.parent.parent / "config" / "imported-files.json",
159
+ Path("/config/imported-files.json") # Docker path if running in container
160
+ ]
161
+
162
+ imported_files_path = None
163
+ for path in possible_paths:
164
+ if path.exists():
165
+ imported_files_path = path
166
+ break
167
+
168
+ if imported_files_path and imported_files_path.exists():
169
+ with open(imported_files_path, 'r') as f:
170
+ imported_data = json.load(f)
171
+ # The file has nested structure: {stream_position: {file: position}, imported_files: {file: lines}}
172
+ # Handle new nested structure
173
+ stream_position = imported_data.get("stream_position", {})
174
+ imported_files_list = stream_position.get("imported_files", [])
175
+ file_metadata = stream_position.get("file_metadata", {})
176
+
177
+ # Count files that have been imported
178
+ for file_path in jsonl_files:
179
+ # Try multiple path formats to match Docker's state file
180
+ file_str = str(file_path).replace(str(Path.home()), "/logs").replace("\\", "/")
181
+ # Also try without .claude/projects prefix (Docker mounts directly)
182
+ file_str_alt = file_str.replace("/.claude/projects", "")
183
+
184
+ # Check if file is in imported_files list (fully imported)
185
+ if file_str in imported_files_list or file_str_alt in imported_files_list:
186
+ indexed_files += 1
187
+ # Or if it has metadata with position > 0 (partially imported)
188
+ elif file_str in file_metadata and file_metadata[file_str].get("position", 0) > 0:
189
+ indexed_files += 1
190
+ elif file_str_alt in file_metadata and file_metadata[file_str_alt].get("position", 0) > 0:
191
+ indexed_files += 1
192
+
193
+ # Update status
194
+ indexing_status["last_check"] = current_time
195
+ indexing_status["total_conversations"] = total_files
196
+ indexing_status["indexed_conversations"] = indexed_files
197
+ indexing_status["backlog_count"] = total_files - indexed_files
198
+
199
+ if total_files > 0:
200
+ indexing_status["percentage"] = (indexed_files / total_files) * 100
201
+ else:
202
+ indexing_status["percentage"] = 100.0
203
+
204
+ except Exception as e:
205
+ print(f"[WARNING] Failed to update indexing status: {e}")
206
+ finally:
207
+ indexing_status["is_checking"] = False
103
208
 
104
209
  async def get_all_collections() -> List[str]:
105
210
  """Get all collections (both Voyage and local)."""
@@ -115,12 +220,23 @@ async def generate_embedding(text: str, force_type: Optional[str] = None) -> Lis
115
220
  text: Text to embed
116
221
  force_type: Force specific embedding type ('local' or 'voyage')
117
222
  """
118
- use_local = force_type == 'local' if force_type else (PREFER_LOCAL_EMBEDDINGS or not voyage_client)
223
+ global embedding_manager, voyage_client, local_embedding_model
224
+
225
+ # Initialize on first use
226
+ if embedding_manager is None:
227
+ if not initialize_embeddings():
228
+ raise RuntimeError("Failed to initialize any embedding model. Check logs for details.")
229
+
230
+ # Determine which type to use
231
+ if force_type:
232
+ use_local = force_type == 'local'
233
+ else:
234
+ use_local = embedding_manager.model_type == 'local'
119
235
 
120
236
  if use_local:
121
237
  # Use local embeddings
122
238
  if not local_embedding_model:
123
- raise ValueError("Local embedding model not initialized")
239
+ raise ValueError("Local embedding model not available")
124
240
 
125
241
  # Run in executor since fastembed is synchronous
126
242
  loop = asyncio.get_event_loop()
@@ -131,7 +247,7 @@ async def generate_embedding(text: str, force_type: Optional[str] = None) -> Lis
131
247
  else:
132
248
  # Use Voyage AI
133
249
  if not voyage_client:
134
- raise ValueError("Voyage client not initialized")
250
+ raise ValueError("Voyage client not available")
135
251
  result = voyage_client.embed(
136
252
  texts=[text],
137
253
  model="voyage-3-large",
@@ -417,6 +533,7 @@ async def reflect_on_past(
417
533
  excerpt=(point.payload.get('text', '')[:350] + '...' if len(point.payload.get('text', '')) > 350 else point.payload.get('text', '')),
418
534
  project_name=point_project,
419
535
  conversation_id=point.payload.get('conversation_id'),
536
+ base_conversation_id=point.payload.get('base_conversation_id'),
420
537
  collection_name=collection_name,
421
538
  raw_payload=point.payload if include_raw else None
422
539
  ))
@@ -496,6 +613,7 @@ async def reflect_on_past(
496
613
  excerpt=(point.payload.get('text', '')[:350] + '...' if len(point.payload.get('text', '')) > 350 else point.payload.get('text', '')),
497
614
  project_name=point_project,
498
615
  conversation_id=point.payload.get('conversation_id'),
616
+ base_conversation_id=point.payload.get('base_conversation_id'),
499
617
  collection_name=collection_name,
500
618
  raw_payload=point.payload if include_raw else None
501
619
  ))
@@ -532,6 +650,7 @@ async def reflect_on_past(
532
650
  excerpt=(point.payload.get('text', '')[:350] + '...' if len(point.payload.get('text', '')) > 350 else point.payload.get('text', '')),
533
651
  project_name=point_project,
534
652
  conversation_id=point.payload.get('conversation_id'),
653
+ base_conversation_id=point.payload.get('base_conversation_id'),
535
654
  collection_name=collection_name,
536
655
  raw_payload=point.payload if include_raw else None
537
656
  ))
@@ -552,6 +671,30 @@ async def reflect_on_past(
552
671
  message="Search complete, processing results"
553
672
  )
554
673
 
674
+ # Apply base_conversation_id boosting before sorting
675
+ timing_info['boost_start'] = time.time()
676
+
677
+ # Group results by base_conversation_id to identify related chunks
678
+ base_conversation_groups = {}
679
+ for result in all_results:
680
+ base_id = result.base_conversation_id
681
+ if base_id:
682
+ if base_id not in base_conversation_groups:
683
+ base_conversation_groups[base_id] = []
684
+ base_conversation_groups[base_id].append(result)
685
+
686
+ # Apply boost to results from base conversations with multiple high-scoring chunks
687
+ base_conversation_boost = 0.1 # Boost factor for base conversation matching
688
+ for base_id, group_results in base_conversation_groups.items():
689
+ if len(group_results) > 1: # Multiple chunks from same base conversation
690
+ avg_score = sum(r.score for r in group_results) / len(group_results)
691
+ if avg_score > 0.8: # Only boost high-quality base conversations
692
+ for result in group_results:
693
+ result.score += base_conversation_boost
694
+ await ctx.debug(f"Boosted result from base_conversation_id {base_id}: {result.score:.3f}")
695
+
696
+ timing_info['boost_end'] = time.time()
697
+
555
698
  # Sort by score and limit
556
699
  timing_info['sort_start'] = time.time()
557
700
  all_results.sort(key=lambda x: x.score, reverse=True)
@@ -561,12 +704,89 @@ async def reflect_on_past(
561
704
  if not all_results:
562
705
  return f"No conversations found matching '{query}'. Try different keywords or check if conversations have been imported."
563
706
 
707
+ # Update indexing status before returning results
708
+ await update_indexing_status()
709
+
564
710
  # Format results based on response_format
565
711
  timing_info['format_start'] = time.time()
566
712
 
567
713
  if response_format == "xml":
714
+ # Add upfront summary for immediate visibility (before collapsible XML)
715
+ upfront_summary = ""
716
+
717
+ # Show indexing status prominently
718
+ if indexing_status["percentage"] < 95.0:
719
+ upfront_summary += f"📊 INDEXING: {indexing_status['indexed_conversations']}/{indexing_status['total_conversations']} conversations ({indexing_status['percentage']:.1f}% complete, {indexing_status['backlog_count']} pending)\n"
720
+
721
+ # Show result summary
722
+ if all_results:
723
+ score_info = "high" if all_results[0].score >= 0.85 else "good" if all_results[0].score >= 0.75 else "partial"
724
+ upfront_summary += f"🎯 RESULTS: {len(all_results)} matches ({score_info} relevance, top score: {all_results[0].score:.3f})\n"
725
+
726
+ # Show performance
727
+ total_time = time.time() - start_time
728
+ upfront_summary += f"⚡ PERFORMANCE: {int(total_time * 1000)}ms total ({len(collections_to_search)} collections searched)\n"
729
+ else:
730
+ upfront_summary += f"❌ NO RESULTS: No conversations found matching '{query}'\n"
731
+
568
732
  # XML format (compact tags for performance)
569
- result_text = "<search>\n"
733
+ result_text = upfront_summary + "\n<search>\n"
734
+
735
+ # Add indexing status if not fully baselined - put key stats in opening tag for immediate visibility
736
+ if indexing_status["percentage"] < 95.0:
737
+ result_text += f' <info status="indexing" progress="{indexing_status["percentage"]:.1f}%" backlog="{indexing_status["backlog_count"]}">\n'
738
+ result_text += f' <message>📊 Indexing: {indexing_status["indexed_conversations"]}/{indexing_status["total_conversations"]} conversations ({indexing_status["percentage"]:.1f}% complete, {indexing_status["backlog_count"]} pending)</message>\n'
739
+ result_text += f" </info>\n"
740
+
741
+ # Add high-level result summary
742
+ if all_results:
743
+ # Count today's results
744
+ now = datetime.now(timezone.utc)
745
+ today_count = 0
746
+ yesterday_count = 0
747
+ week_count = 0
748
+
749
+ for result in all_results:
750
+ timestamp_clean = result.timestamp.replace('Z', '+00:00') if result.timestamp.endswith('Z') else result.timestamp
751
+ timestamp_dt = datetime.fromisoformat(timestamp_clean)
752
+ if timestamp_dt.tzinfo is None:
753
+ timestamp_dt = timestamp_dt.replace(tzinfo=timezone.utc)
754
+
755
+ days_ago = (now - timestamp_dt).days
756
+ if days_ago == 0:
757
+ today_count += 1
758
+ elif days_ago == 1:
759
+ yesterday_count += 1
760
+ if days_ago <= 7:
761
+ week_count += 1
762
+
763
+ # Compact summary with key info in opening tag
764
+ time_info = ""
765
+ if today_count > 0:
766
+ time_info = f"{today_count} today"
767
+ elif yesterday_count > 0:
768
+ time_info = f"{yesterday_count} yesterday"
769
+ elif week_count > 0:
770
+ time_info = f"{week_count} this week"
771
+ else:
772
+ time_info = "older results"
773
+
774
+ score_info = "high" if all_results[0].score >= 0.85 else "good" if all_results[0].score >= 0.75 else "partial"
775
+
776
+ result_text += f' <summary count="{len(all_results)}" relevance="{score_info}" recency="{time_info}" top-score="{all_results[0].score:.3f}">\n'
777
+
778
+ # Short preview of top result
779
+ top_excerpt = all_results[0].excerpt[:100].strip()
780
+ if '...' not in top_excerpt:
781
+ top_excerpt += "..."
782
+ result_text += f' <preview>{top_excerpt}</preview>\n'
783
+ result_text += f" </summary>\n"
784
+ else:
785
+ result_text += f" <result-summary>\n"
786
+ result_text += f" <headline>No matches found</headline>\n"
787
+ result_text += f" <relevance>No conversations matched your query</relevance>\n"
788
+ result_text += f" </result-summary>\n"
789
+
570
790
  result_text += f" <meta>\n"
571
791
  result_text += f" <q>{query}</q>\n"
572
792
  result_text += f" <scope>{target_project if target_project != 'all' else 'all'}</scope>\n"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-self-reflect",
3
- "version": "2.5.10",
3
+ "version": "2.5.11",
4
4
  "description": "Give Claude perfect memory of all your conversations - Installation wizard for Python MCP server",
5
5
  "keywords": [
6
6
  "claude",