claude-self-reflect 3.3.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,10 +44,24 @@ class ReflectionTools:
44
44
  await ctx.debug(f"Storing reflection with {len(tags)} tags")
45
45
 
46
46
  try:
47
- # Determine collection name based on active model type, not prefer_local
47
+ # Check runtime preference from environment
48
+ import os
49
+ prefer_local = os.getenv('PREFER_LOCAL_EMBEDDINGS', 'true').lower() == 'true'
50
+
48
51
  embedding_manager = self.get_embedding_manager()
49
- # Use actual model_type to ensure consistency
50
- embedding_type = embedding_manager.model_type or ("voyage" if embedding_manager.voyage_client else "local")
52
+
53
+ # Use embedding_manager's model_type which already respects preferences
54
+ embedding_type = embedding_manager.model_type
55
+
56
+ if embedding_type == "local":
57
+ await ctx.debug("Using LOCAL mode (FastEmbed, 384 dimensions)")
58
+ elif embedding_type == "voyage":
59
+ await ctx.debug("Using VOYAGE mode (Voyage AI, 1024 dimensions)")
60
+ else:
61
+ # Shouldn't happen but handle gracefully
62
+ embedding_type = "local" if embedding_manager.local_model else "voyage"
63
+ await ctx.debug(f"Using {embedding_type} mode (fallback)")
64
+
51
65
  collection_name = f"reflections_{embedding_type}"
52
66
 
53
67
  # Ensure reflections collection exists
@@ -77,8 +91,9 @@ class ReflectionTools:
77
91
  await ctx.debug("Failed to generate embedding for reflection")
78
92
  return "Failed to store reflection: embedding generation failed"
79
93
 
80
- # Create unique ID
81
- reflection_id = hashlib.md5(f"{content}{datetime.now().isoformat()}".encode()).hexdigest()
94
+ # SECURITY FIX: Use SHA-256 instead of MD5
95
+ from .security_patches import SecureHashGenerator
96
+ reflection_id = SecureHashGenerator.generate_id(f"{content}{datetime.now().isoformat()}")
82
97
 
83
98
  # Prepare metadata
84
99
  metadata = {
@@ -104,6 +119,7 @@ class ReflectionTools:
104
119
 
105
120
  return f"""Reflection stored successfully.
106
121
  ID: {reflection_id}
122
+ Collection: {collection_name}
107
123
  Tags: {', '.join(tags) if tags else 'none'}
108
124
  Timestamp: {metadata['timestamp']}"""
109
125
 
@@ -125,17 +141,34 @@ Timestamp: {metadata['timestamp']}"""
125
141
  try:
126
142
  # Base path for conversations
127
143
  base_path = Path.home() / '.claude' / 'projects'
128
-
144
+
145
+ # SECURITY FIX: Validate paths to prevent traversal
146
+ from .security_patches import PathValidator
147
+ if not PathValidator.is_safe_path(base_path):
148
+ logger.error(f"Unsafe base path detected: {base_path}")
149
+ return "<conversation_file><error>Security validation failed</error></conversation_file>"
150
+
129
151
  # If project is specified, try to find it in that project
130
152
  if project:
131
153
  # Normalize project name for path matching
132
- project_normalized = self.normalize_project_name(project)
133
-
154
+ from .security_patches import InputValidator
155
+ project_normalized = InputValidator.validate_project_name(
156
+ self.normalize_project_name(project)
157
+ )
158
+
134
159
  # Look for project directories that match
135
160
  for project_dir in base_path.glob('*'):
161
+ # Validate each path before accessing
162
+ if not PathValidator.is_safe_path(project_dir):
163
+ continue
164
+
136
165
  if project_normalized in project_dir.name.lower():
137
166
  # Look for JSONL files in this project
138
167
  for jsonl_file in project_dir.glob('*.jsonl'):
168
+ # Validate file path
169
+ if not PathValidator.is_safe_path(jsonl_file):
170
+ continue
171
+
139
172
  # Check if filename matches conversation_id (with or without .jsonl)
140
173
  if conversation_id in jsonl_file.stem or conversation_id == jsonl_file.stem:
141
174
  await ctx.debug(f"Found conversation by filename in {jsonl_file}")
@@ -148,8 +181,17 @@ Timestamp: {metadata['timestamp']}"""
148
181
 
149
182
  # If not found in specific project or no project specified, search all
150
183
  await ctx.debug("Searching all projects for conversation")
184
+ from .security_patches import PathValidator
151
185
  for project_dir in base_path.glob('*'):
186
+ # SECURITY FIX: Validate each path before accessing
187
+ if not PathValidator.is_safe_path(project_dir):
188
+ continue
189
+
152
190
  for jsonl_file in project_dir.glob('*.jsonl'):
191
+ # Validate file path
192
+ if not PathValidator.is_safe_path(jsonl_file):
193
+ continue
194
+
153
195
  # Check if filename matches conversation_id (with or without .jsonl)
154
196
  if conversation_id in jsonl_file.stem or conversation_id == jsonl_file.stem:
155
197
  await ctx.debug(f"Found conversation by filename in {jsonl_file}")
@@ -103,6 +103,109 @@ def format_search_results_rich(
103
103
  result_text += f" <relevance>No conversations matched your query</relevance>\n"
104
104
  result_text += f" </result-summary>\n"
105
105
 
106
+ # Add aggregated insights section (NEW FEATURE)
107
+ if results and len(results) > 1:
108
+ result_text += " <insights>\n"
109
+ result_text += f" <!-- Processing {len(results)} results for pattern analysis -->\n"
110
+
111
+ # Aggregate file modification patterns
112
+ file_frequency = {}
113
+ tool_frequency = {}
114
+ concept_frequency = {}
115
+
116
+ for result in results:
117
+ # Count file modifications
118
+ for file in result.get('files_analyzed', []):
119
+ file_frequency[file] = file_frequency.get(file, 0) + 1
120
+
121
+ # Count tool usage
122
+ for tool in result.get('tools_used', []):
123
+ tool_frequency[tool] = tool_frequency.get(tool, 0) + 1
124
+
125
+ # Count concepts
126
+ for concept in result.get('concepts', []):
127
+ concept_frequency[concept] = concept_frequency.get(concept, 0) + 1
128
+
129
+ # Show most frequently modified files
130
+ if file_frequency:
131
+ top_files = sorted(file_frequency.items(), key=lambda x: x[1], reverse=True)[:3]
132
+ if top_files:
133
+ result_text += ' <pattern type="files">\n'
134
+ result_text += f' <title>📁 Frequently Modified Files</title>\n'
135
+ for file, count in top_files:
136
+ percentage = (count / len(results)) * 100
137
+ result_text += f' <item count="{count}" pct="{percentage:.0f}%">{file}</item>\n'
138
+ result_text += ' </pattern>\n'
139
+
140
+ # Show common tools used
141
+ if tool_frequency:
142
+ top_tools = sorted(tool_frequency.items(), key=lambda x: x[1], reverse=True)[:3]
143
+ if top_tools:
144
+ result_text += ' <pattern type="tools">\n'
145
+ result_text += f' <title>🔧 Common Tools Used</title>\n'
146
+ for tool, count in top_tools:
147
+ percentage = (count / len(results)) * 100
148
+ result_text += f' <item count="{count}" pct="{percentage:.0f}%">{tool}</item>\n'
149
+ result_text += ' </pattern>\n'
150
+
151
+ # Show related concepts
152
+ if concept_frequency:
153
+ top_concepts = sorted(concept_frequency.items(), key=lambda x: x[1], reverse=True)[:3]
154
+ if top_concepts:
155
+ result_text += ' <pattern type="concepts">\n'
156
+ result_text += f' <title>💡 Related Concepts</title>\n'
157
+ for concept, count in top_concepts:
158
+ percentage = (count / len(results)) * 100
159
+ result_text += f' <item count="{count}" pct="{percentage:.0f}%">{concept}</item>\n'
160
+ result_text += ' </pattern>\n'
161
+
162
+ # Add workflow suggestion based on patterns
163
+ if file_frequency and tool_frequency:
164
+ most_common_file = list(file_frequency.keys())[0] if file_frequency else None
165
+ most_common_tool = list(tool_frequency.keys())[0] if tool_frequency else None
166
+ if most_common_file and most_common_tool:
167
+ result_text += ' <suggestion>\n'
168
+ result_text += f' <title>💭 Pattern Detection</title>\n'
169
+ result_text += f' <text>Similar conversations often involve {most_common_tool} on {most_common_file}</text>\n'
170
+ result_text += ' </suggestion>\n'
171
+
172
+ # Always show a summary even if no clear patterns
173
+ if not file_frequency and not tool_frequency and not concept_frequency:
174
+ result_text += ' <summary>\n'
175
+ result_text += f' <title>📊 Analysis Summary</title>\n'
176
+ result_text += f' <text>Analyzed {len(results)} conversations for patterns</text>\n'
177
+
178
+ # Show temporal distribution
179
+ now = datetime.now(timezone.utc)
180
+ time_dist = {"today": 0, "week": 0, "month": 0, "older": 0}
181
+ for result in results:
182
+ timestamp_str = result.get('timestamp', '')
183
+ if timestamp_str:
184
+ try:
185
+ timestamp_clean = timestamp_str.replace('Z', '+00:00') if timestamp_str.endswith('Z') else timestamp_str
186
+ timestamp_dt = datetime.fromisoformat(timestamp_clean)
187
+ if timestamp_dt.tzinfo is None:
188
+ timestamp_dt = timestamp_dt.replace(tzinfo=timezone.utc)
189
+ days_ago = (now - timestamp_dt).days
190
+ if days_ago == 0:
191
+ time_dist["today"] += 1
192
+ elif days_ago <= 7:
193
+ time_dist["week"] += 1
194
+ elif days_ago <= 30:
195
+ time_dist["month"] += 1
196
+ else:
197
+ time_dist["older"] += 1
198
+ except:
199
+ pass
200
+
201
+ if any(time_dist.values()):
202
+ dist_str = ", ".join([f"{v} {k}" for k, v in time_dist.items() if v > 0])
203
+ result_text += f' <temporal>Time distribution: {dist_str}</temporal>\n'
204
+
205
+ result_text += ' </summary>\n'
206
+
207
+ result_text += " </insights>\n\n"
208
+
106
209
  # Add metadata
107
210
  result_text += f" <meta>\n"
108
211
  result_text += f" <q>{query}</q>\n"
@@ -83,10 +83,20 @@ class SearchTools:
83
83
  # Generate embedding for query
84
84
  embedding_manager = self.get_embedding_manager()
85
85
 
86
- # Determine embedding type based on collection name
87
- embedding_type = 'voyage' if collection_name.endswith('_voyage') else 'local'
86
+ # Determine embedding type based on collection name (v3 and v4 compatible)
87
+ # v4 format: csr_project_mode_dims (e.g., csr_project_cloud_1024d)
88
+ # v3 format: project_suffix (e.g., project_voyage)
89
+ if '_cloud_' in collection_name or collection_name.endswith('_1024d') or collection_name.endswith('_voyage'):
90
+ embedding_type = 'voyage'
91
+ else:
92
+ embedding_type = 'local'
88
93
  query_embedding = await embedding_manager.generate_embedding(query, force_type=embedding_type)
89
-
94
+
95
+ # FIX: Validate embedding before search
96
+ if query_embedding is None:
97
+ logger.warning(f"Embedding generation failed for query in {collection_name}")
98
+ return []
99
+
90
100
  # Search the collection
91
101
  search_results = await self.qdrant_client.search(
92
102
  collection_name=collection_name,
@@ -132,9 +142,9 @@ class SearchTools:
132
142
  # Apply exponential decay
133
143
  decay_factor = pow(2, -age / self.decay_scale_days)
134
144
 
135
- # Adjust score
145
+ # Adjust score - FIX: Maintain comparable scale
136
146
  original_score = result['score']
137
- result['score'] = original_score * (1 - self.decay_weight) + decay_factor * self.decay_weight
147
+ result['score'] = original_score * ((1 - self.decay_weight) + self.decay_weight * decay_factor)
138
148
  result['original_score'] = original_score
139
149
  result['decay_factor'] = decay_factor
140
150
 
@@ -242,12 +252,14 @@ class SearchTools:
242
252
  ]
243
253
  await ctx.debug(f"Filtered to {len(filtered_collections)} collections from {len(all_collections)} total")
244
254
  else:
245
- # Use all collections except reflections
255
+ # Use all collections INCLUDING reflections (with decay)
246
256
  collections_response = await self.qdrant_client.get_collections()
247
257
  collections = collections_response.collections
258
+ # Include both conversation collections and reflection collections
248
259
  filtered_collections = [
249
- c for c in collections
250
- if not c.name.startswith('reflections')
260
+ c for c in collections
261
+ if (c.name.endswith('_local') or c.name.endswith('_voyage') or
262
+ c.name.startswith('reflections'))
251
263
  ]
252
264
  await ctx.debug(f"Searching across {len(filtered_collections)} collections")
253
265
 
@@ -362,12 +374,14 @@ class SearchTools:
362
374
  if c.name in collection_names
363
375
  ]
364
376
  else:
365
- # Use all collections except reflections
377
+ # Use all collections INCLUDING reflections (with decay)
366
378
  collections_response = await self.qdrant_client.get_collections()
367
379
  collections = collections_response.collections
380
+ # Include both conversation collections and reflection collections
368
381
  filtered_collections = [
369
- c for c in collections
370
- if not c.name.startswith('reflections')
382
+ c for c in collections
383
+ if (c.name.endswith('_local') or c.name.endswith('_voyage') or
384
+ c.name.startswith('reflections'))
371
385
  ]
372
386
 
373
387
  # Quick PARALLEL count across collections
@@ -450,12 +464,14 @@ class SearchTools:
450
464
  if c.name in collection_names
451
465
  ]
452
466
  else:
453
- # Use all collections except reflections
467
+ # Use all collections INCLUDING reflections (with decay)
454
468
  collections_response = await self.qdrant_client.get_collections()
455
469
  collections = collections_response.collections
470
+ # Include both conversation collections and reflection collections
456
471
  filtered_collections = [
457
- c for c in collections
458
- if not c.name.startswith('reflections')
472
+ c for c in collections
473
+ if (c.name.endswith('_local') or c.name.endswith('_voyage') or
474
+ c.name.startswith('reflections'))
459
475
  ]
460
476
 
461
477
  # Gather results for summary using PARALLEL search
@@ -545,12 +561,14 @@ class SearchTools:
545
561
  if c.name in collection_names
546
562
  ]
547
563
  else:
548
- # Use all collections except reflections
564
+ # Use all collections INCLUDING reflections (with decay)
549
565
  collections_response = await self.qdrant_client.get_collections()
550
566
  collections = collections_response.collections
567
+ # Include both conversation collections and reflection collections
551
568
  filtered_collections = [
552
- c for c in collections
553
- if not c.name.startswith('reflections')
569
+ c for c in collections
570
+ if (c.name.endswith('_local') or c.name.endswith('_voyage') or
571
+ c.name.startswith('reflections'))
554
572
  ]
555
573
 
556
574
  # Gather all results using PARALLEL search
@@ -698,17 +716,21 @@ class SearchTools:
698
716
  await ctx.debug(f"Error searching {collection_name}: {e}")
699
717
  return []
700
718
 
701
- # Use asyncio.gather for PARALLEL search across all collections
719
+ # SECURITY FIX: Use proper concurrency limiting
702
720
  import asyncio
721
+ from .security_patches import ConcurrencyLimiter
722
+
703
723
  search_tasks = [search_collection(c.name) for c in collections]
704
-
705
- # Limit concurrent searches to avoid overload
706
- batch_size = 20
724
+
725
+ # Use semaphore-based limiting instead of batching
707
726
  all_results = []
708
- for i in range(0, len(search_tasks), batch_size):
709
- batch = search_tasks[i:i+batch_size]
710
- batch_results = await asyncio.gather(*batch)
711
- for results in batch_results:
727
+ batch_results = await ConcurrencyLimiter.limited_gather(search_tasks, limit=10)
728
+ for results in batch_results:
729
+ if isinstance(results, Exception):
730
+ logger.error(f"Search task failed: {type(results).__name__}: {results}")
731
+ await ctx.debug(f"Search task error: {results}")
732
+ continue
733
+ if results:
712
734
  all_results.extend(results)
713
735
 
714
736
  # Format results
@@ -791,7 +813,7 @@ def register_search_tools(
791
813
  project_resolver # Pass the resolver
792
814
  )
793
815
 
794
- @mcp.tool()
816
+ @mcp.tool(name="csr_reflect_on_past")
795
817
  async def reflect_on_past(
796
818
  ctx: Context,
797
819
  query: str = Field(description="The search query to find semantically similar conversations"),
@@ -804,29 +826,45 @@ def register_search_tools(
804
826
  include_raw: bool = Field(default=False, description="Include raw Qdrant payload data for debugging (increases response size)"),
805
827
  response_format: str = Field(default="xml", description="Response format: 'xml' or 'markdown'")
806
828
  ) -> str:
807
- """Search for relevant past conversations using semantic search with optional time decay."""
829
+ """Search past Claude conversations semantically to find relevant context.
830
+
831
+ WHEN TO USE: User asks 'what did we discuss about X?', 'find conversations about Y',
832
+ mentions 'remember when' or 'last time', debugging issues that may have been solved before,
833
+ or finding implementation patterns used in the project.
834
+
835
+ This is the PRIMARY tool for conversation memory - use it liberally!"""
808
836
  return await tools.reflect_on_past(ctx, query, limit, min_score, use_decay, project, mode, brief, include_raw, response_format)
809
837
 
810
- @mcp.tool()
838
+ @mcp.tool(name="csr_quick_check")
811
839
  async def quick_search(
812
840
  ctx: Context,
813
841
  query: str = Field(description="The search query to find semantically similar conversations"),
814
842
  min_score: float = Field(default=0.3, description="Minimum similarity score (0-1)"),
815
843
  project: Optional[str] = Field(default=None, description="Search specific project only. If not provided, searches current project based on working directory. Use 'all' to search across all projects.")
816
844
  ) -> str:
817
- """Quick search that returns only the count and top result for fast overview."""
845
+ """Quick check if a topic was discussed before (returns count + top match only).
846
+
847
+ WHEN TO USE: User asks 'have we discussed X?' or 'is there anything about Y?',
848
+ need a yes/no answer about topic existence, checking if a problem was encountered before.
849
+
850
+ Much faster than full search - use for existence checks!"""
818
851
  return await tools.quick_search(ctx, query, min_score, project)
819
852
 
820
- @mcp.tool()
853
+ @mcp.tool(name="csr_search_insights")
821
854
  async def search_summary(
822
855
  ctx: Context,
823
856
  query: str = Field(description="The search query to find semantically similar conversations"),
824
857
  project: Optional[str] = Field(default=None, description="Search specific project only. If not provided, searches current project based on working directory. Use 'all' to search across all projects.")
825
858
  ) -> str:
826
- """Get aggregated insights from search results without individual result details."""
859
+ """Get aggregated insights and patterns from search results.
860
+
861
+ WHEN TO USE: User wants patterns or trends, analyzing topic evolution,
862
+ understanding common themes, getting high-level view without details.
863
+
864
+ Provides analysis, not just search results!"""
827
865
  return await tools.search_summary(ctx, query, project)
828
866
 
829
- @mcp.tool()
867
+ @mcp.tool(name="csr_get_more")
830
868
  async def get_more_results(
831
869
  ctx: Context,
832
870
  query: str = Field(description="The original search query"),
@@ -835,20 +873,30 @@ def register_search_tools(
835
873
  min_score: float = Field(default=0.3, description="Minimum similarity score (0-1)"),
836
874
  project: Optional[str] = Field(default=None, description="Search specific project only")
837
875
  ) -> str:
838
- """Get additional search results after an initial search (pagination support)."""
876
+ """Get additional search results for paginated exploration.
877
+
878
+ WHEN TO USE: User says 'show me more' after a search, initial results weren't sufficient,
879
+ deep diving into a topic, user wants comprehensive coverage.
880
+
881
+ Use after initial search when more context is needed!"""
839
882
  return await tools.get_more_results(ctx, query, offset, limit, min_score, project)
840
883
 
841
- @mcp.tool()
884
+ @mcp.tool(name="csr_search_by_file")
842
885
  async def search_by_file(
843
886
  ctx: Context,
844
887
  file_path: str = Field(description="The file path to search for in conversations"),
845
888
  limit: int = Field(default=10, description="Maximum number of results to return"),
846
889
  project: Optional[str] = Field(default=None, description="Search specific project only. Use 'all' to search across all projects.")
847
890
  ) -> str:
848
- """Search for conversations that analyzed a specific file."""
891
+ """Find all conversations that analyzed or modified a specific file.
892
+
893
+ WHEN TO USE: User asks 'when did we modify X file?', investigating file history,
894
+ understanding why changes were made, finding discussions about specific code files.
895
+
896
+ Perfect for code archaeology and understanding file evolution!"""
849
897
  return await tools.search_by_file(ctx, file_path, limit, project)
850
898
 
851
- @mcp.tool()
899
+ @mcp.tool(name="csr_search_by_concept")
852
900
  async def search_by_concept(
853
901
  ctx: Context,
854
902
  concept: str = Field(description="The concept to search for (e.g., 'security', 'docker', 'testing')"),
@@ -856,7 +904,12 @@ def register_search_tools(
856
904
  project: Optional[str] = Field(default=None, description="Search specific project only. Use 'all' to search across all projects."),
857
905
  include_files: bool = Field(default=True, description="Include file information in results")
858
906
  ) -> str:
859
- """Search for conversations about a specific development concept."""
907
+ """Search for conversations about specific development concepts or themes.
908
+
909
+ WHEN TO USE: User asks about broad topics like 'security', 'testing', 'performance',
910
+ looking for all discussions on a technical theme, gathering knowledge about a concept.
911
+
912
+ Ideal for thematic analysis and knowledge gathering!"""
860
913
  return await tools.search_by_concept(ctx, concept, limit, project, include_files)
861
914
 
862
915
  @mcp.tool()