claude-self-reflect 3.3.1 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,10 +44,24 @@ class ReflectionTools:
44
44
  await ctx.debug(f"Storing reflection with {len(tags)} tags")
45
45
 
46
46
  try:
47
- # Determine collection name based on active model type, not prefer_local
47
+ # Check runtime preference from environment
48
+ import os
49
+ prefer_local = os.getenv('PREFER_LOCAL_EMBEDDINGS', 'true').lower() == 'true'
50
+
48
51
  embedding_manager = self.get_embedding_manager()
49
- # Use actual model_type to ensure consistency
50
- embedding_type = embedding_manager.model_type or ("voyage" if embedding_manager.voyage_client else "local")
52
+
53
+ # Use embedding_manager's model_type which already respects preferences
54
+ embedding_type = embedding_manager.model_type
55
+
56
+ if embedding_type == "local":
57
+ await ctx.debug("Using LOCAL mode (FastEmbed, 384 dimensions)")
58
+ elif embedding_type == "voyage":
59
+ await ctx.debug("Using VOYAGE mode (Voyage AI, 1024 dimensions)")
60
+ else:
61
+ # Shouldn't happen but handle gracefully
62
+ embedding_type = "local" if embedding_manager.local_model else "voyage"
63
+ await ctx.debug(f"Using {embedding_type} mode (fallback)")
64
+
51
65
  collection_name = f"reflections_{embedding_type}"
52
66
 
53
67
  # Ensure reflections collection exists
@@ -77,8 +91,9 @@ class ReflectionTools:
77
91
  await ctx.debug("Failed to generate embedding for reflection")
78
92
  return "Failed to store reflection: embedding generation failed"
79
93
 
80
- # Create unique ID
81
- reflection_id = hashlib.md5(f"{content}{datetime.now().isoformat()}".encode()).hexdigest()
94
+ # SECURITY FIX: Use SHA-256 instead of MD5
95
+ from .security_patches import SecureHashGenerator
96
+ reflection_id = SecureHashGenerator.generate_id(f"{content}{datetime.now().isoformat()}")
82
97
 
83
98
  # Prepare metadata
84
99
  metadata = {
@@ -104,6 +119,7 @@ class ReflectionTools:
104
119
 
105
120
  return f"""Reflection stored successfully.
106
121
  ID: {reflection_id}
122
+ Collection: {collection_name}
107
123
  Tags: {', '.join(tags) if tags else 'none'}
108
124
  Timestamp: {metadata['timestamp']}"""
109
125
 
@@ -125,17 +141,34 @@ Timestamp: {metadata['timestamp']}"""
125
141
  try:
126
142
  # Base path for conversations
127
143
  base_path = Path.home() / '.claude' / 'projects'
128
-
144
+
145
+ # SECURITY FIX: Validate paths to prevent traversal
146
+ from .security_patches import PathValidator
147
+ if not PathValidator.is_safe_path(base_path):
148
+ logger.error(f"Unsafe base path detected: {base_path}")
149
+ return "<conversation_file><error>Security validation failed</error></conversation_file>"
150
+
129
151
  # If project is specified, try to find it in that project
130
152
  if project:
131
153
  # Normalize project name for path matching
132
- project_normalized = self.normalize_project_name(project)
133
-
154
+ from .security_patches import InputValidator
155
+ project_normalized = InputValidator.validate_project_name(
156
+ self.normalize_project_name(project)
157
+ )
158
+
134
159
  # Look for project directories that match
135
160
  for project_dir in base_path.glob('*'):
161
+ # Validate each path before accessing
162
+ if not PathValidator.is_safe_path(project_dir):
163
+ continue
164
+
136
165
  if project_normalized in project_dir.name.lower():
137
166
  # Look for JSONL files in this project
138
167
  for jsonl_file in project_dir.glob('*.jsonl'):
168
+ # Validate file path
169
+ if not PathValidator.is_safe_path(jsonl_file):
170
+ continue
171
+
139
172
  # Check if filename matches conversation_id (with or without .jsonl)
140
173
  if conversation_id in jsonl_file.stem or conversation_id == jsonl_file.stem:
141
174
  await ctx.debug(f"Found conversation by filename in {jsonl_file}")
@@ -148,8 +181,17 @@ Timestamp: {metadata['timestamp']}"""
148
181
 
149
182
  # If not found in specific project or no project specified, search all
150
183
  await ctx.debug("Searching all projects for conversation")
184
+ from .security_patches import PathValidator
151
185
  for project_dir in base_path.glob('*'):
186
+ # SECURITY FIX: Validate each path before accessing
187
+ if not PathValidator.is_safe_path(project_dir):
188
+ continue
189
+
152
190
  for jsonl_file in project_dir.glob('*.jsonl'):
191
+ # Validate file path
192
+ if not PathValidator.is_safe_path(jsonl_file):
193
+ continue
194
+
153
195
  # Check if filename matches conversation_id (with or without .jsonl)
154
196
  if conversation_id in jsonl_file.stem or conversation_id == jsonl_file.stem:
155
197
  await ctx.debug(f"Found conversation by filename in {jsonl_file}")
@@ -103,6 +103,109 @@ def format_search_results_rich(
103
103
  result_text += f" <relevance>No conversations matched your query</relevance>\n"
104
104
  result_text += f" </result-summary>\n"
105
105
 
106
+ # Add aggregated insights section (NEW FEATURE)
107
+ if results and len(results) > 1:
108
+ result_text += " <insights>\n"
109
+ result_text += f" <!-- Processing {len(results)} results for pattern analysis -->\n"
110
+
111
+ # Aggregate file modification patterns
112
+ file_frequency = {}
113
+ tool_frequency = {}
114
+ concept_frequency = {}
115
+
116
+ for result in results:
117
+ # Count file modifications
118
+ for file in result.get('files_analyzed', []):
119
+ file_frequency[file] = file_frequency.get(file, 0) + 1
120
+
121
+ # Count tool usage
122
+ for tool in result.get('tools_used', []):
123
+ tool_frequency[tool] = tool_frequency.get(tool, 0) + 1
124
+
125
+ # Count concepts
126
+ for concept in result.get('concepts', []):
127
+ concept_frequency[concept] = concept_frequency.get(concept, 0) + 1
128
+
129
+ # Show most frequently modified files
130
+ if file_frequency:
131
+ top_files = sorted(file_frequency.items(), key=lambda x: x[1], reverse=True)[:3]
132
+ if top_files:
133
+ result_text += ' <pattern type="files">\n'
134
+ result_text += f' <title>📁 Frequently Modified Files</title>\n'
135
+ for file, count in top_files:
136
+ percentage = (count / len(results)) * 100
137
+ result_text += f' <item count="{count}" pct="{percentage:.0f}%">{file}</item>\n'
138
+ result_text += ' </pattern>\n'
139
+
140
+ # Show common tools used
141
+ if tool_frequency:
142
+ top_tools = sorted(tool_frequency.items(), key=lambda x: x[1], reverse=True)[:3]
143
+ if top_tools:
144
+ result_text += ' <pattern type="tools">\n'
145
+ result_text += f' <title>🔧 Common Tools Used</title>\n'
146
+ for tool, count in top_tools:
147
+ percentage = (count / len(results)) * 100
148
+ result_text += f' <item count="{count}" pct="{percentage:.0f}%">{tool}</item>\n'
149
+ result_text += ' </pattern>\n'
150
+
151
+ # Show related concepts
152
+ if concept_frequency:
153
+ top_concepts = sorted(concept_frequency.items(), key=lambda x: x[1], reverse=True)[:3]
154
+ if top_concepts:
155
+ result_text += ' <pattern type="concepts">\n'
156
+ result_text += f' <title>💡 Related Concepts</title>\n'
157
+ for concept, count in top_concepts:
158
+ percentage = (count / len(results)) * 100
159
+ result_text += f' <item count="{count}" pct="{percentage:.0f}%">{concept}</item>\n'
160
+ result_text += ' </pattern>\n'
161
+
162
+ # Add workflow suggestion based on patterns
163
+ if file_frequency and tool_frequency:
164
+ most_common_file = list(file_frequency.keys())[0] if file_frequency else None
165
+ most_common_tool = list(tool_frequency.keys())[0] if tool_frequency else None
166
+ if most_common_file and most_common_tool:
167
+ result_text += ' <suggestion>\n'
168
+ result_text += f' <title>💭 Pattern Detection</title>\n'
169
+ result_text += f' <text>Similar conversations often involve {most_common_tool} on {most_common_file}</text>\n'
170
+ result_text += ' </suggestion>\n'
171
+
172
+ # Always show a summary even if no clear patterns
173
+ if not file_frequency and not tool_frequency and not concept_frequency:
174
+ result_text += ' <summary>\n'
175
+ result_text += f' <title>📊 Analysis Summary</title>\n'
176
+ result_text += f' <text>Analyzed {len(results)} conversations for patterns</text>\n'
177
+
178
+ # Show temporal distribution
179
+ now = datetime.now(timezone.utc)
180
+ time_dist = {"today": 0, "week": 0, "month": 0, "older": 0}
181
+ for result in results:
182
+ timestamp_str = result.get('timestamp', '')
183
+ if timestamp_str:
184
+ try:
185
+ timestamp_clean = timestamp_str.replace('Z', '+00:00') if timestamp_str.endswith('Z') else timestamp_str
186
+ timestamp_dt = datetime.fromisoformat(timestamp_clean)
187
+ if timestamp_dt.tzinfo is None:
188
+ timestamp_dt = timestamp_dt.replace(tzinfo=timezone.utc)
189
+ days_ago = (now - timestamp_dt).days
190
+ if days_ago == 0:
191
+ time_dist["today"] += 1
192
+ elif days_ago <= 7:
193
+ time_dist["week"] += 1
194
+ elif days_ago <= 30:
195
+ time_dist["month"] += 1
196
+ else:
197
+ time_dist["older"] += 1
198
+ except:
199
+ pass
200
+
201
+ if any(time_dist.values()):
202
+ dist_str = ", ".join([f"{v} {k}" for k, v in time_dist.items() if v > 0])
203
+ result_text += f' <temporal>Time distribution: {dist_str}</temporal>\n'
204
+
205
+ result_text += ' </summary>\n'
206
+
207
+ result_text += " </insights>\n\n"
208
+
106
209
  # Add metadata
107
210
  result_text += f" <meta>\n"
108
211
  result_text += f" <q>{query}</q>\n"
@@ -83,18 +83,34 @@ class SearchTools:
83
83
  # Generate embedding for query
84
84
  embedding_manager = self.get_embedding_manager()
85
85
 
86
- # Determine embedding type based on collection name
87
- embedding_type = 'voyage' if collection_name.endswith('_voyage') else 'local'
86
+ # Determine embedding type based on collection name (v3 and v4 compatible)
87
+ # v4 format: csr_project_mode_dims (e.g., csr_project_cloud_1024d)
88
+ # v3 format: project_suffix (e.g., project_voyage)
89
+ if '_cloud_' in collection_name or collection_name.endswith('_1024d') or collection_name.endswith('_voyage'):
90
+ embedding_type = 'voyage'
91
+ else:
92
+ embedding_type = 'local'
88
93
  query_embedding = await embedding_manager.generate_embedding(query, force_type=embedding_type)
89
-
94
+
95
+ # FIX: Validate embedding before search
96
+ if query_embedding is None:
97
+ logger.warning(f"Embedding generation failed for query in {collection_name}")
98
+ return []
99
+
90
100
  # Search the collection
91
101
  search_results = await self.qdrant_client.search(
92
102
  collection_name=collection_name,
93
103
  query_vector=query_embedding,
94
104
  limit=limit,
95
- score_threshold=min_score
105
+ score_threshold=min_score,
106
+ with_payload=True # Explicitly request payloads from Qdrant
96
107
  )
97
-
108
+
109
+ # CRITICAL FIX: Handle None search results (cloud mode issue)
110
+ if search_results is None:
111
+ logger.warning(f"Search returned None for collection {collection_name}")
112
+ search_results = []
113
+
98
114
  # Convert results to dict format
99
115
  results = []
100
116
  for result in search_results:
@@ -132,9 +148,9 @@ class SearchTools:
132
148
  # Apply exponential decay
133
149
  decay_factor = pow(2, -age / self.decay_scale_days)
134
150
 
135
- # Adjust score
151
+ # Adjust score - FIX: Maintain comparable scale
136
152
  original_score = result['score']
137
- result['score'] = original_score * (1 - self.decay_weight) + decay_factor * self.decay_weight
153
+ result['score'] = original_score * ((1 - self.decay_weight) + self.decay_weight * decay_factor)
138
154
  result['original_score'] = original_score
139
155
  result['decay_factor'] = decay_factor
140
156
 
@@ -242,12 +258,14 @@ class SearchTools:
242
258
  ]
243
259
  await ctx.debug(f"Filtered to {len(filtered_collections)} collections from {len(all_collections)} total")
244
260
  else:
245
- # Use all collections except reflections
261
+ # Use all collections INCLUDING reflections (with decay)
246
262
  collections_response = await self.qdrant_client.get_collections()
247
263
  collections = collections_response.collections
264
+ # Include both conversation collections and reflection collections
248
265
  filtered_collections = [
249
- c for c in collections
250
- if not c.name.startswith('reflections')
266
+ c for c in collections
267
+ if (c.name.endswith('_local') or c.name.endswith('_voyage') or
268
+ c.name.startswith('reflections'))
251
269
  ]
252
270
  await ctx.debug(f"Searching across {len(filtered_collections)} collections")
253
271
 
@@ -362,12 +380,14 @@ class SearchTools:
362
380
  if c.name in collection_names
363
381
  ]
364
382
  else:
365
- # Use all collections except reflections
383
+ # Use all collections INCLUDING reflections (with decay)
366
384
  collections_response = await self.qdrant_client.get_collections()
367
385
  collections = collections_response.collections
386
+ # Include both conversation collections and reflection collections
368
387
  filtered_collections = [
369
- c for c in collections
370
- if not c.name.startswith('reflections')
388
+ c for c in collections
389
+ if (c.name.endswith('_local') or c.name.endswith('_voyage') or
390
+ c.name.startswith('reflections'))
371
391
  ]
372
392
 
373
393
  # Quick PARALLEL count across collections
@@ -450,12 +470,14 @@ class SearchTools:
450
470
  if c.name in collection_names
451
471
  ]
452
472
  else:
453
- # Use all collections except reflections
473
+ # Use all collections INCLUDING reflections (with decay)
454
474
  collections_response = await self.qdrant_client.get_collections()
455
475
  collections = collections_response.collections
476
+ # Include both conversation collections and reflection collections
456
477
  filtered_collections = [
457
- c for c in collections
458
- if not c.name.startswith('reflections')
478
+ c for c in collections
479
+ if (c.name.endswith('_local') or c.name.endswith('_voyage') or
480
+ c.name.startswith('reflections'))
459
481
  ]
460
482
 
461
483
  # Gather results for summary using PARALLEL search
@@ -545,12 +567,14 @@ class SearchTools:
545
567
  if c.name in collection_names
546
568
  ]
547
569
  else:
548
- # Use all collections except reflections
570
+ # Use all collections INCLUDING reflections (with decay)
549
571
  collections_response = await self.qdrant_client.get_collections()
550
572
  collections = collections_response.collections
573
+ # Include both conversation collections and reflection collections
551
574
  filtered_collections = [
552
- c for c in collections
553
- if not c.name.startswith('reflections')
575
+ c for c in collections
576
+ if (c.name.endswith('_local') or c.name.endswith('_voyage') or
577
+ c.name.startswith('reflections'))
554
578
  ]
555
579
 
556
580
  # Gather all results using PARALLEL search
@@ -698,17 +722,21 @@ class SearchTools:
698
722
  await ctx.debug(f"Error searching {collection_name}: {e}")
699
723
  return []
700
724
 
701
- # Use asyncio.gather for PARALLEL search across all collections
725
+ # SECURITY FIX: Use proper concurrency limiting
702
726
  import asyncio
727
+ from .security_patches import ConcurrencyLimiter
728
+
703
729
  search_tasks = [search_collection(c.name) for c in collections]
704
-
705
- # Limit concurrent searches to avoid overload
706
- batch_size = 20
730
+
731
+ # Use semaphore-based limiting instead of batching
707
732
  all_results = []
708
- for i in range(0, len(search_tasks), batch_size):
709
- batch = search_tasks[i:i+batch_size]
710
- batch_results = await asyncio.gather(*batch)
711
- for results in batch_results:
733
+ batch_results = await ConcurrencyLimiter.limited_gather(search_tasks, limit=10)
734
+ for results in batch_results:
735
+ if isinstance(results, Exception):
736
+ logger.error(f"Search task failed: {type(results).__name__}: {results}")
737
+ await ctx.debug(f"Search task error: {results}")
738
+ continue
739
+ if results:
712
740
  all_results.extend(results)
713
741
 
714
742
  # Format results
@@ -791,7 +819,7 @@ def register_search_tools(
791
819
  project_resolver # Pass the resolver
792
820
  )
793
821
 
794
- @mcp.tool()
822
+ @mcp.tool(name="csr_reflect_on_past")
795
823
  async def reflect_on_past(
796
824
  ctx: Context,
797
825
  query: str = Field(description="The search query to find semantically similar conversations"),
@@ -804,29 +832,45 @@ def register_search_tools(
804
832
  include_raw: bool = Field(default=False, description="Include raw Qdrant payload data for debugging (increases response size)"),
805
833
  response_format: str = Field(default="xml", description="Response format: 'xml' or 'markdown'")
806
834
  ) -> str:
807
- """Search for relevant past conversations using semantic search with optional time decay."""
835
+ """Search past Claude conversations semantically to find relevant context.
836
+
837
+ WHEN TO USE: User asks 'what did we discuss about X?', 'find conversations about Y',
838
+ mentions 'remember when' or 'last time', debugging issues that may have been solved before,
839
+ or finding implementation patterns used in the project.
840
+
841
+ This is the PRIMARY tool for conversation memory - use it liberally!"""
808
842
  return await tools.reflect_on_past(ctx, query, limit, min_score, use_decay, project, mode, brief, include_raw, response_format)
809
843
 
810
- @mcp.tool()
844
+ @mcp.tool(name="csr_quick_check")
811
845
  async def quick_search(
812
846
  ctx: Context,
813
847
  query: str = Field(description="The search query to find semantically similar conversations"),
814
848
  min_score: float = Field(default=0.3, description="Minimum similarity score (0-1)"),
815
849
  project: Optional[str] = Field(default=None, description="Search specific project only. If not provided, searches current project based on working directory. Use 'all' to search across all projects.")
816
850
  ) -> str:
817
- """Quick search that returns only the count and top result for fast overview."""
851
+ """Quick check if a topic was discussed before (returns count + top match only).
852
+
853
+ WHEN TO USE: User asks 'have we discussed X?' or 'is there anything about Y?',
854
+ need a yes/no answer about topic existence, checking if a problem was encountered before.
855
+
856
+ Much faster than full search - use for existence checks!"""
818
857
  return await tools.quick_search(ctx, query, min_score, project)
819
858
 
820
- @mcp.tool()
859
+ @mcp.tool(name="csr_search_insights")
821
860
  async def search_summary(
822
861
  ctx: Context,
823
862
  query: str = Field(description="The search query to find semantically similar conversations"),
824
863
  project: Optional[str] = Field(default=None, description="Search specific project only. If not provided, searches current project based on working directory. Use 'all' to search across all projects.")
825
864
  ) -> str:
826
- """Get aggregated insights from search results without individual result details."""
865
+ """Get aggregated insights and patterns from search results.
866
+
867
+ WHEN TO USE: User wants patterns or trends, analyzing topic evolution,
868
+ understanding common themes, getting high-level view without details.
869
+
870
+ Provides analysis, not just search results!"""
827
871
  return await tools.search_summary(ctx, query, project)
828
872
 
829
- @mcp.tool()
873
+ @mcp.tool(name="csr_get_more")
830
874
  async def get_more_results(
831
875
  ctx: Context,
832
876
  query: str = Field(description="The original search query"),
@@ -835,20 +879,30 @@ def register_search_tools(
835
879
  min_score: float = Field(default=0.3, description="Minimum similarity score (0-1)"),
836
880
  project: Optional[str] = Field(default=None, description="Search specific project only")
837
881
  ) -> str:
838
- """Get additional search results after an initial search (pagination support)."""
882
+ """Get additional search results for paginated exploration.
883
+
884
+ WHEN TO USE: User says 'show me more' after a search, initial results weren't sufficient,
885
+ deep diving into a topic, user wants comprehensive coverage.
886
+
887
+ Use after initial search when more context is needed!"""
839
888
  return await tools.get_more_results(ctx, query, offset, limit, min_score, project)
840
889
 
841
- @mcp.tool()
890
+ @mcp.tool(name="csr_search_by_file")
842
891
  async def search_by_file(
843
892
  ctx: Context,
844
893
  file_path: str = Field(description="The file path to search for in conversations"),
845
894
  limit: int = Field(default=10, description="Maximum number of results to return"),
846
895
  project: Optional[str] = Field(default=None, description="Search specific project only. Use 'all' to search across all projects.")
847
896
  ) -> str:
848
- """Search for conversations that analyzed a specific file."""
897
+ """Find all conversations that analyzed or modified a specific file.
898
+
899
+ WHEN TO USE: User asks 'when did we modify X file?', investigating file history,
900
+ understanding why changes were made, finding discussions about specific code files.
901
+
902
+ Perfect for code archaeology and understanding file evolution!"""
849
903
  return await tools.search_by_file(ctx, file_path, limit, project)
850
904
 
851
- @mcp.tool()
905
+ @mcp.tool(name="csr_search_by_concept")
852
906
  async def search_by_concept(
853
907
  ctx: Context,
854
908
  concept: str = Field(description="The concept to search for (e.g., 'security', 'docker', 'testing')"),
@@ -856,7 +910,12 @@ def register_search_tools(
856
910
  project: Optional[str] = Field(default=None, description="Search specific project only. Use 'all' to search across all projects."),
857
911
  include_files: bool = Field(default=True, description="Include file information in results")
858
912
  ) -> str:
859
- """Search for conversations about a specific development concept."""
913
+ """Search for conversations about specific development concepts or themes.
914
+
915
+ WHEN TO USE: User asks about broad topics like 'security', 'testing', 'performance',
916
+ looking for all discussions on a technical theme, gathering knowledge about a concept.
917
+
918
+ Ideal for thematic analysis and knowledge gathering!"""
860
919
  return await tools.search_by_concept(ctx, concept, limit, project, include_files)
861
920
 
862
921
  @mcp.tool()