claude-self-reflect 2.5.19 → 2.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,527 @@
1
+ """
2
+ Project name resolution for claude-self-reflect.
3
+ Handles mapping between user-friendly names and internal collection names.
4
+ """
5
+
6
+ import hashlib
7
+ import logging
8
+ import re
9
+ from pathlib import Path
10
+ from typing import List, Dict, Optional, Set
11
+ from time import time
12
+ from qdrant_client import QdrantClient
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Project discovery markers - common parent directories that indicate project roots
17
+ PROJECT_MARKERS = {'projects', 'code', 'Code', 'repos', 'repositories',
18
+ 'dev', 'Development', 'work', 'src', 'github', 'gitlab'}
19
+
20
+ # Patterns to filter out from project segments - keep minimal
21
+ # Users create Claude conversations from their actual working directories
22
+ FILTER_PATTERNS = {
23
+ r'^[a-f0-9]{32}$', # Full MD5 hashes
24
+ r'^[a-f0-9]{40}$', # Full SHA1 hashes
25
+ r'^\.$', # Single dot
26
+ r'^\.\.$', # Double dot
27
+ }
28
+
29
+
30
+ class ProjectResolver:
31
+ """Resolves user-friendly project names to collection names."""
32
+
33
+ def __init__(self, qdrant_client: QdrantClient):
34
+ self.client = qdrant_client
35
+ self._cache: Dict[str, Set[str]] = {}
36
+ self._cache_ttl: Dict[str, float] = {}
37
+ self._cache_duration = 300 # 5 minutes TTL
38
+ self._reverse_cache: Dict[str, str] = {}
39
+ # Collection names cache
40
+ self._collections_cache: List[str] = []
41
+ self._collections_cache_time: float = 0
42
+ # Compile filter patterns for efficiency
43
+ import re
44
+ self._filter_patterns = [re.compile(p) for p in FILTER_PATTERNS]
45
+
46
+ def find_collections_for_project(self, user_project_name: str) -> List[str]:
47
+ """
48
+ Find all collections that match a user-provided project name.
49
+
50
+ Tries multiple strategies:
51
+ 1. Direct hash of the input
52
+ 2. Normalized name hash
53
+ 3. Scan collections for matching project metadata
54
+ 4. Fuzzy matching on collection names
55
+
56
+ Args:
57
+ user_project_name: User-provided project name (e.g., "anukruti", "Anukruti", full path)
58
+
59
+ Returns:
60
+ List of collection names that match the project
61
+ """
62
+ if user_project_name in self._cache:
63
+ # Check if cache entry is still valid
64
+ if time() - self._cache_ttl.get(user_project_name, 0) < self._cache_duration:
65
+ return list(self._cache[user_project_name])
66
+ else:
67
+ # Cache expired, remove it
68
+ del self._cache[user_project_name]
69
+ del self._cache_ttl[user_project_name]
70
+
71
+ matching_collections = set()
72
+
73
+ # Get all collections (with caching)
74
+ collection_names = self._get_collection_names()
75
+ if not collection_names:
76
+ return []
77
+
78
+ # Strategy 1: Direct hash of input (handles full paths)
79
+ direct_hash = hashlib.sha256(user_project_name.encode()).hexdigest()[:16]
80
+ # Match exact hash segment between underscores, not substring
81
+ direct_matches = [c for c in collection_names
82
+ if f"_{direct_hash}_" in c or c.endswith(f"_{direct_hash}")]
83
+ matching_collections.update(direct_matches)
84
+
85
+ # Strategy 2: Try normalized version
86
+ normalized = self._normalize_project_name(user_project_name)
87
+ if normalized != user_project_name:
88
+ norm_hash = hashlib.sha256(normalized.encode()).hexdigest()[:16]
89
+ # Match exact hash segment between underscores, not substring
90
+ norm_matches = [c for c in collection_names
91
+ if f"_{norm_hash}_" in c or c.endswith(f"_{norm_hash}")]
92
+ matching_collections.update(norm_matches)
93
+
94
+ # Strategy 3: Case-insensitive normalized version
95
+ lower_normalized = normalized.lower()
96
+ if lower_normalized != normalized:
97
+ lower_hash = hashlib.sha256(lower_normalized.encode()).hexdigest()[:16]
98
+ # Match exact hash segment between underscores, not substring
99
+ lower_matches = [c for c in collection_names
100
+ if f"_{lower_hash}_" in c or c.endswith(f"_{lower_hash}")]
101
+ matching_collections.update(lower_matches)
102
+
103
+ # Strategy 4: Use segment-based discovery for complex paths
104
+ if not matching_collections:
105
+ # Extract segments from the input
106
+ segments = self._extract_project_segments(user_project_name)
107
+ if segments:
108
+ # Score and generate candidates
109
+ scores = self._score_segments(segments, user_project_name)
110
+ candidates = self._generate_search_candidates(segments, scores)
111
+
112
+ # Try each candidate
113
+ for candidate in candidates:
114
+ candidate_hash = hashlib.sha256(candidate.encode()).hexdigest()[:16]
115
+ # Match exact hash segment between underscores, not substring
116
+ candidate_matches = [c for c in collection_names
117
+ if f"_{candidate_hash}_" in c or c.endswith(f"_{candidate_hash}")]
118
+ matching_collections.update(candidate_matches)
119
+
120
+ # Stop if we found matches
121
+ if matching_collections:
122
+ break
123
+
124
+ # Strategy 5: Scan ALL collections to build a mapping
125
+ # This finds collections where the stored project name contains our search term
126
+ if not matching_collections:
127
+ # Get all projects first
128
+ all_projects = self.get_all_projects()
129
+
130
+ # Find matching project names
131
+ search_lower = user_project_name.lower()
132
+ for project_name, project_collections in all_projects.items():
133
+ if (search_lower in project_name.lower() or
134
+ project_name.lower() in search_lower or
135
+ project_name.lower() == search_lower):
136
+ matching_collections.update(project_collections)
137
+
138
+ # Strategy 5: Direct collection scan as last resort
139
+ if not matching_collections and len(collection_names) < 200: # Only for reasonable collection counts
140
+ # Sample proportionally - 5% of collections up to 10
141
+ sample_size = min(10, max(1, len(collection_names) // 20))
142
+ for coll_name in collection_names[:sample_size]:
143
+ try:
144
+ # Get a sample point to check metadata structure
145
+ result = self.client.scroll(
146
+ collection_name=coll_name,
147
+ limit=1,
148
+ with_payload=True
149
+ )
150
+ if not result or not result[0]:
151
+ continue
152
+ if result[0]:
153
+ point = result[0][0]
154
+ project_in_payload = point.payload.get('project', '')
155
+
156
+ # Check if this project matches
157
+ if self._project_matches(project_in_payload, user_project_name):
158
+ # Get the hash from this collection name
159
+ # Format: conv_HASH_local or conv_HASH_voyage
160
+ parts = coll_name.split('_')
161
+ if len(parts) >= 2:
162
+ coll_hash = parts[1]
163
+ # Find all collections with this hash
164
+ hash_matches = [c for c in collection_names if coll_hash in c]
165
+ matching_collections.update(hash_matches)
166
+ break
167
+ except Exception as e:
168
+ logger.debug(f"Failed to scroll {coll_name}: {e}")
169
+ continue
170
+
171
+ # Cache the result with TTL
172
+ result = list(matching_collections)
173
+ self._cache[user_project_name] = matching_collections
174
+ self._cache_ttl[user_project_name] = time()
175
+
176
+ return result
177
+
178
+ def _get_collection_names(self, force_refresh: bool = False) -> List[str]:
179
+ """
180
+ Get all collection names with caching.
181
+
182
+ Args:
183
+ force_refresh: Force refresh the cache
184
+
185
+ Returns:
186
+ List of collection names starting with 'conv_'
187
+ """
188
+ # Check cache validity
189
+ if not force_refresh and self._collections_cache:
190
+ if time() - self._collections_cache_time < self._cache_duration:
191
+ return self._collections_cache
192
+
193
+ # Fetch fresh collection list
194
+ try:
195
+ all_collections = self.client.get_collections().collections
196
+ collection_names = [c.name for c in all_collections if c.name.startswith('conv_')]
197
+
198
+ # Update cache
199
+ self._collections_cache = collection_names
200
+ self._collections_cache_time = time()
201
+
202
+ return collection_names
203
+ except Exception as e:
204
+ logger.error(f"Failed to get collections: {e}")
205
+ # Return cached version if available, even if expired
206
+ return self._collections_cache if self._collections_cache else []
207
+
208
+ def _normalize_project_name(self, project_path: str) -> str:
209
+ """
210
+ Normalize project name for consistent hashing.
211
+ Extracts the actual project name from various path formats.
212
+ """
213
+ if not project_path:
214
+ return ""
215
+
216
+ # Remove trailing slashes
217
+ project_path = project_path.rstrip('/')
218
+
219
+ # Handle Claude logs format (starts with dash)
220
+ if project_path.startswith('-'):
221
+ # Split on dashes but don't convert to path separators
222
+ # This preserves project names that contain dashes
223
+ path_str = project_path[1:] # Remove leading dash
224
+ path_parts = path_str.split('-') # Split on dashes, not path separators
225
+
226
+ # Look for common project parent directories
227
+ project_parents = {'projects', 'code', 'Code', 'repos', 'repositories',
228
+ 'dev', 'Development', 'work', 'src', 'github'}
229
+
230
+ # Find the project name after a known parent directory
231
+ for i, part in enumerate(path_parts):
232
+ if part.lower() in project_parents and i + 1 < len(path_parts):
233
+ # Return everything after the parent directory
234
+ remaining = path_parts[i + 1:]
235
+
236
+ # Use segment-based approach for complex paths
237
+ # Return the most likely project name from remaining segments
238
+ if remaining:
239
+ # If it's a single segment, return it
240
+ if len(remaining) == 1:
241
+ return remaining[0]
242
+ # For multiple segments, look for project-like patterns
243
+ for r in remaining:
244
+ r_lower = r.lower()
245
+ # Prioritize segments with project indicators
246
+ if any(ind in r_lower for ind in ['app', 'service', 'project', 'api', 'client']):
247
+ return r
248
+
249
+ # Otherwise join remaining parts
250
+ return '-'.join(remaining)
251
+
252
+ # Fallback: use the last component
253
+ return path_parts[-1] if path_parts else project_path
254
+
255
+ # For regular paths or simple names
256
+ path_obj = Path(project_path)
257
+
258
+ # If it's already a simple name, return it
259
+ if '/' not in project_path and '\\' not in project_path:
260
+ return project_path
261
+
262
+ # Otherwise extract from path
263
+ return path_obj.name
264
+
265
+ def _project_matches(self, stored_project: str, target_project: str) -> bool:
266
+ """
267
+ Check if a stored project name matches the target.
268
+ Handles various naming conventions.
269
+ """
270
+ # Exact match
271
+ if stored_project == target_project:
272
+ return True
273
+
274
+ # Case-insensitive match
275
+ if stored_project.lower() == target_project.lower():
276
+ return True
277
+
278
+ # Check if target appears at the end of stored (for paths)
279
+ if stored_project.endswith(f"-{target_project}") or stored_project.endswith(f"/{target_project}"):
280
+ return True
281
+
282
+ # Check if normalized versions match
283
+ stored_norm = self._normalize_project_name(stored_project)
284
+ target_norm = self._normalize_project_name(target_project)
285
+
286
+ if stored_norm.lower() == target_norm.lower():
287
+ return True
288
+
289
+ # Check if segments match
290
+ stored_segments = self._extract_project_segments(stored_project)
291
+ target_segments = self._extract_project_segments(target_project)
292
+
293
+ # If any segments match, consider it a match
294
+ if stored_segments and target_segments:
295
+ stored_set = set(s.lower() for s in stored_segments)
296
+ target_set = set(s.lower() for s in target_segments)
297
+ if stored_set & target_set: # Intersection
298
+ return True
299
+
300
+ return False
301
+
302
+ def get_all_projects(self) -> Dict[str, List[str]]:
303
+ """
304
+ Get all available projects and their collections.
305
+ Returns a mapping of project names to collection names.
306
+ """
307
+ projects = {}
308
+
309
+ try:
310
+ # Use cached collection names
311
+ collection_names = self._get_collection_names()
312
+
313
+ # Group collections by hash
314
+ hash_groups = {}
315
+ for coll_name in collection_names:
316
+ parts = coll_name.split('_')
317
+ if len(parts) >= 2:
318
+ coll_hash = parts[1]
319
+ if coll_hash not in hash_groups:
320
+ hash_groups[coll_hash] = []
321
+ hash_groups[coll_hash].append(coll_name)
322
+
323
+ # Sample each group to find project name
324
+ for coll_hash, colls in hash_groups.items():
325
+ # Skip empty collections
326
+ sample_coll = colls[0]
327
+ try:
328
+ info = self.client.get_collection(sample_coll)
329
+ if info.points_count == 0:
330
+ continue
331
+
332
+ # Get a sample point
333
+ result = self.client.scroll(
334
+ collection_name=sample_coll,
335
+ limit=1,
336
+ with_payload=True
337
+ )
338
+
339
+ if result[0]:
340
+ point = result[0][0]
341
+ project_name = point.payload.get('project', f'unknown_{coll_hash}')
342
+
343
+ # Try to extract a friendly name
344
+ friendly_name = self._normalize_project_name(project_name)
345
+ if friendly_name:
346
+ projects[friendly_name] = colls
347
+ else:
348
+ projects[project_name] = colls
349
+
350
+ except Exception as e:
351
+ logger.debug(f"Error sampling {sample_coll}: {e}")
352
+ continue
353
+
354
+ except Exception as e:
355
+ logger.error(f"Failed to get all projects: {e}")
356
+
357
+ return projects
358
+
359
+ def _extract_project_segments(self, path: str) -> List[str]:
360
+ """
361
+ Extract meaningful segments from a dash-encoded or regular path.
362
+
363
+ Examples:
364
+ - -Users-name-projects-my-app-src -> ['my', 'app', 'src']
365
+ - -Users-name-Code-freightwise-documents -> ['freightwise', 'documents']
366
+
367
+ Args:
368
+ path: Path in any format
369
+
370
+ Returns:
371
+ List of meaningful segments that could be project names
372
+ """
373
+ segments = []
374
+
375
+ # Handle dash-encoded paths
376
+ if path.startswith('-'):
377
+ # Remove leading dash and split
378
+ parts = path[1:].split('-')
379
+
380
+ # Find marker position
381
+ marker_idx = -1
382
+ for i, part in enumerate(parts):
383
+ if part.lower() in PROJECT_MARKERS:
384
+ marker_idx = i
385
+ break
386
+
387
+ # Extract segments after marker
388
+ if marker_idx >= 0:
389
+ # Everything after the marker is a candidate
390
+ candidate_parts = parts[marker_idx + 1:]
391
+ else:
392
+ # No marker found, use last few segments
393
+ candidate_parts = parts[-3:] if len(parts) > 3 else parts
394
+
395
+ # Filter out unwanted patterns
396
+ for part in candidate_parts:
397
+ if not self._should_filter_segment(part):
398
+ segments.append(part)
399
+
400
+ # Handle regular paths
401
+ else:
402
+ path_obj = Path(path)
403
+ parts = list(path_obj.parts)
404
+
405
+ # Find marker position
406
+ marker_idx = -1
407
+ for i, part in enumerate(parts):
408
+ if part.lower() in PROJECT_MARKERS:
409
+ marker_idx = i
410
+ break
411
+
412
+ # Extract segments after marker
413
+ if marker_idx >= 0:
414
+ candidate_parts = parts[marker_idx + 1:]
415
+ else:
416
+ # Use the path name itself
417
+ candidate_parts = [path_obj.name] if path_obj.name else []
418
+
419
+ # Process segments
420
+ for part in candidate_parts:
421
+ # Split on common separators
422
+ sub_parts = part.replace('-', ' ').replace('_', ' ').split()
423
+ for sub in sub_parts:
424
+ if not self._should_filter_segment(sub):
425
+ segments.append(sub)
426
+
427
+ return segments
428
+
429
+ def _should_filter_segment(self, segment: str) -> bool:
430
+ """
431
+ Check if a segment should be filtered out.
432
+
433
+ Args:
434
+ segment: Segment to check
435
+
436
+ Returns:
437
+ True if segment should be filtered, False otherwise
438
+ """
439
+ if not segment or len(segment) < 2:
440
+ return True
441
+
442
+ # Check against filter patterns
443
+ for pattern in self._filter_patterns:
444
+ if pattern.match(segment):
445
+ return True
446
+
447
+ # Don't filter common words - users might have projects named "for", "with", etc.
448
+ # Only filter if it's a single character (except valid single chars like 'a', 'x')
449
+ if len(segment) == 1 and segment not in {'a', 'x', 'c', 'r', 'v'}:
450
+ return True
451
+
452
+ return False
453
+
454
+ def _score_segments(self, segments: List[str], original_path: str) -> Dict[str, float]:
455
+ """
456
+ Score segments by likelihood of being the project name.
457
+
458
+ Args:
459
+ segments: List of segments to score
460
+ original_path: Original path for context
461
+
462
+ Returns:
463
+ Dictionary of segment to score (0-1)
464
+ """
465
+ scores = {}
466
+
467
+ for i, segment in enumerate(segments):
468
+ score = 1.0
469
+
470
+ # Position scoring - earlier segments after marker are more likely
471
+ position_weight = 1.0 - (i * 0.1)
472
+ score *= max(0.3, position_weight)
473
+
474
+ # Length scoring - very short or very long segments less likely
475
+ if len(segment) < 3:
476
+ score *= 0.5
477
+ elif len(segment) > 20:
478
+ score *= 0.7
479
+
480
+ # Case scoring - proper case or lowercase more likely
481
+ if segment.isupper():
482
+ score *= 0.8
483
+
484
+ # Contains project-like patterns
485
+ if any(indicator in segment.lower() for indicator in ['app', 'project', 'service', 'client', 'server', 'api']):
486
+ score *= 1.2
487
+
488
+ scores[segment] = min(1.0, score)
489
+
490
+ return scores
491
+
492
+ def _generate_search_candidates(self, segments: List[str], scores: Dict[str, float]) -> List[str]:
493
+ """
494
+ Generate search candidates from segments.
495
+
496
+ Args:
497
+ segments: List of segments
498
+ scores: Segment scores
499
+
500
+ Returns:
501
+ List of search candidates ordered by likelihood
502
+ """
503
+ candidates = []
504
+
505
+ # Add individual segments sorted by score
506
+ sorted_segments = sorted(segments, key=lambda s: scores.get(s, 0), reverse=True)
507
+ candidates.extend(sorted_segments[:5]) # Top 5 individual segments
508
+
509
+ # Add combinations of high-scoring segments
510
+ if len(segments) >= 2:
511
+ # Adjacent pairs
512
+ for i in range(len(segments) - 1):
513
+ combined = f"{segments[i]}-{segments[i+1]}"
514
+ candidates.append(combined)
515
+
516
+ # Full combination if not too long
517
+ if len(segments) <= 4:
518
+ full_combo = '-'.join(segments)
519
+ candidates.append(full_combo)
520
+
521
+ # Add lowercase variants
522
+ for candidate in list(candidates):
523
+ lower = candidate.lower()
524
+ if lower not in candidates:
525
+ candidates.append(lower)
526
+
527
+ return candidates
@@ -12,6 +12,7 @@ import time
12
12
 
13
13
  from fastmcp import FastMCP, Context
14
14
  from .utils import normalize_project_name
15
+ from .project_resolver import ProjectResolver
15
16
  from pydantic import BaseModel, Field
16
17
  from qdrant_client import AsyncQdrantClient, models
17
18
  from qdrant_client.models import (
@@ -367,19 +368,22 @@ async def reflect_on_past(
367
368
  # Filter collections by project if not searching all
368
369
  project_collections = [] # Define at this scope for later use
369
370
  if target_project != 'all':
370
- # Generate the collection name pattern for this project using normalized name
371
- normalized_name = normalize_project_name(target_project)
372
- project_hash = hashlib.md5(normalized_name.encode()).hexdigest()[:8]
373
- # Search BOTH local and voyage collections for this project
374
- project_collections = [
375
- c for c in all_collections
376
- if c.startswith(f"conv_{project_hash}_")
377
- ]
371
+ # Use ProjectResolver to find collections for this project
372
+ resolver = ProjectResolver(qdrant_client)
373
+ project_collections = resolver.find_collections_for_project(target_project)
374
+
375
+ if not project_collections:
376
+ # Fall back to old method for backward compatibility
377
+ normalized_name = normalize_project_name(target_project)
378
+ project_hash = hashlib.md5(normalized_name.encode()).hexdigest()[:8]
379
+ project_collections = [
380
+ c for c in all_collections
381
+ if c.startswith(f"conv_{project_hash}_")
382
+ ]
378
383
 
379
384
  if not project_collections:
380
- # Try to find collections with project metadata
381
385
  # Fall back to searching all collections but filtering by project metadata
382
- await ctx.debug(f"No collections found for project hash {project_hash}, will filter by metadata")
386
+ await ctx.debug(f"No collections found for project {target_project}, will filter by metadata")
383
387
  collections_to_search = all_collections
384
388
  else:
385
389
  await ctx.debug(f"Found {len(project_collections)} collections for project {target_project}")
@@ -47,6 +47,10 @@ def normalize_project_name(project_path: str) -> str:
47
47
  if not project_path:
48
48
  return ""
49
49
 
50
+ # Project discovery markers - common parent directories that indicate project roots
51
+ PROJECT_MARKERS = {'projects', 'code', 'Code', 'repos', 'repositories',
52
+ 'dev', 'Development', 'work', 'src', 'github'}
53
+
50
54
  # Remove trailing slashes
51
55
  project_path = project_path.rstrip('/')
52
56
 
@@ -61,15 +65,28 @@ def normalize_project_name(project_path: str) -> str:
61
65
  path_parts = Path(path_str).parts
62
66
 
63
67
  # Look for common project parent directories
64
- project_parents = {'projects', 'code', 'Code', 'repos', 'repositories',
65
- 'dev', 'Development', 'work', 'src', 'github'}
68
+ project_parents = PROJECT_MARKERS
66
69
 
67
70
  # Find the project name after a known parent directory
68
71
  for i, part in enumerate(path_parts):
69
72
  if part.lower() in project_parents and i + 1 < len(path_parts):
70
73
  # Everything after the parent directory is the project name
71
- # Join remaining parts with dash if project name has multiple components
72
74
  remaining = path_parts[i + 1:]
75
+
76
+ # Use segment-based approach for complex paths
77
+ # Return the most likely project name from remaining segments
78
+ if remaining:
79
+ # If it's a single segment, return it
80
+ if len(remaining) == 1:
81
+ return remaining[0]
82
+ # For multiple segments, look for project-like patterns
83
+ for r in remaining:
84
+ r_lower = r.lower()
85
+ # Prioritize segments with project indicators
86
+ if any(ind in r_lower for ind in ['app', 'service', 'project', 'api', 'client']):
87
+ return r
88
+
89
+ # Join remaining parts with dash if project name has multiple components
73
90
  return '-'.join(remaining)
74
91
 
75
92
  # Fallback: just use the last component
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-self-reflect",
3
- "version": "2.5.19",
3
+ "version": "2.7.1",
4
4
  "description": "Give Claude perfect memory of all your conversations - Installation wizard for Python MCP server",
5
5
  "keywords": [
6
6
  "claude",
@@ -13,6 +13,12 @@
13
13
  "ai-memory",
14
14
  "claude-code"
15
15
  ],
16
+ "badges": {
17
+ "npm": "https://badge.fury.io/js/claude-self-reflect.svg",
18
+ "license": "https://img.shields.io/badge/License-MIT-yellow.svg",
19
+ "docker": "https://img.shields.io/badge/Docker-Required-blue.svg",
20
+ "claude": "https://img.shields.io/badge/Claude%20Code-Compatible-green.svg"
21
+ },
16
22
  "homepage": "https://github.com/ramakay/claude-self-reflect#readme",
17
23
  "bugs": {
18
24
  "url": "https://github.com/ramakay/claude-self-reflect/issues"