claude-self-reflect 3.2.3 → 3.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/documentation-writer.md +1 -1
- package/.claude/agents/qdrant-specialist.md +2 -2
- package/.claude/agents/reflection-specialist.md +2 -2
- package/.claude/agents/search-optimizer.md +9 -7
- package/README.md +6 -8
- package/mcp-server/pyproject.toml +1 -1
- package/mcp-server/src/project_resolver.py +28 -46
- package/mcp-server/src/server.py +80 -62
- package/package.json +2 -1
- package/scripts/import-conversations-unified.py +12 -4
- package/scripts/importer/utils/project_normalizer.py +22 -9
- package/shared/__init__.py +5 -0
- package/shared/normalization.py +54 -0
|
@@ -48,7 +48,7 @@ You are a technical documentation specialist for the Claude Self Reflect project
|
|
|
48
48
|
* @param query - Natural language search query
|
|
49
49
|
* @param options - Search configuration options
|
|
50
50
|
* @param options.limit - Maximum results to return (default: 10)
|
|
51
|
-
* @param options.threshold - Minimum similarity score 0-1 (
|
|
51
|
+
* @param options.threshold - Minimum similarity score 0-1 (removed in v3.2.4 - uses natural scoring)
|
|
52
52
|
* @param options.project - Filter by specific project name
|
|
53
53
|
* @returns Promise resolving to array of search results
|
|
54
54
|
*
|
|
@@ -12,7 +12,7 @@ You are a Qdrant vector database specialist for the claude-self-reflect project.
|
|
|
12
12
|
- Collections use per-project isolation: `conv_<md5_hash>_local` or `conv_<md5_hash>_voyage` naming
|
|
13
13
|
- Project paths: ~/.claude/projects/-Users-{username}-projects-{project-name}/*.jsonl
|
|
14
14
|
- Project name is extracted from path and MD5 hashed for collection naming
|
|
15
|
-
- Cross-collection search
|
|
15
|
+
- Cross-collection search uses Qdrant's natural scoring (no artificial thresholds since v3.2.4)
|
|
16
16
|
- Streaming importer detects file growth and processes new lines incrementally
|
|
17
17
|
- MCP server expects collections to match project name MD5 hash
|
|
18
18
|
|
|
@@ -195,7 +195,7 @@ docker stats qdrant
|
|
|
195
195
|
|
|
196
196
|
## Project-Specific Rules
|
|
197
197
|
- Always use Voyage AI embeddings for consistency
|
|
198
|
-
-
|
|
198
|
+
- Use Qdrant's natural scoring (no artificial thresholds since v3.2.4)
|
|
199
199
|
- Preserve per-project collection isolation
|
|
200
200
|
- Do not grep JSONL files unless explicitly asked
|
|
201
201
|
- Always verify the MCP integration works end-to-end
|
|
@@ -128,7 +128,7 @@ Fast search that returns only the count and top result. Perfect for quick checks
|
|
|
128
128
|
// Quick overview of matches
|
|
129
129
|
{
|
|
130
130
|
query: "authentication patterns",
|
|
131
|
-
min_score: 0.5, // Optional
|
|
131
|
+
min_score: 0.5, // Optional (v3.2.4+ ignores this - uses natural scoring)
|
|
132
132
|
project: "all" // Optional, defaults to current project
|
|
133
133
|
}
|
|
134
134
|
```
|
|
@@ -165,7 +165,7 @@ Pagination support for getting additional results after an initial search.
|
|
|
165
165
|
query: "original search query", // Must match original query
|
|
166
166
|
offset: 3, // Skip first 3 results
|
|
167
167
|
limit: 3, // Get next 3 results
|
|
168
|
-
min_score: 0.7, // Optional
|
|
168
|
+
min_score: 0.7, // Optional (v3.2.4+ ignores this)
|
|
169
169
|
project: "all" // Optional
|
|
170
170
|
}
|
|
171
171
|
```
|
|
@@ -9,7 +9,7 @@ You are a search optimization specialist for the claude-self-reflect project. Yo
|
|
|
9
9
|
## Project Context
|
|
10
10
|
- Current baseline: 66.1% search accuracy with Voyage AI
|
|
11
11
|
- Gemini comparison showed 70-77% accuracy but 50% slower
|
|
12
|
-
-
|
|
12
|
+
- Search scoring: Uses Qdrant's natural scoring (no artificial thresholds as of v3.2.4)
|
|
13
13
|
- Cross-collection search adds ~100ms overhead
|
|
14
14
|
- 24+ projects with 10,165+ conversation chunks
|
|
15
15
|
|
|
@@ -71,9 +71,11 @@ python scripts/analyze-search-quality.py
|
|
|
71
71
|
### Threshold Tuning
|
|
72
72
|
```bash
|
|
73
73
|
# Test different thresholds
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
74
|
+
# Note: As of v3.2.4, artificial thresholds removed
|
|
75
|
+
# Focus on embedding model comparison instead
|
|
76
|
+
for model in voyage openai gemini; do
|
|
77
|
+
echo "Testing model: $model"
|
|
78
|
+
EMBEDDING_MODEL=$model npm test
|
|
77
79
|
done
|
|
78
80
|
|
|
79
81
|
# Find optimal threshold
|
|
@@ -237,7 +239,7 @@ def calculate_mrr(queries, results):
|
|
|
237
239
|
interface ABTestConfig {
|
|
238
240
|
control: {
|
|
239
241
|
model: 'voyage',
|
|
240
|
-
|
|
242
|
+
scoring: 'natural',
|
|
241
243
|
limit: 10
|
|
242
244
|
},
|
|
243
245
|
variant: {
|
|
@@ -285,7 +287,7 @@ async function abTestSearch(query: string, userId: string) {
|
|
|
285
287
|
### Recommended Settings
|
|
286
288
|
```env
|
|
287
289
|
# Search Configuration
|
|
288
|
-
SIMILARITY_THRESHOLD
|
|
290
|
+
# SIMILARITY_THRESHOLD removed in v3.2.4 - uses natural scoring
|
|
289
291
|
SEARCH_LIMIT=10
|
|
290
292
|
CROSS_COLLECTION_LIMIT=5
|
|
291
293
|
|
|
@@ -300,7 +302,7 @@ SAMPLE_RATE=0.1
|
|
|
300
302
|
```
|
|
301
303
|
|
|
302
304
|
## Project-Specific Rules
|
|
303
|
-
-
|
|
305
|
+
- Use Qdrant's natural scoring (no artificial thresholds since v3.2.4)
|
|
304
306
|
- Always compare against Voyage AI baseline (66.1%)
|
|
305
307
|
- Consider search latency alongside accuracy
|
|
306
308
|
- Test with real conversation data
|
package/README.md
CHANGED
|
@@ -116,11 +116,9 @@ Works with [Claude Code Statusline](https://github.com/sirmalloc/ccstatusline) -
|
|
|
116
116
|
<summary><b>MCP Tools Available to Claude</b></summary>
|
|
117
117
|
|
|
118
118
|
**Search & Memory Tools:**
|
|
119
|
-
- `reflect_on_past` - Search past conversations using semantic similarity with time decay
|
|
119
|
+
- `reflect_on_past` - Search past conversations using semantic similarity with time decay (supports quick/summary modes)
|
|
120
120
|
- `store_reflection` - Store important insights or learnings for future reference
|
|
121
|
-
- `
|
|
122
|
-
- `search_summary` - Get aggregated insights without individual details
|
|
123
|
-
- `get_more_results` - Paginate through additional search results
|
|
121
|
+
- `get_next_results` - Paginate through additional search results
|
|
124
122
|
- `search_by_file` - Find conversations that analyzed specific files
|
|
125
123
|
- `search_by_concept` - Search for conversations about development concepts
|
|
126
124
|
- `get_full_conversation` - Retrieve complete JSONL conversation files (v2.8.8)
|
|
@@ -288,11 +286,11 @@ npm uninstall -g claude-self-reflect
|
|
|
288
286
|
## What's New
|
|
289
287
|
|
|
290
288
|
<details>
|
|
291
|
-
<summary>
|
|
289
|
+
<summary>v3.2.4 - Latest Release</summary>
|
|
292
290
|
|
|
293
|
-
- **
|
|
294
|
-
- **
|
|
295
|
-
- **
|
|
291
|
+
- **CRITICAL: Search Threshold Removal**: Eliminated artificial 0.7+ thresholds that blocked broad searches like "docker", "MCP", "python"
|
|
292
|
+
- **Shared Normalization Module**: Created centralized project name normalization preventing search failures
|
|
293
|
+
- **Memory Decay Fixes**: Corrected mathematical errors in exponential decay calculation
|
|
296
294
|
|
|
297
295
|
</details>
|
|
298
296
|
|
|
@@ -6,11 +6,21 @@ Handles mapping between user-friendly names and internal collection names.
|
|
|
6
6
|
import hashlib
|
|
7
7
|
import logging
|
|
8
8
|
import re
|
|
9
|
+
import sys
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from typing import List, Dict, Optional, Set
|
|
11
12
|
from time import time
|
|
12
13
|
from qdrant_client import QdrantClient
|
|
13
14
|
|
|
15
|
+
# Import from shared module for consistent normalization
|
|
16
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
17
|
+
try:
|
|
18
|
+
from shared.normalization import normalize_project_name
|
|
19
|
+
except ImportError:
|
|
20
|
+
# Fall back to creating local version if shared module not found
|
|
21
|
+
logging.warning("Could not import shared normalization module")
|
|
22
|
+
normalize_project_name = None
|
|
23
|
+
|
|
14
24
|
logger = logging.getLogger(__name__)
|
|
15
25
|
|
|
16
26
|
# Project discovery markers - common parent directories that indicate project roots
|
|
@@ -244,59 +254,31 @@ class ProjectResolver:
|
|
|
244
254
|
def _normalize_project_name(self, project_path: str) -> str:
|
|
245
255
|
"""
|
|
246
256
|
Normalize project name for consistent hashing.
|
|
247
|
-
|
|
257
|
+
Uses the shared normalization module to ensure consistency
|
|
258
|
+
with import scripts.
|
|
248
259
|
"""
|
|
260
|
+
# Use the shared normalization function if available
|
|
261
|
+
if normalize_project_name:
|
|
262
|
+
return normalize_project_name(project_path)
|
|
263
|
+
|
|
264
|
+
# Fallback implementation - EXACT copy of shared module
|
|
249
265
|
if not project_path:
|
|
250
266
|
return ""
|
|
251
267
|
|
|
252
|
-
|
|
253
|
-
project_path = project_path.rstrip('/')
|
|
268
|
+
path = Path(project_path.rstrip('/'))
|
|
254
269
|
|
|
255
|
-
#
|
|
256
|
-
|
|
257
|
-
# Split on dashes but don't convert to path separators
|
|
258
|
-
# This preserves project names that contain dashes
|
|
259
|
-
path_str = project_path[1:] # Remove leading dash
|
|
260
|
-
path_parts = path_str.split('-') # Split on dashes, not path separators
|
|
261
|
-
|
|
262
|
-
# Look for common project parent directories
|
|
263
|
-
project_parents = {'projects', 'code', 'Code', 'repos', 'repositories',
|
|
264
|
-
'dev', 'Development', 'work', 'src', 'github'}
|
|
265
|
-
|
|
266
|
-
# Find the project name after a known parent directory
|
|
267
|
-
for i, part in enumerate(path_parts):
|
|
268
|
-
if part.lower() in project_parents and i + 1 < len(path_parts):
|
|
269
|
-
# Return everything after the parent directory
|
|
270
|
-
remaining = path_parts[i + 1:]
|
|
271
|
-
|
|
272
|
-
# Use segment-based approach for complex paths
|
|
273
|
-
# Return the most likely project name from remaining segments
|
|
274
|
-
if remaining:
|
|
275
|
-
# If it's a single segment, return it
|
|
276
|
-
if len(remaining) == 1:
|
|
277
|
-
return remaining[0]
|
|
278
|
-
# For multiple segments, look for project-like patterns
|
|
279
|
-
for r in remaining:
|
|
280
|
-
r_lower = r.lower()
|
|
281
|
-
# Prioritize segments with project indicators
|
|
282
|
-
if any(ind in r_lower for ind in ['app', 'service', 'project', 'api', 'client']):
|
|
283
|
-
return r
|
|
284
|
-
|
|
285
|
-
# Otherwise join remaining parts
|
|
286
|
-
return '-'.join(remaining)
|
|
287
|
-
|
|
288
|
-
# Fallback: use the last component
|
|
289
|
-
return path_parts[-1] if path_parts else project_path
|
|
270
|
+
# Extract the final directory name
|
|
271
|
+
final_component = path.name
|
|
290
272
|
|
|
291
|
-
#
|
|
292
|
-
|
|
273
|
+
# If it's Claude's dash-separated format, extract project name
|
|
274
|
+
if final_component.startswith('-') and 'projects' in final_component:
|
|
275
|
+
# Find the last occurrence of 'projects-' to handle edge cases
|
|
276
|
+
idx = final_component.rfind('projects-')
|
|
277
|
+
if idx != -1:
|
|
278
|
+
return final_component[idx + len('projects-'):]
|
|
293
279
|
|
|
294
|
-
#
|
|
295
|
-
|
|
296
|
-
return project_path
|
|
297
|
-
|
|
298
|
-
# Otherwise extract from path
|
|
299
|
-
return path_obj.name
|
|
280
|
+
# For regular paths, just return the directory name
|
|
281
|
+
return final_component if final_component else path.parent.name
|
|
300
282
|
|
|
301
283
|
def _project_matches(self, stored_project: str, target_project: str) -> bool:
|
|
302
284
|
"""
|
package/mcp-server/src/server.py
CHANGED
|
@@ -10,10 +10,22 @@ import numpy as np
|
|
|
10
10
|
import hashlib
|
|
11
11
|
import time
|
|
12
12
|
import logging
|
|
13
|
+
import math
|
|
13
14
|
from xml.sax.saxutils import escape
|
|
14
15
|
|
|
15
16
|
from fastmcp import FastMCP, Context
|
|
16
|
-
|
|
17
|
+
|
|
18
|
+
# Import from shared module for consistent normalization
|
|
19
|
+
import sys
|
|
20
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
21
|
+
try:
|
|
22
|
+
from shared.normalization import normalize_project_name
|
|
23
|
+
except ImportError:
|
|
24
|
+
# Fall back to local utils if shared module not found
|
|
25
|
+
from .utils import normalize_project_name
|
|
26
|
+
import logging
|
|
27
|
+
logging.warning("Using legacy utils.normalize_project_name - shared module not found")
|
|
28
|
+
|
|
17
29
|
from .project_resolver import ProjectResolver
|
|
18
30
|
from pydantic import BaseModel, Field
|
|
19
31
|
from qdrant_client import AsyncQdrantClient, models
|
|
@@ -571,7 +583,7 @@ async def reflect_on_past(
|
|
|
571
583
|
ctx: Context,
|
|
572
584
|
query: str = Field(description="The search query to find semantically similar conversations"),
|
|
573
585
|
limit: int = Field(default=5, description="Maximum number of results to return"),
|
|
574
|
-
min_score: float = Field(default=0.
|
|
586
|
+
min_score: float = Field(default=0.3, description="Minimum similarity score (0-1)"),
|
|
575
587
|
use_decay: Union[int, str] = Field(default=-1, description="Apply time-based decay: 1=enable, 0=disable, -1=use environment default (accepts int or str)"),
|
|
576
588
|
project: Optional[str] = Field(default=None, description="Search specific project only. If not provided, searches current project based on working directory. Use 'all' to search across all projects."),
|
|
577
589
|
include_raw: bool = Field(default=False, description="Include raw Qdrant payload data for debugging (increases response size)"),
|
|
@@ -669,8 +681,10 @@ async def reflect_on_past(
|
|
|
669
681
|
# Filter collections by project if not searching all
|
|
670
682
|
project_collections = [] # Define at this scope for later use
|
|
671
683
|
if target_project != 'all':
|
|
672
|
-
# Use ProjectResolver
|
|
673
|
-
|
|
684
|
+
# Use ProjectResolver with sync client (resolver expects sync operations)
|
|
685
|
+
from qdrant_client import QdrantClient as SyncQdrantClient
|
|
686
|
+
sync_client = SyncQdrantClient(url=QDRANT_URL)
|
|
687
|
+
resolver = ProjectResolver(sync_client)
|
|
674
688
|
project_collections = resolver.find_collections_for_project(target_project)
|
|
675
689
|
|
|
676
690
|
if not project_collections:
|
|
@@ -739,33 +753,32 @@ async def reflect_on_past(
|
|
|
739
753
|
await ctx.debug(f"Using NATIVE Qdrant decay (new API) for {collection_name}")
|
|
740
754
|
|
|
741
755
|
# Build the query with native Qdrant decay formula using newer API
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
756
|
+
# Convert half-life to seconds (Qdrant uses seconds for datetime)
|
|
757
|
+
half_life_seconds = DECAY_SCALE_DAYS * 24 * 60 * 60
|
|
758
|
+
|
|
759
|
+
# Build query using proper Python models as per Qdrant docs
|
|
760
|
+
from qdrant_client import models
|
|
761
|
+
|
|
762
|
+
query_obj = models.FormulaQuery(
|
|
763
|
+
formula=models.SumExpression(
|
|
745
764
|
sum=[
|
|
746
|
-
# Original similarity score
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
target=Expression(datetime="now"),
|
|
761
|
-
# Scale in milliseconds
|
|
762
|
-
scale=DECAY_SCALE_DAYS * 24 * 60 * 60 * 1000,
|
|
763
|
-
# Standard exponential decay midpoint
|
|
764
|
-
midpoint=0.5
|
|
765
|
-
)
|
|
765
|
+
"$score", # Original similarity score
|
|
766
|
+
models.MultExpression(
|
|
767
|
+
mult=[
|
|
768
|
+
DECAY_WEIGHT, # Weight multiplier
|
|
769
|
+
models.ExpDecayExpression(
|
|
770
|
+
exp_decay=models.DecayParamsExpression(
|
|
771
|
+
x=models.DatetimeKeyExpression(
|
|
772
|
+
datetime_key="timestamp" # Payload field with datetime
|
|
773
|
+
),
|
|
774
|
+
target=models.DatetimeExpression(
|
|
775
|
+
datetime="now" # Current time on server
|
|
776
|
+
),
|
|
777
|
+
scale=half_life_seconds, # Scale in seconds
|
|
778
|
+
midpoint=0.5 # Half-life semantics
|
|
766
779
|
)
|
|
767
|
-
|
|
768
|
-
|
|
780
|
+
)
|
|
781
|
+
]
|
|
769
782
|
)
|
|
770
783
|
]
|
|
771
784
|
)
|
|
@@ -776,36 +789,32 @@ async def reflect_on_past(
|
|
|
776
789
|
collection_name=collection_name,
|
|
777
790
|
query=query_obj,
|
|
778
791
|
limit=limit,
|
|
779
|
-
score_threshold=min_score,
|
|
780
792
|
with_payload=True
|
|
793
|
+
# No score_threshold - let Qdrant's decay formula handle relevance
|
|
781
794
|
)
|
|
782
795
|
elif should_use_decay and USE_NATIVE_DECAY and not NATIVE_DECAY_AVAILABLE:
|
|
783
796
|
# Use native Qdrant decay with older API
|
|
784
797
|
await ctx.debug(f"Using NATIVE Qdrant decay (legacy API) for {collection_name}")
|
|
785
798
|
|
|
799
|
+
# Convert half-life to seconds (Qdrant uses seconds for datetime)
|
|
800
|
+
half_life_seconds = DECAY_SCALE_DAYS * 24 * 60 * 60
|
|
801
|
+
|
|
786
802
|
# Build the query with native Qdrant decay formula using older API
|
|
803
|
+
# Use the same models but with FormulaQuery
|
|
787
804
|
query_obj = FormulaQuery(
|
|
788
805
|
nearest=query_embedding,
|
|
789
806
|
formula=SumExpression(
|
|
790
807
|
sum=[
|
|
791
|
-
# Original similarity score
|
|
792
|
-
'score', # Variable expression can be a string
|
|
793
|
-
# Decay boost term
|
|
808
|
+
"$score", # Original similarity score
|
|
794
809
|
{
|
|
795
|
-
|
|
796
|
-
#
|
|
797
|
-
DECAY_WEIGHT,
|
|
798
|
-
# Exponential decay function
|
|
810
|
+
"mult": [
|
|
811
|
+
DECAY_WEIGHT, # Weight multiplier
|
|
799
812
|
{
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
#
|
|
804
|
-
|
|
805
|
-
# Scale in milliseconds
|
|
806
|
-
scale=DECAY_SCALE_DAYS * 24 * 60 * 60 * 1000,
|
|
807
|
-
# Standard exponential decay midpoint
|
|
808
|
-
midpoint=0.5
|
|
813
|
+
"exp_decay": DecayParamsExpression(
|
|
814
|
+
x=DatetimeKeyExpression(datetime_key="timestamp"),
|
|
815
|
+
target=DatetimeExpression(datetime="now"),
|
|
816
|
+
scale=half_life_seconds, # Scale in seconds
|
|
817
|
+
midpoint=0.5 # Half-life semantics
|
|
809
818
|
)
|
|
810
819
|
}
|
|
811
820
|
]
|
|
@@ -819,8 +828,8 @@ async def reflect_on_past(
|
|
|
819
828
|
collection_name=collection_name,
|
|
820
829
|
query=query_obj,
|
|
821
830
|
limit=limit,
|
|
822
|
-
score_threshold=min_score,
|
|
823
831
|
with_payload=True
|
|
832
|
+
# No score_threshold - let Qdrant's decay formula handle relevance
|
|
824
833
|
)
|
|
825
834
|
|
|
826
835
|
# Process results from native decay search
|
|
@@ -916,11 +925,14 @@ async def reflect_on_past(
|
|
|
916
925
|
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
917
926
|
age_ms = (now - timestamp).total_seconds() * 1000
|
|
918
927
|
|
|
919
|
-
# Calculate decay factor
|
|
920
|
-
|
|
928
|
+
# Calculate decay factor using proper half-life formula
|
|
929
|
+
# For half-life H: decay = exp(-ln(2) * age / H)
|
|
930
|
+
ln2 = math.log(2)
|
|
931
|
+
decay_factor = math.exp(-ln2 * age_ms / scale_ms)
|
|
921
932
|
|
|
922
|
-
# Apply decay formula
|
|
923
|
-
|
|
933
|
+
# Apply multiplicative decay formula to keep scores bounded [0, 1]
|
|
934
|
+
# adjusted = score * ((1 - weight) + weight * decay)
|
|
935
|
+
adjusted_score = point.score * ((1 - DECAY_WEIGHT) + DECAY_WEIGHT * decay_factor)
|
|
924
936
|
|
|
925
937
|
# Debug: show the calculation
|
|
926
938
|
age_days = age_ms / (24 * 60 * 60 * 1000)
|
|
@@ -1001,12 +1013,13 @@ async def reflect_on_past(
|
|
|
1001
1013
|
))
|
|
1002
1014
|
else:
|
|
1003
1015
|
# Standard search without decay
|
|
1016
|
+
# Let Qdrant handle scoring natively
|
|
1004
1017
|
results = await qdrant_client.search(
|
|
1005
1018
|
collection_name=collection_name,
|
|
1006
1019
|
query_vector=query_embedding,
|
|
1007
1020
|
limit=limit * 2, # Get more results to account for filtering
|
|
1008
|
-
score_threshold=min_score * 0.9, # Slightly lower threshold to catch v1 chunks
|
|
1009
1021
|
with_payload=True
|
|
1022
|
+
# No score_threshold - let Qdrant decide what's relevant
|
|
1010
1023
|
)
|
|
1011
1024
|
|
|
1012
1025
|
for point in results:
|
|
@@ -1691,7 +1704,7 @@ async def store_reflection(
|
|
|
1691
1704
|
async def quick_search(
|
|
1692
1705
|
ctx: Context,
|
|
1693
1706
|
query: str = Field(description="The search query to find semantically similar conversations"),
|
|
1694
|
-
min_score: float = Field(default=0.
|
|
1707
|
+
min_score: float = Field(default=0.3, description="Minimum similarity score (0-1)"),
|
|
1695
1708
|
project: Optional[str] = Field(default=None, description="Search specific project only. If not provided, searches current project based on working directory. Use 'all' to search across all projects.")
|
|
1696
1709
|
) -> str:
|
|
1697
1710
|
"""Quick search that returns only the count and top result for fast overview."""
|
|
@@ -1737,7 +1750,7 @@ async def get_more_results(
|
|
|
1737
1750
|
query: str = Field(description="The original search query"),
|
|
1738
1751
|
offset: int = Field(default=3, description="Number of results to skip (for pagination)"),
|
|
1739
1752
|
limit: int = Field(default=3, description="Number of additional results to return"),
|
|
1740
|
-
min_score: float = Field(default=0.
|
|
1753
|
+
min_score: float = Field(default=0.3, description="Minimum similarity score (0-1)"),
|
|
1741
1754
|
project: Optional[str] = Field(default=None, description="Search specific project only")
|
|
1742
1755
|
) -> str:
|
|
1743
1756
|
"""Get additional search results after an initial search (pagination support)."""
|
|
@@ -1772,8 +1785,9 @@ async def search_by_file(
|
|
|
1772
1785
|
collections = await get_all_collections() if not project else []
|
|
1773
1786
|
|
|
1774
1787
|
if project and project != 'all':
|
|
1775
|
-
# Filter collections for specific project
|
|
1776
|
-
|
|
1788
|
+
# Filter collections for specific project - normalize first!
|
|
1789
|
+
normalized_project = normalize_project_name(project)
|
|
1790
|
+
project_hash = hashlib.md5(normalized_project.encode()).hexdigest()[:8]
|
|
1777
1791
|
collection_prefix = f"conv_{project_hash}_"
|
|
1778
1792
|
collections = [c for c in await get_all_collections() if c.startswith(collection_prefix)]
|
|
1779
1793
|
elif project == 'all':
|
|
@@ -2137,7 +2151,7 @@ async def get_next_results(
|
|
|
2137
2151
|
query: str = Field(description="The original search query"),
|
|
2138
2152
|
offset: int = Field(default=3, description="Number of results to skip (for pagination)"),
|
|
2139
2153
|
limit: int = Field(default=3, description="Number of additional results to return"),
|
|
2140
|
-
min_score: float = Field(default=0.
|
|
2154
|
+
min_score: float = Field(default=0.3, description="Minimum similarity score (0-1)"),
|
|
2141
2155
|
project: Optional[str] = Field(default=None, description="Search specific project only")
|
|
2142
2156
|
) -> str:
|
|
2143
2157
|
"""Get additional search results after an initial search (pagination support)."""
|
|
@@ -2152,9 +2166,10 @@ async def get_next_results(
|
|
|
2152
2166
|
# Search all collections if project is "all" or not specified
|
|
2153
2167
|
collections = await get_all_collections()
|
|
2154
2168
|
else:
|
|
2155
|
-
# Search specific project
|
|
2169
|
+
# Search specific project - normalize first!
|
|
2156
2170
|
all_collections = await get_all_collections()
|
|
2157
|
-
|
|
2171
|
+
normalized_project = normalize_project_name(project)
|
|
2172
|
+
project_hash = hashlib.md5(normalized_project.encode()).hexdigest()[:8]
|
|
2158
2173
|
collections = [
|
|
2159
2174
|
c for c in all_collections
|
|
2160
2175
|
if c.startswith(f"conv_{project_hash}_")
|
|
@@ -2196,9 +2211,12 @@ async def get_next_results(
|
|
|
2196
2211
|
if use_decay_bool and 'timestamp' in payload:
|
|
2197
2212
|
try:
|
|
2198
2213
|
timestamp = datetime.fromisoformat(payload['timestamp'].replace('Z', '+00:00'))
|
|
2199
|
-
age_days = (datetime.now(timezone.utc) - timestamp).
|
|
2200
|
-
|
|
2201
|
-
|
|
2214
|
+
age_days = (datetime.now(timezone.utc) - timestamp).total_seconds() / (24 * 60 * 60)
|
|
2215
|
+
# Use consistent half-life formula: decay = exp(-ln(2) * age / half_life)
|
|
2216
|
+
ln2 = math.log(2)
|
|
2217
|
+
decay_factor = math.exp(-ln2 * age_days / DECAY_SCALE_DAYS)
|
|
2218
|
+
# Apply multiplicative formula: score * ((1 - weight) + weight * decay)
|
|
2219
|
+
score = score * ((1 - DECAY_WEIGHT) + DECAY_WEIGHT * decay_factor)
|
|
2202
2220
|
except (ValueError, TypeError) as e:
|
|
2203
2221
|
# Log but continue - timestamp format issue shouldn't break search
|
|
2204
2222
|
logger.debug(f"Failed to apply decay for timestamp {payload.get('timestamp')}: {e}")
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-self-reflect",
|
|
3
|
-
"version": "3.2.
|
|
3
|
+
"version": "3.2.4",
|
|
4
4
|
"description": "Give Claude perfect memory of all your conversations - Installation wizard for Python MCP server",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"claude",
|
|
@@ -44,6 +44,7 @@
|
|
|
44
44
|
"scripts/importer/**/*.py",
|
|
45
45
|
"scripts/delta-metadata-update-safe.py",
|
|
46
46
|
"scripts/force-metadata-recovery.py",
|
|
47
|
+
"shared/**/*.py",
|
|
47
48
|
".claude/agents/*.md",
|
|
48
49
|
"config/qdrant-config.yaml",
|
|
49
50
|
"docker-compose.yaml",
|
|
@@ -25,12 +25,20 @@ sys.path.insert(0, str(scripts_dir))
|
|
|
25
25
|
from qdrant_client import QdrantClient
|
|
26
26
|
from qdrant_client.models import PointStruct, Distance, VectorParams
|
|
27
27
|
|
|
28
|
-
# Import
|
|
28
|
+
# Import normalize_project_name from shared module
|
|
29
|
+
# Add parent directory to path to import shared module
|
|
30
|
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
29
31
|
try:
|
|
30
|
-
from
|
|
32
|
+
from shared.normalization import normalize_project_name
|
|
31
33
|
except ImportError as e:
|
|
32
|
-
logging.error(f"Failed to import normalize_project_name from
|
|
33
|
-
|
|
34
|
+
logging.error(f"Failed to import normalize_project_name from shared module: {e}")
|
|
35
|
+
# Fall back to local utils if shared module not found
|
|
36
|
+
try:
|
|
37
|
+
from utils import normalize_project_name
|
|
38
|
+
logging.warning("Using legacy utils.normalize_project_name - consider updating")
|
|
39
|
+
except ImportError:
|
|
40
|
+
logging.error("Could not import normalize_project_name from any source")
|
|
41
|
+
sys.exit(1)
|
|
34
42
|
|
|
35
43
|
# Set up logging
|
|
36
44
|
logging.basicConfig(
|
|
@@ -2,8 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import logging
|
|
5
|
+
import sys
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
|
|
8
|
+
# Import from shared module for consistent normalization
|
|
9
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
|
|
10
|
+
try:
|
|
11
|
+
from shared.normalization import normalize_project_name as shared_normalize
|
|
12
|
+
except ImportError:
|
|
13
|
+
shared_normalize = None
|
|
14
|
+
logging.warning("Could not import shared normalization module")
|
|
15
|
+
|
|
7
16
|
logger = logging.getLogger(__name__)
|
|
8
17
|
|
|
9
18
|
|
|
@@ -20,32 +29,36 @@ class ProjectNormalizer:
|
|
|
20
29
|
"""
|
|
21
30
|
Normalize a project path to a consistent project name.
|
|
22
31
|
|
|
23
|
-
|
|
32
|
+
Uses the shared normalization module to ensure consistency
|
|
33
|
+
across all components.
|
|
24
34
|
|
|
25
35
|
Examples:
|
|
26
36
|
- "-Users-name-projects-claude-self-reflect" -> "claude-self-reflect"
|
|
27
37
|
- "claude-self-reflect" -> "claude-self-reflect"
|
|
28
38
|
- "/path/to/-Users-name-projects-myapp" -> "myapp"
|
|
29
39
|
"""
|
|
30
|
-
#
|
|
31
|
-
if
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
40
|
+
# Use shared normalization if available
|
|
41
|
+
if shared_normalize:
|
|
42
|
+
return shared_normalize(project_path)
|
|
43
|
+
|
|
44
|
+
# Fallback implementation (matches shared module)
|
|
45
|
+
if not project_path:
|
|
46
|
+
return ""
|
|
47
|
+
|
|
48
|
+
path = Path(project_path.rstrip('/'))
|
|
49
|
+
final_component = path.name
|
|
35
50
|
|
|
36
51
|
# Handle Claude's dash-separated format
|
|
37
52
|
if final_component.startswith('-') and 'projects' in final_component:
|
|
38
|
-
# Find the last occurrence of 'projects-'
|
|
39
53
|
idx = final_component.rfind('projects-')
|
|
40
54
|
if idx != -1:
|
|
41
|
-
# Extract everything after 'projects-'
|
|
42
55
|
project_name = final_component[idx + len('projects-'):]
|
|
43
56
|
logger.debug(f"Normalized '{project_path}' to '{project_name}'")
|
|
44
57
|
return project_name
|
|
45
58
|
|
|
46
59
|
# Already normalized or different format
|
|
47
60
|
logger.debug(f"Project path '{project_path}' already normalized")
|
|
48
|
-
return final_component
|
|
61
|
+
return final_component if final_component else path.parent.name
|
|
49
62
|
|
|
50
63
|
def get_project_name(self, file_path: Path) -> str:
|
|
51
64
|
"""
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Shared normalization utilities for claude-self-reflect.
|
|
2
|
+
|
|
3
|
+
This module provides the single source of truth for project name normalization,
|
|
4
|
+
ensuring consistent hashing across import scripts and the MCP server.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def normalize_project_name(project_path: str, _depth: int = 0) -> str:
|
|
11
|
+
"""
|
|
12
|
+
Normalize project name for consistent hashing across import/search.
|
|
13
|
+
|
|
14
|
+
This is the authoritative normalization function used by both:
|
|
15
|
+
- Import scripts (import-conversations-unified.py)
|
|
16
|
+
- MCP server (server.py)
|
|
17
|
+
|
|
18
|
+
Examples:
|
|
19
|
+
'/Users/name/.claude/projects/-Users-name-projects-myproject' -> 'myproject'
|
|
20
|
+
'-Users-name-projects-myproject' -> 'myproject'
|
|
21
|
+
'/path/to/myproject' -> 'myproject'
|
|
22
|
+
'myproject' -> 'myproject'
|
|
23
|
+
|
|
24
|
+
Special handling for Claude's dash-separated format:
|
|
25
|
+
When a path component starts with '-' and contains 'projects',
|
|
26
|
+
we extract everything after 'projects-' as the project name.
|
|
27
|
+
This handles dashes in project names correctly.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
project_path: Project path or name in any format
|
|
31
|
+
_depth: Internal recursion depth counter (for backwards compatibility)
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Normalized project name suitable for consistent hashing
|
|
35
|
+
"""
|
|
36
|
+
if not project_path:
|
|
37
|
+
return ""
|
|
38
|
+
|
|
39
|
+
path = Path(project_path.rstrip('/'))
|
|
40
|
+
|
|
41
|
+
# Extract the final directory name
|
|
42
|
+
final_component = path.name
|
|
43
|
+
|
|
44
|
+
# If it's Claude's dash-separated format, extract project name
|
|
45
|
+
if final_component.startswith('-') and 'projects' in final_component:
|
|
46
|
+
# Find the last occurrence of 'projects-' to handle edge cases
|
|
47
|
+
# This correctly extracts 'claude-self-reflect' from:
|
|
48
|
+
# '-Users-ramakrishnanannaswamy-projects-claude-self-reflect'
|
|
49
|
+
idx = final_component.rfind('projects-')
|
|
50
|
+
if idx != -1:
|
|
51
|
+
return final_component[idx + len('projects-'):]
|
|
52
|
+
|
|
53
|
+
# For regular paths, just return the directory name
|
|
54
|
+
return final_component if final_component else path.parent.name
|