claude-self-reflect 3.2.3 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/claude-self-reflect-test.md +595 -528
- package/.claude/agents/documentation-writer.md +1 -1
- package/.claude/agents/qdrant-specialist.md +2 -2
- package/.claude/agents/reflection-specialist.md +61 -5
- package/.claude/agents/search-optimizer.md +9 -7
- package/README.md +16 -9
- package/mcp-server/pyproject.toml +1 -1
- package/mcp-server/run-mcp.sh +49 -5
- package/mcp-server/src/app_context.py +64 -0
- package/mcp-server/src/config.py +57 -0
- package/mcp-server/src/connection_pool.py +286 -0
- package/mcp-server/src/decay_manager.py +106 -0
- package/mcp-server/src/embedding_manager.py +64 -40
- package/mcp-server/src/embeddings_old.py +141 -0
- package/mcp-server/src/models.py +64 -0
- package/mcp-server/src/parallel_search.py +371 -0
- package/mcp-server/src/project_resolver.py +33 -46
- package/mcp-server/src/reflection_tools.py +206 -0
- package/mcp-server/src/rich_formatting.py +196 -0
- package/mcp-server/src/search_tools.py +826 -0
- package/mcp-server/src/server.py +140 -1715
- package/mcp-server/src/temporal_design.py +132 -0
- package/mcp-server/src/temporal_tools.py +597 -0
- package/mcp-server/src/temporal_utils.py +384 -0
- package/mcp-server/src/utils.py +150 -67
- package/package.json +11 -1
- package/scripts/add-timestamp-indexes.py +134 -0
- package/scripts/check-collections.py +29 -0
- package/scripts/debug-august-parsing.py +76 -0
- package/scripts/debug-import-single.py +91 -0
- package/scripts/debug-project-resolver.py +82 -0
- package/scripts/debug-temporal-tools.py +135 -0
- package/scripts/delta-metadata-update.py +547 -0
- package/scripts/import-conversations-unified.py +65 -6
- package/scripts/importer/utils/project_normalizer.py +22 -9
- package/scripts/precompact-hook.sh +33 -0
- package/scripts/streaming-watcher.py +1443 -0
- package/scripts/utils.py +39 -0
- package/shared/__init__.py +5 -0
- package/shared/normalization.py +54 -0
|
@@ -6,11 +6,21 @@ Handles mapping between user-friendly names and internal collection names.
|
|
|
6
6
|
import hashlib
|
|
7
7
|
import logging
|
|
8
8
|
import re
|
|
9
|
+
import sys
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from typing import List, Dict, Optional, Set
|
|
11
12
|
from time import time
|
|
12
13
|
from qdrant_client import QdrantClient
|
|
13
14
|
|
|
15
|
+
# Import from shared module for consistent normalization
|
|
16
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
17
|
+
try:
|
|
18
|
+
from shared.normalization import normalize_project_name
|
|
19
|
+
except ImportError:
|
|
20
|
+
# Fall back to creating local version if shared module not found
|
|
21
|
+
logging.warning("Could not import shared normalization module")
|
|
22
|
+
normalize_project_name = None
|
|
23
|
+
|
|
14
24
|
logger = logging.getLogger(__name__)
|
|
15
25
|
|
|
16
26
|
# Project discovery markers - common parent directories that indicate project roots
|
|
@@ -59,6 +69,11 @@ class ProjectResolver:
|
|
|
59
69
|
Returns:
|
|
60
70
|
List of collection names that match the project
|
|
61
71
|
"""
|
|
72
|
+
# Special case: 'all' returns all conversation collections
|
|
73
|
+
if user_project_name == 'all':
|
|
74
|
+
collection_names = self._get_collection_names()
|
|
75
|
+
return collection_names # Return all conv_ collections
|
|
76
|
+
|
|
62
77
|
if user_project_name in self._cache:
|
|
63
78
|
# Check if cache entry is still valid
|
|
64
79
|
if time() - self._cache_ttl.get(user_project_name, 0) < self._cache_duration:
|
|
@@ -244,59 +259,31 @@ class ProjectResolver:
|
|
|
244
259
|
def _normalize_project_name(self, project_path: str) -> str:
|
|
245
260
|
"""
|
|
246
261
|
Normalize project name for consistent hashing.
|
|
247
|
-
|
|
262
|
+
Uses the shared normalization module to ensure consistency
|
|
263
|
+
with import scripts.
|
|
248
264
|
"""
|
|
265
|
+
# Use the shared normalization function if available
|
|
266
|
+
if normalize_project_name:
|
|
267
|
+
return normalize_project_name(project_path)
|
|
268
|
+
|
|
269
|
+
# Fallback implementation - EXACT copy of shared module
|
|
249
270
|
if not project_path:
|
|
250
271
|
return ""
|
|
251
272
|
|
|
252
|
-
|
|
253
|
-
project_path = project_path.rstrip('/')
|
|
273
|
+
path = Path(project_path.rstrip('/'))
|
|
254
274
|
|
|
255
|
-
#
|
|
256
|
-
|
|
257
|
-
# Split on dashes but don't convert to path separators
|
|
258
|
-
# This preserves project names that contain dashes
|
|
259
|
-
path_str = project_path[1:] # Remove leading dash
|
|
260
|
-
path_parts = path_str.split('-') # Split on dashes, not path separators
|
|
261
|
-
|
|
262
|
-
# Look for common project parent directories
|
|
263
|
-
project_parents = {'projects', 'code', 'Code', 'repos', 'repositories',
|
|
264
|
-
'dev', 'Development', 'work', 'src', 'github'}
|
|
265
|
-
|
|
266
|
-
# Find the project name after a known parent directory
|
|
267
|
-
for i, part in enumerate(path_parts):
|
|
268
|
-
if part.lower() in project_parents and i + 1 < len(path_parts):
|
|
269
|
-
# Return everything after the parent directory
|
|
270
|
-
remaining = path_parts[i + 1:]
|
|
271
|
-
|
|
272
|
-
# Use segment-based approach for complex paths
|
|
273
|
-
# Return the most likely project name from remaining segments
|
|
274
|
-
if remaining:
|
|
275
|
-
# If it's a single segment, return it
|
|
276
|
-
if len(remaining) == 1:
|
|
277
|
-
return remaining[0]
|
|
278
|
-
# For multiple segments, look for project-like patterns
|
|
279
|
-
for r in remaining:
|
|
280
|
-
r_lower = r.lower()
|
|
281
|
-
# Prioritize segments with project indicators
|
|
282
|
-
if any(ind in r_lower for ind in ['app', 'service', 'project', 'api', 'client']):
|
|
283
|
-
return r
|
|
284
|
-
|
|
285
|
-
# Otherwise join remaining parts
|
|
286
|
-
return '-'.join(remaining)
|
|
287
|
-
|
|
288
|
-
# Fallback: use the last component
|
|
289
|
-
return path_parts[-1] if path_parts else project_path
|
|
275
|
+
# Extract the final directory name
|
|
276
|
+
final_component = path.name
|
|
290
277
|
|
|
291
|
-
#
|
|
292
|
-
|
|
278
|
+
# If it's Claude's dash-separated format, extract project name
|
|
279
|
+
if final_component.startswith('-') and 'projects' in final_component:
|
|
280
|
+
# Find the last occurrence of 'projects-' to handle edge cases
|
|
281
|
+
idx = final_component.rfind('projects-')
|
|
282
|
+
if idx != -1:
|
|
283
|
+
return final_component[idx + len('projects-'):]
|
|
293
284
|
|
|
294
|
-
#
|
|
295
|
-
|
|
296
|
-
return project_path
|
|
297
|
-
|
|
298
|
-
# Otherwise extract from path
|
|
299
|
-
return path_obj.name
|
|
285
|
+
# For regular paths, just return the directory name
|
|
286
|
+
return final_component if final_component else path.parent.name
|
|
300
287
|
|
|
301
288
|
def _project_matches(self, stored_project: str, target_project: str) -> bool:
|
|
302
289
|
"""
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Reflection tools for Claude Self Reflect MCP server."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import json
|
|
5
|
+
import hashlib
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Optional, List, Dict, Any
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import uuid
|
|
11
|
+
|
|
12
|
+
from fastmcp import Context
|
|
13
|
+
from pydantic import Field
|
|
14
|
+
from qdrant_client import AsyncQdrantClient
|
|
15
|
+
from qdrant_client.models import PointStruct, VectorParams, Distance
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ReflectionTools:
|
|
21
|
+
"""Handles reflection storage and conversation retrieval operations."""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
qdrant_client: AsyncQdrantClient,
|
|
26
|
+
qdrant_url: str,
|
|
27
|
+
get_embedding_manager,
|
|
28
|
+
normalize_project_name
|
|
29
|
+
):
|
|
30
|
+
"""Initialize reflection tools with dependencies."""
|
|
31
|
+
self.qdrant_client = qdrant_client
|
|
32
|
+
self.qdrant_url = qdrant_url
|
|
33
|
+
self.get_embedding_manager = get_embedding_manager
|
|
34
|
+
self.normalize_project_name = normalize_project_name
|
|
35
|
+
|
|
36
|
+
async def store_reflection(
|
|
37
|
+
self,
|
|
38
|
+
ctx: Context,
|
|
39
|
+
content: str,
|
|
40
|
+
tags: List[str] = []
|
|
41
|
+
) -> str:
|
|
42
|
+
"""Store an important insight or reflection for future reference."""
|
|
43
|
+
|
|
44
|
+
await ctx.debug(f"Storing reflection with {len(tags)} tags")
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
# Determine collection name based on embedding type
|
|
48
|
+
embedding_manager = self.get_embedding_manager()
|
|
49
|
+
embedding_type = "local" if embedding_manager.prefer_local else "voyage"
|
|
50
|
+
collection_name = f"reflections_{embedding_type}"
|
|
51
|
+
|
|
52
|
+
# Ensure reflections collection exists
|
|
53
|
+
try:
|
|
54
|
+
await self.qdrant_client.get_collection(collection_name)
|
|
55
|
+
await ctx.debug(f"Using existing {collection_name} collection")
|
|
56
|
+
except Exception:
|
|
57
|
+
# Collection doesn't exist, create it
|
|
58
|
+
await ctx.debug(f"Creating {collection_name} collection")
|
|
59
|
+
|
|
60
|
+
# Determine embedding dimensions
|
|
61
|
+
embedding_dim = embedding_manager.get_vector_dimension()
|
|
62
|
+
|
|
63
|
+
await self.qdrant_client.create_collection(
|
|
64
|
+
collection_name=collection_name,
|
|
65
|
+
vectors_config=VectorParams(
|
|
66
|
+
size=embedding_dim,
|
|
67
|
+
distance=Distance.COSINE
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Generate embedding for the reflection
|
|
72
|
+
embedding_manager = self.get_embedding_manager()
|
|
73
|
+
embedding = await embedding_manager.generate_embedding(content)
|
|
74
|
+
|
|
75
|
+
# Create unique ID
|
|
76
|
+
reflection_id = hashlib.md5(f"{content}{datetime.now().isoformat()}".encode()).hexdigest()
|
|
77
|
+
|
|
78
|
+
# Prepare metadata
|
|
79
|
+
metadata = {
|
|
80
|
+
"content": content,
|
|
81
|
+
"tags": tags,
|
|
82
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
83
|
+
"type": "reflection"
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
# Store in Qdrant
|
|
87
|
+
await self.qdrant_client.upsert(
|
|
88
|
+
collection_name=collection_name,
|
|
89
|
+
points=[
|
|
90
|
+
PointStruct(
|
|
91
|
+
id=reflection_id,
|
|
92
|
+
vector=embedding,
|
|
93
|
+
payload=metadata
|
|
94
|
+
)
|
|
95
|
+
]
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
await ctx.debug(f"Stored reflection with ID {reflection_id}")
|
|
99
|
+
|
|
100
|
+
return f"""Reflection stored successfully.
|
|
101
|
+
ID: {reflection_id}
|
|
102
|
+
Tags: {', '.join(tags) if tags else 'none'}
|
|
103
|
+
Timestamp: {metadata['timestamp']}"""
|
|
104
|
+
|
|
105
|
+
except Exception as e:
|
|
106
|
+
logger.error(f"Failed to store reflection: {e}", exc_info=True)
|
|
107
|
+
return f"Failed to store reflection: {str(e)}"
|
|
108
|
+
|
|
109
|
+
async def get_full_conversation(
|
|
110
|
+
self,
|
|
111
|
+
ctx: Context,
|
|
112
|
+
conversation_id: str,
|
|
113
|
+
project: Optional[str] = None
|
|
114
|
+
) -> str:
|
|
115
|
+
"""Get the full JSONL conversation file path for a conversation ID.
|
|
116
|
+
This allows agents to read complete conversations instead of truncated excerpts."""
|
|
117
|
+
|
|
118
|
+
await ctx.debug(f"Getting full conversation for ID: {conversation_id}, project: {project}")
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
# Base path for conversations
|
|
122
|
+
base_path = Path.home() / '.claude' / 'projects'
|
|
123
|
+
|
|
124
|
+
# If project is specified, try to find it in that project
|
|
125
|
+
if project:
|
|
126
|
+
# Normalize project name for path matching
|
|
127
|
+
project_normalized = self.normalize_project_name(project)
|
|
128
|
+
|
|
129
|
+
# Look for project directories that match
|
|
130
|
+
for project_dir in base_path.glob('*'):
|
|
131
|
+
if project_normalized in project_dir.name.lower():
|
|
132
|
+
# Look for JSONL files in this project
|
|
133
|
+
for jsonl_file in project_dir.glob('*.jsonl'):
|
|
134
|
+
# Check if filename matches conversation_id (with or without .jsonl)
|
|
135
|
+
if conversation_id in jsonl_file.stem or conversation_id == jsonl_file.stem:
|
|
136
|
+
await ctx.debug(f"Found conversation by filename in {jsonl_file}")
|
|
137
|
+
return f"""<conversation_file>
|
|
138
|
+
<conversation_id>{conversation_id}</conversation_id>
|
|
139
|
+
<file_path>{str(jsonl_file)}</file_path>
|
|
140
|
+
<project>{project_dir.name}</project>
|
|
141
|
+
<message>Use the Read tool with this file path to read the complete conversation.</message>
|
|
142
|
+
</conversation_file>"""
|
|
143
|
+
|
|
144
|
+
# If not found in specific project or no project specified, search all
|
|
145
|
+
await ctx.debug("Searching all projects for conversation")
|
|
146
|
+
for project_dir in base_path.glob('*'):
|
|
147
|
+
for jsonl_file in project_dir.glob('*.jsonl'):
|
|
148
|
+
# Check if filename matches conversation_id (with or without .jsonl)
|
|
149
|
+
if conversation_id in jsonl_file.stem or conversation_id == jsonl_file.stem:
|
|
150
|
+
await ctx.debug(f"Found conversation by filename in {jsonl_file}")
|
|
151
|
+
return f"""<conversation_file>
|
|
152
|
+
<conversation_id>{conversation_id}</conversation_id>
|
|
153
|
+
<file_path>{str(jsonl_file)}</file_path>
|
|
154
|
+
<project>{project_dir.name}</project>
|
|
155
|
+
<message>Use the Read tool with this file path to read the complete conversation.</message>
|
|
156
|
+
</conversation_file>"""
|
|
157
|
+
|
|
158
|
+
# Not found
|
|
159
|
+
return f"""<conversation_file>
|
|
160
|
+
<error>Conversation ID '{conversation_id}' not found in any project.</error>
|
|
161
|
+
<suggestion>The conversation may not have been imported yet, or the ID may be incorrect.</suggestion>
|
|
162
|
+
</conversation_file>"""
|
|
163
|
+
|
|
164
|
+
except Exception as e:
|
|
165
|
+
logger.error(f"Failed to get conversation file: {e}", exc_info=True)
|
|
166
|
+
return f"""<conversation_file>
|
|
167
|
+
<error>Failed to locate conversation: {str(e)}</error>
|
|
168
|
+
</conversation_file>"""
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def register_reflection_tools(
|
|
172
|
+
mcp,
|
|
173
|
+
qdrant_client: AsyncQdrantClient,
|
|
174
|
+
qdrant_url: str,
|
|
175
|
+
get_embedding_manager,
|
|
176
|
+
normalize_project_name
|
|
177
|
+
):
|
|
178
|
+
"""Register reflection tools with the MCP server."""
|
|
179
|
+
|
|
180
|
+
tools = ReflectionTools(
|
|
181
|
+
qdrant_client,
|
|
182
|
+
qdrant_url,
|
|
183
|
+
get_embedding_manager,
|
|
184
|
+
normalize_project_name
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
@mcp.tool()
|
|
188
|
+
async def store_reflection(
|
|
189
|
+
ctx: Context,
|
|
190
|
+
content: str = Field(description="The insight or reflection to store"),
|
|
191
|
+
tags: List[str] = Field(default=[], description="Tags to categorize this reflection")
|
|
192
|
+
) -> str:
|
|
193
|
+
"""Store an important insight or reflection for future reference."""
|
|
194
|
+
return await tools.store_reflection(ctx, content, tags)
|
|
195
|
+
|
|
196
|
+
@mcp.tool()
|
|
197
|
+
async def get_full_conversation(
|
|
198
|
+
ctx: Context,
|
|
199
|
+
conversation_id: str = Field(description="The conversation ID from search results (cid)"),
|
|
200
|
+
project: Optional[str] = Field(default=None, description="Optional project name to help locate the file")
|
|
201
|
+
) -> str:
|
|
202
|
+
"""Get the full JSONL conversation file path for a conversation ID.
|
|
203
|
+
This allows agents to read complete conversations instead of truncated excerpts."""
|
|
204
|
+
return await tools.get_full_conversation(ctx, conversation_id, project)
|
|
205
|
+
|
|
206
|
+
logger.info("Reflection tools registered successfully")
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Rich formatting for search results with emojis and enhanced display."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import List, Dict, Any, Optional
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def format_search_results_rich(
|
|
13
|
+
results: List[Dict],
|
|
14
|
+
query: str,
|
|
15
|
+
target_project: str,
|
|
16
|
+
collections_searched: int,
|
|
17
|
+
timing_info: Dict[str, float],
|
|
18
|
+
start_time: float,
|
|
19
|
+
brief: bool = False,
|
|
20
|
+
include_raw: bool = False,
|
|
21
|
+
indexing_status: Optional[Dict] = None
|
|
22
|
+
) -> str:
|
|
23
|
+
"""Format search results with rich formatting including emojis and performance metrics."""
|
|
24
|
+
|
|
25
|
+
# Initialize upfront summary
|
|
26
|
+
upfront_summary = ""
|
|
27
|
+
|
|
28
|
+
# Show result summary with emojis
|
|
29
|
+
if results:
|
|
30
|
+
score_info = "high" if results[0]['score'] >= 0.85 else "good" if results[0]['score'] >= 0.75 else "partial"
|
|
31
|
+
upfront_summary += f"🎯 RESULTS: {len(results)} matches ({score_info} relevance, top score: {results[0]['score']:.3f})\n"
|
|
32
|
+
|
|
33
|
+
# Show performance metrics
|
|
34
|
+
total_time = time.time() - start_time
|
|
35
|
+
indexing_info = ""
|
|
36
|
+
if indexing_status and indexing_status.get("percentage", 100) < 100.0:
|
|
37
|
+
indexing_info = f" | 📊 {indexing_status['indexed_conversations']}/{indexing_status['total_conversations']} indexed"
|
|
38
|
+
upfront_summary += f"⚡ PERFORMANCE: {int(total_time * 1000)}ms ({collections_searched} collections searched{indexing_info})\n"
|
|
39
|
+
else:
|
|
40
|
+
upfront_summary += f"❌ NO RESULTS: No conversations found matching '{query}'\n"
|
|
41
|
+
|
|
42
|
+
# Start XML format with upfront summary
|
|
43
|
+
result_text = upfront_summary + "\n<search>\n"
|
|
44
|
+
|
|
45
|
+
# Add indexing status if not fully baselined
|
|
46
|
+
if indexing_status and indexing_status.get("percentage", 100) < 95.0:
|
|
47
|
+
result_text += f' <info status="indexing" progress="{indexing_status["percentage"]:.1f}%" backlog="{indexing_status.get("backlog_count", 0)}">\n'
|
|
48
|
+
result_text += f' <message>📊 Indexing: {indexing_status["indexed_conversations"]}/{indexing_status["total_conversations"]} conversations ({indexing_status["percentage"]:.1f}% complete)</message>\n'
|
|
49
|
+
result_text += f" </info>\n"
|
|
50
|
+
|
|
51
|
+
# Add high-level result summary
|
|
52
|
+
if results:
|
|
53
|
+
# Count time-based results
|
|
54
|
+
now = datetime.now(timezone.utc)
|
|
55
|
+
today_count = 0
|
|
56
|
+
yesterday_count = 0
|
|
57
|
+
week_count = 0
|
|
58
|
+
|
|
59
|
+
for result in results:
|
|
60
|
+
timestamp_str = result.get('timestamp', '')
|
|
61
|
+
if timestamp_str:
|
|
62
|
+
try:
|
|
63
|
+
# Clean timestamp
|
|
64
|
+
timestamp_clean = timestamp_str.replace('Z', '+00:00') if timestamp_str.endswith('Z') else timestamp_str
|
|
65
|
+
timestamp_dt = datetime.fromisoformat(timestamp_clean)
|
|
66
|
+
if timestamp_dt.tzinfo is None:
|
|
67
|
+
timestamp_dt = timestamp_dt.replace(tzinfo=timezone.utc)
|
|
68
|
+
|
|
69
|
+
days_ago = (now - timestamp_dt).days
|
|
70
|
+
if days_ago == 0:
|
|
71
|
+
today_count += 1
|
|
72
|
+
elif days_ago == 1:
|
|
73
|
+
yesterday_count += 1
|
|
74
|
+
if days_ago <= 7:
|
|
75
|
+
week_count += 1
|
|
76
|
+
except:
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
# Compact summary with key info
|
|
80
|
+
time_info = ""
|
|
81
|
+
if today_count > 0:
|
|
82
|
+
time_info = f"{today_count} today"
|
|
83
|
+
elif yesterday_count > 0:
|
|
84
|
+
time_info = f"{yesterday_count} yesterday"
|
|
85
|
+
elif week_count > 0:
|
|
86
|
+
time_info = f"{week_count} this week"
|
|
87
|
+
else:
|
|
88
|
+
time_info = "older results"
|
|
89
|
+
|
|
90
|
+
score_info = "high" if results[0]['score'] >= 0.85 else "good" if results[0]['score'] >= 0.75 else "partial"
|
|
91
|
+
|
|
92
|
+
result_text += f' <summary count="{len(results)}" relevance="{score_info}" recency="{time_info}" top-score="{results[0]["score"]:.3f}">\n'
|
|
93
|
+
|
|
94
|
+
# Short preview of top result
|
|
95
|
+
top_excerpt = results[0].get('excerpt', results[0].get('content', ''))[:100].strip()
|
|
96
|
+
if '...' not in top_excerpt:
|
|
97
|
+
top_excerpt += "..."
|
|
98
|
+
result_text += f' <preview>{top_excerpt}</preview>\n'
|
|
99
|
+
result_text += f" </summary>\n"
|
|
100
|
+
else:
|
|
101
|
+
result_text += f" <result-summary>\n"
|
|
102
|
+
result_text += f" <headline>No matches found</headline>\n"
|
|
103
|
+
result_text += f" <relevance>No conversations matched your query</relevance>\n"
|
|
104
|
+
result_text += f" </result-summary>\n"
|
|
105
|
+
|
|
106
|
+
# Add metadata
|
|
107
|
+
result_text += f" <meta>\n"
|
|
108
|
+
result_text += f" <q>{query}</q>\n"
|
|
109
|
+
result_text += f" <scope>{target_project if target_project != 'all' else 'all'}</scope>\n"
|
|
110
|
+
result_text += f" <count>{len(results)}</count>\n"
|
|
111
|
+
if results:
|
|
112
|
+
result_text += f" <range>{results[-1]['score']:.3f}-{results[0]['score']:.3f}</range>\n"
|
|
113
|
+
|
|
114
|
+
# Add performance metadata
|
|
115
|
+
total_time = time.time() - start_time
|
|
116
|
+
result_text += f" <perf>\n"
|
|
117
|
+
result_text += f" <ttl>{int(total_time * 1000)}</ttl>\n"
|
|
118
|
+
result_text += f" <emb>{int((timing_info.get('embedding_end', 0) - timing_info.get('embedding_start', 0)) * 1000)}</emb>\n"
|
|
119
|
+
result_text += f" <srch>{int((timing_info.get('search_all_end', 0) - timing_info.get('search_all_start', 0)) * 1000)}</srch>\n"
|
|
120
|
+
result_text += f" <cols>{collections_searched}</cols>\n"
|
|
121
|
+
result_text += f" </perf>\n"
|
|
122
|
+
result_text += f" </meta>\n"
|
|
123
|
+
|
|
124
|
+
# Add individual results
|
|
125
|
+
result_text += " <results>\n"
|
|
126
|
+
for i, result in enumerate(results):
|
|
127
|
+
result_text += f' <r rank="{i+1}">\n'
|
|
128
|
+
result_text += f" <s>{result['score']:.3f}</s>\n"
|
|
129
|
+
result_text += f" <p>{result.get('project_name', 'unknown')}</p>\n"
|
|
130
|
+
|
|
131
|
+
# Calculate relative time
|
|
132
|
+
timestamp_str = result.get('timestamp', '')
|
|
133
|
+
if timestamp_str:
|
|
134
|
+
try:
|
|
135
|
+
timestamp_clean = timestamp_str.replace('Z', '+00:00') if timestamp_str.endswith('Z') else timestamp_str
|
|
136
|
+
timestamp_dt = datetime.fromisoformat(timestamp_clean)
|
|
137
|
+
if timestamp_dt.tzinfo is None:
|
|
138
|
+
timestamp_dt = timestamp_dt.replace(tzinfo=timezone.utc)
|
|
139
|
+
now = datetime.now(timezone.utc)
|
|
140
|
+
days_ago = (now - timestamp_dt).days
|
|
141
|
+
if days_ago == 0:
|
|
142
|
+
time_str = "today"
|
|
143
|
+
elif days_ago == 1:
|
|
144
|
+
time_str = "yesterday"
|
|
145
|
+
else:
|
|
146
|
+
time_str = f"{days_ago}d"
|
|
147
|
+
result_text += f" <t>{time_str}</t>\n"
|
|
148
|
+
except:
|
|
149
|
+
result_text += f" <t>unknown</t>\n"
|
|
150
|
+
|
|
151
|
+
# Get excerpt/content
|
|
152
|
+
excerpt = result.get('excerpt', result.get('content', ''))
|
|
153
|
+
|
|
154
|
+
if not brief and excerpt:
|
|
155
|
+
# Extract title from first line of excerpt
|
|
156
|
+
excerpt_lines = excerpt.split('\n')
|
|
157
|
+
title = excerpt_lines[0][:80] + "..." if len(excerpt_lines[0]) > 80 else excerpt_lines[0]
|
|
158
|
+
result_text += f" <title>{title}</title>\n"
|
|
159
|
+
|
|
160
|
+
# Key finding - summarize the main point
|
|
161
|
+
key_finding = excerpt[:100] + "..." if len(excerpt) > 100 else excerpt
|
|
162
|
+
result_text += f" <key-finding>{key_finding.strip()}</key-finding>\n"
|
|
163
|
+
|
|
164
|
+
# Always include excerpt
|
|
165
|
+
if brief:
|
|
166
|
+
brief_excerpt = excerpt[:100] + "..." if len(excerpt) > 100 else excerpt
|
|
167
|
+
result_text += f" <excerpt>{brief_excerpt.strip()}</excerpt>\n"
|
|
168
|
+
else:
|
|
169
|
+
result_text += f" <excerpt><![CDATA[{excerpt}]]></excerpt>\n"
|
|
170
|
+
|
|
171
|
+
# Add conversation ID if present
|
|
172
|
+
if result.get('conversation_id'):
|
|
173
|
+
result_text += f" <cid>{result['conversation_id']}</cid>\n"
|
|
174
|
+
|
|
175
|
+
# Include raw data if requested
|
|
176
|
+
if include_raw and result.get('raw_payload'):
|
|
177
|
+
result_text += " <raw>\n"
|
|
178
|
+
payload = result['raw_payload']
|
|
179
|
+
result_text += f" <txt><![CDATA[{payload.get('text', '')}]]></txt>\n"
|
|
180
|
+
result_text += f" <id>{result.get('id', '')}</id>\n"
|
|
181
|
+
result_text += " </raw>\n"
|
|
182
|
+
|
|
183
|
+
# Add metadata fields if present
|
|
184
|
+
if result.get('files_analyzed'):
|
|
185
|
+
result_text += f" <files>{', '.join(result['files_analyzed'][:5])}</files>\n"
|
|
186
|
+
if result.get('tools_used'):
|
|
187
|
+
result_text += f" <tools>{', '.join(result['tools_used'][:5])}</tools>\n"
|
|
188
|
+
if result.get('concepts'):
|
|
189
|
+
result_text += f" <concepts>{', '.join(result['concepts'][:5])}</concepts>\n"
|
|
190
|
+
|
|
191
|
+
result_text += " </r>\n"
|
|
192
|
+
|
|
193
|
+
result_text += " </results>\n"
|
|
194
|
+
result_text += "</search>\n"
|
|
195
|
+
|
|
196
|
+
return result_text
|