crackerjack 0.38.15__py3-none-any.whl → 0.39.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crackerjack might be problematic. Click here for more details.
- crackerjack/__main__.py +134 -13
- crackerjack/agents/__init__.py +2 -0
- crackerjack/agents/base.py +1 -0
- crackerjack/agents/claude_code_bridge.py +319 -0
- crackerjack/agents/coordinator.py +6 -3
- crackerjack/agents/dry_agent.py +187 -3
- crackerjack/agents/enhanced_coordinator.py +279 -0
- crackerjack/agents/enhanced_proactive_agent.py +185 -0
- crackerjack/agents/performance_agent.py +324 -3
- crackerjack/agents/refactoring_agent.py +254 -5
- crackerjack/agents/semantic_agent.py +479 -0
- crackerjack/agents/semantic_helpers.py +356 -0
- crackerjack/cli/options.py +27 -0
- crackerjack/cli/semantic_handlers.py +290 -0
- crackerjack/core/async_workflow_orchestrator.py +9 -8
- crackerjack/core/enhanced_container.py +1 -1
- crackerjack/core/phase_coordinator.py +1 -1
- crackerjack/core/proactive_workflow.py +1 -1
- crackerjack/core/workflow_orchestrator.py +9 -6
- crackerjack/documentation/ai_templates.py +1 -1
- crackerjack/interactive.py +1 -1
- crackerjack/mcp/server_core.py +2 -0
- crackerjack/mcp/tools/__init__.py +2 -0
- crackerjack/mcp/tools/semantic_tools.py +584 -0
- crackerjack/models/semantic_models.py +271 -0
- crackerjack/plugins/loader.py +2 -2
- crackerjack/py313.py +4 -1
- crackerjack/services/embeddings.py +444 -0
- crackerjack/services/quality_intelligence.py +11 -1
- crackerjack/services/smart_scheduling.py +1 -1
- crackerjack/services/vector_store.py +681 -0
- crackerjack/slash_commands/run.md +84 -50
- {crackerjack-0.38.15.dist-info → crackerjack-0.39.0.dist-info}/METADATA +7 -2
- {crackerjack-0.38.15.dist-info → crackerjack-0.39.0.dist-info}/RECORD +37 -27
- {crackerjack-0.38.15.dist-info → crackerjack-0.39.0.dist-info}/WHEEL +0 -0
- {crackerjack-0.38.15.dist-info → crackerjack-0.39.0.dist-info}/entry_points.txt +0 -0
- {crackerjack-0.38.15.dist-info → crackerjack-0.39.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
"""Semantic enhancement helpers for AI agents.
|
|
2
|
+
|
|
3
|
+
This module provides shared utilities for integrating semantic search
|
|
4
|
+
capabilities into existing agents, enabling them to discover related
|
|
5
|
+
code patterns and make more informed decisions.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import typing as t
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from ..models.semantic_models import SearchQuery, SemanticConfig
|
|
13
|
+
from ..services.vector_store import VectorStore
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class SemanticInsight:
|
|
18
|
+
"""Container for semantic analysis results."""
|
|
19
|
+
|
|
20
|
+
query: str
|
|
21
|
+
related_patterns: list[dict[str, t.Any]]
|
|
22
|
+
similarity_threshold: float
|
|
23
|
+
total_matches: int
|
|
24
|
+
high_confidence_matches: int
|
|
25
|
+
session_id: str | None = None
|
|
26
|
+
timestamp: str | None = None
|
|
27
|
+
|
|
28
|
+
def to_session_data(self) -> dict[str, t.Any]:
|
|
29
|
+
"""Convert insight to session-storable data."""
|
|
30
|
+
return {
|
|
31
|
+
"query": self.query,
|
|
32
|
+
"related_patterns": self.related_patterns[:3], # Limit for storage
|
|
33
|
+
"similarity_threshold": self.similarity_threshold,
|
|
34
|
+
"total_matches": self.total_matches,
|
|
35
|
+
"high_confidence_matches": self.high_confidence_matches,
|
|
36
|
+
"session_id": self.session_id,
|
|
37
|
+
"timestamp": self.timestamp,
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SemanticEnhancer:
|
|
42
|
+
"""Helper class to add semantic capabilities to existing agents."""
|
|
43
|
+
|
|
44
|
+
def __init__(self, project_path: Path) -> None:
|
|
45
|
+
self.project_path = project_path
|
|
46
|
+
self._vector_store: VectorStore | None = None
|
|
47
|
+
self._config = self._create_semantic_config()
|
|
48
|
+
self._session_insights: dict[str, SemanticInsight] = {}
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def _create_semantic_config() -> SemanticConfig:
|
|
52
|
+
"""Create semantic search configuration."""
|
|
53
|
+
return SemanticConfig(
|
|
54
|
+
embedding_model="sentence-transformers/all-MiniLM-L6-v2",
|
|
55
|
+
chunk_size=512,
|
|
56
|
+
chunk_overlap=50,
|
|
57
|
+
max_search_results=10,
|
|
58
|
+
similarity_threshold=0.7,
|
|
59
|
+
embedding_dimension=384,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def _get_vector_store(self) -> VectorStore:
|
|
63
|
+
"""Get or create vector store instance."""
|
|
64
|
+
if self._vector_store is None:
|
|
65
|
+
db_path = self._get_persistent_db_path()
|
|
66
|
+
self._vector_store = VectorStore(self._config, db_path=db_path)
|
|
67
|
+
return self._vector_store
|
|
68
|
+
|
|
69
|
+
def _get_persistent_db_path(self) -> Path:
|
|
70
|
+
"""Get the path to the persistent semantic search database."""
|
|
71
|
+
db_path = self.project_path / ".crackerjack" / "semantic_index.db"
|
|
72
|
+
db_path.parent.mkdir(exist_ok=True)
|
|
73
|
+
return db_path
|
|
74
|
+
|
|
75
|
+
async def find_similar_patterns(
|
|
76
|
+
self,
|
|
77
|
+
query: str,
|
|
78
|
+
current_file: Path | None = None,
|
|
79
|
+
min_similarity: float = 0.6,
|
|
80
|
+
max_results: int = 5,
|
|
81
|
+
) -> SemanticInsight:
|
|
82
|
+
"""Find similar code patterns using semantic search.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
query: Search query (code snippet, function signature, or description)
|
|
86
|
+
current_file: File to exclude from results (to avoid self-matches)
|
|
87
|
+
min_similarity: Minimum similarity threshold (0.0-1.0)
|
|
88
|
+
max_results: Maximum number of results to return
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
SemanticInsight with related patterns and analysis
|
|
92
|
+
"""
|
|
93
|
+
vector_store = self._get_vector_store()
|
|
94
|
+
|
|
95
|
+
search_query = SearchQuery(
|
|
96
|
+
query=query,
|
|
97
|
+
max_results=max_results,
|
|
98
|
+
min_similarity=min_similarity,
|
|
99
|
+
file_types=["py"],
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
results = vector_store.search(search_query)
|
|
104
|
+
|
|
105
|
+
# Filter out results from current file if specified
|
|
106
|
+
if current_file:
|
|
107
|
+
results = [
|
|
108
|
+
result for result in results if result.file_path != current_file
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
# Categorize results by confidence
|
|
112
|
+
high_confidence = [
|
|
113
|
+
result for result in results if result.similarity_score >= 0.8
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
# Convert to pattern format
|
|
117
|
+
patterns = [
|
|
118
|
+
{
|
|
119
|
+
"file_path": str(result.file_path),
|
|
120
|
+
"content": result.content[:300], # Truncate for readability
|
|
121
|
+
"similarity_score": result.similarity_score,
|
|
122
|
+
"lines": f"{result.start_line}-{result.end_line}",
|
|
123
|
+
"file_type": result.file_type,
|
|
124
|
+
"confidence_level": "high"
|
|
125
|
+
if result.similarity_score >= 0.8
|
|
126
|
+
else "medium",
|
|
127
|
+
}
|
|
128
|
+
for result in results
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
return SemanticInsight(
|
|
132
|
+
query=query,
|
|
133
|
+
related_patterns=patterns,
|
|
134
|
+
similarity_threshold=min_similarity,
|
|
135
|
+
total_matches=len(patterns),
|
|
136
|
+
high_confidence_matches=len(high_confidence),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
except Exception:
|
|
140
|
+
# Return empty insight on error
|
|
141
|
+
return SemanticInsight(
|
|
142
|
+
query=query,
|
|
143
|
+
related_patterns=[],
|
|
144
|
+
similarity_threshold=min_similarity,
|
|
145
|
+
total_matches=0,
|
|
146
|
+
high_confidence_matches=0,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
async def find_duplicate_patterns(
|
|
150
|
+
self, code_snippet: str, current_file: Path | None = None
|
|
151
|
+
) -> SemanticInsight:
|
|
152
|
+
"""Find potential code duplicates using semantic similarity.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
code_snippet: Code snippet to find duplicates for
|
|
156
|
+
current_file: File to exclude from results
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
SemanticInsight focused on potential duplicates
|
|
160
|
+
"""
|
|
161
|
+
insight = await self.find_similar_patterns(
|
|
162
|
+
query=code_snippet,
|
|
163
|
+
current_file=current_file,
|
|
164
|
+
min_similarity=0.75, # Higher threshold for duplicates
|
|
165
|
+
max_results=8,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Store insight for session continuity
|
|
169
|
+
await self.store_insight_to_session(insight, "DuplicateDetection")
|
|
170
|
+
return insight
|
|
171
|
+
|
|
172
|
+
async def find_refactoring_opportunities(
|
|
173
|
+
self, function_signature: str, current_file: Path | None = None
|
|
174
|
+
) -> SemanticInsight:
|
|
175
|
+
"""Find similar functions for refactoring opportunities.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
function_signature: Function signature or description
|
|
179
|
+
current_file: File to exclude from results
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
SemanticInsight focused on refactoring opportunities
|
|
183
|
+
"""
|
|
184
|
+
return await self.find_similar_patterns(
|
|
185
|
+
query=function_signature,
|
|
186
|
+
current_file=current_file,
|
|
187
|
+
min_similarity=0.6,
|
|
188
|
+
max_results=6,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
async def find_implementation_examples(
|
|
192
|
+
self, pattern_description: str, current_file: Path | None = None
|
|
193
|
+
) -> SemanticInsight:
|
|
194
|
+
"""Find implementation examples for a given pattern.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
pattern_description: Description of the pattern to find
|
|
198
|
+
current_file: File to exclude from results
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
SemanticInsight with implementation examples
|
|
202
|
+
"""
|
|
203
|
+
return await self.find_similar_patterns(
|
|
204
|
+
query=pattern_description,
|
|
205
|
+
current_file=current_file,
|
|
206
|
+
min_similarity=0.5, # Lower threshold for broader examples
|
|
207
|
+
max_results=10,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
def enhance_recommendations(
|
|
211
|
+
self,
|
|
212
|
+
base_recommendations: list[str],
|
|
213
|
+
semantic_insight: SemanticInsight,
|
|
214
|
+
) -> list[str]:
|
|
215
|
+
"""Enhance existing recommendations with semantic insights.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
base_recommendations: Original agent recommendations
|
|
219
|
+
semantic_insight: Semantic analysis results
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
Enhanced recommendations including semantic insights
|
|
223
|
+
"""
|
|
224
|
+
enhanced = base_recommendations.copy()
|
|
225
|
+
|
|
226
|
+
if semantic_insight.total_matches > 0:
|
|
227
|
+
# Add semantic-based recommendations
|
|
228
|
+
if semantic_insight.high_confidence_matches > 0:
|
|
229
|
+
enhanced.append(
|
|
230
|
+
f"Semantic analysis found {semantic_insight.high_confidence_matches} "
|
|
231
|
+
f"highly similar patterns - consider consolidation"
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Add pattern discovery insights
|
|
235
|
+
if semantic_insight.total_matches >= 3:
|
|
236
|
+
enhanced.append(
|
|
237
|
+
f"Found {semantic_insight.total_matches} related patterns "
|
|
238
|
+
f"across codebase - review for consistency"
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# Add specific file references for high-confidence matches
|
|
242
|
+
high_conf_files = {
|
|
243
|
+
pattern["file_path"]
|
|
244
|
+
for pattern in semantic_insight.related_patterns
|
|
245
|
+
if pattern["confidence_level"] == "high"
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
if high_conf_files:
|
|
249
|
+
file_list = ", ".join(Path(f).name for f in sorted(high_conf_files)[:3])
|
|
250
|
+
if len(high_conf_files) > 3:
|
|
251
|
+
file_list += f" (+{len(high_conf_files) - 3} more)"
|
|
252
|
+
|
|
253
|
+
enhanced.append(f"Similar implementations found in: {file_list}")
|
|
254
|
+
|
|
255
|
+
return enhanced
|
|
256
|
+
|
|
257
|
+
def get_semantic_context_summary(self, insight: SemanticInsight) -> str:
|
|
258
|
+
"""Generate a summary of semantic context for logging.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
insight: Semantic analysis results
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
Human-readable summary string
|
|
265
|
+
"""
|
|
266
|
+
if insight.total_matches == 0:
|
|
267
|
+
return "No similar patterns found in semantic analysis"
|
|
268
|
+
|
|
269
|
+
high_conf_pct = (
|
|
270
|
+
insight.high_confidence_matches / insight.total_matches * 100
|
|
271
|
+
if insight.total_matches > 0
|
|
272
|
+
else 0
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
return (
|
|
276
|
+
f"Semantic context: {insight.total_matches} similar patterns found "
|
|
277
|
+
f"({insight.high_confidence_matches} high-confidence, {high_conf_pct:.0f}%)"
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
async def store_insight_to_session(
|
|
281
|
+
self, insight: SemanticInsight, agent_type: str
|
|
282
|
+
) -> bool:
|
|
283
|
+
"""Store semantic insight to session for continuity.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
insight: Semantic insight to store
|
|
287
|
+
agent_type: Type of agent storing the insight
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
True if stored successfully, False otherwise
|
|
291
|
+
"""
|
|
292
|
+
try:
|
|
293
|
+
# Create a unique session key for this insight
|
|
294
|
+
session_key = f"{agent_type}_{hash(insight.query)}"
|
|
295
|
+
|
|
296
|
+
# Store in local cache
|
|
297
|
+
self._session_insights[session_key] = insight
|
|
298
|
+
|
|
299
|
+
return True
|
|
300
|
+
except Exception:
|
|
301
|
+
return False
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def create_semantic_enhancer(project_path: Path) -> SemanticEnhancer:
|
|
305
|
+
"""Factory function to create a semantic enhancer.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
project_path: Path to the project root
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
Configured SemanticEnhancer instance
|
|
312
|
+
"""
|
|
313
|
+
return SemanticEnhancer(project_path)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
async def get_session_enhanced_recommendations(
|
|
317
|
+
base_recommendations: list[str], agent_type: str, project_path: Path
|
|
318
|
+
) -> list[str]:
|
|
319
|
+
"""Get enhanced recommendations based on session insights.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
base_recommendations: Original recommendations
|
|
323
|
+
agent_type: Type of agent requesting enhancements
|
|
324
|
+
project_path: Path to the project root
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
Enhanced recommendations with session context
|
|
328
|
+
"""
|
|
329
|
+
try:
|
|
330
|
+
enhancer = create_semantic_enhancer(project_path)
|
|
331
|
+
|
|
332
|
+
# Try to find stored insights for this agent type
|
|
333
|
+
session_insights = [
|
|
334
|
+
insight
|
|
335
|
+
for key, insight in enhancer._session_insights.items()
|
|
336
|
+
if key.startswith(agent_type)
|
|
337
|
+
]
|
|
338
|
+
|
|
339
|
+
if not session_insights:
|
|
340
|
+
return base_recommendations
|
|
341
|
+
|
|
342
|
+
enhanced = base_recommendations.copy()
|
|
343
|
+
|
|
344
|
+
# Add session-based recommendations
|
|
345
|
+
total_patterns = sum(insight.total_matches for insight in session_insights)
|
|
346
|
+
if total_patterns > 5:
|
|
347
|
+
enhanced.append(
|
|
348
|
+
f"Session context: {total_patterns} similar patterns found "
|
|
349
|
+
"across recent analyses - consider broader refactoring"
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
return enhanced
|
|
353
|
+
|
|
354
|
+
except Exception:
|
|
355
|
+
# Fallback to original recommendations on error
|
|
356
|
+
return base_recommendations
|
crackerjack/cli/options.py
CHANGED
|
@@ -137,6 +137,12 @@ class Options(BaseModel):
|
|
|
137
137
|
quick: bool = False
|
|
138
138
|
thorough: bool = False
|
|
139
139
|
clear_cache: bool = False
|
|
140
|
+
|
|
141
|
+
# Semantic search options
|
|
142
|
+
index: str | None = None
|
|
143
|
+
search: str | None = None
|
|
144
|
+
semantic_stats: bool = False
|
|
145
|
+
remove_from_index: str | None = None
|
|
140
146
|
cache_stats: bool = False
|
|
141
147
|
|
|
142
148
|
# Semantic field names (new primary interface)
|
|
@@ -924,6 +930,27 @@ CLI_OPTIONS = {
|
|
|
924
930
|
"--refresh-cache",
|
|
925
931
|
help="Refresh pre-commit cache to ensure fresh environment.",
|
|
926
932
|
),
|
|
933
|
+
# Semantic search options
|
|
934
|
+
"index": typer.Option(
|
|
935
|
+
None,
|
|
936
|
+
"--index",
|
|
937
|
+
help="Index a file or directory for semantic search (e.g., --index path/to/file.py).",
|
|
938
|
+
),
|
|
939
|
+
"search": typer.Option(
|
|
940
|
+
None,
|
|
941
|
+
"--search",
|
|
942
|
+
help="Perform semantic search across indexed files (e.g., --search 'similarity calculation').",
|
|
943
|
+
),
|
|
944
|
+
"semantic_stats": typer.Option(
|
|
945
|
+
False,
|
|
946
|
+
"--semantic-stats",
|
|
947
|
+
help="Display statistics about the semantic search index.",
|
|
948
|
+
),
|
|
949
|
+
"remove_from_index": typer.Option(
|
|
950
|
+
None,
|
|
951
|
+
"--remove-from-index",
|
|
952
|
+
help="Remove a file from the semantic search index (e.g., --remove-from-index path/to/file.py).",
|
|
953
|
+
),
|
|
927
954
|
}
|
|
928
955
|
|
|
929
956
|
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
"""CLI handlers for semantic search operations."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from textwrap import dedent
|
|
5
|
+
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.panel import Panel
|
|
8
|
+
from rich.table import Table
|
|
9
|
+
|
|
10
|
+
from crackerjack.models.semantic_models import SearchQuery, SemanticConfig
|
|
11
|
+
from crackerjack.services.vector_store import VectorStore
|
|
12
|
+
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def handle_semantic_index(file_path: str) -> None:
|
|
17
|
+
"""Handle indexing a file for semantic search.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
file_path: Path to the file or directory to index
|
|
21
|
+
"""
|
|
22
|
+
try:
|
|
23
|
+
console.print(f"[cyan]Indexing file for semantic search:[/cyan] {file_path}")
|
|
24
|
+
|
|
25
|
+
# Validate path
|
|
26
|
+
path_obj = Path(file_path)
|
|
27
|
+
if not path_obj.exists():
|
|
28
|
+
console.print(f"[red]Error:[/red] File does not exist: {file_path}")
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
# Create default configuration
|
|
32
|
+
config = SemanticConfig(
|
|
33
|
+
embedding_model="sentence-transformers/all-MiniLM-L6-v2",
|
|
34
|
+
chunk_size=512,
|
|
35
|
+
chunk_overlap=50,
|
|
36
|
+
max_search_results=10,
|
|
37
|
+
similarity_threshold=0.7,
|
|
38
|
+
embedding_dimension=384,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Initialize vector store with persistent database
|
|
42
|
+
db_path = Path.cwd() / ".crackerjack" / "semantic_index.db"
|
|
43
|
+
db_path.parent.mkdir(exist_ok=True)
|
|
44
|
+
vector_store = VectorStore(config, db_path=db_path)
|
|
45
|
+
|
|
46
|
+
if path_obj.is_file():
|
|
47
|
+
# Index single file
|
|
48
|
+
embeddings = vector_store.index_file(path_obj)
|
|
49
|
+
console.print(
|
|
50
|
+
f"[green]✅ Successfully indexed {len(embeddings)} chunks from {path_obj.name}[/green]"
|
|
51
|
+
)
|
|
52
|
+
else:
|
|
53
|
+
# Index directory (recursively)
|
|
54
|
+
total_files = 0
|
|
55
|
+
total_chunks = 0
|
|
56
|
+
|
|
57
|
+
for file in path_obj.rglob("*.py"): # Index Python files
|
|
58
|
+
try:
|
|
59
|
+
embeddings = vector_store.index_file(file)
|
|
60
|
+
total_files += 1
|
|
61
|
+
total_chunks += len(embeddings)
|
|
62
|
+
console.print(
|
|
63
|
+
f"[dim]Indexed {len(embeddings)} chunks from {file.relative_to(path_obj)}[/dim]"
|
|
64
|
+
)
|
|
65
|
+
except Exception as e:
|
|
66
|
+
console.print(
|
|
67
|
+
f"[yellow]Warning:[/yellow] Failed to index {file}: {e}"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
console.print(
|
|
71
|
+
f"[green]✅ Successfully indexed {total_files} files with {total_chunks} total chunks[/green]"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Show index stats
|
|
75
|
+
stats = vector_store.get_stats()
|
|
76
|
+
console.print(
|
|
77
|
+
f"[cyan]Index now contains:[/cyan] {stats.total_files} files, {stats.total_chunks} chunks"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
except Exception as e:
|
|
81
|
+
console.print(
|
|
82
|
+
f"[red]Error indexing file:[/red] {str(e).replace('[', '\\[').replace(']', '\\]')}"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def handle_semantic_search(query: str) -> None:
|
|
87
|
+
"""Handle semantic search across indexed files.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
query: The search query text
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
console.print(f"[cyan]Performing semantic search for:[/cyan] {query}")
|
|
94
|
+
|
|
95
|
+
# Create default configuration
|
|
96
|
+
config = SemanticConfig(
|
|
97
|
+
embedding_model="sentence-transformers/all-MiniLM-L6-v2",
|
|
98
|
+
chunk_size=512,
|
|
99
|
+
chunk_overlap=50,
|
|
100
|
+
max_search_results=10,
|
|
101
|
+
similarity_threshold=0.7,
|
|
102
|
+
embedding_dimension=384,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Create search query
|
|
106
|
+
search_query = SearchQuery(
|
|
107
|
+
query=query,
|
|
108
|
+
max_results=10,
|
|
109
|
+
min_similarity=0.3, # Lower threshold for CLI to show more results
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Initialize vector store with persistent database
|
|
113
|
+
db_path = Path.cwd() / ".crackerjack" / "semantic_index.db"
|
|
114
|
+
db_path.parent.mkdir(exist_ok=True)
|
|
115
|
+
vector_store = VectorStore(config, db_path=db_path)
|
|
116
|
+
results = vector_store.search(search_query)
|
|
117
|
+
|
|
118
|
+
if not results:
|
|
119
|
+
console.print(
|
|
120
|
+
"[yellow]No results found. Try a different search term or index more files.[/yellow]"
|
|
121
|
+
)
|
|
122
|
+
return
|
|
123
|
+
|
|
124
|
+
# Display results in a table
|
|
125
|
+
table = Table(title=f"Semantic Search Results for: '{query}'")
|
|
126
|
+
table.add_column("File", style="cyan", no_wrap=True)
|
|
127
|
+
table.add_column("Lines", style="magenta", justify="center")
|
|
128
|
+
table.add_column("Score", style="green", justify="center")
|
|
129
|
+
table.add_column("Content Preview", style="white")
|
|
130
|
+
|
|
131
|
+
for result in results:
|
|
132
|
+
# Truncate content for display
|
|
133
|
+
content_preview = (
|
|
134
|
+
result.content[:80] + "..."
|
|
135
|
+
if len(result.content) > 80
|
|
136
|
+
else result.content
|
|
137
|
+
)
|
|
138
|
+
content_preview = content_preview.replace("\n", " ").strip()
|
|
139
|
+
|
|
140
|
+
# Escape Rich markup in content to prevent rendering issues
|
|
141
|
+
content_preview = content_preview.replace("[", "\\[").replace("]", "\\]")
|
|
142
|
+
|
|
143
|
+
table.add_row(
|
|
144
|
+
str(result.file_path.name),
|
|
145
|
+
f"{result.start_line}-{result.end_line}",
|
|
146
|
+
f"{result.similarity_score:.3f}",
|
|
147
|
+
content_preview,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
console.print(table)
|
|
151
|
+
|
|
152
|
+
# Show detailed content for top result
|
|
153
|
+
if results:
|
|
154
|
+
top_result = results[0]
|
|
155
|
+
# Escape Rich markup in the detailed content
|
|
156
|
+
escaped_content = (
|
|
157
|
+
top_result.content.strip().replace("[", "\\[").replace("]", "\\]")
|
|
158
|
+
)
|
|
159
|
+
console.print(
|
|
160
|
+
Panel(
|
|
161
|
+
dedent(f"""
|
|
162
|
+
[cyan]Top Result Details:[/cyan]
|
|
163
|
+
[bold]File:[/bold] {top_result.file_path}
|
|
164
|
+
[bold]Lines:[/bold] {top_result.start_line}-{top_result.end_line}
|
|
165
|
+
[bold]Similarity Score:[/bold] {top_result.similarity_score:.4f}
|
|
166
|
+
|
|
167
|
+
[bold]Content:[/bold]
|
|
168
|
+
{escaped_content}
|
|
169
|
+
""").strip(),
|
|
170
|
+
title="🎯 Best Match",
|
|
171
|
+
border_style="green",
|
|
172
|
+
)
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
except Exception as e:
|
|
176
|
+
console.print(
|
|
177
|
+
f"[red]Error performing search:[/red] {str(e).replace('[', '\\[').replace(']', '\\]')}"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def handle_semantic_stats() -> None:
|
|
182
|
+
"""Handle displaying semantic search index statistics."""
|
|
183
|
+
try:
|
|
184
|
+
console.print("[cyan]Retrieving semantic search index statistics...[/cyan]")
|
|
185
|
+
|
|
186
|
+
# Create default configuration
|
|
187
|
+
config = SemanticConfig(
|
|
188
|
+
embedding_model="sentence-transformers/all-MiniLM-L6-v2",
|
|
189
|
+
chunk_size=512,
|
|
190
|
+
chunk_overlap=50,
|
|
191
|
+
max_search_results=10,
|
|
192
|
+
similarity_threshold=0.7,
|
|
193
|
+
embedding_dimension=384,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Initialize vector store with persistent database
|
|
197
|
+
db_path = Path.cwd() / ".crackerjack" / "semantic_index.db"
|
|
198
|
+
db_path.parent.mkdir(exist_ok=True)
|
|
199
|
+
vector_store = VectorStore(config, db_path=db_path)
|
|
200
|
+
stats = vector_store.get_stats()
|
|
201
|
+
|
|
202
|
+
# Create stats table
|
|
203
|
+
table = Table(title="Semantic Search Index Statistics")
|
|
204
|
+
table.add_column("Metric", style="cyan", no_wrap=True)
|
|
205
|
+
table.add_column("Value", style="green")
|
|
206
|
+
|
|
207
|
+
table.add_row("Total Files", str(stats.total_files))
|
|
208
|
+
table.add_row("Total Chunks", str(stats.total_chunks))
|
|
209
|
+
table.add_row("Index Size", f"{stats.index_size_mb:.2f} MB")
|
|
210
|
+
|
|
211
|
+
# Calculate average chunks per file
|
|
212
|
+
avg_chunks = (
|
|
213
|
+
stats.total_chunks / stats.total_files if stats.total_files > 0 else 0.0
|
|
214
|
+
)
|
|
215
|
+
table.add_row("Average Chunks per File", f"{avg_chunks:.1f}")
|
|
216
|
+
|
|
217
|
+
table.add_row("Embedding Model", config.embedding_model)
|
|
218
|
+
table.add_row("Embedding Dimension", "384") # ONNX fallback uses 384 dimensions
|
|
219
|
+
|
|
220
|
+
if stats.last_updated:
|
|
221
|
+
table.add_row(
|
|
222
|
+
"Last Updated", stats.last_updated.strftime("%Y-%m-%d %H:%M:%S")
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
console.print(table)
|
|
226
|
+
|
|
227
|
+
if stats.total_files == 0:
|
|
228
|
+
console.print(
|
|
229
|
+
Panel(
|
|
230
|
+
"[yellow]The semantic search index is empty. Use [bold]--index[/bold] to add files.[/yellow]",
|
|
231
|
+
title="💡 Tip",
|
|
232
|
+
border_style="yellow",
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
except Exception as e:
|
|
237
|
+
console.print(
|
|
238
|
+
f"[red]Error retrieving stats:[/red] {str(e).replace('[', '\\[').replace(']', '\\]')}"
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def handle_remove_from_semantic_index(file_path: str) -> None:
|
|
243
|
+
"""Handle removing a file from the semantic search index.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
file_path: Path to the file to remove
|
|
247
|
+
"""
|
|
248
|
+
try:
|
|
249
|
+
console.print(
|
|
250
|
+
f"[cyan]Removing file from semantic search index:[/cyan] {file_path}"
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Validate path
|
|
254
|
+
path_obj = Path(file_path)
|
|
255
|
+
|
|
256
|
+
# Create default configuration
|
|
257
|
+
config = SemanticConfig(
|
|
258
|
+
embedding_model="sentence-transformers/all-MiniLM-L6-v2",
|
|
259
|
+
chunk_size=512,
|
|
260
|
+
chunk_overlap=50,
|
|
261
|
+
max_search_results=10,
|
|
262
|
+
similarity_threshold=0.7,
|
|
263
|
+
embedding_dimension=384,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Initialize vector store with persistent database
|
|
267
|
+
db_path = Path.cwd() / ".crackerjack" / "semantic_index.db"
|
|
268
|
+
db_path.parent.mkdir(exist_ok=True)
|
|
269
|
+
vector_store = VectorStore(config, db_path=db_path)
|
|
270
|
+
success = vector_store.remove_file(path_obj)
|
|
271
|
+
|
|
272
|
+
if success:
|
|
273
|
+
console.print(
|
|
274
|
+
f"[green]✅ Successfully removed {path_obj.name} from index[/green]"
|
|
275
|
+
)
|
|
276
|
+
else:
|
|
277
|
+
console.print(
|
|
278
|
+
f"[yellow]Warning:[/yellow] File {path_obj.name} was not found in index"
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# Show updated stats
|
|
282
|
+
stats = vector_store.get_stats()
|
|
283
|
+
console.print(
|
|
284
|
+
f"[cyan]Index now contains:[/cyan] {stats.total_files} files, {stats.total_chunks} chunks"
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
except Exception as e:
|
|
288
|
+
console.print(
|
|
289
|
+
f"[red]Error removing file:[/red] {str(e).replace('[', '\\[').replace(']', '\\]')}"
|
|
290
|
+
)
|