claude-code-workflow 6.2.4 → 6.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/ccw/dist/core/lite-scanner-complete.d.ts.map +1 -1
  2. package/ccw/dist/core/lite-scanner-complete.js +4 -1
  3. package/ccw/dist/core/lite-scanner-complete.js.map +1 -1
  4. package/ccw/dist/core/lite-scanner.d.ts.map +1 -1
  5. package/ccw/dist/core/lite-scanner.js +4 -1
  6. package/ccw/dist/core/lite-scanner.js.map +1 -1
  7. package/ccw/dist/core/routes/claude-routes.d.ts.map +1 -1
  8. package/ccw/dist/core/routes/claude-routes.js +3 -5
  9. package/ccw/dist/core/routes/claude-routes.js.map +1 -1
  10. package/ccw/dist/core/routes/cli-routes.d.ts.map +1 -1
  11. package/ccw/dist/core/routes/cli-routes.js +2 -1
  12. package/ccw/dist/core/routes/cli-routes.js.map +1 -1
  13. package/ccw/dist/core/routes/codexlens-routes.d.ts.map +1 -1
  14. package/ccw/dist/core/routes/codexlens-routes.js +31 -6
  15. package/ccw/dist/core/routes/codexlens-routes.js.map +1 -1
  16. package/ccw/dist/core/routes/rules-routes.d.ts.map +1 -1
  17. package/ccw/dist/core/routes/rules-routes.js +4 -3
  18. package/ccw/dist/core/routes/rules-routes.js.map +1 -1
  19. package/ccw/dist/core/routes/skills-routes.d.ts.map +1 -1
  20. package/ccw/dist/core/routes/skills-routes.js +124 -6
  21. package/ccw/dist/core/routes/skills-routes.js.map +1 -1
  22. package/ccw/dist/tools/cli-executor.d.ts +4 -1
  23. package/ccw/dist/tools/cli-executor.d.ts.map +1 -1
  24. package/ccw/dist/tools/cli-executor.js +54 -2
  25. package/ccw/dist/tools/cli-executor.js.map +1 -1
  26. package/ccw/dist/tools/codex-lens.d.ts +20 -3
  27. package/ccw/dist/tools/codex-lens.d.ts.map +1 -1
  28. package/ccw/dist/tools/codex-lens.js +166 -37
  29. package/ccw/dist/tools/codex-lens.js.map +1 -1
  30. package/ccw/package.json +1 -1
  31. package/ccw/src/core/lite-scanner-complete.ts +5 -1
  32. package/ccw/src/core/lite-scanner.ts +5 -1
  33. package/ccw/src/core/routes/claude-routes.ts +3 -5
  34. package/ccw/src/core/routes/cli-routes.ts +2 -1
  35. package/ccw/src/core/routes/codexlens-routes.ts +34 -6
  36. package/ccw/src/core/routes/rules-routes.ts +4 -3
  37. package/ccw/src/core/routes/skills-routes.ts +144 -6
  38. package/ccw/src/templates/dashboard-js/components/mcp-manager.js +7 -12
  39. package/ccw/src/templates/dashboard-js/i18n.js +167 -5
  40. package/ccw/src/templates/dashboard-js/views/claude-manager.js +18 -4
  41. package/ccw/src/templates/dashboard-js/views/cli-manager.js +5 -3
  42. package/ccw/src/templates/dashboard-js/views/codexlens-manager.js +790 -25
  43. package/ccw/src/templates/dashboard-js/views/rules-manager.js +35 -6
  44. package/ccw/src/templates/dashboard-js/views/skills-manager.js +385 -21
  45. package/ccw/src/tools/cli-executor.ts +70 -2
  46. package/ccw/src/tools/codex-lens.ts +183 -35
  47. package/codex-lens/pyproject.toml +66 -48
  48. package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
  49. package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-313.pyc +0 -0
  50. package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-313.pyc +0 -0
  51. package/codex-lens/src/codexlens/cli/embedding_manager.py +3 -3
  52. package/codex-lens/src/codexlens/cli/model_manager.py +24 -2
  53. package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
  54. package/codex-lens/src/codexlens/search/hybrid_search.py +313 -313
  55. package/codex-lens/src/codexlens/semantic/__init__.py +76 -39
  56. package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-313.pyc +0 -0
  57. package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-313.pyc +0 -0
  58. package/codex-lens/src/codexlens/semantic/__pycache__/gpu_support.cpython-313.pyc +0 -0
  59. package/codex-lens/src/codexlens/semantic/__pycache__/ollama_backend.cpython-313.pyc +0 -0
  60. package/codex-lens/src/codexlens/semantic/embedder.py +244 -185
  61. package/codex-lens/src/codexlens/semantic/gpu_support.py +192 -0
  62. package/package.json +1 -1
@@ -1,313 +1,313 @@
1
- """Hybrid search engine orchestrating parallel exact/fuzzy/vector searches with RRF fusion.
2
-
3
- Coordinates multiple search backends in parallel using ThreadPoolExecutor and combines
4
- results via Reciprocal Rank Fusion (RRF) algorithm.
5
- """
6
-
7
- from __future__ import annotations
8
-
9
- import logging
10
- from concurrent.futures import ThreadPoolExecutor, as_completed
11
- from pathlib import Path
12
- from typing import Dict, List, Optional
13
-
14
- from codexlens.entities import SearchResult
15
- from codexlens.search.ranking import reciprocal_rank_fusion, tag_search_source
16
- from codexlens.storage.dir_index import DirIndexStore
17
-
18
-
19
- class HybridSearchEngine:
20
- """Hybrid search engine with parallel execution and RRF fusion.
21
-
22
- Orchestrates searches across exact FTS, fuzzy FTS, and optional vector backends,
23
- executing them in parallel and fusing results via Reciprocal Rank Fusion.
24
-
25
- Attributes:
26
- logger: Python logger instance
27
- default_weights: Default RRF weights for each source
28
- """
29
-
30
- # Default RRF weights (exact: 40%, fuzzy: 30%, vector: 30%)
31
- DEFAULT_WEIGHTS = {
32
- "exact": 0.4,
33
- "fuzzy": 0.3,
34
- "vector": 0.3,
35
- }
36
-
37
- def __init__(self, weights: Optional[Dict[str, float]] = None):
38
- """Initialize hybrid search engine.
39
-
40
- Args:
41
- weights: Optional custom RRF weights (default: DEFAULT_WEIGHTS)
42
- """
43
- self.logger = logging.getLogger(__name__)
44
- self.weights = weights or self.DEFAULT_WEIGHTS.copy()
45
-
46
- def search(
47
- self,
48
- index_path: Path,
49
- query: str,
50
- limit: int = 20,
51
- enable_fuzzy: bool = True,
52
- enable_vector: bool = False,
53
- pure_vector: bool = False,
54
- ) -> List[SearchResult]:
55
- """Execute hybrid search with parallel retrieval and RRF fusion.
56
-
57
- Args:
58
- index_path: Path to _index.db file
59
- query: FTS5 query string (for FTS) or natural language query (for vector)
60
- limit: Maximum results to return after fusion
61
- enable_fuzzy: Enable fuzzy FTS search (default True)
62
- enable_vector: Enable vector search (default False)
63
- pure_vector: If True, only use vector search without FTS fallback (default False)
64
-
65
- Returns:
66
- List of SearchResult objects sorted by fusion score
67
-
68
- Examples:
69
- >>> engine = HybridSearchEngine()
70
- >>> # Hybrid search (exact + fuzzy + vector)
71
- >>> results = engine.search(Path("project/_index.db"), "authentication",
72
- ... enable_vector=True)
73
- >>> # Pure vector search (semantic only)
74
- >>> results = engine.search(Path("project/_index.db"),
75
- ... "how to authenticate users",
76
- ... enable_vector=True, pure_vector=True)
77
- >>> for r in results[:5]:
78
- ... print(f"{r.path}: {r.score:.3f}")
79
- """
80
- # Determine which backends to use
81
- backends = {}
82
-
83
- if pure_vector:
84
- # Pure vector mode: only use vector search, no FTS fallback
85
- if enable_vector:
86
- backends["vector"] = True
87
- else:
88
- # Invalid configuration: pure_vector=True but enable_vector=False
89
- self.logger.warning(
90
- "pure_vector=True requires enable_vector=True. "
91
- "Falling back to exact search. "
92
- "To use pure vector search, enable vector search mode."
93
- )
94
- backends["exact"] = True
95
- else:
96
- # Hybrid mode: always include exact search as baseline
97
- backends["exact"] = True
98
- if enable_fuzzy:
99
- backends["fuzzy"] = True
100
- if enable_vector:
101
- backends["vector"] = True
102
-
103
- # Execute parallel searches
104
- results_map = self._search_parallel(index_path, query, backends, limit)
105
-
106
- # Provide helpful message if pure-vector mode returns no results
107
- if pure_vector and enable_vector and len(results_map.get("vector", [])) == 0:
108
- self.logger.warning(
109
- "Pure vector search returned no results. "
110
- "This usually means embeddings haven't been generated. "
111
- "Run: codexlens embeddings-generate %s",
112
- index_path.parent if index_path.name == "_index.db" else index_path
113
- )
114
-
115
- # Apply RRF fusion
116
- # Filter weights to only active backends
117
- active_weights = {
118
- source: weight
119
- for source, weight in self.weights.items()
120
- if source in results_map
121
- }
122
-
123
- fused_results = reciprocal_rank_fusion(results_map, active_weights)
124
-
125
- # Apply final limit
126
- return fused_results[:limit]
127
-
128
- def _search_parallel(
129
- self,
130
- index_path: Path,
131
- query: str,
132
- backends: Dict[str, bool],
133
- limit: int,
134
- ) -> Dict[str, List[SearchResult]]:
135
- """Execute parallel searches across enabled backends.
136
-
137
- Args:
138
- index_path: Path to _index.db file
139
- query: FTS5 query string
140
- backends: Dictionary of backend name to enabled flag
141
- limit: Results limit per backend
142
-
143
- Returns:
144
- Dictionary mapping source name to results list
145
- """
146
- results_map: Dict[str, List[SearchResult]] = {}
147
-
148
- # Use ThreadPoolExecutor for parallel I/O-bound searches
149
- with ThreadPoolExecutor(max_workers=len(backends)) as executor:
150
- # Submit search tasks
151
- future_to_source = {}
152
-
153
- if backends.get("exact"):
154
- future = executor.submit(
155
- self._search_exact, index_path, query, limit
156
- )
157
- future_to_source[future] = "exact"
158
-
159
- if backends.get("fuzzy"):
160
- future = executor.submit(
161
- self._search_fuzzy, index_path, query, limit
162
- )
163
- future_to_source[future] = "fuzzy"
164
-
165
- if backends.get("vector"):
166
- future = executor.submit(
167
- self._search_vector, index_path, query, limit
168
- )
169
- future_to_source[future] = "vector"
170
-
171
- # Collect results as they complete
172
- for future in as_completed(future_to_source):
173
- source = future_to_source[future]
174
- try:
175
- results = future.result()
176
- # Tag results with source for debugging
177
- tagged_results = tag_search_source(results, source)
178
- results_map[source] = tagged_results
179
- self.logger.debug(
180
- "Got %d results from %s search", len(results), source
181
- )
182
- except Exception as exc:
183
- self.logger.error("Search failed for %s: %s", source, exc)
184
- results_map[source] = []
185
-
186
- return results_map
187
-
188
- def _search_exact(
189
- self, index_path: Path, query: str, limit: int
190
- ) -> List[SearchResult]:
191
- """Execute exact FTS search using unicode61 tokenizer.
192
-
193
- Args:
194
- index_path: Path to _index.db file
195
- query: FTS5 query string
196
- limit: Maximum results
197
-
198
- Returns:
199
- List of SearchResult objects
200
- """
201
- try:
202
- with DirIndexStore(index_path) as store:
203
- return store.search_fts_exact(query, limit=limit)
204
- except Exception as exc:
205
- self.logger.debug("Exact search error: %s", exc)
206
- return []
207
-
208
- def _search_fuzzy(
209
- self, index_path: Path, query: str, limit: int
210
- ) -> List[SearchResult]:
211
- """Execute fuzzy FTS search using trigram/extended unicode61 tokenizer.
212
-
213
- Args:
214
- index_path: Path to _index.db file
215
- query: FTS5 query string
216
- limit: Maximum results
217
-
218
- Returns:
219
- List of SearchResult objects
220
- """
221
- try:
222
- with DirIndexStore(index_path) as store:
223
- return store.search_fts_fuzzy(query, limit=limit)
224
- except Exception as exc:
225
- self.logger.debug("Fuzzy search error: %s", exc)
226
- return []
227
-
228
- def _search_vector(
229
- self, index_path: Path, query: str, limit: int
230
- ) -> List[SearchResult]:
231
- """Execute vector similarity search using semantic embeddings.
232
-
233
- Args:
234
- index_path: Path to _index.db file
235
- query: Natural language query string
236
- limit: Maximum results
237
-
238
- Returns:
239
- List of SearchResult objects ordered by semantic similarity
240
- """
241
- try:
242
- # Check if semantic chunks table exists
243
- import sqlite3
244
- try:
245
- with sqlite3.connect(index_path) as conn:
246
- cursor = conn.execute(
247
- "SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_chunks'"
248
- )
249
- has_semantic_table = cursor.fetchone() is not None
250
- except sqlite3.Error as e:
251
- self.logger.error("Database check failed in vector search: %s", e)
252
- return []
253
-
254
- if not has_semantic_table:
255
- self.logger.info(
256
- "No embeddings found in index. "
257
- "Generate embeddings with: codexlens embeddings-generate %s",
258
- index_path.parent if index_path.name == "_index.db" else index_path
259
- )
260
- return []
261
-
262
- # Initialize embedder and vector store
263
- from codexlens.semantic.embedder import get_embedder
264
- from codexlens.semantic.vector_store import VectorStore
265
-
266
- vector_store = VectorStore(index_path)
267
-
268
- # Check if vector store has data
269
- if vector_store.count_chunks() == 0:
270
- self.logger.info(
271
- "Vector store is empty (0 chunks). "
272
- "Generate embeddings with: codexlens embeddings-generate %s",
273
- index_path.parent if index_path.name == "_index.db" else index_path
274
- )
275
- return []
276
-
277
- # Auto-detect embedding dimension and select appropriate profile
278
- detected_dim = vector_store.dimension
279
- if detected_dim is None:
280
- self.logger.info("Vector store dimension unknown, using default profile")
281
- profile = "code" # Default fallback
282
- elif detected_dim == 384:
283
- profile = "fast"
284
- elif detected_dim == 768:
285
- profile = "code"
286
- elif detected_dim == 1024:
287
- profile = "multilingual" # or balanced, both are 1024
288
- else:
289
- profile = "code" # Default fallback
290
-
291
- # Use cached embedder (singleton) for performance
292
- embedder = get_embedder(profile=profile)
293
-
294
- # Generate query embedding
295
- query_embedding = embedder.embed_single(query)
296
-
297
- # Search for similar chunks
298
- results = vector_store.search_similar(
299
- query_embedding=query_embedding,
300
- top_k=limit,
301
- min_score=0.0, # Return all results, let RRF handle filtering
302
- return_full_content=True,
303
- )
304
-
305
- self.logger.debug("Vector search found %d results", len(results))
306
- return results
307
-
308
- except ImportError as exc:
309
- self.logger.debug("Semantic dependencies not available: %s", exc)
310
- return []
311
- except Exception as exc:
312
- self.logger.error("Vector search error: %s", exc)
313
- return []
1
+ """Hybrid search engine orchestrating parallel exact/fuzzy/vector searches with RRF fusion.
2
+
3
+ Coordinates multiple search backends in parallel using ThreadPoolExecutor and combines
4
+ results via Reciprocal Rank Fusion (RRF) algorithm.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from concurrent.futures import ThreadPoolExecutor, as_completed
11
+ from pathlib import Path
12
+ from typing import Dict, List, Optional
13
+
14
+ from codexlens.entities import SearchResult
15
+ from codexlens.search.ranking import reciprocal_rank_fusion, tag_search_source
16
+ from codexlens.storage.dir_index import DirIndexStore
17
+
18
+
19
+ class HybridSearchEngine:
20
+ """Hybrid search engine with parallel execution and RRF fusion.
21
+
22
+ Orchestrates searches across exact FTS, fuzzy FTS, and optional vector backends,
23
+ executing them in parallel and fusing results via Reciprocal Rank Fusion.
24
+
25
+ Attributes:
26
+ logger: Python logger instance
27
+ default_weights: Default RRF weights for each source
28
+ """
29
+
30
+ # Default RRF weights (exact: 40%, fuzzy: 30%, vector: 30%)
31
+ DEFAULT_WEIGHTS = {
32
+ "exact": 0.4,
33
+ "fuzzy": 0.3,
34
+ "vector": 0.3,
35
+ }
36
+
37
+ def __init__(self, weights: Optional[Dict[str, float]] = None):
38
+ """Initialize hybrid search engine.
39
+
40
+ Args:
41
+ weights: Optional custom RRF weights (default: DEFAULT_WEIGHTS)
42
+ """
43
+ self.logger = logging.getLogger(__name__)
44
+ self.weights = weights or self.DEFAULT_WEIGHTS.copy()
45
+
46
+ def search(
47
+ self,
48
+ index_path: Path,
49
+ query: str,
50
+ limit: int = 20,
51
+ enable_fuzzy: bool = True,
52
+ enable_vector: bool = False,
53
+ pure_vector: bool = False,
54
+ ) -> List[SearchResult]:
55
+ """Execute hybrid search with parallel retrieval and RRF fusion.
56
+
57
+ Args:
58
+ index_path: Path to _index.db file
59
+ query: FTS5 query string (for FTS) or natural language query (for vector)
60
+ limit: Maximum results to return after fusion
61
+ enable_fuzzy: Enable fuzzy FTS search (default True)
62
+ enable_vector: Enable vector search (default False)
63
+ pure_vector: If True, only use vector search without FTS fallback (default False)
64
+
65
+ Returns:
66
+ List of SearchResult objects sorted by fusion score
67
+
68
+ Examples:
69
+ >>> engine = HybridSearchEngine()
70
+ >>> # Hybrid search (exact + fuzzy + vector)
71
+ >>> results = engine.search(Path("project/_index.db"), "authentication",
72
+ ... enable_vector=True)
73
+ >>> # Pure vector search (semantic only)
74
+ >>> results = engine.search(Path("project/_index.db"),
75
+ ... "how to authenticate users",
76
+ ... enable_vector=True, pure_vector=True)
77
+ >>> for r in results[:5]:
78
+ ... print(f"{r.path}: {r.score:.3f}")
79
+ """
80
+ # Determine which backends to use
81
+ backends = {}
82
+
83
+ if pure_vector:
84
+ # Pure vector mode: only use vector search, no FTS fallback
85
+ if enable_vector:
86
+ backends["vector"] = True
87
+ else:
88
+ # Invalid configuration: pure_vector=True but enable_vector=False
89
+ self.logger.warning(
90
+ "pure_vector=True requires enable_vector=True. "
91
+ "Falling back to exact search. "
92
+ "To use pure vector search, enable vector search mode."
93
+ )
94
+ backends["exact"] = True
95
+ else:
96
+ # Hybrid mode: always include exact search as baseline
97
+ backends["exact"] = True
98
+ if enable_fuzzy:
99
+ backends["fuzzy"] = True
100
+ if enable_vector:
101
+ backends["vector"] = True
102
+
103
+ # Execute parallel searches
104
+ results_map = self._search_parallel(index_path, query, backends, limit)
105
+
106
+ # Provide helpful message if pure-vector mode returns no results
107
+ if pure_vector and enable_vector and len(results_map.get("vector", [])) == 0:
108
+ self.logger.warning(
109
+ "Pure vector search returned no results. "
110
+ "This usually means embeddings haven't been generated. "
111
+ "Run: codexlens embeddings-generate %s",
112
+ index_path.parent if index_path.name == "_index.db" else index_path
113
+ )
114
+
115
+ # Apply RRF fusion
116
+ # Filter weights to only active backends
117
+ active_weights = {
118
+ source: weight
119
+ for source, weight in self.weights.items()
120
+ if source in results_map
121
+ }
122
+
123
+ fused_results = reciprocal_rank_fusion(results_map, active_weights)
124
+
125
+ # Apply final limit
126
+ return fused_results[:limit]
127
+
128
+ def _search_parallel(
129
+ self,
130
+ index_path: Path,
131
+ query: str,
132
+ backends: Dict[str, bool],
133
+ limit: int,
134
+ ) -> Dict[str, List[SearchResult]]:
135
+ """Execute parallel searches across enabled backends.
136
+
137
+ Args:
138
+ index_path: Path to _index.db file
139
+ query: FTS5 query string
140
+ backends: Dictionary of backend name to enabled flag
141
+ limit: Results limit per backend
142
+
143
+ Returns:
144
+ Dictionary mapping source name to results list
145
+ """
146
+ results_map: Dict[str, List[SearchResult]] = {}
147
+
148
+ # Use ThreadPoolExecutor for parallel I/O-bound searches
149
+ with ThreadPoolExecutor(max_workers=len(backends)) as executor:
150
+ # Submit search tasks
151
+ future_to_source = {}
152
+
153
+ if backends.get("exact"):
154
+ future = executor.submit(
155
+ self._search_exact, index_path, query, limit
156
+ )
157
+ future_to_source[future] = "exact"
158
+
159
+ if backends.get("fuzzy"):
160
+ future = executor.submit(
161
+ self._search_fuzzy, index_path, query, limit
162
+ )
163
+ future_to_source[future] = "fuzzy"
164
+
165
+ if backends.get("vector"):
166
+ future = executor.submit(
167
+ self._search_vector, index_path, query, limit
168
+ )
169
+ future_to_source[future] = "vector"
170
+
171
+ # Collect results as they complete
172
+ for future in as_completed(future_to_source):
173
+ source = future_to_source[future]
174
+ try:
175
+ results = future.result()
176
+ # Tag results with source for debugging
177
+ tagged_results = tag_search_source(results, source)
178
+ results_map[source] = tagged_results
179
+ self.logger.debug(
180
+ "Got %d results from %s search", len(results), source
181
+ )
182
+ except Exception as exc:
183
+ self.logger.error("Search failed for %s: %s", source, exc)
184
+ results_map[source] = []
185
+
186
+ return results_map
187
+
188
+ def _search_exact(
189
+ self, index_path: Path, query: str, limit: int
190
+ ) -> List[SearchResult]:
191
+ """Execute exact FTS search using unicode61 tokenizer.
192
+
193
+ Args:
194
+ index_path: Path to _index.db file
195
+ query: FTS5 query string
196
+ limit: Maximum results
197
+
198
+ Returns:
199
+ List of SearchResult objects
200
+ """
201
+ try:
202
+ with DirIndexStore(index_path) as store:
203
+ return store.search_fts_exact(query, limit=limit)
204
+ except Exception as exc:
205
+ self.logger.debug("Exact search error: %s", exc)
206
+ return []
207
+
208
+ def _search_fuzzy(
209
+ self, index_path: Path, query: str, limit: int
210
+ ) -> List[SearchResult]:
211
+ """Execute fuzzy FTS search using trigram/extended unicode61 tokenizer.
212
+
213
+ Args:
214
+ index_path: Path to _index.db file
215
+ query: FTS5 query string
216
+ limit: Maximum results
217
+
218
+ Returns:
219
+ List of SearchResult objects
220
+ """
221
+ try:
222
+ with DirIndexStore(index_path) as store:
223
+ return store.search_fts_fuzzy(query, limit=limit)
224
+ except Exception as exc:
225
+ self.logger.debug("Fuzzy search error: %s", exc)
226
+ return []
227
+
228
+ def _search_vector(
229
+ self, index_path: Path, query: str, limit: int
230
+ ) -> List[SearchResult]:
231
+ """Execute vector similarity search using semantic embeddings.
232
+
233
+ Args:
234
+ index_path: Path to _index.db file
235
+ query: Natural language query string
236
+ limit: Maximum results
237
+
238
+ Returns:
239
+ List of SearchResult objects ordered by semantic similarity
240
+ """
241
+ try:
242
+ # Check if semantic chunks table exists
243
+ import sqlite3
244
+ try:
245
+ with sqlite3.connect(index_path) as conn:
246
+ cursor = conn.execute(
247
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_chunks'"
248
+ )
249
+ has_semantic_table = cursor.fetchone() is not None
250
+ except sqlite3.Error as e:
251
+ self.logger.error("Database check failed in vector search: %s", e)
252
+ return []
253
+
254
+ if not has_semantic_table:
255
+ self.logger.info(
256
+ "No embeddings found in index. "
257
+ "Generate embeddings with: codexlens embeddings-generate %s",
258
+ index_path.parent if index_path.name == "_index.db" else index_path
259
+ )
260
+ return []
261
+
262
+ # Initialize embedder and vector store
263
+ from codexlens.semantic.embedder import get_embedder
264
+ from codexlens.semantic.vector_store import VectorStore
265
+
266
+ vector_store = VectorStore(index_path)
267
+
268
+ # Check if vector store has data
269
+ if vector_store.count_chunks() == 0:
270
+ self.logger.info(
271
+ "Vector store is empty (0 chunks). "
272
+ "Generate embeddings with: codexlens embeddings-generate %s",
273
+ index_path.parent if index_path.name == "_index.db" else index_path
274
+ )
275
+ return []
276
+
277
+ # Auto-detect embedding dimension and select appropriate profile
278
+ detected_dim = vector_store.dimension
279
+ if detected_dim is None:
280
+ self.logger.info("Vector store dimension unknown, using default profile")
281
+ profile = "code" # Default fallback
282
+ elif detected_dim == 384:
283
+ profile = "fast"
284
+ elif detected_dim == 768:
285
+ profile = "code"
286
+ elif detected_dim == 1024:
287
+ profile = "multilingual" # or balanced, both are 1024
288
+ else:
289
+ profile = "code" # Default fallback
290
+
291
+ # Use cached embedder (singleton) for performance
292
+ embedder = get_embedder(profile=profile)
293
+
294
+ # Generate query embedding
295
+ query_embedding = embedder.embed_single(query)
296
+
297
+ # Search for similar chunks
298
+ results = vector_store.search_similar(
299
+ query_embedding=query_embedding,
300
+ top_k=limit,
301
+ min_score=0.0, # Return all results, let RRF handle filtering
302
+ return_full_content=True,
303
+ )
304
+
305
+ self.logger.debug("Vector search found %d results", len(results))
306
+ return results
307
+
308
+ except ImportError as exc:
309
+ self.logger.debug("Semantic dependencies not available: %s", exc)
310
+ return []
311
+ except Exception as exc:
312
+ self.logger.error("Vector search error: %s", exc)
313
+ return []