claude-code-workflow 6.3.2 → 6.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/.claude/CLAUDE.md +9 -1
  2. package/.claude/commands/workflow/lite-plan.md +1 -1
  3. package/.claude/workflows/cli-tools-usage.md +515 -516
  4. package/ccw/dist/cli.d.ts.map +1 -1
  5. package/ccw/dist/cli.js +6 -1
  6. package/ccw/dist/cli.js.map +1 -1
  7. package/ccw/dist/commands/cli.d.ts +1 -1
  8. package/ccw/dist/commands/cli.d.ts.map +1 -1
  9. package/ccw/dist/commands/cli.js +71 -7
  10. package/ccw/dist/commands/cli.js.map +1 -1
  11. package/ccw/dist/tools/cli-executor.d.ts.map +1 -1
  12. package/ccw/dist/tools/cli-executor.js +19 -7
  13. package/ccw/dist/tools/cli-executor.js.map +1 -1
  14. package/ccw/dist/tools/cli-history-store.d.ts +33 -0
  15. package/ccw/dist/tools/cli-history-store.d.ts.map +1 -1
  16. package/ccw/dist/tools/cli-history-store.js +89 -5
  17. package/ccw/dist/tools/cli-history-store.js.map +1 -1
  18. package/ccw/src/cli.ts +263 -258
  19. package/ccw/src/commands/cli.ts +967 -884
  20. package/ccw/src/tools/cli-executor.ts +20 -7
  21. package/ccw/src/tools/cli-history-store.ts +125 -5
  22. package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
  23. package/codex-lens/src/codexlens/config.py +3 -0
  24. package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-313.pyc +0 -0
  25. package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
  26. package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
  27. package/codex-lens/src/codexlens/search/chain_search.py +71 -1
  28. package/codex-lens/src/codexlens/search/ranking.py +274 -274
  29. package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-313.pyc +0 -0
  30. package/codex-lens/src/codexlens/storage/__pycache__/dir_index.cpython-313.pyc +0 -0
  31. package/codex-lens/src/codexlens/storage/__pycache__/global_index.cpython-313.pyc +0 -0
  32. package/codex-lens/src/codexlens/storage/__pycache__/index_tree.cpython-313.pyc +0 -0
  33. package/codex-lens/src/codexlens/storage/dir_index.py +1888 -1850
  34. package/codex-lens/src/codexlens/storage/global_index.py +365 -0
  35. package/codex-lens/src/codexlens/storage/index_tree.py +83 -10
  36. package/package.json +1 -1
@@ -1,274 +1,274 @@
1
- """Ranking algorithms for hybrid search result fusion.
2
-
3
- Implements Reciprocal Rank Fusion (RRF) and score normalization utilities
4
- for combining results from heterogeneous search backends (exact FTS, fuzzy FTS, vector search).
5
- """
6
-
7
- from __future__ import annotations
8
-
9
- import math
10
- from typing import Dict, List
11
-
12
- from codexlens.entities import SearchResult, AdditionalLocation
13
-
14
-
15
- def reciprocal_rank_fusion(
16
- results_map: Dict[str, List[SearchResult]],
17
- weights: Dict[str, float] = None,
18
- k: int = 60,
19
- ) -> List[SearchResult]:
20
- """Combine search results from multiple sources using Reciprocal Rank Fusion.
21
-
22
- RRF formula: score(d) = Σ weight_source / (k + rank_source(d))
23
-
24
- Args:
25
- results_map: Dictionary mapping source name to list of SearchResult objects
26
- Sources: 'exact', 'fuzzy', 'vector'
27
- weights: Dictionary mapping source name to weight (default: equal weights)
28
- Example: {'exact': 0.3, 'fuzzy': 0.1, 'vector': 0.6}
29
- k: Constant to avoid division by zero and control rank influence (default 60)
30
-
31
- Returns:
32
- List of SearchResult objects sorted by fused score (descending)
33
-
34
- Examples:
35
- >>> exact_results = [SearchResult(path="a.py", score=10.0, excerpt="...")]
36
- >>> fuzzy_results = [SearchResult(path="b.py", score=8.0, excerpt="...")]
37
- >>> results_map = {'exact': exact_results, 'fuzzy': fuzzy_results}
38
- >>> fused = reciprocal_rank_fusion(results_map)
39
- """
40
- if not results_map:
41
- return []
42
-
43
- # Default equal weights if not provided
44
- if weights is None:
45
- num_sources = len(results_map)
46
- weights = {source: 1.0 / num_sources for source in results_map}
47
-
48
- # Validate weights sum to 1.0
49
- weight_sum = sum(weights.values())
50
- if not math.isclose(weight_sum, 1.0, abs_tol=0.01):
51
- # Normalize weights to sum to 1.0
52
- weights = {source: w / weight_sum for source, w in weights.items()}
53
-
54
- # Build unified result set with RRF scores
55
- path_to_result: Dict[str, SearchResult] = {}
56
- path_to_fusion_score: Dict[str, float] = {}
57
-
58
- for source_name, results in results_map.items():
59
- weight = weights.get(source_name, 0.0)
60
- if weight == 0:
61
- continue
62
-
63
- for rank, result in enumerate(results, start=1):
64
- path = result.path
65
- rrf_contribution = weight / (k + rank)
66
-
67
- # Initialize or accumulate fusion score
68
- if path not in path_to_fusion_score:
69
- path_to_fusion_score[path] = 0.0
70
- path_to_result[path] = result
71
-
72
- path_to_fusion_score[path] += rrf_contribution
73
-
74
- # Create final results with fusion scores
75
- fused_results = []
76
- for path, base_result in path_to_result.items():
77
- fusion_score = path_to_fusion_score[path]
78
-
79
- # Create new SearchResult with fusion_score in metadata
80
- fused_result = SearchResult(
81
- path=base_result.path,
82
- score=fusion_score,
83
- excerpt=base_result.excerpt,
84
- content=base_result.content,
85
- symbol=base_result.symbol,
86
- chunk=base_result.chunk,
87
- metadata={
88
- **base_result.metadata,
89
- "fusion_score": fusion_score,
90
- "original_score": base_result.score,
91
- },
92
- start_line=base_result.start_line,
93
- end_line=base_result.end_line,
94
- symbol_name=base_result.symbol_name,
95
- symbol_kind=base_result.symbol_kind,
96
- )
97
- fused_results.append(fused_result)
98
-
99
- # Sort by fusion score descending
100
- fused_results.sort(key=lambda r: r.score, reverse=True)
101
-
102
- return fused_results
103
-
104
-
105
- def normalize_bm25_score(score: float) -> float:
106
- """Normalize BM25 scores from SQLite FTS5 to 0-1 range.
107
-
108
- SQLite FTS5 returns negative BM25 scores (more negative = better match).
109
- Uses sigmoid transformation for normalization.
110
-
111
- Args:
112
- score: Raw BM25 score from SQLite (typically negative)
113
-
114
- Returns:
115
- Normalized score in range [0, 1]
116
-
117
- Examples:
118
- >>> normalize_bm25_score(-10.5) # Good match
119
- 0.85
120
- >>> normalize_bm25_score(-1.2) # Weak match
121
- 0.62
122
- """
123
- # Take absolute value (BM25 is negative in SQLite)
124
- abs_score = abs(score)
125
-
126
- # Sigmoid transformation: 1 / (1 + e^(-x))
127
- # Scale factor of 0.1 maps typical BM25 range (-20 to 0) to (0, 1)
128
- normalized = 1.0 / (1.0 + math.exp(-abs_score * 0.1))
129
-
130
- return normalized
131
-
132
-
133
- def tag_search_source(results: List[SearchResult], source: str) -> List[SearchResult]:
134
- """Tag search results with their source for RRF tracking.
135
-
136
- Args:
137
- results: List of SearchResult objects
138
- source: Source identifier ('exact', 'fuzzy', 'vector')
139
-
140
- Returns:
141
- List of SearchResult objects with 'search_source' in metadata
142
- """
143
- tagged_results = []
144
- for result in results:
145
- tagged_result = SearchResult(
146
- path=result.path,
147
- score=result.score,
148
- excerpt=result.excerpt,
149
- content=result.content,
150
- symbol=result.symbol,
151
- chunk=result.chunk,
152
- metadata={**result.metadata, "search_source": source},
153
- start_line=result.start_line,
154
- end_line=result.end_line,
155
- symbol_name=result.symbol_name,
156
- symbol_kind=result.symbol_kind,
157
- )
158
- tagged_results.append(tagged_result)
159
-
160
- return tagged_results
161
-
162
-
163
- def group_similar_results(
164
- results: List[SearchResult],
165
- score_threshold_abs: float = 0.01,
166
- content_field: str = "excerpt"
167
- ) -> List[SearchResult]:
168
- """Group search results by content and score similarity.
169
-
170
- Groups results that have similar content and similar scores into a single
171
- representative result, with other locations stored in additional_locations.
172
-
173
- Algorithm:
174
- 1. Group results by content (using excerpt or content field)
175
- 2. Within each content group, create subgroups based on score similarity
176
- 3. Select highest-scoring result as representative for each subgroup
177
- 4. Store other results in subgroup as additional_locations
178
-
179
- Args:
180
- results: A list of SearchResult objects (typically sorted by score)
181
- score_threshold_abs: Absolute score difference to consider results similar.
182
- Results with |score_a - score_b| <= threshold are grouped.
183
- Default 0.01 is suitable for RRF fusion scores.
184
- content_field: The field to use for content grouping ('excerpt' or 'content')
185
-
186
- Returns:
187
- A new list of SearchResult objects where similar items are grouped.
188
- The list is sorted by score descending.
189
-
190
- Examples:
191
- >>> results = [SearchResult(path="a.py", score=0.5, excerpt="def foo()"),
192
- ... SearchResult(path="b.py", score=0.5, excerpt="def foo()")]
193
- >>> grouped = group_similar_results(results)
194
- >>> len(grouped) # Two results merged into one
195
- 1
196
- >>> len(grouped[0].additional_locations) # One additional location
197
- 1
198
- """
199
- if not results:
200
- return []
201
-
202
- # Group results by content
203
- content_map: Dict[str, List[SearchResult]] = {}
204
- unidentifiable_results: List[SearchResult] = []
205
-
206
- for r in results:
207
- key = getattr(r, content_field, None)
208
- if key and key.strip():
209
- content_map.setdefault(key, []).append(r)
210
- else:
211
- # Results without content can't be grouped by content
212
- unidentifiable_results.append(r)
213
-
214
- final_results: List[SearchResult] = []
215
-
216
- # Process each content group
217
- for content_group in content_map.values():
218
- # Sort by score descending within group
219
- content_group.sort(key=lambda r: r.score, reverse=True)
220
-
221
- while content_group:
222
- # Take highest scoring as representative
223
- representative = content_group.pop(0)
224
- others_in_group = []
225
- remaining_for_next_pass = []
226
-
227
- # Find results with similar scores
228
- for item in content_group:
229
- if abs(representative.score - item.score) <= score_threshold_abs:
230
- others_in_group.append(item)
231
- else:
232
- remaining_for_next_pass.append(item)
233
-
234
- # Create grouped result with additional locations
235
- if others_in_group:
236
- # Build new result with additional_locations populated
237
- grouped_result = SearchResult(
238
- path=representative.path,
239
- score=representative.score,
240
- excerpt=representative.excerpt,
241
- content=representative.content,
242
- symbol=representative.symbol,
243
- chunk=representative.chunk,
244
- metadata={
245
- **representative.metadata,
246
- "grouped_count": len(others_in_group) + 1,
247
- },
248
- start_line=representative.start_line,
249
- end_line=representative.end_line,
250
- symbol_name=representative.symbol_name,
251
- symbol_kind=representative.symbol_kind,
252
- additional_locations=[
253
- AdditionalLocation(
254
- path=other.path,
255
- score=other.score,
256
- start_line=other.start_line,
257
- end_line=other.end_line,
258
- symbol_name=other.symbol_name,
259
- ) for other in others_in_group
260
- ],
261
- )
262
- final_results.append(grouped_result)
263
- else:
264
- final_results.append(representative)
265
-
266
- content_group = remaining_for_next_pass
267
-
268
- # Add ungroupable results
269
- final_results.extend(unidentifiable_results)
270
-
271
- # Sort final results by score descending
272
- final_results.sort(key=lambda r: r.score, reverse=True)
273
-
274
- return final_results
1
+ """Ranking algorithms for hybrid search result fusion.
2
+
3
+ Implements Reciprocal Rank Fusion (RRF) and score normalization utilities
4
+ for combining results from heterogeneous search backends (exact FTS, fuzzy FTS, vector search).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import math
10
+ from typing import Dict, List
11
+
12
+ from codexlens.entities import SearchResult, AdditionalLocation
13
+
14
+
15
+ def reciprocal_rank_fusion(
16
+ results_map: Dict[str, List[SearchResult]],
17
+ weights: Dict[str, float] = None,
18
+ k: int = 60,
19
+ ) -> List[SearchResult]:
20
+ """Combine search results from multiple sources using Reciprocal Rank Fusion.
21
+
22
+ RRF formula: score(d) = Σ weight_source / (k + rank_source(d))
23
+
24
+ Args:
25
+ results_map: Dictionary mapping source name to list of SearchResult objects
26
+ Sources: 'exact', 'fuzzy', 'vector'
27
+ weights: Dictionary mapping source name to weight (default: equal weights)
28
+ Example: {'exact': 0.3, 'fuzzy': 0.1, 'vector': 0.6}
29
+ k: Constant to avoid division by zero and control rank influence (default 60)
30
+
31
+ Returns:
32
+ List of SearchResult objects sorted by fused score (descending)
33
+
34
+ Examples:
35
+ >>> exact_results = [SearchResult(path="a.py", score=10.0, excerpt="...")]
36
+ >>> fuzzy_results = [SearchResult(path="b.py", score=8.0, excerpt="...")]
37
+ >>> results_map = {'exact': exact_results, 'fuzzy': fuzzy_results}
38
+ >>> fused = reciprocal_rank_fusion(results_map)
39
+ """
40
+ if not results_map:
41
+ return []
42
+
43
+ # Default equal weights if not provided
44
+ if weights is None:
45
+ num_sources = len(results_map)
46
+ weights = {source: 1.0 / num_sources for source in results_map}
47
+
48
+ # Validate weights sum to 1.0
49
+ weight_sum = sum(weights.values())
50
+ if not math.isclose(weight_sum, 1.0, abs_tol=0.01):
51
+ # Normalize weights to sum to 1.0
52
+ weights = {source: w / weight_sum for source, w in weights.items()}
53
+
54
+ # Build unified result set with RRF scores
55
+ path_to_result: Dict[str, SearchResult] = {}
56
+ path_to_fusion_score: Dict[str, float] = {}
57
+
58
+ for source_name, results in results_map.items():
59
+ weight = weights.get(source_name, 0.0)
60
+ if weight == 0:
61
+ continue
62
+
63
+ for rank, result in enumerate(results, start=1):
64
+ path = result.path
65
+ rrf_contribution = weight / (k + rank)
66
+
67
+ # Initialize or accumulate fusion score
68
+ if path not in path_to_fusion_score:
69
+ path_to_fusion_score[path] = 0.0
70
+ path_to_result[path] = result
71
+
72
+ path_to_fusion_score[path] += rrf_contribution
73
+
74
+ # Create final results with fusion scores
75
+ fused_results = []
76
+ for path, base_result in path_to_result.items():
77
+ fusion_score = path_to_fusion_score[path]
78
+
79
+ # Create new SearchResult with fusion_score in metadata
80
+ fused_result = SearchResult(
81
+ path=base_result.path,
82
+ score=fusion_score,
83
+ excerpt=base_result.excerpt,
84
+ content=base_result.content,
85
+ symbol=base_result.symbol,
86
+ chunk=base_result.chunk,
87
+ metadata={
88
+ **base_result.metadata,
89
+ "fusion_score": fusion_score,
90
+ "original_score": base_result.score,
91
+ },
92
+ start_line=base_result.start_line,
93
+ end_line=base_result.end_line,
94
+ symbol_name=base_result.symbol_name,
95
+ symbol_kind=base_result.symbol_kind,
96
+ )
97
+ fused_results.append(fused_result)
98
+
99
+ # Sort by fusion score descending
100
+ fused_results.sort(key=lambda r: r.score, reverse=True)
101
+
102
+ return fused_results
103
+
104
+
105
+ def normalize_bm25_score(score: float) -> float:
106
+ """Normalize BM25 scores from SQLite FTS5 to 0-1 range.
107
+
108
+ SQLite FTS5 returns negative BM25 scores (more negative = better match).
109
+ Uses sigmoid transformation for normalization.
110
+
111
+ Args:
112
+ score: Raw BM25 score from SQLite (typically negative)
113
+
114
+ Returns:
115
+ Normalized score in range [0, 1]
116
+
117
+ Examples:
118
+ >>> normalize_bm25_score(-10.5) # Good match
119
+ 0.85
120
+ >>> normalize_bm25_score(-1.2) # Weak match
121
+ 0.62
122
+ """
123
+ # Take absolute value (BM25 is negative in SQLite)
124
+ abs_score = abs(score)
125
+
126
+ # Sigmoid transformation: 1 / (1 + e^(-x))
127
+ # Scale factor of 0.1 maps typical BM25 range (-20 to 0) to (0, 1)
128
+ normalized = 1.0 / (1.0 + math.exp(-abs_score * 0.1))
129
+
130
+ return normalized
131
+
132
+
133
+ def tag_search_source(results: List[SearchResult], source: str) -> List[SearchResult]:
134
+ """Tag search results with their source for RRF tracking.
135
+
136
+ Args:
137
+ results: List of SearchResult objects
138
+ source: Source identifier ('exact', 'fuzzy', 'vector')
139
+
140
+ Returns:
141
+ List of SearchResult objects with 'search_source' in metadata
142
+ """
143
+ tagged_results = []
144
+ for result in results:
145
+ tagged_result = SearchResult(
146
+ path=result.path,
147
+ score=result.score,
148
+ excerpt=result.excerpt,
149
+ content=result.content,
150
+ symbol=result.symbol,
151
+ chunk=result.chunk,
152
+ metadata={**result.metadata, "search_source": source},
153
+ start_line=result.start_line,
154
+ end_line=result.end_line,
155
+ symbol_name=result.symbol_name,
156
+ symbol_kind=result.symbol_kind,
157
+ )
158
+ tagged_results.append(tagged_result)
159
+
160
+ return tagged_results
161
+
162
+
163
+ def group_similar_results(
164
+ results: List[SearchResult],
165
+ score_threshold_abs: float = 0.01,
166
+ content_field: str = "excerpt"
167
+ ) -> List[SearchResult]:
168
+ """Group search results by content and score similarity.
169
+
170
+ Groups results that have similar content and similar scores into a single
171
+ representative result, with other locations stored in additional_locations.
172
+
173
+ Algorithm:
174
+ 1. Group results by content (using excerpt or content field)
175
+ 2. Within each content group, create subgroups based on score similarity
176
+ 3. Select highest-scoring result as representative for each subgroup
177
+ 4. Store other results in subgroup as additional_locations
178
+
179
+ Args:
180
+ results: A list of SearchResult objects (typically sorted by score)
181
+ score_threshold_abs: Absolute score difference to consider results similar.
182
+ Results with |score_a - score_b| <= threshold are grouped.
183
+ Default 0.01 is suitable for RRF fusion scores.
184
+ content_field: The field to use for content grouping ('excerpt' or 'content')
185
+
186
+ Returns:
187
+ A new list of SearchResult objects where similar items are grouped.
188
+ The list is sorted by score descending.
189
+
190
+ Examples:
191
+ >>> results = [SearchResult(path="a.py", score=0.5, excerpt="def foo()"),
192
+ ... SearchResult(path="b.py", score=0.5, excerpt="def foo()")]
193
+ >>> grouped = group_similar_results(results)
194
+ >>> len(grouped) # Two results merged into one
195
+ 1
196
+ >>> len(grouped[0].additional_locations) # One additional location
197
+ 1
198
+ """
199
+ if not results:
200
+ return []
201
+
202
+ # Group results by content
203
+ content_map: Dict[str, List[SearchResult]] = {}
204
+ unidentifiable_results: List[SearchResult] = []
205
+
206
+ for r in results:
207
+ key = getattr(r, content_field, None)
208
+ if key and key.strip():
209
+ content_map.setdefault(key, []).append(r)
210
+ else:
211
+ # Results without content can't be grouped by content
212
+ unidentifiable_results.append(r)
213
+
214
+ final_results: List[SearchResult] = []
215
+
216
+ # Process each content group
217
+ for content_group in content_map.values():
218
+ # Sort by score descending within group
219
+ content_group.sort(key=lambda r: r.score, reverse=True)
220
+
221
+ while content_group:
222
+ # Take highest scoring as representative
223
+ representative = content_group.pop(0)
224
+ others_in_group = []
225
+ remaining_for_next_pass = []
226
+
227
+ # Find results with similar scores
228
+ for item in content_group:
229
+ if abs(representative.score - item.score) <= score_threshold_abs:
230
+ others_in_group.append(item)
231
+ else:
232
+ remaining_for_next_pass.append(item)
233
+
234
+ # Create grouped result with additional locations
235
+ if others_in_group:
236
+ # Build new result with additional_locations populated
237
+ grouped_result = SearchResult(
238
+ path=representative.path,
239
+ score=representative.score,
240
+ excerpt=representative.excerpt,
241
+ content=representative.content,
242
+ symbol=representative.symbol,
243
+ chunk=representative.chunk,
244
+ metadata={
245
+ **representative.metadata,
246
+ "grouped_count": len(others_in_group) + 1,
247
+ },
248
+ start_line=representative.start_line,
249
+ end_line=representative.end_line,
250
+ symbol_name=representative.symbol_name,
251
+ symbol_kind=representative.symbol_kind,
252
+ additional_locations=[
253
+ AdditionalLocation(
254
+ path=other.path,
255
+ score=other.score,
256
+ start_line=other.start_line,
257
+ end_line=other.end_line,
258
+ symbol_name=other.symbol_name,
259
+ ) for other in others_in_group
260
+ ],
261
+ )
262
+ final_results.append(grouped_result)
263
+ else:
264
+ final_results.append(representative)
265
+
266
+ content_group = remaining_for_next_pass
267
+
268
+ # Add ungroupable results
269
+ final_results.extend(unidentifiable_results)
270
+
271
+ # Sort final results by score descending
272
+ final_results.sort(key=lambda r: r.score, reverse=True)
273
+
274
+ return final_results