claude-code-workflow 6.3.2 → 6.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/CLAUDE.md +9 -1
- package/.claude/commands/{clean.md → workflow/clean.md} +5 -5
- package/.claude/commands/workflow/docs/analyze.md +1467 -0
- package/.claude/commands/workflow/docs/copyright.md +1265 -0
- package/.claude/commands/workflow/lite-plan.md +1 -1
- package/.claude/commands/workflow/tools/conflict-resolution.md +76 -240
- package/.claude/commands/workflow/tools/task-generate-agent.md +81 -8
- package/.claude/skills/_shared/mermaid-utils.md +584 -0
- package/.claude/skills/copyright-docs/SKILL.md +132 -0
- package/.claude/skills/copyright-docs/phases/01-metadata-collection.md +78 -0
- package/.claude/skills/copyright-docs/phases/02-deep-analysis.md +454 -0
- package/.claude/skills/copyright-docs/phases/02.5-consolidation.md +192 -0
- package/.claude/skills/copyright-docs/phases/04-document-assembly.md +261 -0
- package/.claude/skills/copyright-docs/phases/05-compliance-refinement.md +192 -0
- package/.claude/skills/copyright-docs/specs/cpcc-requirements.md +121 -0
- package/.claude/skills/copyright-docs/templates/agent-base.md +200 -0
- package/.claude/skills/project-analyze/SKILL.md +162 -0
- package/.claude/skills/project-analyze/phases/01-requirements-discovery.md +79 -0
- package/.claude/skills/project-analyze/phases/02-project-exploration.md +75 -0
- package/.claude/skills/project-analyze/phases/03-deep-analysis.md +640 -0
- package/.claude/skills/project-analyze/phases/03.5-consolidation.md +208 -0
- package/.claude/skills/project-analyze/phases/04-report-generation.md +217 -0
- package/.claude/skills/project-analyze/phases/05-iterative-refinement.md +124 -0
- package/.claude/skills/project-analyze/specs/quality-standards.md +115 -0
- package/.claude/skills/project-analyze/specs/writing-style.md +152 -0
- package/.claude/workflows/cli-templates/schemas/conflict-resolution-schema.json +79 -65
- package/.claude/workflows/cli-tools-usage.md +515 -516
- package/README.md +11 -1
- package/ccw/dist/cli.d.ts.map +1 -1
- package/ccw/dist/cli.js +7 -1
- package/ccw/dist/cli.js.map +1 -1
- package/ccw/dist/commands/cli.d.ts +1 -1
- package/ccw/dist/commands/cli.d.ts.map +1 -1
- package/ccw/dist/commands/cli.js +116 -14
- package/ccw/dist/commands/cli.js.map +1 -1
- package/ccw/dist/core/routes/cli-routes.js +2 -2
- package/ccw/dist/core/routes/cli-routes.js.map +1 -1
- package/ccw/dist/tools/claude-cli-tools.d.ts +7 -3
- package/ccw/dist/tools/claude-cli-tools.d.ts.map +1 -1
- package/ccw/dist/tools/claude-cli-tools.js +31 -17
- package/ccw/dist/tools/claude-cli-tools.js.map +1 -1
- package/ccw/dist/tools/cli-executor.d.ts.map +1 -1
- package/ccw/dist/tools/cli-executor.js +19 -7
- package/ccw/dist/tools/cli-executor.js.map +1 -1
- package/ccw/dist/tools/cli-history-store.d.ts +33 -0
- package/ccw/dist/tools/cli-history-store.d.ts.map +1 -1
- package/ccw/dist/tools/cli-history-store.js +89 -5
- package/ccw/dist/tools/cli-history-store.js.map +1 -1
- package/ccw/dist/tools/smart-search.d.ts +25 -0
- package/ccw/dist/tools/smart-search.d.ts.map +1 -1
- package/ccw/dist/tools/smart-search.js +121 -17
- package/ccw/dist/tools/smart-search.js.map +1 -1
- package/ccw/src/cli.ts +264 -258
- package/ccw/src/commands/cli.ts +1009 -884
- package/ccw/src/core/routes/cli-routes.ts +3 -3
- package/ccw/src/templates/dashboard-js/components/cli-history.js +40 -13
- package/ccw/src/templates/dashboard-js/components/cli-status.js +26 -2
- package/ccw/src/templates/dashboard-js/views/cli-manager.js +5 -0
- package/ccw/src/templates/dashboard-js/views/history.js +19 -4
- package/ccw/src/tools/claude-cli-tools.ts +37 -20
- package/ccw/src/tools/cli-executor.ts +20 -7
- package/ccw/src/tools/cli-history-store.ts +125 -5
- package/ccw/src/tools/smart-search.ts +157 -16
- package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/config.py +8 -0
- package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/chain_search.py +71 -1
- package/codex-lens/src/codexlens/search/hybrid_search.py +144 -11
- package/codex-lens/src/codexlens/search/ranking.py +540 -274
- package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/chunker.py +55 -10
- package/codex-lens/src/codexlens/storage/__pycache__/dir_index.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/global_index.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/index_tree.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/dir_index.py +1888 -1850
- package/codex-lens/src/codexlens/storage/global_index.py +365 -0
- package/codex-lens/src/codexlens/storage/index_tree.py +83 -10
- package/package.json +2 -2
|
@@ -24,6 +24,39 @@ import {
|
|
|
24
24
|
import type { ProgressInfo } from './codex-lens.js';
|
|
25
25
|
import { getProjectRoot } from '../utils/path-validator.js';
|
|
26
26
|
|
|
27
|
+
// Timing utilities for performance analysis
|
|
28
|
+
const TIMING_ENABLED = process.env.SMART_SEARCH_TIMING === '1' || process.env.DEBUG?.includes('timing');
|
|
29
|
+
|
|
30
|
+
interface TimingData {
|
|
31
|
+
[key: string]: number;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function createTimer(): { mark: (name: string) => void; getTimings: () => TimingData; log: () => void } {
|
|
35
|
+
const startTime = performance.now();
|
|
36
|
+
const marks: { name: string; time: number }[] = [];
|
|
37
|
+
let lastMark = startTime;
|
|
38
|
+
|
|
39
|
+
return {
|
|
40
|
+
mark(name: string) {
|
|
41
|
+
const now = performance.now();
|
|
42
|
+
marks.push({ name, time: now - lastMark });
|
|
43
|
+
lastMark = now;
|
|
44
|
+
},
|
|
45
|
+
getTimings(): TimingData {
|
|
46
|
+
const timings: TimingData = {};
|
|
47
|
+
marks.forEach(m => { timings[m.name] = Math.round(m.time * 100) / 100; });
|
|
48
|
+
timings['_total'] = Math.round((performance.now() - startTime) * 100) / 100;
|
|
49
|
+
return timings;
|
|
50
|
+
},
|
|
51
|
+
log() {
|
|
52
|
+
if (TIMING_ENABLED) {
|
|
53
|
+
const timings = this.getTimings();
|
|
54
|
+
console.error(`[TIMING] smart-search: ${JSON.stringify(timings)}`);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
27
60
|
// Define Zod schema for validation
|
|
28
61
|
const ParamsSchema = z.object({
|
|
29
62
|
// Action: search (content), find_files (path/name pattern), init, status
|
|
@@ -48,6 +81,9 @@ const ParamsSchema = z.object({
|
|
|
48
81
|
regex: z.boolean().default(true), // Use regex pattern matching (default: enabled)
|
|
49
82
|
caseSensitive: z.boolean().default(true), // Case sensitivity (default: case-sensitive)
|
|
50
83
|
tokenize: z.boolean().default(true), // Tokenize multi-word queries for OR matching (default: enabled)
|
|
84
|
+
// File type filtering
|
|
85
|
+
excludeExtensions: z.array(z.string()).optional().describe('File extensions to exclude from results (e.g., ["md", "txt"])'),
|
|
86
|
+
codeOnly: z.boolean().default(false).describe('Only return code files (excludes md, txt, json, yaml, xml, etc.)'),
|
|
51
87
|
// Fuzzy matching is implicit in hybrid mode (RRF fusion)
|
|
52
88
|
});
|
|
53
89
|
|
|
@@ -254,6 +290,8 @@ interface SearchMetadata {
|
|
|
254
290
|
tokenized?: boolean; // Whether tokenization was applied
|
|
255
291
|
// Pagination metadata
|
|
256
292
|
pagination?: PaginationInfo;
|
|
293
|
+
// Performance timing data (when SMART_SEARCH_TIMING=1 or DEBUG includes 'timing')
|
|
294
|
+
timing?: TimingData;
|
|
257
295
|
// Init action specific
|
|
258
296
|
action?: string;
|
|
259
297
|
path?: string;
|
|
@@ -1086,7 +1124,8 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
|
|
|
1086
1124
|
* Requires index with embeddings
|
|
1087
1125
|
*/
|
|
1088
1126
|
async function executeHybridMode(params: Params): Promise<SearchResult> {
|
|
1089
|
-
const
|
|
1127
|
+
const timer = createTimer();
|
|
1128
|
+
const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false, excludeExtensions, codeOnly = false } = params;
|
|
1090
1129
|
|
|
1091
1130
|
if (!query) {
|
|
1092
1131
|
return {
|
|
@@ -1097,6 +1136,7 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
|
|
|
1097
1136
|
|
|
1098
1137
|
// Check CodexLens availability
|
|
1099
1138
|
const readyStatus = await ensureCodexLensReady();
|
|
1139
|
+
timer.mark('codexlens_ready_check');
|
|
1100
1140
|
if (!readyStatus.ready) {
|
|
1101
1141
|
return {
|
|
1102
1142
|
success: false,
|
|
@@ -1106,6 +1146,7 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
|
|
|
1106
1146
|
|
|
1107
1147
|
// Check index status
|
|
1108
1148
|
const indexStatus = await checkIndexStatus(path);
|
|
1149
|
+
timer.mark('index_status_check');
|
|
1109
1150
|
|
|
1110
1151
|
// Request more results to support split (full content + extra files)
|
|
1111
1152
|
const totalToFetch = maxResults + extraFilesCount;
|
|
@@ -1114,8 +1155,10 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
|
|
|
1114
1155
|
args.push('--enrich');
|
|
1115
1156
|
}
|
|
1116
1157
|
const result = await executeCodexLens(args, { cwd: path });
|
|
1158
|
+
timer.mark('codexlens_search');
|
|
1117
1159
|
|
|
1118
1160
|
if (!result.success) {
|
|
1161
|
+
timer.log();
|
|
1119
1162
|
return {
|
|
1120
1163
|
success: false,
|
|
1121
1164
|
error: result.error,
|
|
@@ -1150,6 +1193,7 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
|
|
|
1150
1193
|
symbol: item.symbol || null,
|
|
1151
1194
|
};
|
|
1152
1195
|
});
|
|
1196
|
+
timer.mark('parse_results');
|
|
1153
1197
|
|
|
1154
1198
|
initialCount = allResults.length;
|
|
1155
1199
|
|
|
@@ -1159,14 +1203,15 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
|
|
|
1159
1203
|
allResults = baselineResult.filteredResults;
|
|
1160
1204
|
baselineInfo = baselineResult.baselineInfo;
|
|
1161
1205
|
|
|
1162
|
-
// 1. Filter noisy files (coverage, node_modules, etc.)
|
|
1163
|
-
allResults = filterNoisyFiles(allResults);
|
|
1206
|
+
// 1. Filter noisy files (coverage, node_modules, etc.) and excluded extensions
|
|
1207
|
+
allResults = filterNoisyFiles(allResults, { excludeExtensions, codeOnly });
|
|
1164
1208
|
// 2. Boost results containing query keywords
|
|
1165
1209
|
allResults = applyKeywordBoosting(allResults, query);
|
|
1166
1210
|
// 3. Enforce score diversity (penalize identical scores)
|
|
1167
1211
|
allResults = enforceScoreDiversity(allResults);
|
|
1168
1212
|
// 4. Re-sort by adjusted scores
|
|
1169
1213
|
allResults.sort((a, b) => b.score - a.score);
|
|
1214
|
+
timer.mark('post_processing');
|
|
1170
1215
|
} catch {
|
|
1171
1216
|
return {
|
|
1172
1217
|
success: true,
|
|
@@ -1184,6 +1229,7 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
|
|
|
1184
1229
|
|
|
1185
1230
|
// Split results: first N with full content, rest as file paths only
|
|
1186
1231
|
const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount);
|
|
1232
|
+
timer.mark('split_results');
|
|
1187
1233
|
|
|
1188
1234
|
// Build metadata with baseline info if detected
|
|
1189
1235
|
let note = 'Hybrid mode uses RRF fusion (exact + fuzzy + vector) for best results';
|
|
@@ -1191,6 +1237,10 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
|
|
|
1191
1237
|
note += ` | Filtered ${initialCount - allResults.length} hot-spot results with baseline score ~${baselineInfo.score.toFixed(4)}`;
|
|
1192
1238
|
}
|
|
1193
1239
|
|
|
1240
|
+
// Log timing data
|
|
1241
|
+
timer.log();
|
|
1242
|
+
const timings = timer.getTimings();
|
|
1243
|
+
|
|
1194
1244
|
return {
|
|
1195
1245
|
success: true,
|
|
1196
1246
|
results,
|
|
@@ -1203,22 +1253,82 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
|
|
|
1203
1253
|
note,
|
|
1204
1254
|
warning: indexStatus.warning,
|
|
1205
1255
|
suggested_weights: getRRFWeights(query),
|
|
1256
|
+
timing: TIMING_ENABLED ? timings : undefined,
|
|
1206
1257
|
},
|
|
1207
1258
|
};
|
|
1208
1259
|
}
|
|
1209
1260
|
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1261
|
+
/**
|
|
1262
|
+
* Query intent used to adapt RRF weights (Python parity).
|
|
1263
|
+
*
|
|
1264
|
+
* Keep this logic aligned with CodexLens Python hybrid search:
|
|
1265
|
+
* `codex-lens/src/codexlens/search/hybrid_search.py`
|
|
1266
|
+
*/
|
|
1267
|
+
export type QueryIntent = 'keyword' | 'semantic' | 'mixed';
|
|
1268
|
+
|
|
1269
|
+
// Python default: vector 60%, exact 30%, fuzzy 10%
|
|
1270
|
+
const DEFAULT_RRF_WEIGHTS = {
|
|
1271
|
+
exact: 0.3,
|
|
1272
|
+
fuzzy: 0.1,
|
|
1273
|
+
vector: 0.6,
|
|
1274
|
+
} as const;
|
|
1275
|
+
|
|
1276
|
+
function normalizeWeights(weights: Record<string, number>): Record<string, number> {
|
|
1277
|
+
const sum = Object.values(weights).reduce((acc, v) => acc + v, 0);
|
|
1278
|
+
if (!Number.isFinite(sum) || sum <= 0) return { ...weights };
|
|
1279
|
+
return Object.fromEntries(Object.entries(weights).map(([k, v]) => [k, v / sum]));
|
|
1280
|
+
}
|
|
1281
|
+
|
|
1282
|
+
/**
|
|
1283
|
+
* Detect query intent using the same heuristic signals as Python:
|
|
1284
|
+
* - Code patterns: `.`, `::`, `->`, CamelCase, snake_case, common code keywords
|
|
1285
|
+
* - Natural language patterns: >5 words, question marks, interrogatives, common verbs
|
|
1286
|
+
*/
|
|
1287
|
+
export function detectQueryIntent(query: string): QueryIntent {
|
|
1288
|
+
const trimmed = query.trim();
|
|
1289
|
+
if (!trimmed) return 'mixed';
|
|
1290
|
+
|
|
1291
|
+
const lower = trimmed.toLowerCase();
|
|
1292
|
+
const wordCount = trimmed.split(/\s+/).filter(Boolean).length;
|
|
1293
|
+
|
|
1294
|
+
const hasCodeSignals =
|
|
1295
|
+
/(::|->|\.)/.test(trimmed) ||
|
|
1296
|
+
/[A-Z][a-z]+[A-Z]/.test(trimmed) ||
|
|
1297
|
+
/\b\w+_\w+\b/.test(trimmed) ||
|
|
1298
|
+
/\b(def|class|function|const|let|var|import|from|return|async|await|interface|type)\b/i.test(lower);
|
|
1299
|
+
|
|
1300
|
+
const hasNaturalSignals =
|
|
1301
|
+
wordCount > 5 ||
|
|
1302
|
+
/\?/.test(trimmed) ||
|
|
1303
|
+
/\b(how|what|why|when|where)\b/i.test(trimmed) ||
|
|
1304
|
+
/\b(handle|explain|fix|implement|create|build|use|find|search|convert|parse|generate|support)\b/i.test(trimmed);
|
|
1305
|
+
|
|
1306
|
+
if (hasCodeSignals && hasNaturalSignals) return 'mixed';
|
|
1307
|
+
if (hasCodeSignals) return 'keyword';
|
|
1308
|
+
if (hasNaturalSignals) return 'semantic';
|
|
1309
|
+
return 'mixed';
|
|
1310
|
+
}
|
|
1311
|
+
|
|
1312
|
+
/**
|
|
1313
|
+
* Intent → weights mapping (Python parity).
|
|
1314
|
+
* - keyword: exact-heavy
|
|
1315
|
+
* - semantic: vector-heavy
|
|
1316
|
+
* - mixed: keep defaults
|
|
1317
|
+
*/
|
|
1318
|
+
export function adjustWeightsByIntent(
|
|
1319
|
+
intent: QueryIntent,
|
|
1320
|
+
baseWeights: Record<string, number>,
|
|
1321
|
+
): Record<string, number> {
|
|
1322
|
+
if (intent === 'keyword') return normalizeWeights({ exact: 0.5, fuzzy: 0.1, vector: 0.4 });
|
|
1323
|
+
if (intent === 'semantic') return normalizeWeights({ exact: 0.2, fuzzy: 0.1, vector: 0.7 });
|
|
1324
|
+
return normalizeWeights({ ...baseWeights });
|
|
1325
|
+
}
|
|
1215
1326
|
|
|
1216
|
-
function getRRFWeights(
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
return RRF_WEIGHTS.default;
|
|
1327
|
+
export function getRRFWeights(
|
|
1328
|
+
query: string,
|
|
1329
|
+
baseWeights: Record<string, number> = DEFAULT_RRF_WEIGHTS,
|
|
1330
|
+
): Record<string, number> {
|
|
1331
|
+
return adjustWeightsByIntent(detectQueryIntent(query), baseWeights);
|
|
1222
1332
|
}
|
|
1223
1333
|
|
|
1224
1334
|
/**
|
|
@@ -1231,7 +1341,29 @@ const FILE_EXCLUDE_REGEXES = [...FILTER_CONFIG.exclude_files].map(pattern =>
|
|
|
1231
1341
|
new RegExp('^' + pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&').replace(/\\\*/g, '.*') + '$')
|
|
1232
1342
|
);
|
|
1233
1343
|
|
|
1234
|
-
|
|
1344
|
+
// Non-code file extensions (for codeOnly filter)
|
|
1345
|
+
const NON_CODE_EXTENSIONS = new Set([
|
|
1346
|
+
'md', 'txt', 'json', 'yaml', 'yml', 'xml', 'csv', 'log',
|
|
1347
|
+
'ini', 'cfg', 'conf', 'toml', 'env', 'properties',
|
|
1348
|
+
'html', 'htm', 'svg', 'png', 'jpg', 'jpeg', 'gif', 'ico', 'webp',
|
|
1349
|
+
'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx',
|
|
1350
|
+
'lock', 'sum', 'mod',
|
|
1351
|
+
]);
|
|
1352
|
+
|
|
1353
|
+
interface FilterOptions {
|
|
1354
|
+
excludeExtensions?: string[];
|
|
1355
|
+
codeOnly?: boolean;
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1358
|
+
function filterNoisyFiles(results: SemanticMatch[], options: FilterOptions = {}): SemanticMatch[] {
|
|
1359
|
+
const { excludeExtensions = [], codeOnly = false } = options;
|
|
1360
|
+
|
|
1361
|
+
// Build extension filter set
|
|
1362
|
+
const excludedExtSet = new Set(excludeExtensions.map(ext => ext.toLowerCase().replace(/^\./, '')));
|
|
1363
|
+
if (codeOnly) {
|
|
1364
|
+
NON_CODE_EXTENSIONS.forEach(ext => excludedExtSet.add(ext));
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1235
1367
|
return results.filter(r => {
|
|
1236
1368
|
const filePath = r.file || '';
|
|
1237
1369
|
if (!filePath) return true;
|
|
@@ -1249,6 +1381,14 @@ function filterNoisyFiles(results: SemanticMatch[]): SemanticMatch[] {
|
|
|
1249
1381
|
return false;
|
|
1250
1382
|
}
|
|
1251
1383
|
|
|
1384
|
+
// Extension filter check
|
|
1385
|
+
if (excludedExtSet.size > 0) {
|
|
1386
|
+
const ext = filename.split('.').pop()?.toLowerCase() || '';
|
|
1387
|
+
if (excludedExtSet.has(ext)) {
|
|
1388
|
+
return false;
|
|
1389
|
+
}
|
|
1390
|
+
}
|
|
1391
|
+
|
|
1252
1392
|
return true;
|
|
1253
1393
|
});
|
|
1254
1394
|
}
|
|
@@ -1396,10 +1536,11 @@ function filterDominantBaselineScores(
|
|
|
1396
1536
|
*/
|
|
1397
1537
|
function applyRRFFusion(
|
|
1398
1538
|
resultsMap: Map<string, any[]>,
|
|
1399
|
-
|
|
1539
|
+
weightsOrQuery: Record<string, number> | string,
|
|
1400
1540
|
limit: number,
|
|
1401
1541
|
k: number = 60,
|
|
1402
1542
|
): any[] {
|
|
1543
|
+
const weights = typeof weightsOrQuery === 'string' ? getRRFWeights(weightsOrQuery) : weightsOrQuery;
|
|
1403
1544
|
const pathScores = new Map<string, { score: number; result: any; sources: string[] }>();
|
|
1404
1545
|
|
|
1405
1546
|
resultsMap.forEach((results, source) => {
|
|
Binary file
|
|
@@ -100,6 +100,14 @@ class Config:
|
|
|
100
100
|
# For litellm: model name from config (e.g., "qwen3-embedding")
|
|
101
101
|
embedding_use_gpu: bool = True # For fastembed: whether to use GPU acceleration
|
|
102
102
|
|
|
103
|
+
# Indexing/search optimizations
|
|
104
|
+
global_symbol_index_enabled: bool = True # Enable project-wide symbol index fast path
|
|
105
|
+
|
|
106
|
+
# Optional search reranking (disabled by default)
|
|
107
|
+
enable_reranking: bool = False
|
|
108
|
+
reranking_top_k: int = 50
|
|
109
|
+
symbol_boost_factor: float = 1.5
|
|
110
|
+
|
|
103
111
|
# Multi-endpoint configuration for litellm backend
|
|
104
112
|
embedding_endpoints: List[Dict[str, Any]] = field(default_factory=list)
|
|
105
113
|
# List of endpoint configs: [{"model": "...", "api_key": "...", "api_base": "...", "weight": 1.0}]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -11,11 +11,14 @@ from dataclasses import dataclass, field
|
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
from typing import List, Optional, Dict, Any
|
|
13
13
|
import logging
|
|
14
|
+
import os
|
|
14
15
|
import time
|
|
15
16
|
|
|
16
17
|
from codexlens.entities import SearchResult, Symbol
|
|
18
|
+
from codexlens.config import Config
|
|
17
19
|
from codexlens.storage.registry import RegistryStore, DirMapping
|
|
18
20
|
from codexlens.storage.dir_index import DirIndexStore, SubdirLink
|
|
21
|
+
from codexlens.storage.global_index import GlobalSymbolIndex
|
|
19
22
|
from codexlens.storage.path_mapper import PathMapper
|
|
20
23
|
from codexlens.storage.sqlite_store import SQLiteStore
|
|
21
24
|
from codexlens.search.hybrid_search import HybridSearchEngine
|
|
@@ -107,7 +110,8 @@ class ChainSearchEngine:
|
|
|
107
110
|
def __init__(self,
|
|
108
111
|
registry: RegistryStore,
|
|
109
112
|
mapper: PathMapper,
|
|
110
|
-
max_workers: int = 8
|
|
113
|
+
max_workers: int = 8,
|
|
114
|
+
config: Config | None = None):
|
|
111
115
|
"""Initialize chain search engine.
|
|
112
116
|
|
|
113
117
|
Args:
|
|
@@ -120,6 +124,7 @@ class ChainSearchEngine:
|
|
|
120
124
|
self.logger = logging.getLogger(__name__)
|
|
121
125
|
self._max_workers = max_workers
|
|
122
126
|
self._executor: Optional[ThreadPoolExecutor] = None
|
|
127
|
+
self._config = config
|
|
123
128
|
|
|
124
129
|
def _get_executor(self, max_workers: Optional[int] = None) -> ThreadPoolExecutor:
|
|
125
130
|
"""Get or create the shared thread pool executor.
|
|
@@ -294,6 +299,71 @@ class ChainSearchEngine:
|
|
|
294
299
|
self.logger.warning(f"No index found for {source_path}")
|
|
295
300
|
return []
|
|
296
301
|
|
|
302
|
+
# Fast path: project-wide global symbol index (avoids chain traversal).
|
|
303
|
+
if self._config is None or getattr(self._config, "global_symbol_index_enabled", True):
|
|
304
|
+
try:
|
|
305
|
+
# Avoid relying on index_to_source() here; use the same logic as _find_start_index
|
|
306
|
+
# to determine the effective search root directory.
|
|
307
|
+
search_root = source_path.resolve()
|
|
308
|
+
exact_index = self.mapper.source_to_index_db(search_root)
|
|
309
|
+
if not exact_index.exists():
|
|
310
|
+
nearest = self.registry.find_nearest_index(search_root)
|
|
311
|
+
if nearest:
|
|
312
|
+
search_root = nearest.source_path
|
|
313
|
+
|
|
314
|
+
project = self.registry.find_by_source_path(str(search_root))
|
|
315
|
+
if project:
|
|
316
|
+
global_db_path = Path(project["index_root"]) / GlobalSymbolIndex.DEFAULT_DB_NAME
|
|
317
|
+
if global_db_path.exists():
|
|
318
|
+
query_limit = max(int(options.total_limit) * 10, int(options.total_limit))
|
|
319
|
+
with GlobalSymbolIndex(global_db_path, project_id=int(project["id"])) as global_index:
|
|
320
|
+
candidates = global_index.search(name=name, kind=kind, limit=query_limit)
|
|
321
|
+
|
|
322
|
+
# Apply depth constraint relative to the start index directory.
|
|
323
|
+
filtered: List[Symbol] = []
|
|
324
|
+
for sym in candidates:
|
|
325
|
+
if not sym.file:
|
|
326
|
+
continue
|
|
327
|
+
try:
|
|
328
|
+
root_str = str(search_root)
|
|
329
|
+
file_dir_str = str(Path(sym.file).parent)
|
|
330
|
+
|
|
331
|
+
# Normalize Windows long-path prefix (\\?\) if present.
|
|
332
|
+
if root_str.startswith("\\\\?\\"):
|
|
333
|
+
root_str = root_str[4:]
|
|
334
|
+
if file_dir_str.startswith("\\\\?\\"):
|
|
335
|
+
file_dir_str = file_dir_str[4:]
|
|
336
|
+
|
|
337
|
+
root_cmp = root_str.lower().rstrip("\\/")
|
|
338
|
+
dir_cmp = file_dir_str.lower().rstrip("\\/")
|
|
339
|
+
|
|
340
|
+
if os.path.commonpath([root_cmp, dir_cmp]) != root_cmp:
|
|
341
|
+
continue
|
|
342
|
+
|
|
343
|
+
rel = os.path.relpath(dir_cmp, root_cmp)
|
|
344
|
+
rel_depth = 0 if rel == "." else len(rel.split(os.sep))
|
|
345
|
+
except Exception:
|
|
346
|
+
continue
|
|
347
|
+
|
|
348
|
+
if options.depth >= 0 and rel_depth > options.depth:
|
|
349
|
+
continue
|
|
350
|
+
filtered.append(sym)
|
|
351
|
+
|
|
352
|
+
if filtered:
|
|
353
|
+
# Match existing semantics: dedupe by (name, kind, range), sort by name.
|
|
354
|
+
seen = set()
|
|
355
|
+
unique_symbols: List[Symbol] = []
|
|
356
|
+
for sym in filtered:
|
|
357
|
+
key = (sym.name, sym.kind, sym.range)
|
|
358
|
+
if key in seen:
|
|
359
|
+
continue
|
|
360
|
+
seen.add(key)
|
|
361
|
+
unique_symbols.append(sym)
|
|
362
|
+
unique_symbols.sort(key=lambda s: s.name)
|
|
363
|
+
return unique_symbols[: options.total_limit]
|
|
364
|
+
except Exception as exc:
|
|
365
|
+
self.logger.debug("Global symbol index fast path failed: %s", exc)
|
|
366
|
+
|
|
297
367
|
index_paths = self._collect_index_paths(start_index, options.depth)
|
|
298
368
|
if not index_paths:
|
|
299
369
|
return []
|