claude-code-workflow 6.3.18 → 6.3.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/CLAUDE.md +8 -5
- package/.claude/agents/action-planning-agent.md +26 -2
- package/.claude/agents/code-developer.md +132 -43
- package/.claude/agents/debug-explore-agent.md +434 -0
- package/.claude/agents/test-fix-agent.md +14 -0
- package/.claude/commands/issue/discover.md +41 -0
- package/.claude/commands/issue/execute.md +200 -19
- package/.claude/commands/issue/new.md +1 -1
- package/.claude/commands/issue/plan.md +6 -1
- package/.claude/commands/issue/queue.md +94 -39
- package/.claude/commands/memory/swagger-docs.md +773 -0
- package/.claude/commands/workflow/brainstorm/auto-parallel.md +21 -21
- package/.claude/commands/workflow/execute.md +54 -34
- package/.claude/commands/workflow/lite-execute.md +48 -164
- package/.claude/commands/workflow/lite-fix.md +4 -4
- package/.claude/commands/workflow/lite-plan.md +5 -5
- package/.claude/commands/workflow/plan.md +27 -27
- package/.claude/commands/workflow/review.md +42 -17
- package/.claude/commands/workflow/tdd-plan.md +25 -25
- package/.claude/commands/workflow/test-fix-gen.md +10 -10
- package/.claude/commands/workflow/test-gen.md +14 -14
- package/.claude/commands/workflow/ui-design/explore-auto.md +21 -21
- package/.claude/commands/workflow/ui-design/imitate-auto.md +24 -24
- package/.claude/skills/_shared/SKILL-DESIGN-SPEC.md +693 -0
- package/.claude/skills/ccw/SKILL.md +462 -0
- package/.claude/skills/ccw/index/command-capabilities.json +127 -0
- package/.claude/skills/ccw/index/intent-rules.json +136 -0
- package/.claude/skills/ccw/index/workflow-chains.json +451 -0
- package/.claude/skills/ccw/phases/actions/bugfix.md +218 -0
- package/.claude/skills/ccw/phases/actions/coupled.md +194 -0
- package/.claude/skills/ccw/phases/actions/docs.md +93 -0
- package/.claude/skills/ccw/phases/actions/full.md +154 -0
- package/.claude/skills/ccw/phases/actions/issue.md +201 -0
- package/.claude/skills/ccw/phases/actions/rapid.md +104 -0
- package/.claude/skills/ccw/phases/actions/review-fix.md +84 -0
- package/.claude/skills/ccw/phases/actions/tdd.md +66 -0
- package/.claude/skills/ccw/phases/actions/ui.md +79 -0
- package/.claude/skills/ccw/phases/orchestrator.md +435 -0
- package/.claude/skills/ccw/specs/intent-classification.md +336 -0
- package/.claude/skills/ccw-help/SKILL.md +177 -0
- package/.claude/skills/ccw-help/index/all-agents.json +82 -0
- package/.claude/skills/{command-guide → ccw-help}/index/all-commands.json +183 -73
- package/.claude/skills/{command-guide → ccw-help}/index/by-category.json +187 -73
- package/.claude/skills/{command-guide → ccw-help}/index/by-use-case.json +295 -185
- package/.claude/skills/{command-guide → ccw-help}/index/command-relationships.json +19 -166
- package/.claude/skills/{command-guide → ccw-help}/index/essential-commands.json +10 -10
- package/.claude/skills/ccw-help/scripts/analyze_commands.py +337 -0
- package/.claude/skills/code-reviewer/README.md +340 -0
- package/.claude/skills/code-reviewer/SKILL.md +308 -0
- package/.claude/skills/code-reviewer/phases/01-code-discovery.md +246 -0
- package/.claude/skills/code-reviewer/phases/02-security-analysis.md +442 -0
- package/.claude/skills/code-reviewer/phases/03-best-practices-review.md +36 -0
- package/.claude/skills/code-reviewer/phases/04-report-generation.md +278 -0
- package/.claude/skills/code-reviewer/specs/best-practices-requirements.md +346 -0
- package/.claude/skills/code-reviewer/specs/quality-standards.md +252 -0
- package/.claude/skills/code-reviewer/specs/security-requirements.md +243 -0
- package/.claude/skills/code-reviewer/templates/best-practice-finding.md +234 -0
- package/.claude/skills/code-reviewer/templates/report-template.md +316 -0
- package/.claude/skills/code-reviewer/templates/security-finding.md +161 -0
- package/.claude/skills/skill-generator/SKILL.md +187 -0
- package/.claude/skills/skill-generator/phases/01-requirements-discovery.md +239 -0
- package/.claude/skills/skill-generator/phases/02-structure-generation.md +207 -0
- package/.claude/skills/skill-generator/phases/03-phase-generation.md +802 -0
- package/.claude/skills/skill-generator/phases/04-specs-templates.md +328 -0
- package/.claude/skills/skill-generator/phases/05-validation.md +334 -0
- package/.claude/skills/skill-generator/specs/cli-integration.md +448 -0
- package/.claude/skills/skill-generator/specs/execution-modes.md +396 -0
- package/.claude/skills/skill-generator/specs/scripting-integration.md +265 -0
- package/.claude/skills/skill-generator/specs/skill-requirements.md +466 -0
- package/.claude/skills/skill-generator/templates/autonomous-action.md +517 -0
- package/.claude/skills/skill-generator/templates/autonomous-orchestrator.md +276 -0
- package/.claude/skills/skill-generator/templates/code-analysis-action.md +503 -0
- package/.claude/skills/skill-generator/templates/llm-action.md +355 -0
- package/.claude/skills/skill-generator/templates/script-bash.md +277 -0
- package/.claude/skills/skill-generator/templates/script-python.md +198 -0
- package/.claude/skills/skill-generator/templates/sequential-phase.md +441 -0
- package/.claude/skills/skill-generator/templates/skill-md.md +156 -0
- package/.claude/workflows/chinese-response.md +15 -28
- package/.claude/workflows/cli-templates/prompts/documentation/swagger-api.txt +266 -0
- package/.claude/workflows/cli-tools-usage.md +221 -177
- package/.claude/workflows/windows-platform.md +13 -10
- package/.codex/prompts/issue-execute.md +305 -82
- package/.codex/prompts/issue-queue.md +22 -0
- package/.codex/prompts/lite-execute.md +36 -11
- package/README.md +309 -305
- package/ccw/README.md +10 -4
- package/ccw/dist/cli.d.ts.map +1 -1
- package/ccw/dist/cli.js +4 -1
- package/ccw/dist/cli.js.map +1 -1
- package/ccw/dist/commands/cli.d.ts.map +1 -1
- package/ccw/dist/commands/cli.js +131 -34
- package/ccw/dist/commands/cli.js.map +1 -1
- package/ccw/dist/commands/issue.d.ts +152 -0
- package/ccw/dist/commands/issue.d.ts.map +1 -1
- package/ccw/dist/commands/issue.js +550 -85
- package/ccw/dist/commands/issue.js.map +1 -1
- package/ccw/dist/commands/serve.d.ts +1 -0
- package/ccw/dist/commands/serve.d.ts.map +1 -1
- package/ccw/dist/commands/serve.js +12 -5
- package/ccw/dist/commands/serve.js.map +1 -1
- package/ccw/dist/commands/stop.d.ts.map +1 -1
- package/ccw/dist/commands/stop.js +29 -5
- package/ccw/dist/commands/stop.js.map +1 -1
- package/ccw/dist/commands/tool.d.ts.map +1 -1
- package/ccw/dist/commands/tool.js +19 -2
- package/ccw/dist/commands/tool.js.map +1 -1
- package/ccw/dist/commands/view.d.ts +1 -0
- package/ccw/dist/commands/view.d.ts.map +1 -1
- package/ccw/dist/commands/view.js +10 -3
- package/ccw/dist/commands/view.js.map +1 -1
- package/ccw/dist/config/cli-settings-manager.d.ts +86 -0
- package/ccw/dist/config/cli-settings-manager.d.ts.map +1 -0
- package/ccw/dist/config/cli-settings-manager.js +392 -0
- package/ccw/dist/config/cli-settings-manager.js.map +1 -0
- package/ccw/dist/config/litellm-api-config-manager.d.ts +71 -5
- package/ccw/dist/config/litellm-api-config-manager.d.ts.map +1 -1
- package/ccw/dist/config/litellm-api-config-manager.js +290 -20
- package/ccw/dist/config/litellm-api-config-manager.js.map +1 -1
- package/ccw/dist/core/auth/csrf-manager.d.ts +18 -0
- package/ccw/dist/core/auth/csrf-manager.d.ts.map +1 -0
- package/ccw/dist/core/auth/csrf-manager.js +80 -0
- package/ccw/dist/core/auth/csrf-manager.js.map +1 -0
- package/ccw/dist/core/auth/csrf-middleware.d.ts +8 -0
- package/ccw/dist/core/auth/csrf-middleware.d.ts.map +1 -0
- package/ccw/dist/core/auth/csrf-middleware.js +141 -0
- package/ccw/dist/core/auth/csrf-middleware.js.map +1 -0
- package/ccw/dist/core/auth/middleware.d.ts +15 -0
- package/ccw/dist/core/auth/middleware.d.ts.map +1 -0
- package/ccw/dist/core/auth/middleware.js +76 -0
- package/ccw/dist/core/auth/middleware.js.map +1 -0
- package/ccw/dist/core/auth/token-manager.d.ts +41 -0
- package/ccw/dist/core/auth/token-manager.d.ts.map +1 -0
- package/ccw/dist/core/auth/token-manager.js +171 -0
- package/ccw/dist/core/auth/token-manager.js.map +1 -0
- package/ccw/dist/core/cache-manager.d.ts +6 -6
- package/ccw/dist/core/cache-manager.d.ts.map +1 -1
- package/ccw/dist/core/cache-manager.js +70 -48
- package/ccw/dist/core/cache-manager.js.map +1 -1
- package/ccw/dist/core/claude-freshness.d.ts.map +1 -1
- package/ccw/dist/core/claude-freshness.js +23 -3
- package/ccw/dist/core/claude-freshness.js.map +1 -1
- package/ccw/dist/core/core-memory-store.d.ts.map +1 -1
- package/ccw/dist/core/core-memory-store.js +2 -1
- package/ccw/dist/core/core-memory-store.js.map +1 -1
- package/ccw/dist/core/cors.d.ts +3 -0
- package/ccw/dist/core/cors.d.ts.map +1 -0
- package/ccw/dist/core/cors.js +10 -0
- package/ccw/dist/core/cors.js.map +1 -0
- package/ccw/dist/core/dashboard-generator-patch.js +0 -1
- package/ccw/dist/core/dashboard-generator-patch.js.map +1 -1
- package/ccw/dist/core/dashboard-generator.d.ts.map +1 -1
- package/ccw/dist/core/dashboard-generator.js +417 -416
- package/ccw/dist/core/dashboard-generator.js.map +1 -1
- package/ccw/dist/core/data-aggregator.js +2 -2
- package/ccw/dist/core/data-aggregator.js.map +1 -1
- package/ccw/dist/core/lite-scanner.d.ts +1 -1
- package/ccw/dist/core/lite-scanner.d.ts.map +1 -1
- package/ccw/dist/core/lite-scanner.js +130 -127
- package/ccw/dist/core/lite-scanner.js.map +1 -1
- package/ccw/dist/core/routes/auth-routes.d.ts +12 -0
- package/ccw/dist/core/routes/auth-routes.d.ts.map +1 -0
- package/ccw/dist/core/routes/auth-routes.js +80 -0
- package/ccw/dist/core/routes/auth-routes.js.map +1 -0
- package/ccw/dist/core/routes/ccw-routes.d.ts +1 -14
- package/ccw/dist/core/routes/ccw-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/ccw-routes.js +9 -4
- package/ccw/dist/core/routes/ccw-routes.js.map +1 -1
- package/ccw/dist/core/routes/claude-routes.d.ts +1 -14
- package/ccw/dist/core/routes/claude-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/claude-routes.js +98 -39
- package/ccw/dist/core/routes/claude-routes.js.map +1 -1
- package/ccw/dist/core/routes/cli-routes.d.ts +14 -12
- package/ccw/dist/core/routes/cli-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/cli-routes.js +122 -43
- package/ccw/dist/core/routes/cli-routes.js.map +1 -1
- package/ccw/dist/core/routes/cli-settings-routes.d.ts +11 -0
- package/ccw/dist/core/routes/cli-settings-routes.d.ts.map +1 -0
- package/ccw/dist/core/routes/cli-settings-routes.js +204 -0
- package/ccw/dist/core/routes/cli-settings-routes.js.map +1 -0
- package/ccw/dist/core/routes/codexlens/config-handlers.d.ts +6 -0
- package/ccw/dist/core/routes/codexlens/config-handlers.d.ts.map +1 -0
- package/ccw/dist/core/routes/codexlens/config-handlers.js +1195 -0
- package/ccw/dist/core/routes/codexlens/config-handlers.js.map +1 -0
- package/ccw/dist/core/routes/codexlens/index-handlers.d.ts +10 -0
- package/ccw/dist/core/routes/codexlens/index-handlers.d.ts.map +1 -0
- package/ccw/dist/core/routes/codexlens/index-handlers.js +322 -0
- package/ccw/dist/core/routes/codexlens/index-handlers.js.map +1 -0
- package/ccw/dist/core/routes/codexlens/semantic-handlers.d.ts +6 -0
- package/ccw/dist/core/routes/codexlens/semantic-handlers.d.ts.map +1 -0
- package/ccw/dist/core/routes/codexlens/semantic-handlers.js +865 -0
- package/ccw/dist/core/routes/codexlens/semantic-handlers.js.map +1 -0
- package/ccw/dist/core/routes/codexlens/utils.d.ts +23 -0
- package/ccw/dist/core/routes/codexlens/utils.d.ts.map +1 -0
- package/ccw/dist/core/routes/codexlens/utils.js +85 -0
- package/ccw/dist/core/routes/codexlens/utils.js.map +1 -0
- package/ccw/dist/core/routes/codexlens/watcher-handlers.d.ts +13 -0
- package/ccw/dist/core/routes/codexlens/watcher-handlers.d.ts.map +1 -0
- package/ccw/dist/core/routes/codexlens/watcher-handlers.js +235 -0
- package/ccw/dist/core/routes/codexlens/watcher-handlers.js.map +1 -0
- package/ccw/dist/core/routes/codexlens-routes.d.ts +2 -11
- package/ccw/dist/core/routes/codexlens-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/codexlens-routes.js +10 -981
- package/ccw/dist/core/routes/codexlens-routes.js.map +1 -1
- package/ccw/dist/core/routes/discovery-routes.d.ts +1 -35
- package/ccw/dist/core/routes/discovery-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/discovery-routes.js +25 -0
- package/ccw/dist/core/routes/discovery-routes.js.map +1 -1
- package/ccw/dist/core/routes/files-routes.d.ts +1 -14
- package/ccw/dist/core/routes/files-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/files-routes.js +57 -14
- package/ccw/dist/core/routes/files-routes.js.map +1 -1
- package/ccw/dist/core/routes/graph-routes.d.ts +1 -14
- package/ccw/dist/core/routes/graph-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/graph-routes.js +36 -37
- package/ccw/dist/core/routes/graph-routes.js.map +1 -1
- package/ccw/dist/core/routes/help-routes.d.ts +1 -14
- package/ccw/dist/core/routes/help-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/help-routes.js +5 -0
- package/ccw/dist/core/routes/help-routes.js.map +1 -1
- package/ccw/dist/core/routes/hooks-routes.d.ts +4 -14
- package/ccw/dist/core/routes/hooks-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/hooks-routes.js +43 -21
- package/ccw/dist/core/routes/hooks-routes.js.map +1 -1
- package/ccw/dist/core/routes/issue-routes.d.ts +1 -34
- package/ccw/dist/core/routes/issue-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/issue-routes.js +24 -0
- package/ccw/dist/core/routes/issue-routes.js.map +1 -1
- package/ccw/dist/core/routes/litellm-api-routes.d.ts +1 -14
- package/ccw/dist/core/routes/litellm-api-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/litellm-api-routes.js +505 -48
- package/ccw/dist/core/routes/litellm-api-routes.js.map +1 -1
- package/ccw/dist/core/routes/litellm-routes.d.ts +1 -14
- package/ccw/dist/core/routes/litellm-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/litellm-routes.js +28 -11
- package/ccw/dist/core/routes/litellm-routes.js.map +1 -1
- package/ccw/dist/core/routes/mcp-routes.d.ts +1 -14
- package/ccw/dist/core/routes/mcp-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/mcp-routes.js +99 -30
- package/ccw/dist/core/routes/mcp-routes.js.map +1 -1
- package/ccw/dist/core/routes/mcp-templates-db.d.ts.map +1 -1
- package/ccw/dist/core/routes/mcp-templates-db.js +30 -31
- package/ccw/dist/core/routes/mcp-templates-db.js.map +1 -1
- package/ccw/dist/core/routes/memory-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/memory-routes.js +74 -24
- package/ccw/dist/core/routes/memory-routes.js.map +1 -1
- package/ccw/dist/core/routes/nav-status-routes.d.ts +3 -0
- package/ccw/dist/core/routes/nav-status-routes.d.ts.map +1 -0
- package/ccw/dist/core/routes/nav-status-routes.js +217 -0
- package/ccw/dist/core/routes/nav-status-routes.js.map +1 -0
- package/ccw/dist/core/routes/rules-routes.d.ts +1 -14
- package/ccw/dist/core/routes/rules-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/rules-routes.js +481 -58
- package/ccw/dist/core/routes/rules-routes.js.map +1 -1
- package/ccw/dist/core/routes/session-routes.d.ts +1 -14
- package/ccw/dist/core/routes/session-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/session-routes.js +15 -3
- package/ccw/dist/core/routes/session-routes.js.map +1 -1
- package/ccw/dist/core/routes/skills-routes.d.ts +1 -14
- package/ccw/dist/core/routes/skills-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/skills-routes.js +394 -112
- package/ccw/dist/core/routes/skills-routes.js.map +1 -1
- package/ccw/dist/core/routes/status-routes.d.ts +1 -14
- package/ccw/dist/core/routes/status-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/status-routes.js +4 -0
- package/ccw/dist/core/routes/status-routes.js.map +1 -1
- package/ccw/dist/core/routes/system-routes.d.ts +4 -10
- package/ccw/dist/core/routes/system-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/system-routes.js +6 -4
- package/ccw/dist/core/routes/system-routes.js.map +1 -1
- package/ccw/dist/core/routes/types.d.ts +19 -0
- package/ccw/dist/core/routes/types.d.ts.map +1 -0
- package/ccw/dist/core/routes/types.js +2 -0
- package/ccw/dist/core/routes/types.js.map +1 -0
- package/ccw/dist/core/server.d.ts.map +1 -1
- package/ccw/dist/core/server.js +201 -29
- package/ccw/dist/core/server.js.map +1 -1
- package/ccw/dist/core/services/api-key-tester.d.ts +31 -0
- package/ccw/dist/core/services/api-key-tester.d.ts.map +1 -0
- package/ccw/dist/core/services/api-key-tester.js +106 -0
- package/ccw/dist/core/services/api-key-tester.js.map +1 -0
- package/ccw/dist/core/services/health-check-service.d.ts +82 -0
- package/ccw/dist/core/services/health-check-service.d.ts.map +1 -0
- package/ccw/dist/core/services/health-check-service.js +271 -0
- package/ccw/dist/core/services/health-check-service.js.map +1 -0
- package/ccw/dist/core/websocket.d.ts +9 -7
- package/ccw/dist/core/websocket.d.ts.map +1 -1
- package/ccw/dist/core/websocket.js +9 -4
- package/ccw/dist/core/websocket.js.map +1 -1
- package/ccw/dist/tools/claude-cli-tools.d.ts +152 -28
- package/ccw/dist/tools/claude-cli-tools.d.ts.map +1 -1
- package/ccw/dist/tools/claude-cli-tools.js +490 -100
- package/ccw/dist/tools/claude-cli-tools.js.map +1 -1
- package/ccw/dist/tools/cli-config-manager.d.ts +24 -8
- package/ccw/dist/tools/cli-config-manager.d.ts.map +1 -1
- package/ccw/dist/tools/cli-config-manager.js +76 -156
- package/ccw/dist/tools/cli-config-manager.js.map +1 -1
- package/ccw/dist/tools/cli-executor-core.d.ts +85 -0
- package/ccw/dist/tools/cli-executor-core.d.ts.map +1 -0
- package/ccw/dist/tools/cli-executor-core.js +1310 -0
- package/ccw/dist/tools/cli-executor-core.js.map +1 -0
- package/ccw/dist/tools/cli-executor-state.d.ts +241 -0
- package/ccw/dist/tools/cli-executor-state.d.ts.map +1 -0
- package/ccw/dist/tools/cli-executor-state.js +392 -0
- package/ccw/dist/tools/cli-executor-state.js.map +1 -0
- package/ccw/dist/tools/cli-executor-utils.d.ts +36 -0
- package/ccw/dist/tools/cli-executor-utils.d.ts.map +1 -0
- package/ccw/dist/tools/cli-executor-utils.js +298 -0
- package/ccw/dist/tools/cli-executor-utils.js.map +1 -0
- package/ccw/dist/tools/cli-executor.d.ts +3 -377
- package/ccw/dist/tools/cli-executor.d.ts.map +1 -1
- package/ccw/dist/tools/cli-executor.js +3 -1884
- package/ccw/dist/tools/cli-executor.js.map +1 -1
- package/ccw/dist/tools/cli-history-store.d.ts +2 -0
- package/ccw/dist/tools/cli-history-store.d.ts.map +1 -1
- package/ccw/dist/tools/cli-history-store.js.map +1 -1
- package/ccw/dist/tools/cli-output-converter.d.ts +192 -0
- package/ccw/dist/tools/cli-output-converter.d.ts.map +1 -0
- package/ccw/dist/tools/cli-output-converter.js +1047 -0
- package/ccw/dist/tools/cli-output-converter.js.map +1 -0
- package/ccw/dist/tools/cli-prompt-builder.d.ts +113 -0
- package/ccw/dist/tools/cli-prompt-builder.d.ts.map +1 -0
- package/ccw/dist/tools/cli-prompt-builder.js +363 -0
- package/ccw/dist/tools/cli-prompt-builder.js.map +1 -0
- package/ccw/dist/tools/codex-lens.d.ts +15 -1
- package/ccw/dist/tools/codex-lens.d.ts.map +1 -1
- package/ccw/dist/tools/codex-lens.js +289 -55
- package/ccw/dist/tools/codex-lens.js.map +1 -1
- package/ccw/dist/tools/detect-changed-modules.d.ts.map +1 -1
- package/ccw/dist/tools/detect-changed-modules.js +22 -4
- package/ccw/dist/tools/detect-changed-modules.js.map +1 -1
- package/ccw/dist/tools/index.d.ts.map +1 -1
- package/ccw/dist/tools/index.js +2 -0
- package/ccw/dist/tools/index.js.map +1 -1
- package/ccw/dist/tools/litellm-client.d.ts.map +1 -1
- package/ccw/dist/tools/litellm-client.js +10 -4
- package/ccw/dist/tools/litellm-client.js.map +1 -1
- package/ccw/dist/tools/litellm-executor.d.ts +2 -4
- package/ccw/dist/tools/litellm-executor.d.ts.map +1 -1
- package/ccw/dist/tools/litellm-executor.js +39 -8
- package/ccw/dist/tools/litellm-executor.js.map +1 -1
- package/ccw/dist/tools/native-session-discovery.d.ts +2 -0
- package/ccw/dist/tools/native-session-discovery.d.ts.map +1 -1
- package/ccw/dist/tools/native-session-discovery.js +197 -1
- package/ccw/dist/tools/native-session-discovery.js.map +1 -1
- package/ccw/dist/tools/session-manager.d.ts.map +1 -1
- package/ccw/dist/tools/session-manager.js +79 -0
- package/ccw/dist/tools/session-manager.js.map +1 -1
- package/ccw/dist/tools/skill-context-loader.d.ts +15 -0
- package/ccw/dist/tools/skill-context-loader.d.ts.map +1 -0
- package/ccw/dist/tools/skill-context-loader.js +198 -0
- package/ccw/dist/tools/skill-context-loader.js.map +1 -0
- package/ccw/dist/tools/smart-search.d.ts +8 -3
- package/ccw/dist/tools/smart-search.d.ts.map +1 -1
- package/ccw/dist/tools/smart-search.js +378 -75
- package/ccw/dist/tools/smart-search.js.map +1 -1
- package/ccw/dist/types/cli-settings.d.ts +86 -0
- package/ccw/dist/types/cli-settings.d.ts.map +1 -0
- package/ccw/dist/types/cli-settings.js +54 -0
- package/ccw/dist/types/cli-settings.js.map +1 -0
- package/ccw/dist/types/litellm-api-config.d.ts +40 -1
- package/ccw/dist/types/litellm-api-config.d.ts.map +1 -1
- package/ccw/dist/utils/exec-constants.d.ts +25 -0
- package/ccw/dist/utils/exec-constants.d.ts.map +1 -0
- package/ccw/dist/utils/exec-constants.js +25 -0
- package/ccw/dist/utils/exec-constants.js.map +1 -0
- package/ccw/dist/utils/path-resolver.d.ts +1 -0
- package/ccw/dist/utils/path-resolver.d.ts.map +1 -1
- package/ccw/dist/utils/path-resolver.js +48 -3
- package/ccw/dist/utils/path-resolver.js.map +1 -1
- package/ccw/dist/utils/path-validator.d.ts.map +1 -1
- package/ccw/dist/utils/path-validator.js +25 -6
- package/ccw/dist/utils/path-validator.js.map +1 -1
- package/ccw/dist/utils/python-utils.d.ts.map +1 -1
- package/ccw/dist/utils/python-utils.js +27 -7
- package/ccw/dist/utils/python-utils.js.map +1 -1
- package/ccw/dist/utils/shell-escape.d.ts +8 -0
- package/ccw/dist/utils/shell-escape.d.ts.map +1 -0
- package/ccw/dist/utils/shell-escape.js +24 -0
- package/ccw/dist/utils/shell-escape.js.map +1 -0
- package/ccw/dist/utils/uv-manager.d.ts +167 -0
- package/ccw/dist/utils/uv-manager.d.ts.map +1 -0
- package/ccw/dist/utils/uv-manager.js +644 -0
- package/ccw/dist/utils/uv-manager.js.map +1 -0
- package/ccw/src/cli.ts +4 -1
- package/ccw/src/commands/cli.ts +132 -34
- package/ccw/src/commands/issue.ts +605 -91
- package/ccw/src/commands/serve.ts +15 -5
- package/ccw/src/commands/stop.ts +32 -5
- package/ccw/src/commands/tool.ts +17 -2
- package/ccw/src/commands/view.ts +13 -3
- package/ccw/src/config/cli-settings-manager.ts +460 -0
- package/ccw/src/config/litellm-api-config-manager.ts +392 -57
- package/ccw/src/core/auth/csrf-manager.ts +104 -0
- package/ccw/src/core/auth/csrf-middleware.ts +159 -0
- package/ccw/src/core/auth/middleware.ts +94 -0
- package/ccw/src/core/auth/token-manager.ts +219 -0
- package/ccw/src/core/cache-manager.ts +64 -52
- package/ccw/src/core/claude-freshness.ts +26 -6
- package/ccw/src/core/core-memory-store.ts +2 -1
- package/ccw/src/core/cors.ts +10 -0
- package/ccw/src/core/dashboard-generator-patch.ts +47 -48
- package/ccw/src/core/dashboard-generator.ts +797 -744
- package/ccw/src/core/data-aggregator.ts +667 -667
- package/ccw/src/core/lite-scanner.ts +156 -140
- package/ccw/src/core/routes/auth-routes.ts +98 -0
- package/ccw/src/core/routes/ccw-routes.ts +10 -20
- package/ccw/src/core/routes/claude-routes.ts +101 -51
- package/ccw/src/core/routes/cli-routes.ts +152 -55
- package/ccw/src/core/routes/cli-settings-routes.ts +232 -0
- package/ccw/src/core/routes/codexlens/README.md +37 -0
- package/ccw/src/core/routes/codexlens/config-handlers.ts +1269 -0
- package/ccw/src/core/routes/codexlens/index-handlers.ts +354 -0
- package/ccw/src/core/routes/codexlens/semantic-handlers.ts +931 -0
- package/ccw/src/core/routes/codexlens/utils.ts +96 -0
- package/ccw/src/core/routes/codexlens/watcher-handlers.ts +265 -0
- package/ccw/src/core/routes/codexlens-routes.ts +11 -1044
- package/ccw/src/core/routes/discovery-routes.ts +1 -12
- package/ccw/src/core/routes/files-routes.ts +112 -40
- package/ccw/src/core/routes/graph-routes.ts +39 -46
- package/ccw/src/core/routes/help-routes.ts +2 -12
- package/ccw/src/core/routes/hooks-routes.ts +83 -44
- package/ccw/src/core/routes/issue-routes.ts +1 -12
- package/ccw/src/core/routes/litellm-api-routes.ts +566 -60
- package/ccw/src/core/routes/litellm-routes.ts +35 -27
- package/ccw/src/core/routes/mcp-routes.ts +157 -60
- package/ccw/src/core/routes/mcp-routes.ts.backup +549 -550
- package/ccw/src/core/routes/mcp-templates-db.ts +267 -268
- package/ccw/src/core/routes/memory-routes.ts +76 -22
- package/ccw/src/core/routes/nav-status-routes.ts +231 -0
- package/ccw/src/core/routes/rules-routes.ts +600 -81
- package/ccw/src/core/routes/session-routes.ts +28 -22
- package/ccw/src/core/routes/skills-routes.ts +452 -132
- package/ccw/src/core/routes/status-routes.ts +1 -12
- package/ccw/src/core/routes/system-routes.ts +15 -22
- package/ccw/src/core/routes/types.ts +25 -0
- package/ccw/src/core/server.ts +651 -468
- package/ccw/src/core/services/api-key-tester.ts +137 -0
- package/ccw/src/core/services/health-check-service.ts +340 -0
- package/ccw/src/core/websocket.ts +20 -12
- package/ccw/src/templates/dashboard-css/01-base.css +109 -0
- package/ccw/src/templates/dashboard-css/10-cli-status.css +202 -0
- package/ccw/src/templates/dashboard-css/21-cli-toolmgmt.css +308 -0
- package/ccw/src/templates/dashboard-css/30-core-memory.css +20 -0
- package/ccw/src/templates/dashboard-css/31-api-settings.css +751 -14
- package/ccw/src/templates/dashboard-css/33-cli-stream-viewer.css +230 -2
- package/ccw/src/templates/dashboard-js/api.js +5 -0
- package/ccw/src/templates/dashboard-js/components/cli-status.js +279 -107
- package/ccw/src/templates/dashboard-js/components/cli-stream-viewer.js +262 -20
- package/ccw/src/templates/dashboard-js/components/hook-manager.js +105 -5
- package/ccw/src/templates/dashboard-js/components/mcp-manager.js +317 -0
- package/ccw/src/templates/dashboard-js/components/navigation.js +45 -0
- package/ccw/src/templates/dashboard-js/components/notifications.js +128 -0
- package/ccw/src/templates/dashboard-js/i18n.js +4438 -3983
- package/ccw/src/templates/dashboard-js/main.js +71 -0
- package/ccw/src/templates/dashboard-js/services.js +289 -0
- package/ccw/src/templates/dashboard-js/views/api-settings.js +5613 -3361
- package/ccw/src/templates/dashboard-js/views/claude-manager.js +1 -7
- package/ccw/src/templates/dashboard-js/views/cli-manager.js +581 -87
- package/ccw/src/templates/dashboard-js/views/codexlens-manager.js +6091 -1965
- package/ccw/src/templates/dashboard-js/views/core-memory.js +129 -20
- package/ccw/src/templates/dashboard-js/views/hook-manager.js +17 -3
- package/ccw/src/templates/dashboard-js/views/mcp-manager.js +63 -0
- package/ccw/src/templates/dashboard-js/views/project-overview.js +182 -37
- package/ccw/src/templates/dashboard-js/views/rules-manager.js +26 -3
- package/ccw/src/templates/dashboard-js/views/skills-manager.js +2 -42
- package/ccw/src/templates/dashboard.html +6 -0
- package/ccw/src/tools/README.md +29 -0
- package/ccw/src/tools/claude-cli-tools.ts +640 -125
- package/ccw/src/tools/cli-config-manager.ts +102 -172
- package/ccw/src/tools/cli-executor-core.ts +1533 -0
- package/ccw/src/tools/cli-executor-state.ts +560 -0
- package/ccw/src/tools/cli-executor-utils.ts +349 -0
- package/ccw/src/tools/cli-executor.ts +3 -2309
- package/ccw/src/tools/cli-history-store.ts +2 -0
- package/ccw/src/tools/cli-output-converter.ts +1237 -0
- package/ccw/src/tools/cli-prompt-builder.ts +487 -0
- package/ccw/src/tools/codex-lens.ts +324 -59
- package/ccw/src/tools/detect-changed-modules.ts +24 -6
- package/ccw/src/tools/index.ts +2 -0
- package/ccw/src/tools/litellm-client.ts +10 -4
- package/ccw/src/tools/litellm-executor.ts +146 -114
- package/ccw/src/tools/native-session-discovery.ts +209 -1
- package/ccw/src/tools/session-manager.ts +88 -0
- package/ccw/src/tools/skill-context-loader.ts +213 -0
- package/ccw/src/tools/smart-search.ts +427 -76
- package/ccw/src/types/cli-settings.ts +137 -0
- package/ccw/src/types/litellm-api-config.ts +55 -1
- package/ccw/src/utils/exec-constants.ts +24 -0
- package/ccw/src/utils/path-resolver.ts +49 -3
- package/ccw/src/utils/path-validator.ts +28 -6
- package/ccw/src/utils/python-utils.ts +140 -121
- package/ccw/src/utils/shell-escape.ts +30 -0
- package/ccw/src/utils/uv-manager.ts +796 -0
- package/ccw-litellm/src/ccw_litellm/__pycache__/__init__.cpython-310.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/__pycache__/__init__.cpython-312.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/__init__.cpython-310.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/__init__.cpython-312.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_embedder.cpython-310.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_embedder.cpython-312.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_llm.cpython-310.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_llm.cpython-312.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_llm.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py +270 -251
- package/ccw-litellm/src/ccw_litellm/clients/litellm_llm.py +33 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/__init__.cpython-310.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/__init__.cpython-312.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/loader.cpython-310.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/loader.cpython-312.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/loader.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/models.cpython-310.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/models.cpython-312.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/models.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/loader.py +343 -316
- package/ccw-litellm/src/ccw_litellm/config/models.py +162 -130
- package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/__init__.cpython-310.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/__init__.cpython-312.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/embedder.cpython-310.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/embedder.cpython-312.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/llm.cpython-310.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/llm.cpython-312.pyc +0 -0
- package/codex-lens/pyproject.toml +43 -0
- package/codex-lens/src/codexlens/__pycache__/__init__.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/__init__.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/__main__.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/__main__.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/config.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/config.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/entities.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/entities.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/entities.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/env_config.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/env_config.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/env_config.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/errors.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/errors.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/__init__.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/__init__.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/commands.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/commands.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/commands.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/output.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/output.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/cli/commands.py +4416 -2295
- package/codex-lens/src/codexlens/cli/embedding_manager.py +767 -14
- package/codex-lens/src/codexlens/cli/model_manager.py +676 -0
- package/codex-lens/src/codexlens/config.py +321 -12
- package/codex-lens/src/codexlens/entities.py +4 -1
- package/codex-lens/src/codexlens/env_config.py +298 -0
- package/codex-lens/src/codexlens/indexing/__init__.py +23 -1
- package/codex-lens/src/codexlens/indexing/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/indexing/__pycache__/embedding.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/indexing/__pycache__/symbol_extractor.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/indexing/embedding.py +582 -0
- package/codex-lens/src/codexlens/indexing/symbol_extractor.py +62 -28
- package/codex-lens/src/codexlens/parsers/__pycache__/__init__.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/__init__.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/factory.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/factory.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/factory.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/tokenizer.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/tokenizer.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/treesitter_parser.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/treesitter_parser.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/treesitter_parser.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/factory.py +139 -10
- package/codex-lens/src/codexlens/parsers/treesitter_parser.py +487 -13
- package/codex-lens/src/codexlens/search/__pycache__/__init__.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/__init__.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/binary_searcher.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/enrichment.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/graph_expander.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/binary_searcher.py +277 -0
- package/codex-lens/src/codexlens/search/chain_search.py +1642 -8
- package/codex-lens/src/codexlens/search/enrichment.py +21 -0
- package/codex-lens/src/codexlens/search/graph_expander.py +264 -0
- package/codex-lens/src/codexlens/search/hybrid_search.py +772 -37
- package/codex-lens/src/codexlens/search/ranking.py +347 -8
- package/codex-lens/src/codexlens/semantic/SPLADE_IMPLEMENTATION.md +225 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/ann_index.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/ann_index.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/ann_index.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/base.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/base.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/factory.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/factory.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/factory.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/gpu_support.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/gpu_support.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/gpu_support.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/litellm_embedder.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/litellm_embedder.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/reranker.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/splade_encoder.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/splade_encoder.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/splade_encoder.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/vector_store.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/vector_store.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/vector_store.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/ann_index.py +654 -0
- package/codex-lens/src/codexlens/semantic/factory.py +63 -3
- package/codex-lens/src/codexlens/semantic/gpu_support.py +19 -2
- package/codex-lens/src/codexlens/semantic/litellm_embedder.py +144 -144
- package/codex-lens/src/codexlens/semantic/reranker/__init__.py +25 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/__init__.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/api_reranker.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/api_reranker.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/base.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/base.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/factory.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/factory.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/fastembed_reranker.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/fastembed_reranker.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/legacy.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/legacy.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/litellm_reranker.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/onnx_reranker.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/__pycache__/onnx_reranker.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/reranker/api_reranker.py +403 -0
- package/codex-lens/src/codexlens/semantic/reranker/base.py +46 -0
- package/codex-lens/src/codexlens/semantic/reranker/factory.py +159 -0
- package/codex-lens/src/codexlens/semantic/reranker/fastembed_reranker.py +257 -0
- package/codex-lens/src/codexlens/semantic/reranker/legacy.py +91 -0
- package/codex-lens/src/codexlens/semantic/reranker/litellm_reranker.py +214 -0
- package/codex-lens/src/codexlens/semantic/reranker/onnx_reranker.py +268 -0
- package/codex-lens/src/codexlens/semantic/splade_encoder.py +567 -0
- package/codex-lens/src/codexlens/semantic/vector_store.py +472 -352
- package/codex-lens/src/codexlens/storage/__init__.py +3 -0
- package/codex-lens/src/codexlens/storage/__pycache__/__init__.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/__init__.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/dir_index.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/dir_index.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/dir_index.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/global_index.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/global_index.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/index_tree.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/index_tree.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/index_tree.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/merkle_tree.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/path_mapper.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/path_mapper.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/registry.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/registry.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/splade_index.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/splade_index.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/splade_index.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/sqlite_store.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/sqlite_store.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/sqlite_store.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/sqlite_utils.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/sqlite_utils.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/vector_meta_store.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/vector_meta_store.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/vector_meta_store.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/dir_index.py +310 -12
- package/codex-lens/src/codexlens/storage/index_tree.py +194 -23
- package/codex-lens/src/codexlens/storage/merkle_tree.py +136 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/__init__.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/__init__.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_006_enhance_relationships.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_007_add_graph_neighbors.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_007_add_graph_neighbors.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_007_add_graph_neighbors.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_008_add_merkle_hashes.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_009_add_splade.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_010_add_multi_vector_chunks.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/migration_006_enhance_relationships.py +37 -0
- package/codex-lens/src/codexlens/storage/migrations/migration_007_add_graph_neighbors.py +47 -0
- package/codex-lens/src/codexlens/storage/migrations/migration_008_add_merkle_hashes.py +81 -0
- package/codex-lens/src/codexlens/storage/migrations/migration_009_add_splade.py +103 -0
- package/codex-lens/src/codexlens/storage/migrations/migration_010_add_multi_vector_chunks.py +162 -0
- package/codex-lens/src/codexlens/storage/splade_index.py +578 -0
- package/codex-lens/src/codexlens/storage/sqlite_store.py +508 -184
- package/codex-lens/src/codexlens/storage/vector_meta_store.py +415 -0
- package/codex-lens/src/codexlens/watcher/__init__.py +17 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/__init__.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/__init__.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/events.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/events.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/events.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/file_watcher.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/file_watcher.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/file_watcher.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/incremental_indexer.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/incremental_indexer.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/incremental_indexer.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/manager.cpython-310.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/manager.cpython-312.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/__pycache__/manager.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/watcher/events.py +82 -0
- package/codex-lens/src/codexlens/watcher/file_watcher.py +347 -0
- package/codex-lens/src/codexlens/watcher/incremental_indexer.py +369 -0
- package/codex-lens/src/codexlens/watcher/manager.py +255 -0
- package/package.json +4 -1
- package/.claude/commands/workflow/docs/analyze.md +0 -1467
- package/.claude/commands/workflow/docs/copyright.md +0 -1265
- package/.claude/skills/command-guide/SKILL.md +0 -388
- package/.claude/skills/command-guide/UPDATE-GUIDELINE.md +0 -592
- package/.claude/skills/command-guide/guides/cli-tools-guide.md +0 -410
- package/.claude/skills/command-guide/guides/examples.md +0 -537
- package/.claude/skills/command-guide/guides/getting-started.md +0 -242
- package/.claude/skills/command-guide/guides/implementation-details.md +0 -1010
- package/.claude/skills/command-guide/guides/index-structure.md +0 -326
- package/.claude/skills/command-guide/guides/troubleshooting.md +0 -92
- package/.claude/skills/command-guide/guides/ui-design-workflow-guide.md +0 -316
- package/.claude/skills/command-guide/guides/workflow-patterns.md +0 -662
- package/.claude/skills/command-guide/reference/agents/action-planning-agent.md +0 -855
- package/.claude/skills/command-guide/reference/agents/cli-execution-agent.md +0 -267
- package/.claude/skills/command-guide/reference/agents/cli-explore-agent.md +0 -182
- package/.claude/skills/command-guide/reference/agents/cli-lite-planning-agent.md +0 -446
- package/.claude/skills/command-guide/reference/agents/cli-planning-agent.md +0 -558
- package/.claude/skills/command-guide/reference/agents/code-developer.md +0 -311
- package/.claude/skills/command-guide/reference/agents/conceptual-planning-agent.md +0 -308
- package/.claude/skills/command-guide/reference/agents/context-search-agent.md +0 -581
- package/.claude/skills/command-guide/reference/agents/doc-generator.md +0 -330
- package/.claude/skills/command-guide/reference/agents/memory-bridge.md +0 -94
- package/.claude/skills/command-guide/reference/agents/test-context-search-agent.md +0 -400
- package/.claude/skills/command-guide/reference/agents/test-fix-agent.md +0 -344
- package/.claude/skills/command-guide/reference/agents/ui-design-agent.md +0 -593
- package/.claude/skills/command-guide/reference/agents/universal-executor.md +0 -131
- package/.claude/skills/command-guide/reference/commands/cli/cli-init.md +0 -440
- package/.claude/skills/command-guide/reference/commands/enhance-prompt.md +0 -93
- package/.claude/skills/command-guide/reference/commands/memory/code-map-memory.md +0 -687
- package/.claude/skills/command-guide/reference/commands/memory/docs-full-cli.md +0 -471
- package/.claude/skills/command-guide/reference/commands/memory/docs-related-cli.md +0 -386
- package/.claude/skills/command-guide/reference/commands/memory/docs.md +0 -616
- package/.claude/skills/command-guide/reference/commands/memory/load-skill-memory.md +0 -182
- package/.claude/skills/command-guide/reference/commands/memory/load.md +0 -240
- package/.claude/skills/command-guide/reference/commands/memory/skill-memory.md +0 -525
- package/.claude/skills/command-guide/reference/commands/memory/style-skill-memory.md +0 -396
- package/.claude/skills/command-guide/reference/commands/memory/tech-research.md +0 -314
- package/.claude/skills/command-guide/reference/commands/memory/update-full.md +0 -332
- package/.claude/skills/command-guide/reference/commands/memory/update-related.md +0 -332
- package/.claude/skills/command-guide/reference/commands/memory/workflow-skill-memory.md +0 -517
- package/.claude/skills/command-guide/reference/commands/task/breakdown.md +0 -204
- package/.claude/skills/command-guide/reference/commands/task/create.md +0 -152
- package/.claude/skills/command-guide/reference/commands/task/execute.md +0 -270
- package/.claude/skills/command-guide/reference/commands/task/replan.md +0 -437
- package/.claude/skills/command-guide/reference/commands/version.md +0 -254
- package/.claude/skills/command-guide/reference/commands/workflow/action-plan-verify.md +0 -447
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/api-designer.md +0 -585
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/artifacts.md +0 -452
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/auto-parallel.md +0 -443
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/data-architect.md +0 -220
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/product-manager.md +0 -200
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/product-owner.md +0 -200
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/scrum-master.md +0 -200
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/subject-matter-expert.md +0 -200
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/synthesis.md +0 -398
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/system-architect.md +0 -387
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/ui-designer.md +0 -221
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/ux-expert.md +0 -221
- package/.claude/skills/command-guide/reference/commands/workflow/execute.md +0 -465
- package/.claude/skills/command-guide/reference/commands/workflow/init.md +0 -164
- package/.claude/skills/command-guide/reference/commands/workflow/lite-execute.md +0 -748
- package/.claude/skills/command-guide/reference/commands/workflow/lite-fix.md +0 -664
- package/.claude/skills/command-guide/reference/commands/workflow/lite-plan.md +0 -645
- package/.claude/skills/command-guide/reference/commands/workflow/plan.md +0 -551
- package/.claude/skills/command-guide/reference/commands/workflow/replan.md +0 -515
- package/.claude/skills/command-guide/reference/commands/workflow/review-fix.md +0 -606
- package/.claude/skills/command-guide/reference/commands/workflow/review-module-cycle.md +0 -765
- package/.claude/skills/command-guide/reference/commands/workflow/review-session-cycle.md +0 -776
- package/.claude/skills/command-guide/reference/commands/workflow/review.md +0 -298
- package/.claude/skills/command-guide/reference/commands/workflow/session/complete.md +0 -547
- package/.claude/skills/command-guide/reference/commands/workflow/session/list.md +0 -114
- package/.claude/skills/command-guide/reference/commands/workflow/session/resume.md +0 -77
- package/.claude/skills/command-guide/reference/commands/workflow/session/start.md +0 -257
- package/.claude/skills/command-guide/reference/commands/workflow/tdd-plan.md +0 -460
- package/.claude/skills/command-guide/reference/commands/workflow/tdd-verify.md +0 -400
- package/.claude/skills/command-guide/reference/commands/workflow/test-cycle-execute.md +0 -498
- package/.claude/skills/command-guide/reference/commands/workflow/test-fix-gen.md +0 -699
- package/.claude/skills/command-guide/reference/commands/workflow/test-gen.md +0 -529
- package/.claude/skills/command-guide/reference/commands/workflow/tools/conflict-resolution.md +0 -766
- package/.claude/skills/command-guide/reference/commands/workflow/tools/context-gather.md +0 -433
- package/.claude/skills/command-guide/reference/commands/workflow/tools/task-generate-agent.md +0 -487
- package/.claude/skills/command-guide/reference/commands/workflow/tools/task-generate-tdd.md +0 -518
- package/.claude/skills/command-guide/reference/commands/workflow/tools/tdd-coverage-analysis.md +0 -309
- package/.claude/skills/command-guide/reference/commands/workflow/tools/test-concept-enhanced.md +0 -163
- package/.claude/skills/command-guide/reference/commands/workflow/tools/test-context-gather.md +0 -232
- package/.claude/skills/command-guide/reference/commands/workflow/tools/test-task-generate.md +0 -254
- package/.claude/skills/command-guide/reference/commands/workflow/ui-design/animation-extract.md +0 -1150
- package/.claude/skills/command-guide/reference/commands/workflow/ui-design/codify-style.md +0 -652
- package/.claude/skills/command-guide/reference/commands/workflow/ui-design/design-sync.md +0 -454
- package/.claude/skills/command-guide/reference/commands/workflow/ui-design/explore-auto.md +0 -678
- package/.claude/skills/command-guide/reference/commands/workflow/ui-design/generate.md +0 -504
- package/.claude/skills/command-guide/reference/commands/workflow/ui-design/imitate-auto.md +0 -745
- package/.claude/skills/command-guide/reference/commands/workflow/ui-design/import-from-code.md +0 -537
- package/.claude/skills/command-guide/reference/commands/workflow/ui-design/layout-extract.md +0 -788
- package/.claude/skills/command-guide/reference/commands/workflow/ui-design/reference-page-generator.md +0 -356
- package/.claude/skills/command-guide/reference/commands/workflow/ui-design/style-extract.md +0 -773
- package/.claude/skills/command-guide/scripts/analyze_commands.py +0 -502
- package/.claude/skills/command-guide/scripts/update-index.sh +0 -130
- package/.claude/skills/command-guide/templates/issue-bug.md +0 -104
- package/.claude/skills/command-guide/templates/issue-diagnosis.md +0 -275
- package/.claude/skills/command-guide/templates/issue-feature.md +0 -97
- package/.claude/skills/command-guide/templates/issue-question.md +0 -141
|
@@ -9,18 +9,30 @@ from __future__ import annotations
|
|
|
9
9
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
10
10
|
from dataclasses import dataclass, field
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import List, Optional, Dict, Any
|
|
12
|
+
from typing import List, Optional, Dict, Any, Literal, Tuple, TYPE_CHECKING
|
|
13
|
+
import json
|
|
13
14
|
import logging
|
|
14
15
|
import os
|
|
15
16
|
import time
|
|
16
17
|
|
|
17
18
|
from codexlens.entities import SearchResult, Symbol
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
import numpy as np
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
import numpy as np
|
|
25
|
+
NUMPY_AVAILABLE = True
|
|
26
|
+
except ImportError:
|
|
27
|
+
NUMPY_AVAILABLE = False
|
|
18
28
|
from codexlens.config import Config
|
|
19
29
|
from codexlens.storage.registry import RegistryStore, DirMapping
|
|
20
30
|
from codexlens.storage.dir_index import DirIndexStore, SubdirLink
|
|
21
31
|
from codexlens.storage.global_index import GlobalSymbolIndex
|
|
22
32
|
from codexlens.storage.path_mapper import PathMapper
|
|
23
33
|
from codexlens.storage.sqlite_store import SQLiteStore
|
|
34
|
+
from codexlens.storage.vector_meta_store import VectorMetadataStore
|
|
35
|
+
from codexlens.config import VECTORS_META_DB_NAME
|
|
24
36
|
from codexlens.search.hybrid_search import HybridSearchEngine
|
|
25
37
|
|
|
26
38
|
|
|
@@ -33,13 +45,18 @@ class SearchOptions:
|
|
|
33
45
|
max_workers: Number of parallel worker threads
|
|
34
46
|
limit_per_dir: Maximum results per directory
|
|
35
47
|
total_limit: Total result limit across all directories
|
|
48
|
+
offset: Pagination offset - skip first N results (default 0)
|
|
36
49
|
include_symbols: Whether to include symbol search results
|
|
37
50
|
files_only: Return only file paths without excerpts
|
|
38
51
|
include_semantic: Whether to include semantic keyword search results
|
|
52
|
+
code_only: Only return code files (excludes md, txt, json, yaml, xml, etc.)
|
|
53
|
+
exclude_extensions: List of file extensions to exclude (e.g., ["md", "txt", "json"])
|
|
39
54
|
hybrid_mode: Enable hybrid search with RRF fusion (default False)
|
|
40
55
|
enable_fuzzy: Enable fuzzy FTS in hybrid mode (default True)
|
|
41
56
|
enable_vector: Enable vector semantic search (default False)
|
|
42
57
|
pure_vector: If True, only use vector search without FTS fallback (default False)
|
|
58
|
+
enable_splade: Enable SPLADE sparse neural search (default False)
|
|
59
|
+
enable_cascade: Enable cascade (binary+dense) two-stage retrieval (default False)
|
|
43
60
|
hybrid_weights: Custom RRF weights for hybrid search (optional)
|
|
44
61
|
group_results: Enable grouping of similar results (default False)
|
|
45
62
|
grouping_threshold: Score threshold for grouping similar results (default 0.01)
|
|
@@ -48,13 +65,18 @@ class SearchOptions:
|
|
|
48
65
|
max_workers: int = 8
|
|
49
66
|
limit_per_dir: int = 10
|
|
50
67
|
total_limit: int = 100
|
|
68
|
+
offset: int = 0
|
|
51
69
|
include_symbols: bool = False
|
|
52
70
|
files_only: bool = False
|
|
53
71
|
include_semantic: bool = False
|
|
72
|
+
code_only: bool = False
|
|
73
|
+
exclude_extensions: Optional[List[str]] = None
|
|
54
74
|
hybrid_mode: bool = False
|
|
55
75
|
enable_fuzzy: bool = True
|
|
56
76
|
enable_vector: bool = False
|
|
57
77
|
pure_vector: bool = False
|
|
78
|
+
enable_splade: bool = False
|
|
79
|
+
enable_cascade: bool = False
|
|
58
80
|
hybrid_weights: Optional[Dict[str, float]] = None
|
|
59
81
|
group_results: bool = False
|
|
60
82
|
grouping_threshold: float = 0.01
|
|
@@ -83,6 +105,7 @@ class ChainSearchResult:
|
|
|
83
105
|
Attributes:
|
|
84
106
|
query: Original search query
|
|
85
107
|
results: List of SearchResult objects
|
|
108
|
+
related_results: Expanded results from graph neighbors (optional)
|
|
86
109
|
symbols: List of Symbol objects (if include_symbols=True)
|
|
87
110
|
stats: SearchStats with execution metrics
|
|
88
111
|
"""
|
|
@@ -90,6 +113,7 @@ class ChainSearchResult:
|
|
|
90
113
|
results: List[SearchResult]
|
|
91
114
|
symbols: List[Symbol]
|
|
92
115
|
stats: SearchStats
|
|
116
|
+
related_results: List[SearchResult] = field(default_factory=list)
|
|
93
117
|
|
|
94
118
|
|
|
95
119
|
class ChainSearchEngine:
|
|
@@ -217,8 +241,14 @@ class ChainSearchEngine:
|
|
|
217
241
|
)
|
|
218
242
|
stats.errors = search_stats.errors
|
|
219
243
|
|
|
244
|
+
# Step 3.5: Filter by extension if requested
|
|
245
|
+
if options.code_only or options.exclude_extensions:
|
|
246
|
+
results = self._filter_by_extension(
|
|
247
|
+
results, options.code_only, options.exclude_extensions
|
|
248
|
+
)
|
|
249
|
+
|
|
220
250
|
# Step 4: Merge and rank
|
|
221
|
-
final_results = self._merge_and_rank(results, options.total_limit)
|
|
251
|
+
final_results = self._merge_and_rank(results, options.total_limit, options.offset)
|
|
222
252
|
|
|
223
253
|
# Step 5: Optional grouping of similar results
|
|
224
254
|
if options.group_results:
|
|
@@ -236,13 +266,1562 @@ class ChainSearchEngine:
|
|
|
236
266
|
index_paths, query, None, options.total_limit
|
|
237
267
|
)
|
|
238
268
|
|
|
269
|
+
# Optional: graph expansion using precomputed neighbors
|
|
270
|
+
related_results: List[SearchResult] = []
|
|
271
|
+
if self._config is not None and getattr(self._config, "enable_graph_expansion", False):
|
|
272
|
+
try:
|
|
273
|
+
from codexlens.search.enrichment import SearchEnrichmentPipeline
|
|
274
|
+
|
|
275
|
+
pipeline = SearchEnrichmentPipeline(self.mapper, config=self._config)
|
|
276
|
+
related_results = pipeline.expand_related_results(final_results)
|
|
277
|
+
except Exception as exc:
|
|
278
|
+
self.logger.debug("Graph expansion failed: %s", exc)
|
|
279
|
+
related_results = []
|
|
280
|
+
|
|
239
281
|
stats.time_ms = (time.time() - start_time) * 1000
|
|
240
282
|
|
|
241
283
|
return ChainSearchResult(
|
|
242
284
|
query=query,
|
|
243
285
|
results=final_results,
|
|
244
286
|
symbols=symbols,
|
|
245
|
-
stats=stats
|
|
287
|
+
stats=stats,
|
|
288
|
+
related_results=related_results,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
def hybrid_cascade_search(
|
|
292
|
+
self,
|
|
293
|
+
query: str,
|
|
294
|
+
source_path: Path,
|
|
295
|
+
k: int = 10,
|
|
296
|
+
coarse_k: int = 100,
|
|
297
|
+
options: Optional[SearchOptions] = None,
|
|
298
|
+
) -> ChainSearchResult:
|
|
299
|
+
"""Execute two-stage cascade search with hybrid coarse retrieval and cross-encoder reranking.
|
|
300
|
+
|
|
301
|
+
Hybrid cascade search process:
|
|
302
|
+
1. Stage 1 (Coarse): Fast retrieval using RRF fusion of FTS + SPLADE + Vector
|
|
303
|
+
to get coarse_k candidates
|
|
304
|
+
2. Stage 2 (Fine): CrossEncoder reranking of candidates to get final k results
|
|
305
|
+
|
|
306
|
+
This approach balances recall (from broad coarse search) with precision
|
|
307
|
+
(from expensive but accurate cross-encoder scoring).
|
|
308
|
+
|
|
309
|
+
Note: This method is the original hybrid approach. For binary vector cascade,
|
|
310
|
+
use binary_cascade_search() instead.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
query: Natural language or keyword query string
|
|
314
|
+
source_path: Starting directory path
|
|
315
|
+
k: Number of final results to return (default 10)
|
|
316
|
+
coarse_k: Number of coarse candidates from first stage (default 100)
|
|
317
|
+
options: Search configuration (uses defaults if None)
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
ChainSearchResult with reranked results and statistics
|
|
321
|
+
|
|
322
|
+
Examples:
|
|
323
|
+
>>> engine = ChainSearchEngine(registry, mapper, config=config)
|
|
324
|
+
>>> result = engine.hybrid_cascade_search(
|
|
325
|
+
... "how to authenticate users",
|
|
326
|
+
... Path("D:/project/src"),
|
|
327
|
+
... k=10,
|
|
328
|
+
... coarse_k=100
|
|
329
|
+
... )
|
|
330
|
+
>>> for r in result.results:
|
|
331
|
+
... print(f"{r.path}: {r.score:.3f}")
|
|
332
|
+
"""
|
|
333
|
+
options = options or SearchOptions()
|
|
334
|
+
start_time = time.time()
|
|
335
|
+
stats = SearchStats()
|
|
336
|
+
|
|
337
|
+
# Use config defaults if available
|
|
338
|
+
if self._config is not None:
|
|
339
|
+
if hasattr(self._config, "cascade_coarse_k"):
|
|
340
|
+
coarse_k = coarse_k or self._config.cascade_coarse_k
|
|
341
|
+
if hasattr(self._config, "cascade_fine_k"):
|
|
342
|
+
k = k or self._config.cascade_fine_k
|
|
343
|
+
|
|
344
|
+
# Step 1: Find starting index
|
|
345
|
+
start_index = self._find_start_index(source_path)
|
|
346
|
+
if not start_index:
|
|
347
|
+
self.logger.warning(f"No index found for {source_path}")
|
|
348
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
349
|
+
return ChainSearchResult(
|
|
350
|
+
query=query,
|
|
351
|
+
results=[],
|
|
352
|
+
symbols=[],
|
|
353
|
+
stats=stats
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
# Step 2: Collect all index paths
|
|
357
|
+
index_paths = self._collect_index_paths(start_index, options.depth)
|
|
358
|
+
stats.dirs_searched = len(index_paths)
|
|
359
|
+
|
|
360
|
+
if not index_paths:
|
|
361
|
+
self.logger.warning(f"No indexes collected from {start_index}")
|
|
362
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
363
|
+
return ChainSearchResult(
|
|
364
|
+
query=query,
|
|
365
|
+
results=[],
|
|
366
|
+
symbols=[],
|
|
367
|
+
stats=stats
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
# Stage 1: Coarse retrieval with hybrid search (FTS + SPLADE + Vector)
|
|
371
|
+
# Use hybrid mode for multi-signal retrieval
|
|
372
|
+
coarse_options = SearchOptions(
|
|
373
|
+
depth=options.depth,
|
|
374
|
+
max_workers=1, # Single thread for GPU safety
|
|
375
|
+
limit_per_dir=max(coarse_k // len(index_paths), 20),
|
|
376
|
+
total_limit=coarse_k,
|
|
377
|
+
hybrid_mode=True,
|
|
378
|
+
enable_fuzzy=options.enable_fuzzy,
|
|
379
|
+
enable_vector=True, # Enable vector for semantic matching
|
|
380
|
+
pure_vector=False,
|
|
381
|
+
hybrid_weights=options.hybrid_weights,
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
self.logger.debug(
|
|
385
|
+
"Cascade Stage 1: Coarse retrieval for %d candidates", coarse_k
|
|
386
|
+
)
|
|
387
|
+
coarse_results, search_stats = self._search_parallel(
|
|
388
|
+
index_paths, query, coarse_options
|
|
389
|
+
)
|
|
390
|
+
stats.errors = search_stats.errors
|
|
391
|
+
|
|
392
|
+
# Merge and deduplicate coarse results
|
|
393
|
+
coarse_merged = self._merge_and_rank(coarse_results, coarse_k)
|
|
394
|
+
self.logger.debug(
|
|
395
|
+
"Cascade Stage 1 complete: %d candidates retrieved", len(coarse_merged)
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
if not coarse_merged:
|
|
399
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
400
|
+
return ChainSearchResult(
|
|
401
|
+
query=query,
|
|
402
|
+
results=[],
|
|
403
|
+
symbols=[],
|
|
404
|
+
stats=stats
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
# Stage 2: Cross-encoder reranking
|
|
408
|
+
self.logger.debug(
|
|
409
|
+
"Cascade Stage 2: Cross-encoder reranking %d candidates to top-%d",
|
|
410
|
+
len(coarse_merged),
|
|
411
|
+
k,
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
final_results = self._cross_encoder_rerank(query, coarse_merged, k)
|
|
415
|
+
|
|
416
|
+
# Optional: grouping of similar results
|
|
417
|
+
if options.group_results:
|
|
418
|
+
from codexlens.search.ranking import group_similar_results
|
|
419
|
+
final_results = group_similar_results(
|
|
420
|
+
final_results, score_threshold_abs=options.grouping_threshold
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
stats.files_matched = len(final_results)
|
|
424
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
425
|
+
|
|
426
|
+
self.logger.debug(
|
|
427
|
+
"Cascade search complete: %d results in %.2fms",
|
|
428
|
+
len(final_results),
|
|
429
|
+
stats.time_ms,
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
return ChainSearchResult(
|
|
433
|
+
query=query,
|
|
434
|
+
results=final_results,
|
|
435
|
+
symbols=[],
|
|
436
|
+
stats=stats,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
def binary_cascade_search(
|
|
440
|
+
self,
|
|
441
|
+
query: str,
|
|
442
|
+
source_path: Path,
|
|
443
|
+
k: int = 10,
|
|
444
|
+
coarse_k: int = 100,
|
|
445
|
+
options: Optional[SearchOptions] = None,
|
|
446
|
+
) -> ChainSearchResult:
|
|
447
|
+
"""Execute binary cascade search with binary coarse ranking and dense fine ranking.
|
|
448
|
+
|
|
449
|
+
Binary cascade search process:
|
|
450
|
+
1. Stage 1 (Coarse): Fast binary vector search using Hamming distance
|
|
451
|
+
to quickly filter to coarse_k candidates (256-dim binary, 32 bytes/vector)
|
|
452
|
+
2. Stage 2 (Fine): Dense vector cosine similarity for precise reranking
|
|
453
|
+
of candidates (2048-dim float32)
|
|
454
|
+
|
|
455
|
+
This approach leverages the speed of binary search (~100x faster) while
|
|
456
|
+
maintaining precision through dense vector reranking.
|
|
457
|
+
|
|
458
|
+
Performance characteristics:
|
|
459
|
+
- Binary search: O(N) with SIMD-accelerated XOR + popcount
|
|
460
|
+
- Dense rerank: Only applied to top coarse_k candidates
|
|
461
|
+
- Memory: 32 bytes (binary) + 8KB (dense) per chunk
|
|
462
|
+
|
|
463
|
+
Args:
|
|
464
|
+
query: Natural language or keyword query string
|
|
465
|
+
source_path: Starting directory path
|
|
466
|
+
k: Number of final results to return (default 10)
|
|
467
|
+
coarse_k: Number of coarse candidates from first stage (default 100)
|
|
468
|
+
options: Search configuration (uses defaults if None)
|
|
469
|
+
|
|
470
|
+
Returns:
|
|
471
|
+
ChainSearchResult with reranked results and statistics
|
|
472
|
+
|
|
473
|
+
Examples:
|
|
474
|
+
>>> engine = ChainSearchEngine(registry, mapper, config=config)
|
|
475
|
+
>>> result = engine.binary_cascade_search(
|
|
476
|
+
... "how to authenticate users",
|
|
477
|
+
... Path("D:/project/src"),
|
|
478
|
+
... k=10,
|
|
479
|
+
... coarse_k=100
|
|
480
|
+
... )
|
|
481
|
+
>>> for r in result.results:
|
|
482
|
+
... print(f"{r.path}: {r.score:.3f}")
|
|
483
|
+
"""
|
|
484
|
+
if not NUMPY_AVAILABLE:
|
|
485
|
+
self.logger.warning(
|
|
486
|
+
"NumPy not available, falling back to hybrid cascade search"
|
|
487
|
+
)
|
|
488
|
+
return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
|
|
489
|
+
|
|
490
|
+
options = options or SearchOptions()
|
|
491
|
+
start_time = time.time()
|
|
492
|
+
stats = SearchStats()
|
|
493
|
+
|
|
494
|
+
# Use config defaults if available
|
|
495
|
+
if self._config is not None:
|
|
496
|
+
if hasattr(self._config, "cascade_coarse_k"):
|
|
497
|
+
coarse_k = coarse_k or self._config.cascade_coarse_k
|
|
498
|
+
if hasattr(self._config, "cascade_fine_k"):
|
|
499
|
+
k = k or self._config.cascade_fine_k
|
|
500
|
+
|
|
501
|
+
# Step 1: Find starting index
|
|
502
|
+
start_index = self._find_start_index(source_path)
|
|
503
|
+
if not start_index:
|
|
504
|
+
self.logger.warning(f"No index found for {source_path}")
|
|
505
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
506
|
+
return ChainSearchResult(
|
|
507
|
+
query=query,
|
|
508
|
+
results=[],
|
|
509
|
+
symbols=[],
|
|
510
|
+
stats=stats
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
# Step 2: Collect all index paths
|
|
514
|
+
index_paths = self._collect_index_paths(start_index, options.depth)
|
|
515
|
+
stats.dirs_searched = len(index_paths)
|
|
516
|
+
|
|
517
|
+
if not index_paths:
|
|
518
|
+
self.logger.warning(f"No indexes collected from {start_index}")
|
|
519
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
520
|
+
return ChainSearchResult(
|
|
521
|
+
query=query,
|
|
522
|
+
results=[],
|
|
523
|
+
symbols=[],
|
|
524
|
+
stats=stats
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
# Initialize embedding backends
|
|
528
|
+
try:
|
|
529
|
+
from codexlens.indexing.embedding import (
|
|
530
|
+
BinaryEmbeddingBackend,
|
|
531
|
+
DenseEmbeddingBackend,
|
|
532
|
+
)
|
|
533
|
+
from codexlens.semantic.ann_index import BinaryANNIndex
|
|
534
|
+
except ImportError as exc:
|
|
535
|
+
self.logger.warning(
|
|
536
|
+
"Binary cascade dependencies not available: %s. "
|
|
537
|
+
"Falling back to hybrid cascade search.",
|
|
538
|
+
exc
|
|
539
|
+
)
|
|
540
|
+
return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
|
|
541
|
+
|
|
542
|
+
# Stage 1: Binary vector coarse retrieval
|
|
543
|
+
self.logger.debug(
|
|
544
|
+
"Binary Cascade Stage 1: Binary coarse retrieval for %d candidates",
|
|
545
|
+
coarse_k,
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
use_gpu = True
|
|
549
|
+
if self._config is not None:
|
|
550
|
+
use_gpu = getattr(self._config, "embedding_use_gpu", True)
|
|
551
|
+
|
|
552
|
+
try:
|
|
553
|
+
binary_backend = BinaryEmbeddingBackend(use_gpu=use_gpu)
|
|
554
|
+
query_binary_packed = binary_backend.embed_packed([query])[0]
|
|
555
|
+
except Exception as exc:
|
|
556
|
+
self.logger.warning(
|
|
557
|
+
"Failed to generate binary query embedding: %s. "
|
|
558
|
+
"Falling back to hybrid cascade search.",
|
|
559
|
+
exc
|
|
560
|
+
)
|
|
561
|
+
return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
|
|
562
|
+
|
|
563
|
+
# Try centralized BinarySearcher first (preferred for mmap indexes)
|
|
564
|
+
# The index root is the parent of the first index path
|
|
565
|
+
index_root = index_paths[0].parent if index_paths else None
|
|
566
|
+
all_candidates: List[Tuple[int, int, Path]] = [] # (chunk_id, distance, index_path)
|
|
567
|
+
used_centralized = False
|
|
568
|
+
|
|
569
|
+
if index_root:
|
|
570
|
+
centralized_searcher = self._get_centralized_binary_searcher(index_root)
|
|
571
|
+
if centralized_searcher is not None:
|
|
572
|
+
try:
|
|
573
|
+
# BinarySearcher expects dense vector, not packed binary
|
|
574
|
+
from codexlens.semantic.embedder import Embedder
|
|
575
|
+
embedder = Embedder()
|
|
576
|
+
query_dense = embedder.embed_to_numpy([query])[0]
|
|
577
|
+
|
|
578
|
+
# Centralized search - returns (chunk_id, hamming_distance) tuples
|
|
579
|
+
results = centralized_searcher.search(query_dense, top_k=coarse_k)
|
|
580
|
+
for chunk_id, dist in results:
|
|
581
|
+
all_candidates.append((chunk_id, dist, index_root))
|
|
582
|
+
used_centralized = True
|
|
583
|
+
self.logger.debug(
|
|
584
|
+
"Centralized binary search found %d candidates", len(results)
|
|
585
|
+
)
|
|
586
|
+
except Exception as exc:
|
|
587
|
+
self.logger.debug(
|
|
588
|
+
"Centralized binary search failed: %s, falling back to per-directory",
|
|
589
|
+
exc
|
|
590
|
+
)
|
|
591
|
+
centralized_searcher.clear()
|
|
592
|
+
|
|
593
|
+
# Fallback: Search per-directory indexes with legacy BinaryANNIndex
|
|
594
|
+
if not used_centralized:
|
|
595
|
+
for index_path in index_paths:
|
|
596
|
+
try:
|
|
597
|
+
# Get or create binary index for this path (uses deprecated BinaryANNIndex)
|
|
598
|
+
binary_index = self._get_or_create_binary_index(index_path)
|
|
599
|
+
if binary_index is None or binary_index.count() == 0:
|
|
600
|
+
continue
|
|
601
|
+
|
|
602
|
+
# Search binary index
|
|
603
|
+
ids, distances = binary_index.search(query_binary_packed, coarse_k)
|
|
604
|
+
for chunk_id, dist in zip(ids, distances):
|
|
605
|
+
all_candidates.append((chunk_id, dist, index_path))
|
|
606
|
+
|
|
607
|
+
except Exception as exc:
|
|
608
|
+
self.logger.debug(
|
|
609
|
+
"Binary search failed for %s: %s", index_path, exc
|
|
610
|
+
)
|
|
611
|
+
stats.errors.append(f"Binary search failed for {index_path}: {exc}")
|
|
612
|
+
|
|
613
|
+
if not all_candidates:
|
|
614
|
+
self.logger.debug("No binary candidates found, falling back to hybrid")
|
|
615
|
+
return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
|
|
616
|
+
|
|
617
|
+
# Sort by Hamming distance and take top coarse_k
|
|
618
|
+
all_candidates.sort(key=lambda x: x[1])
|
|
619
|
+
coarse_candidates = all_candidates[:coarse_k]
|
|
620
|
+
|
|
621
|
+
self.logger.debug(
|
|
622
|
+
"Binary Cascade Stage 1 complete: %d candidates retrieved",
|
|
623
|
+
len(coarse_candidates),
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
# Stage 2: Dense vector fine ranking
|
|
627
|
+
self.logger.debug(
|
|
628
|
+
"Binary Cascade Stage 2: Dense reranking %d candidates to top-%d",
|
|
629
|
+
len(coarse_candidates),
|
|
630
|
+
k,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
try:
|
|
634
|
+
dense_backend = DenseEmbeddingBackend(use_gpu=use_gpu)
|
|
635
|
+
query_dense = dense_backend.embed_to_numpy([query])[0]
|
|
636
|
+
except Exception as exc:
|
|
637
|
+
self.logger.warning(
|
|
638
|
+
"Failed to generate dense query embedding: %s. "
|
|
639
|
+
"Using Hamming distance scores only.",
|
|
640
|
+
exc
|
|
641
|
+
)
|
|
642
|
+
# Fall back to using Hamming distance as score
|
|
643
|
+
return self._build_results_from_candidates(
|
|
644
|
+
coarse_candidates[:k], index_paths, stats, query, start_time,
|
|
645
|
+
use_centralized=used_centralized
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
# Group candidates by index path for batch retrieval
|
|
649
|
+
candidates_by_index: Dict[Path, List[int]] = {}
|
|
650
|
+
for chunk_id, _, index_path in coarse_candidates:
|
|
651
|
+
if index_path not in candidates_by_index:
|
|
652
|
+
candidates_by_index[index_path] = []
|
|
653
|
+
candidates_by_index[index_path].append(chunk_id)
|
|
654
|
+
|
|
655
|
+
# Retrieve dense embeddings and compute cosine similarity
|
|
656
|
+
scored_results: List[Tuple[float, SearchResult]] = []
|
|
657
|
+
import sqlite3
|
|
658
|
+
|
|
659
|
+
for index_path, chunk_ids in candidates_by_index.items():
|
|
660
|
+
try:
|
|
661
|
+
# Collect valid rows and dense vectors for batch processing
|
|
662
|
+
valid_rows: List[Dict[str, Any]] = []
|
|
663
|
+
dense_vectors: List["np.ndarray"] = []
|
|
664
|
+
|
|
665
|
+
if used_centralized:
|
|
666
|
+
# Centralized mode: index_path is actually index_root directory
|
|
667
|
+
# Dense embeddings are in per-directory _index.db files
|
|
668
|
+
# referenced by source_index_db in chunk_metadata
|
|
669
|
+
meta_db_path = index_path / VECTORS_META_DB_NAME
|
|
670
|
+
if not meta_db_path.exists():
|
|
671
|
+
self.logger.debug(
|
|
672
|
+
"VectorMetadataStore not found at %s, skipping dense reranking", meta_db_path
|
|
673
|
+
)
|
|
674
|
+
continue
|
|
675
|
+
|
|
676
|
+
# Get chunk metadata with source_index_db references
|
|
677
|
+
meta_store = VectorMetadataStore(meta_db_path)
|
|
678
|
+
chunks_meta = meta_store.get_chunks_by_ids(chunk_ids)
|
|
679
|
+
|
|
680
|
+
# Group chunks by source_index_db
|
|
681
|
+
chunks_by_source: Dict[str, List[Dict[str, Any]]] = {}
|
|
682
|
+
for chunk in chunks_meta:
|
|
683
|
+
source_db = chunk.get("source_index_db")
|
|
684
|
+
if source_db:
|
|
685
|
+
if source_db not in chunks_by_source:
|
|
686
|
+
chunks_by_source[source_db] = []
|
|
687
|
+
chunks_by_source[source_db].append(chunk)
|
|
688
|
+
|
|
689
|
+
# Retrieve dense embeddings from each source_index_db
|
|
690
|
+
for source_db, source_chunks in chunks_by_source.items():
|
|
691
|
+
try:
|
|
692
|
+
source_chunk_ids = [c["chunk_id"] for c in source_chunks]
|
|
693
|
+
conn = sqlite3.connect(source_db)
|
|
694
|
+
conn.row_factory = sqlite3.Row
|
|
695
|
+
|
|
696
|
+
placeholders = ",".join("?" * len(source_chunk_ids))
|
|
697
|
+
# Try semantic_chunks first (newer schema), fall back to chunks
|
|
698
|
+
try:
|
|
699
|
+
rows = conn.execute(
|
|
700
|
+
f"SELECT id, embedding_dense FROM semantic_chunks WHERE id IN ({placeholders})",
|
|
701
|
+
source_chunk_ids
|
|
702
|
+
).fetchall()
|
|
703
|
+
except sqlite3.OperationalError:
|
|
704
|
+
rows = conn.execute(
|
|
705
|
+
f"SELECT id, embedding_dense FROM chunks WHERE id IN ({placeholders})",
|
|
706
|
+
source_chunk_ids
|
|
707
|
+
).fetchall()
|
|
708
|
+
conn.close()
|
|
709
|
+
|
|
710
|
+
# Build dense vector lookup
|
|
711
|
+
dense_lookup = {row["id"]: row["embedding_dense"] for row in rows}
|
|
712
|
+
|
|
713
|
+
# Process chunks with their embeddings
|
|
714
|
+
for chunk in source_chunks:
|
|
715
|
+
chunk_id = chunk["chunk_id"]
|
|
716
|
+
dense_bytes = dense_lookup.get(chunk_id)
|
|
717
|
+
if dense_bytes is not None:
|
|
718
|
+
valid_rows.append({
|
|
719
|
+
"id": chunk_id,
|
|
720
|
+
"file_path": chunk["file_path"],
|
|
721
|
+
"content": chunk["content"],
|
|
722
|
+
})
|
|
723
|
+
dense_vectors.append(np.frombuffer(dense_bytes, dtype=np.float32))
|
|
724
|
+
except Exception as exc:
|
|
725
|
+
self.logger.debug(
|
|
726
|
+
"Failed to get dense embeddings from %s: %s", source_db, exc
|
|
727
|
+
)
|
|
728
|
+
else:
|
|
729
|
+
# Per-directory mode: index_path is the _index.db file
|
|
730
|
+
conn = sqlite3.connect(str(index_path))
|
|
731
|
+
conn.row_factory = sqlite3.Row
|
|
732
|
+
|
|
733
|
+
placeholders = ",".join("?" * len(chunk_ids))
|
|
734
|
+
rows = conn.execute(
|
|
735
|
+
f"SELECT id, file_path, content, embedding_dense FROM semantic_chunks WHERE id IN ({placeholders})",
|
|
736
|
+
chunk_ids
|
|
737
|
+
).fetchall()
|
|
738
|
+
conn.close()
|
|
739
|
+
|
|
740
|
+
for row in rows:
|
|
741
|
+
dense_bytes = row["embedding_dense"]
|
|
742
|
+
if dense_bytes is not None:
|
|
743
|
+
valid_rows.append(dict(row))
|
|
744
|
+
dense_vectors.append(np.frombuffer(dense_bytes, dtype=np.float32))
|
|
745
|
+
|
|
746
|
+
# Skip if no dense embeddings found
|
|
747
|
+
if not dense_vectors:
|
|
748
|
+
continue
|
|
749
|
+
|
|
750
|
+
# Stack into matrix for batch computation
|
|
751
|
+
doc_matrix = np.vstack(dense_vectors)
|
|
752
|
+
|
|
753
|
+
# Batch compute cosine similarities
|
|
754
|
+
scores = self._compute_cosine_similarity_batch(query_dense, doc_matrix)
|
|
755
|
+
|
|
756
|
+
# Create search results
|
|
757
|
+
for i, row in enumerate(valid_rows):
|
|
758
|
+
score = float(scores[i])
|
|
759
|
+
excerpt = (row.get("content") or "")[:500]
|
|
760
|
+
result = SearchResult(
|
|
761
|
+
path=row.get("file_path") or "",
|
|
762
|
+
score=score,
|
|
763
|
+
excerpt=excerpt,
|
|
764
|
+
)
|
|
765
|
+
scored_results.append((score, result))
|
|
766
|
+
|
|
767
|
+
except Exception as exc:
|
|
768
|
+
self.logger.debug(
|
|
769
|
+
"Dense reranking failed for %s: %s", index_path, exc
|
|
770
|
+
)
|
|
771
|
+
stats.errors.append(f"Dense reranking failed for {index_path}: {exc}")
|
|
772
|
+
|
|
773
|
+
# Sort by score descending and deduplicate by path
|
|
774
|
+
scored_results.sort(key=lambda x: x[0], reverse=True)
|
|
775
|
+
|
|
776
|
+
path_to_result: Dict[str, SearchResult] = {}
|
|
777
|
+
for score, result in scored_results:
|
|
778
|
+
if result.path not in path_to_result:
|
|
779
|
+
path_to_result[result.path] = result
|
|
780
|
+
|
|
781
|
+
final_results = list(path_to_result.values())[:k]
|
|
782
|
+
|
|
783
|
+
# Optional: grouping of similar results
|
|
784
|
+
if options.group_results:
|
|
785
|
+
from codexlens.search.ranking import group_similar_results
|
|
786
|
+
final_results = group_similar_results(
|
|
787
|
+
final_results, score_threshold_abs=options.grouping_threshold
|
|
788
|
+
)
|
|
789
|
+
|
|
790
|
+
stats.files_matched = len(final_results)
|
|
791
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
792
|
+
|
|
793
|
+
self.logger.debug(
|
|
794
|
+
"Binary cascade search complete: %d results in %.2fms",
|
|
795
|
+
len(final_results),
|
|
796
|
+
stats.time_ms,
|
|
797
|
+
)
|
|
798
|
+
|
|
799
|
+
return ChainSearchResult(
|
|
800
|
+
query=query,
|
|
801
|
+
results=final_results,
|
|
802
|
+
symbols=[],
|
|
803
|
+
stats=stats,
|
|
804
|
+
)
|
|
805
|
+
|
|
806
|
+
def cascade_search(
|
|
807
|
+
self,
|
|
808
|
+
query: str,
|
|
809
|
+
source_path: Path,
|
|
810
|
+
k: int = 10,
|
|
811
|
+
coarse_k: int = 100,
|
|
812
|
+
options: Optional[SearchOptions] = None,
|
|
813
|
+
strategy: Optional[Literal["binary", "hybrid", "binary_rerank", "dense_rerank"]] = None,
|
|
814
|
+
) -> ChainSearchResult:
|
|
815
|
+
"""Unified cascade search entry point with strategy selection.
|
|
816
|
+
|
|
817
|
+
Provides a single interface for cascade search with configurable strategy:
|
|
818
|
+
- "binary": Uses binary vector coarse ranking + dense fine ranking (fastest)
|
|
819
|
+
- "hybrid": Uses FTS+SPLADE+Vector coarse ranking + cross-encoder reranking (original)
|
|
820
|
+
- "binary_rerank": Uses binary vector coarse ranking + cross-encoder reranking (best balance)
|
|
821
|
+
- "dense_rerank": Uses dense vector coarse ranking + cross-encoder reranking
|
|
822
|
+
|
|
823
|
+
The strategy is determined with the following priority:
|
|
824
|
+
1. The `strategy` parameter (e.g., from CLI --cascade-strategy option)
|
|
825
|
+
2. Config `cascade_strategy` setting from settings.json
|
|
826
|
+
3. Default: "binary"
|
|
827
|
+
|
|
828
|
+
Args:
|
|
829
|
+
query: Natural language or keyword query string
|
|
830
|
+
source_path: Starting directory path
|
|
831
|
+
k: Number of final results to return (default 10)
|
|
832
|
+
coarse_k: Number of coarse candidates from first stage (default 100)
|
|
833
|
+
options: Search configuration (uses defaults if None)
|
|
834
|
+
strategy: Cascade strategy - "binary", "hybrid", or "binary_rerank".
|
|
835
|
+
|
|
836
|
+
Returns:
|
|
837
|
+
ChainSearchResult with reranked results and statistics
|
|
838
|
+
|
|
839
|
+
Examples:
|
|
840
|
+
>>> engine = ChainSearchEngine(registry, mapper, config=config)
|
|
841
|
+
>>> # Use binary cascade (default, fastest)
|
|
842
|
+
>>> result = engine.cascade_search("auth", Path("D:/project"))
|
|
843
|
+
>>> # Use hybrid cascade (original behavior)
|
|
844
|
+
>>> result = engine.cascade_search("auth", Path("D:/project"), strategy="hybrid")
|
|
845
|
+
>>> # Use binary + cross-encoder (best balance of speed and quality)
|
|
846
|
+
>>> result = engine.cascade_search("auth", Path("D:/project"), strategy="binary_rerank")
|
|
847
|
+
"""
|
|
848
|
+
# Strategy priority: parameter > config > default
|
|
849
|
+
effective_strategy = strategy
|
|
850
|
+
valid_strategies = ("binary", "hybrid", "binary_rerank", "dense_rerank")
|
|
851
|
+
if effective_strategy is None:
|
|
852
|
+
# Not passed via parameter, check config
|
|
853
|
+
if self._config is not None:
|
|
854
|
+
config_strategy = getattr(self._config, "cascade_strategy", None)
|
|
855
|
+
if config_strategy in valid_strategies:
|
|
856
|
+
effective_strategy = config_strategy
|
|
857
|
+
|
|
858
|
+
# If still not set, apply default
|
|
859
|
+
if effective_strategy not in valid_strategies:
|
|
860
|
+
effective_strategy = "binary"
|
|
861
|
+
|
|
862
|
+
if effective_strategy == "binary":
|
|
863
|
+
return self.binary_cascade_search(query, source_path, k, coarse_k, options)
|
|
864
|
+
elif effective_strategy == "binary_rerank":
|
|
865
|
+
return self.binary_rerank_cascade_search(query, source_path, k, coarse_k, options)
|
|
866
|
+
elif effective_strategy == "dense_rerank":
|
|
867
|
+
return self.dense_rerank_cascade_search(query, source_path, k, coarse_k, options)
|
|
868
|
+
else:
|
|
869
|
+
return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
|
|
870
|
+
|
|
871
|
+
def binary_rerank_cascade_search(
|
|
872
|
+
self,
|
|
873
|
+
query: str,
|
|
874
|
+
source_path: Path,
|
|
875
|
+
k: int = 10,
|
|
876
|
+
coarse_k: int = 100,
|
|
877
|
+
options: Optional[SearchOptions] = None,
|
|
878
|
+
) -> ChainSearchResult:
|
|
879
|
+
"""Execute binary cascade search with cross-encoder reranking.
|
|
880
|
+
|
|
881
|
+
Combines the speed of binary vector coarse search with the quality of
|
|
882
|
+
cross-encoder reranking for the best balance of speed and accuracy.
|
|
883
|
+
|
|
884
|
+
Binary + Reranker cascade process:
|
|
885
|
+
1. Stage 1 (Coarse): Fast binary vector search using Hamming distance
|
|
886
|
+
to quickly filter to coarse_k candidates (256-dim binary, 32 bytes/vector)
|
|
887
|
+
2. Stage 2 (Fine): Cross-encoder reranking for precise semantic ranking
|
|
888
|
+
of candidates using query-document attention
|
|
889
|
+
|
|
890
|
+
This approach is typically faster than hybrid_cascade_search while
|
|
891
|
+
achieving similar or better quality through cross-encoder reranking.
|
|
892
|
+
|
|
893
|
+
Performance characteristics:
|
|
894
|
+
- Binary search: O(N) with SIMD-accelerated XOR + popcount (~8ms)
|
|
895
|
+
- Cross-encoder: Applied to top coarse_k candidates (~15-20s for API)
|
|
896
|
+
- Total: Faster coarse + high-quality fine = best balance
|
|
897
|
+
|
|
898
|
+
Args:
|
|
899
|
+
query: Natural language or keyword query string
|
|
900
|
+
source_path: Starting directory path
|
|
901
|
+
k: Number of final results to return (default 10)
|
|
902
|
+
coarse_k: Number of coarse candidates from first stage (default 100)
|
|
903
|
+
options: Search configuration (uses defaults if None)
|
|
904
|
+
|
|
905
|
+
Returns:
|
|
906
|
+
ChainSearchResult with cross-encoder reranked results and statistics
|
|
907
|
+
|
|
908
|
+
Examples:
|
|
909
|
+
>>> engine = ChainSearchEngine(registry, mapper, config=config)
|
|
910
|
+
>>> result = engine.binary_rerank_cascade_search(
|
|
911
|
+
... "how to authenticate users",
|
|
912
|
+
... Path("D:/project/src"),
|
|
913
|
+
... k=10,
|
|
914
|
+
... coarse_k=100
|
|
915
|
+
... )
|
|
916
|
+
>>> for r in result.results:
|
|
917
|
+
... print(f"{r.path}: {r.score:.3f}")
|
|
918
|
+
"""
|
|
919
|
+
if not NUMPY_AVAILABLE:
|
|
920
|
+
self.logger.warning(
|
|
921
|
+
"NumPy not available, falling back to hybrid cascade search"
|
|
922
|
+
)
|
|
923
|
+
return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
|
|
924
|
+
|
|
925
|
+
options = options or SearchOptions()
|
|
926
|
+
start_time = time.time()
|
|
927
|
+
stats = SearchStats()
|
|
928
|
+
|
|
929
|
+
# Use config defaults if available
|
|
930
|
+
if self._config is not None:
|
|
931
|
+
if hasattr(self._config, "cascade_coarse_k"):
|
|
932
|
+
coarse_k = coarse_k or self._config.cascade_coarse_k
|
|
933
|
+
if hasattr(self._config, "cascade_fine_k"):
|
|
934
|
+
k = k or self._config.cascade_fine_k
|
|
935
|
+
|
|
936
|
+
# Step 1: Find starting index
|
|
937
|
+
start_index = self._find_start_index(source_path)
|
|
938
|
+
if not start_index:
|
|
939
|
+
self.logger.warning(f"No index found for {source_path}")
|
|
940
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
941
|
+
return ChainSearchResult(
|
|
942
|
+
query=query,
|
|
943
|
+
results=[],
|
|
944
|
+
symbols=[],
|
|
945
|
+
stats=stats
|
|
946
|
+
)
|
|
947
|
+
|
|
948
|
+
# Step 2: Collect all index paths
|
|
949
|
+
index_paths = self._collect_index_paths(start_index, options.depth)
|
|
950
|
+
stats.dirs_searched = len(index_paths)
|
|
951
|
+
|
|
952
|
+
if not index_paths:
|
|
953
|
+
self.logger.warning(f"No indexes collected from {start_index}")
|
|
954
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
955
|
+
return ChainSearchResult(
|
|
956
|
+
query=query,
|
|
957
|
+
results=[],
|
|
958
|
+
symbols=[],
|
|
959
|
+
stats=stats
|
|
960
|
+
)
|
|
961
|
+
|
|
962
|
+
# Initialize binary embedding backend
|
|
963
|
+
try:
|
|
964
|
+
from codexlens.indexing.embedding import BinaryEmbeddingBackend
|
|
965
|
+
except ImportError as exc:
|
|
966
|
+
self.logger.warning(
|
|
967
|
+
"BinaryEmbeddingBackend not available: %s, falling back to hybrid cascade",
|
|
968
|
+
exc
|
|
969
|
+
)
|
|
970
|
+
return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
|
|
971
|
+
|
|
972
|
+
# Step 4: Binary coarse search (same as binary_cascade_search)
|
|
973
|
+
binary_coarse_time = time.time()
|
|
974
|
+
coarse_candidates: List[Tuple[int, int, Path]] = []
|
|
975
|
+
|
|
976
|
+
# Try centralized BinarySearcher first (preferred for mmap indexes)
|
|
977
|
+
# The index root is the parent of the first index path
|
|
978
|
+
index_root = index_paths[0].parent if index_paths else None
|
|
979
|
+
used_centralized = False
|
|
980
|
+
|
|
981
|
+
if index_root:
|
|
982
|
+
binary_searcher = self._get_centralized_binary_searcher(index_root)
|
|
983
|
+
if binary_searcher is not None:
|
|
984
|
+
try:
|
|
985
|
+
# BinarySearcher expects dense vector, not packed binary
|
|
986
|
+
from codexlens.semantic.embedder import Embedder
|
|
987
|
+
embedder = Embedder()
|
|
988
|
+
query_dense = embedder.embed_to_numpy([query])[0]
|
|
989
|
+
|
|
990
|
+
results = binary_searcher.search(query_dense, top_k=coarse_k)
|
|
991
|
+
for chunk_id, distance in results:
|
|
992
|
+
coarse_candidates.append((chunk_id, distance, index_root))
|
|
993
|
+
# Only mark as used if we got actual results
|
|
994
|
+
if coarse_candidates:
|
|
995
|
+
used_centralized = True
|
|
996
|
+
self.logger.debug(
|
|
997
|
+
"Binary coarse search (centralized): %d candidates in %.2fms",
|
|
998
|
+
len(results), (time.time() - binary_coarse_time) * 1000
|
|
999
|
+
)
|
|
1000
|
+
except Exception as exc:
|
|
1001
|
+
self.logger.debug(f"Centralized binary search failed: {exc}")
|
|
1002
|
+
|
|
1003
|
+
if not used_centralized:
|
|
1004
|
+
# Get GPU preference from config
|
|
1005
|
+
use_gpu = True
|
|
1006
|
+
if self._config is not None:
|
|
1007
|
+
use_gpu = getattr(self._config, "embedding_use_gpu", True)
|
|
1008
|
+
|
|
1009
|
+
try:
|
|
1010
|
+
binary_backend = BinaryEmbeddingBackend(use_gpu=use_gpu)
|
|
1011
|
+
query_binary = binary_backend.embed_packed([query])[0]
|
|
1012
|
+
except Exception as exc:
|
|
1013
|
+
self.logger.warning(f"Failed to generate binary query embedding: {exc}")
|
|
1014
|
+
return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
|
|
1015
|
+
|
|
1016
|
+
# Fallback to per-directory binary indexes
|
|
1017
|
+
for index_path in index_paths:
|
|
1018
|
+
try:
|
|
1019
|
+
binary_index = self._get_or_create_binary_index(index_path)
|
|
1020
|
+
if binary_index is None or binary_index.count() == 0:
|
|
1021
|
+
continue
|
|
1022
|
+
# BinaryANNIndex returns (ids, distances) arrays
|
|
1023
|
+
ids, distances = binary_index.search(query_binary, coarse_k)
|
|
1024
|
+
for chunk_id, dist in zip(ids, distances):
|
|
1025
|
+
coarse_candidates.append((chunk_id, dist, index_path))
|
|
1026
|
+
except Exception as exc:
|
|
1027
|
+
self.logger.debug(
|
|
1028
|
+
"Binary search failed for %s: %s", index_path, exc
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
if not coarse_candidates:
|
|
1032
|
+
self.logger.info("No binary candidates found, falling back to hybrid cascade for reranking")
|
|
1033
|
+
# Fall back to hybrid_cascade_search which uses FTS+Vector coarse + cross-encoder rerank
|
|
1034
|
+
return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
|
|
1035
|
+
|
|
1036
|
+
# Sort by Hamming distance and take top coarse_k
|
|
1037
|
+
coarse_candidates.sort(key=lambda x: x[1])
|
|
1038
|
+
coarse_candidates = coarse_candidates[:coarse_k]
|
|
1039
|
+
|
|
1040
|
+
self.logger.debug(
|
|
1041
|
+
"Binary coarse search: %d candidates in %.2fms",
|
|
1042
|
+
len(coarse_candidates), (time.time() - binary_coarse_time) * 1000
|
|
1043
|
+
)
|
|
1044
|
+
|
|
1045
|
+
# Step 5: Build SearchResult objects for cross-encoder reranking
|
|
1046
|
+
# Group candidates by index path for efficient retrieval
|
|
1047
|
+
candidates_by_index: Dict[Path, List[int]] = {}
|
|
1048
|
+
for chunk_id, distance, index_path in coarse_candidates:
|
|
1049
|
+
if index_path not in candidates_by_index:
|
|
1050
|
+
candidates_by_index[index_path] = []
|
|
1051
|
+
candidates_by_index[index_path].append(chunk_id)
|
|
1052
|
+
|
|
1053
|
+
# Retrieve chunk content for reranking
|
|
1054
|
+
# Always use centralized VectorMetadataStore since chunks are stored there
|
|
1055
|
+
import sqlite3
|
|
1056
|
+
coarse_results: List[SearchResult] = []
|
|
1057
|
+
|
|
1058
|
+
# Find the centralized metadata store path (project root)
|
|
1059
|
+
# index_root was computed earlier, use it for chunk retrieval
|
|
1060
|
+
central_meta_path = index_root / VECTORS_META_DB_NAME if index_root else None
|
|
1061
|
+
central_meta_store = None
|
|
1062
|
+
if central_meta_path and central_meta_path.exists():
|
|
1063
|
+
central_meta_store = VectorMetadataStore(central_meta_path)
|
|
1064
|
+
|
|
1065
|
+
for index_path, chunk_ids in candidates_by_index.items():
|
|
1066
|
+
try:
|
|
1067
|
+
chunks_data = []
|
|
1068
|
+
if central_meta_store:
|
|
1069
|
+
# Try centralized VectorMetadataStore first (preferred)
|
|
1070
|
+
chunks_data = central_meta_store.get_chunks_by_ids(chunk_ids)
|
|
1071
|
+
|
|
1072
|
+
if not chunks_data and used_centralized:
|
|
1073
|
+
# Fallback to per-index-path meta store
|
|
1074
|
+
meta_db_path = index_path / VECTORS_META_DB_NAME
|
|
1075
|
+
if meta_db_path.exists():
|
|
1076
|
+
meta_store = VectorMetadataStore(meta_db_path)
|
|
1077
|
+
chunks_data = meta_store.get_chunks_by_ids(chunk_ids)
|
|
1078
|
+
|
|
1079
|
+
if not chunks_data:
|
|
1080
|
+
# Final fallback: query semantic_chunks table directly
|
|
1081
|
+
# This handles per-directory indexes with semantic_chunks table
|
|
1082
|
+
try:
|
|
1083
|
+
conn = sqlite3.connect(str(index_path))
|
|
1084
|
+
conn.row_factory = sqlite3.Row
|
|
1085
|
+
placeholders = ",".join("?" * len(chunk_ids))
|
|
1086
|
+
cursor = conn.execute(
|
|
1087
|
+
f"""
|
|
1088
|
+
SELECT id, file_path, content, metadata, category
|
|
1089
|
+
FROM semantic_chunks
|
|
1090
|
+
WHERE id IN ({placeholders})
|
|
1091
|
+
""",
|
|
1092
|
+
chunk_ids
|
|
1093
|
+
)
|
|
1094
|
+
chunks_data = [
|
|
1095
|
+
{
|
|
1096
|
+
"id": row["id"],
|
|
1097
|
+
"file_path": row["file_path"],
|
|
1098
|
+
"content": row["content"],
|
|
1099
|
+
"metadata": row["metadata"],
|
|
1100
|
+
"category": row["category"],
|
|
1101
|
+
}
|
|
1102
|
+
for row in cursor.fetchall()
|
|
1103
|
+
]
|
|
1104
|
+
conn.close()
|
|
1105
|
+
except Exception:
|
|
1106
|
+
pass # Skip if table doesn't exist
|
|
1107
|
+
|
|
1108
|
+
for chunk in chunks_data:
|
|
1109
|
+
# Find the Hamming distance for this chunk
|
|
1110
|
+
chunk_id = chunk.get("id") or chunk.get("chunk_id")
|
|
1111
|
+
distance = next(
|
|
1112
|
+
(d for cid, d, _ in coarse_candidates if cid == chunk_id),
|
|
1113
|
+
256
|
|
1114
|
+
)
|
|
1115
|
+
# Initial score from Hamming distance (will be replaced by reranker)
|
|
1116
|
+
score = 1.0 - (distance / 256.0)
|
|
1117
|
+
|
|
1118
|
+
content = chunk.get("content", "")
|
|
1119
|
+
result = SearchResult(
|
|
1120
|
+
path=chunk.get("file_path", ""),
|
|
1121
|
+
score=float(score),
|
|
1122
|
+
excerpt=content[:500] if content else "",
|
|
1123
|
+
content=content,
|
|
1124
|
+
)
|
|
1125
|
+
coarse_results.append(result)
|
|
1126
|
+
except Exception as exc:
|
|
1127
|
+
self.logger.debug(
|
|
1128
|
+
"Failed to retrieve chunks from %s: %s", index_path, exc
|
|
1129
|
+
)
|
|
1130
|
+
|
|
1131
|
+
if not coarse_results:
|
|
1132
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
1133
|
+
return ChainSearchResult(
|
|
1134
|
+
query=query, results=[], symbols=[], stats=stats
|
|
1135
|
+
)
|
|
1136
|
+
|
|
1137
|
+
self.logger.debug(
|
|
1138
|
+
"Retrieved %d chunks for cross-encoder reranking", len(coarse_results)
|
|
1139
|
+
)
|
|
1140
|
+
|
|
1141
|
+
# Step 6: Cross-encoder reranking (same as hybrid_cascade_search)
|
|
1142
|
+
rerank_time = time.time()
|
|
1143
|
+
reranked_results = self._cross_encoder_rerank(query, coarse_results, top_k=k)
|
|
1144
|
+
|
|
1145
|
+
self.logger.debug(
|
|
1146
|
+
"Cross-encoder reranking: %d results in %.2fms",
|
|
1147
|
+
len(reranked_results), (time.time() - rerank_time) * 1000
|
|
1148
|
+
)
|
|
1149
|
+
|
|
1150
|
+
# Deduplicate by path (keep highest score)
|
|
1151
|
+
path_to_result: Dict[str, SearchResult] = {}
|
|
1152
|
+
for result in reranked_results:
|
|
1153
|
+
if result.path not in path_to_result or result.score > path_to_result[result.path].score:
|
|
1154
|
+
path_to_result[result.path] = result
|
|
1155
|
+
|
|
1156
|
+
final_results = list(path_to_result.values())[:k]
|
|
1157
|
+
|
|
1158
|
+
stats.files_matched = len(final_results)
|
|
1159
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
1160
|
+
|
|
1161
|
+
self.logger.debug(
|
|
1162
|
+
"Binary+Rerank cascade search complete: %d results in %.2fms",
|
|
1163
|
+
len(final_results),
|
|
1164
|
+
stats.time_ms,
|
|
1165
|
+
)
|
|
1166
|
+
|
|
1167
|
+
return ChainSearchResult(
|
|
1168
|
+
query=query,
|
|
1169
|
+
results=final_results,
|
|
1170
|
+
symbols=[],
|
|
1171
|
+
stats=stats,
|
|
1172
|
+
)
|
|
1173
|
+
|
|
1174
|
+
def dense_rerank_cascade_search(
|
|
1175
|
+
self,
|
|
1176
|
+
query: str,
|
|
1177
|
+
source_path: Path,
|
|
1178
|
+
k: int = 10,
|
|
1179
|
+
coarse_k: int = 100,
|
|
1180
|
+
options: Optional[SearchOptions] = None,
|
|
1181
|
+
) -> ChainSearchResult:
|
|
1182
|
+
"""Execute dense cascade search with cross-encoder reranking.
|
|
1183
|
+
|
|
1184
|
+
Combines dense vector coarse search (HNSW) with cross-encoder reranking
|
|
1185
|
+
for comparison with binary_rerank strategy.
|
|
1186
|
+
|
|
1187
|
+
Dense + Reranker cascade process:
|
|
1188
|
+
1. Stage 1 (Coarse): Dense vector search using HNSW (cosine similarity)
|
|
1189
|
+
to get coarse_k candidates (2048-dim float32)
|
|
1190
|
+
2. Stage 2 (Fine): Cross-encoder reranking for precise semantic ranking
|
|
1191
|
+
|
|
1192
|
+
Args:
|
|
1193
|
+
query: Natural language or keyword query string
|
|
1194
|
+
source_path: Starting directory path
|
|
1195
|
+
k: Number of final results to return (default 10)
|
|
1196
|
+
coarse_k: Number of coarse candidates from first stage (default 100)
|
|
1197
|
+
options: Search configuration (uses defaults if None)
|
|
1198
|
+
|
|
1199
|
+
Returns:
|
|
1200
|
+
ChainSearchResult with cross-encoder reranked results and statistics
|
|
1201
|
+
"""
|
|
1202
|
+
if not NUMPY_AVAILABLE:
|
|
1203
|
+
self.logger.warning(
|
|
1204
|
+
"NumPy not available, falling back to hybrid cascade search"
|
|
1205
|
+
)
|
|
1206
|
+
return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
|
|
1207
|
+
|
|
1208
|
+
options = options or SearchOptions()
|
|
1209
|
+
start_time = time.time()
|
|
1210
|
+
stats = SearchStats()
|
|
1211
|
+
|
|
1212
|
+
# Use config defaults if available
|
|
1213
|
+
if self._config is not None:
|
|
1214
|
+
if hasattr(self._config, "cascade_coarse_k"):
|
|
1215
|
+
coarse_k = coarse_k or self._config.cascade_coarse_k
|
|
1216
|
+
if hasattr(self._config, "cascade_fine_k"):
|
|
1217
|
+
k = k or self._config.cascade_fine_k
|
|
1218
|
+
|
|
1219
|
+
# Step 1: Find starting index
|
|
1220
|
+
start_index = self._find_start_index(source_path)
|
|
1221
|
+
if not start_index:
|
|
1222
|
+
self.logger.warning(f"No index found for {source_path}")
|
|
1223
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
1224
|
+
return ChainSearchResult(
|
|
1225
|
+
query=query,
|
|
1226
|
+
results=[],
|
|
1227
|
+
symbols=[],
|
|
1228
|
+
stats=stats
|
|
1229
|
+
)
|
|
1230
|
+
|
|
1231
|
+
# Step 2: Collect all index paths
|
|
1232
|
+
index_paths = self._collect_index_paths(start_index, options.depth)
|
|
1233
|
+
stats.dirs_searched = len(index_paths)
|
|
1234
|
+
|
|
1235
|
+
if not index_paths:
|
|
1236
|
+
self.logger.warning(f"No indexes collected from {start_index}")
|
|
1237
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
1238
|
+
return ChainSearchResult(
|
|
1239
|
+
query=query,
|
|
1240
|
+
results=[],
|
|
1241
|
+
symbols=[],
|
|
1242
|
+
stats=stats
|
|
1243
|
+
)
|
|
1244
|
+
|
|
1245
|
+
# Step 3: Find centralized HNSW index and read model config
|
|
1246
|
+
from codexlens.config import VECTORS_HNSW_NAME
|
|
1247
|
+
central_hnsw_path = None
|
|
1248
|
+
index_root = start_index.parent
|
|
1249
|
+
current_dir = index_root
|
|
1250
|
+
for _ in range(10): # Limit search depth
|
|
1251
|
+
candidate = current_dir / VECTORS_HNSW_NAME
|
|
1252
|
+
if candidate.exists():
|
|
1253
|
+
central_hnsw_path = candidate
|
|
1254
|
+
index_root = current_dir # Update to where HNSW was found
|
|
1255
|
+
break
|
|
1256
|
+
parent = current_dir.parent
|
|
1257
|
+
if parent == current_dir: # Reached root
|
|
1258
|
+
break
|
|
1259
|
+
current_dir = parent
|
|
1260
|
+
|
|
1261
|
+
# Step 4: Generate query dense embedding using same model as centralized index
|
|
1262
|
+
# Read embedding config to match the model used during indexing
|
|
1263
|
+
dense_coarse_time = time.time()
|
|
1264
|
+
try:
|
|
1265
|
+
from codexlens.semantic.factory import get_embedder
|
|
1266
|
+
|
|
1267
|
+
# Get embedding settings from centralized index config (preferred) or fallback to self._config
|
|
1268
|
+
embedding_backend = "litellm" # Default to API for dense
|
|
1269
|
+
embedding_model = "qwen3-embedding-sf" # Default model
|
|
1270
|
+
use_gpu = True
|
|
1271
|
+
|
|
1272
|
+
# Try to read model config from centralized index's embeddings_config table
|
|
1273
|
+
central_index_db = index_root / "_index.db"
|
|
1274
|
+
if central_index_db.exists():
|
|
1275
|
+
try:
|
|
1276
|
+
from codexlens.semantic.vector_store import VectorStore
|
|
1277
|
+
with VectorStore(central_index_db) as vs:
|
|
1278
|
+
model_config = vs.get_model_config()
|
|
1279
|
+
if model_config:
|
|
1280
|
+
embedding_backend = model_config.get("backend", embedding_backend)
|
|
1281
|
+
embedding_model = model_config.get("model_name", embedding_model)
|
|
1282
|
+
self.logger.debug(
|
|
1283
|
+
"Read model config from centralized index: %s/%s",
|
|
1284
|
+
embedding_backend, embedding_model
|
|
1285
|
+
)
|
|
1286
|
+
except Exception as e:
|
|
1287
|
+
self.logger.debug("Failed to read centralized model config: %s", e)
|
|
1288
|
+
|
|
1289
|
+
# Fallback to self._config if not read from index
|
|
1290
|
+
if self._config is not None:
|
|
1291
|
+
if embedding_backend == "litellm" and embedding_model == "qwen3-embedding-sf":
|
|
1292
|
+
# Only use config values if we didn't read from centralized index
|
|
1293
|
+
config_backend = getattr(self._config, "embedding_backend", None)
|
|
1294
|
+
config_model = getattr(self._config, "embedding_model", None)
|
|
1295
|
+
if config_backend:
|
|
1296
|
+
embedding_backend = config_backend
|
|
1297
|
+
if config_model:
|
|
1298
|
+
embedding_model = config_model
|
|
1299
|
+
use_gpu = getattr(self._config, "embedding_use_gpu", True)
|
|
1300
|
+
|
|
1301
|
+
# Create embedder matching index configuration
|
|
1302
|
+
if embedding_backend == "litellm":
|
|
1303
|
+
embedder = get_embedder(backend="litellm", model=embedding_model)
|
|
1304
|
+
else:
|
|
1305
|
+
embedder = get_embedder(backend="fastembed", profile=embedding_model, use_gpu=use_gpu)
|
|
1306
|
+
|
|
1307
|
+
query_dense = embedder.embed_to_numpy([query])[0]
|
|
1308
|
+
self.logger.debug(f"Dense query embedding: {query_dense.shape[0]}-dim via {embedding_backend}/{embedding_model}")
|
|
1309
|
+
except Exception as exc:
|
|
1310
|
+
self.logger.warning(f"Failed to generate dense query embedding: {exc}")
|
|
1311
|
+
return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
|
|
1312
|
+
|
|
1313
|
+
# Step 5: Dense coarse search using centralized HNSW index
|
|
1314
|
+
coarse_candidates: List[Tuple[int, float, Path]] = [] # (chunk_id, distance, index_path)
|
|
1315
|
+
|
|
1316
|
+
if central_hnsw_path is not None:
|
|
1317
|
+
# Use centralized index
|
|
1318
|
+
try:
|
|
1319
|
+
from codexlens.semantic.ann_index import ANNIndex
|
|
1320
|
+
ann_index = ANNIndex.create_central(
|
|
1321
|
+
index_root=index_root,
|
|
1322
|
+
dim=query_dense.shape[0],
|
|
1323
|
+
)
|
|
1324
|
+
if ann_index.load() and ann_index.count() > 0:
|
|
1325
|
+
# Search centralized HNSW index
|
|
1326
|
+
ids, distances = ann_index.search(query_dense, top_k=coarse_k)
|
|
1327
|
+
for chunk_id, dist in zip(ids, distances):
|
|
1328
|
+
coarse_candidates.append((chunk_id, dist, index_root / "_index.db"))
|
|
1329
|
+
self.logger.debug(
|
|
1330
|
+
"Centralized dense search: %d candidates from %s",
|
|
1331
|
+
len(ids), central_hnsw_path
|
|
1332
|
+
)
|
|
1333
|
+
except Exception as exc:
|
|
1334
|
+
self.logger.debug(
|
|
1335
|
+
"Centralized dense search failed for %s: %s", central_hnsw_path, exc
|
|
1336
|
+
)
|
|
1337
|
+
|
|
1338
|
+
# Fallback: try per-directory HNSW indexes if centralized not found
|
|
1339
|
+
if not coarse_candidates:
|
|
1340
|
+
for index_path in index_paths:
|
|
1341
|
+
try:
|
|
1342
|
+
# Load HNSW index
|
|
1343
|
+
from codexlens.semantic.ann_index import ANNIndex
|
|
1344
|
+
ann_index = ANNIndex(index_path, dim=query_dense.shape[0])
|
|
1345
|
+
if not ann_index.load():
|
|
1346
|
+
continue
|
|
1347
|
+
|
|
1348
|
+
if ann_index.count() == 0:
|
|
1349
|
+
continue
|
|
1350
|
+
|
|
1351
|
+
# Search HNSW index
|
|
1352
|
+
ids, distances = ann_index.search(query_dense, top_k=coarse_k)
|
|
1353
|
+
for chunk_id, dist in zip(ids, distances):
|
|
1354
|
+
coarse_candidates.append((chunk_id, dist, index_path))
|
|
1355
|
+
|
|
1356
|
+
except Exception as exc:
|
|
1357
|
+
self.logger.debug(
|
|
1358
|
+
"Dense search failed for %s: %s", index_path, exc
|
|
1359
|
+
)
|
|
1360
|
+
|
|
1361
|
+
if not coarse_candidates:
|
|
1362
|
+
self.logger.info("No dense candidates found, falling back to hybrid cascade")
|
|
1363
|
+
return self.hybrid_cascade_search(query, source_path, k, coarse_k, options)
|
|
1364
|
+
|
|
1365
|
+
# Sort by distance (ascending for cosine distance) and take top coarse_k
|
|
1366
|
+
coarse_candidates.sort(key=lambda x: x[1])
|
|
1367
|
+
coarse_candidates = coarse_candidates[:coarse_k]
|
|
1368
|
+
|
|
1369
|
+
self.logger.debug(
|
|
1370
|
+
"Dense coarse search: %d candidates in %.2fms",
|
|
1371
|
+
len(coarse_candidates), (time.time() - dense_coarse_time) * 1000
|
|
1372
|
+
)
|
|
1373
|
+
|
|
1374
|
+
# Step 6: Build SearchResult objects for cross-encoder reranking
|
|
1375
|
+
candidates_by_index: Dict[Path, List[int]] = {}
|
|
1376
|
+
for chunk_id, distance, index_path in coarse_candidates:
|
|
1377
|
+
if index_path not in candidates_by_index:
|
|
1378
|
+
candidates_by_index[index_path] = []
|
|
1379
|
+
candidates_by_index[index_path].append(chunk_id)
|
|
1380
|
+
|
|
1381
|
+
# Retrieve chunk content for reranking
|
|
1382
|
+
import sqlite3
|
|
1383
|
+
coarse_results: List[SearchResult] = []
|
|
1384
|
+
|
|
1385
|
+
for index_path, chunk_ids in candidates_by_index.items():
|
|
1386
|
+
try:
|
|
1387
|
+
# For centralized index, use _vectors_meta.db for chunk metadata
|
|
1388
|
+
# which contains file_path, content, start_line, end_line
|
|
1389
|
+
if central_hnsw_path is not None and index_path == index_root / "_index.db":
|
|
1390
|
+
# Use centralized metadata from _vectors_meta.db
|
|
1391
|
+
meta_db_path = index_root / "_vectors_meta.db"
|
|
1392
|
+
if meta_db_path.exists():
|
|
1393
|
+
conn = sqlite3.connect(str(meta_db_path))
|
|
1394
|
+
conn.row_factory = sqlite3.Row
|
|
1395
|
+
placeholders = ",".join("?" * len(chunk_ids))
|
|
1396
|
+
cursor = conn.execute(
|
|
1397
|
+
f"""
|
|
1398
|
+
SELECT chunk_id, file_path, content, start_line, end_line
|
|
1399
|
+
FROM chunk_metadata
|
|
1400
|
+
WHERE chunk_id IN ({placeholders})
|
|
1401
|
+
""",
|
|
1402
|
+
chunk_ids
|
|
1403
|
+
)
|
|
1404
|
+
chunks_data = [
|
|
1405
|
+
{
|
|
1406
|
+
"id": row["chunk_id"],
|
|
1407
|
+
"file_path": row["file_path"],
|
|
1408
|
+
"content": row["content"],
|
|
1409
|
+
"metadata": json.dumps({
|
|
1410
|
+
"start_line": row["start_line"],
|
|
1411
|
+
"end_line": row["end_line"]
|
|
1412
|
+
}),
|
|
1413
|
+
"category": "code" if row["file_path"].endswith(('.py', '.ts', '.js', '.java', '.go', '.rs', '.cpp', '.c')) else "doc",
|
|
1414
|
+
}
|
|
1415
|
+
for row in cursor.fetchall()
|
|
1416
|
+
]
|
|
1417
|
+
conn.close()
|
|
1418
|
+
else:
|
|
1419
|
+
chunks_data = []
|
|
1420
|
+
else:
|
|
1421
|
+
# Fall back to per-directory semantic_chunks table
|
|
1422
|
+
conn = sqlite3.connect(str(index_path))
|
|
1423
|
+
conn.row_factory = sqlite3.Row
|
|
1424
|
+
placeholders = ",".join("?" * len(chunk_ids))
|
|
1425
|
+
cursor = conn.execute(
|
|
1426
|
+
f"""
|
|
1427
|
+
SELECT id, file_path, content, metadata, category
|
|
1428
|
+
FROM semantic_chunks
|
|
1429
|
+
WHERE id IN ({placeholders})
|
|
1430
|
+
""",
|
|
1431
|
+
chunk_ids
|
|
1432
|
+
)
|
|
1433
|
+
chunks_data = [
|
|
1434
|
+
{
|
|
1435
|
+
"id": row["id"],
|
|
1436
|
+
"file_path": row["file_path"],
|
|
1437
|
+
"content": row["content"],
|
|
1438
|
+
"metadata": row["metadata"],
|
|
1439
|
+
"category": row["category"],
|
|
1440
|
+
}
|
|
1441
|
+
for row in cursor.fetchall()
|
|
1442
|
+
]
|
|
1443
|
+
conn.close()
|
|
1444
|
+
|
|
1445
|
+
for chunk in chunks_data:
|
|
1446
|
+
chunk_id = chunk.get("id")
|
|
1447
|
+
distance = next(
|
|
1448
|
+
(d for cid, d, _ in coarse_candidates if cid == chunk_id),
|
|
1449
|
+
1.0
|
|
1450
|
+
)
|
|
1451
|
+
# Convert cosine distance to score (clamp to [0, 1] for Pydantic validation)
|
|
1452
|
+
# Cosine distance can be > 1 for anti-correlated vectors, causing negative scores
|
|
1453
|
+
score = max(0.0, 1.0 - distance)
|
|
1454
|
+
|
|
1455
|
+
content = chunk.get("content", "")
|
|
1456
|
+
result = SearchResult(
|
|
1457
|
+
path=chunk.get("file_path", ""),
|
|
1458
|
+
score=float(score),
|
|
1459
|
+
excerpt=content[:500] if content else "",
|
|
1460
|
+
content=content,
|
|
1461
|
+
)
|
|
1462
|
+
coarse_results.append(result)
|
|
1463
|
+
except Exception as exc:
|
|
1464
|
+
self.logger.debug(
|
|
1465
|
+
"Failed to retrieve chunks from %s: %s", index_path, exc
|
|
1466
|
+
)
|
|
1467
|
+
|
|
1468
|
+
if not coarse_results:
|
|
1469
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
1470
|
+
return ChainSearchResult(
|
|
1471
|
+
query=query, results=[], symbols=[], stats=stats
|
|
1472
|
+
)
|
|
1473
|
+
|
|
1474
|
+
self.logger.debug(
|
|
1475
|
+
"Retrieved %d chunks for cross-encoder reranking", len(coarse_results)
|
|
1476
|
+
)
|
|
1477
|
+
|
|
1478
|
+
# Step 6: Cross-encoder reranking
|
|
1479
|
+
rerank_time = time.time()
|
|
1480
|
+
reranked_results = self._cross_encoder_rerank(query, coarse_results, top_k=k)
|
|
1481
|
+
|
|
1482
|
+
self.logger.debug(
|
|
1483
|
+
"Cross-encoder reranking: %d results in %.2fms",
|
|
1484
|
+
len(reranked_results), (time.time() - rerank_time) * 1000
|
|
1485
|
+
)
|
|
1486
|
+
|
|
1487
|
+
# Deduplicate by path (keep highest score)
|
|
1488
|
+
path_to_result: Dict[str, SearchResult] = {}
|
|
1489
|
+
for result in reranked_results:
|
|
1490
|
+
if result.path not in path_to_result or result.score > path_to_result[result.path].score:
|
|
1491
|
+
path_to_result[result.path] = result
|
|
1492
|
+
|
|
1493
|
+
final_results = list(path_to_result.values())[:k]
|
|
1494
|
+
|
|
1495
|
+
stats.files_matched = len(final_results)
|
|
1496
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
1497
|
+
|
|
1498
|
+
self.logger.debug(
|
|
1499
|
+
"Dense+Rerank cascade search complete: %d results in %.2fms",
|
|
1500
|
+
len(final_results),
|
|
1501
|
+
stats.time_ms,
|
|
1502
|
+
)
|
|
1503
|
+
|
|
1504
|
+
return ChainSearchResult(
|
|
1505
|
+
query=query,
|
|
1506
|
+
results=final_results,
|
|
1507
|
+
symbols=[],
|
|
1508
|
+
stats=stats,
|
|
1509
|
+
)
|
|
1510
|
+
|
|
1511
|
+
def _get_or_create_binary_index(self, index_path: Path) -> Optional[Any]:
|
|
1512
|
+
"""Get or create a BinaryANNIndex for the given index path.
|
|
1513
|
+
|
|
1514
|
+
.. deprecated::
|
|
1515
|
+
This method uses the deprecated BinaryANNIndex. For centralized indexes,
|
|
1516
|
+
use _get_centralized_binary_searcher() instead.
|
|
1517
|
+
|
|
1518
|
+
Attempts to load an existing binary index from disk. If not found,
|
|
1519
|
+
returns None (binary index should be built during indexing).
|
|
1520
|
+
|
|
1521
|
+
Args:
|
|
1522
|
+
index_path: Path to the _index.db file
|
|
1523
|
+
|
|
1524
|
+
Returns:
|
|
1525
|
+
BinaryANNIndex instance or None if not available
|
|
1526
|
+
"""
|
|
1527
|
+
try:
|
|
1528
|
+
import warnings
|
|
1529
|
+
# Suppress deprecation warning since we're using it intentionally for legacy support
|
|
1530
|
+
with warnings.catch_warnings():
|
|
1531
|
+
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
|
1532
|
+
from codexlens.semantic.ann_index import BinaryANNIndex
|
|
1533
|
+
|
|
1534
|
+
binary_index = BinaryANNIndex(index_path, dim=256)
|
|
1535
|
+
if binary_index.load():
|
|
1536
|
+
return binary_index
|
|
1537
|
+
return None
|
|
1538
|
+
except Exception as exc:
|
|
1539
|
+
self.logger.debug("Failed to load binary index for %s: %s", index_path, exc)
|
|
1540
|
+
return None
|
|
1541
|
+
|
|
1542
|
+
def _get_centralized_binary_searcher(self, index_root: Path) -> Optional[Any]:
|
|
1543
|
+
"""Get centralized BinarySearcher for memory-mapped binary vectors.
|
|
1544
|
+
|
|
1545
|
+
This is the preferred method for centralized indexes, providing faster
|
|
1546
|
+
search via memory-mapped files.
|
|
1547
|
+
|
|
1548
|
+
Args:
|
|
1549
|
+
index_root: Root directory containing centralized index files
|
|
1550
|
+
|
|
1551
|
+
Returns:
|
|
1552
|
+
BinarySearcher instance or None if not available
|
|
1553
|
+
"""
|
|
1554
|
+
try:
|
|
1555
|
+
from codexlens.search.binary_searcher import BinarySearcher
|
|
1556
|
+
|
|
1557
|
+
binary_searcher = BinarySearcher(index_root)
|
|
1558
|
+
if binary_searcher.load():
|
|
1559
|
+
self.logger.debug(
|
|
1560
|
+
"Using centralized BinarySearcher with %d vectors (mmap=%s)",
|
|
1561
|
+
binary_searcher.vector_count,
|
|
1562
|
+
binary_searcher.is_memmap
|
|
1563
|
+
)
|
|
1564
|
+
return binary_searcher
|
|
1565
|
+
return None
|
|
1566
|
+
except Exception as exc:
|
|
1567
|
+
self.logger.debug("Failed to load centralized binary searcher: %s", exc)
|
|
1568
|
+
return None
|
|
1569
|
+
|
|
1570
|
+
def _compute_cosine_similarity(
|
|
1571
|
+
self,
|
|
1572
|
+
query_vec: "np.ndarray",
|
|
1573
|
+
doc_vec: "np.ndarray",
|
|
1574
|
+
) -> float:
|
|
1575
|
+
"""Compute cosine similarity between query and document vectors.
|
|
1576
|
+
|
|
1577
|
+
Args:
|
|
1578
|
+
query_vec: Query embedding vector
|
|
1579
|
+
doc_vec: Document embedding vector
|
|
1580
|
+
|
|
1581
|
+
Returns:
|
|
1582
|
+
Cosine similarity score in range [-1, 1]
|
|
1583
|
+
"""
|
|
1584
|
+
if not NUMPY_AVAILABLE:
|
|
1585
|
+
return 0.0
|
|
1586
|
+
|
|
1587
|
+
# Ensure same shape
|
|
1588
|
+
min_len = min(len(query_vec), len(doc_vec))
|
|
1589
|
+
q = query_vec[:min_len]
|
|
1590
|
+
d = doc_vec[:min_len]
|
|
1591
|
+
|
|
1592
|
+
# Compute cosine similarity
|
|
1593
|
+
dot_product = np.dot(q, d)
|
|
1594
|
+
norm_q = np.linalg.norm(q)
|
|
1595
|
+
norm_d = np.linalg.norm(d)
|
|
1596
|
+
|
|
1597
|
+
if norm_q == 0 or norm_d == 0:
|
|
1598
|
+
return 0.0
|
|
1599
|
+
|
|
1600
|
+
return float(dot_product / (norm_q * norm_d))
|
|
1601
|
+
|
|
1602
|
+
def _compute_cosine_similarity_batch(
|
|
1603
|
+
self,
|
|
1604
|
+
query_vec: "np.ndarray",
|
|
1605
|
+
doc_matrix: "np.ndarray",
|
|
1606
|
+
) -> "np.ndarray":
|
|
1607
|
+
"""Compute cosine similarity between query and multiple document vectors.
|
|
1608
|
+
|
|
1609
|
+
Uses vectorized matrix operations for efficient batch computation.
|
|
1610
|
+
|
|
1611
|
+
Args:
|
|
1612
|
+
query_vec: Query embedding vector of shape (dim,)
|
|
1613
|
+
doc_matrix: Document embeddings matrix of shape (n_docs, dim)
|
|
1614
|
+
|
|
1615
|
+
Returns:
|
|
1616
|
+
Array of cosine similarity scores of shape (n_docs,)
|
|
1617
|
+
"""
|
|
1618
|
+
if not NUMPY_AVAILABLE:
|
|
1619
|
+
return np.zeros(doc_matrix.shape[0])
|
|
1620
|
+
|
|
1621
|
+
# Ensure query is 1D
|
|
1622
|
+
if query_vec.ndim > 1:
|
|
1623
|
+
query_vec = query_vec.flatten()
|
|
1624
|
+
|
|
1625
|
+
# Handle dimension mismatch by truncating to smaller dimension
|
|
1626
|
+
min_dim = min(len(query_vec), doc_matrix.shape[1])
|
|
1627
|
+
q = query_vec[:min_dim]
|
|
1628
|
+
docs = doc_matrix[:, :min_dim]
|
|
1629
|
+
|
|
1630
|
+
# Compute query norm once
|
|
1631
|
+
norm_q = np.linalg.norm(q)
|
|
1632
|
+
if norm_q == 0:
|
|
1633
|
+
return np.zeros(docs.shape[0])
|
|
1634
|
+
|
|
1635
|
+
# Normalize query
|
|
1636
|
+
q_normalized = q / norm_q
|
|
1637
|
+
|
|
1638
|
+
# Compute document norms (vectorized)
|
|
1639
|
+
doc_norms = np.linalg.norm(docs, axis=1)
|
|
1640
|
+
|
|
1641
|
+
# Avoid division by zero
|
|
1642
|
+
nonzero_mask = doc_norms > 0
|
|
1643
|
+
scores = np.zeros(docs.shape[0], dtype=np.float32)
|
|
1644
|
+
|
|
1645
|
+
if np.any(nonzero_mask):
|
|
1646
|
+
# Normalize documents with non-zero norms
|
|
1647
|
+
docs_normalized = docs[nonzero_mask] / doc_norms[nonzero_mask, np.newaxis]
|
|
1648
|
+
|
|
1649
|
+
# Batch dot product: (n_docs, dim) @ (dim,) = (n_docs,)
|
|
1650
|
+
scores[nonzero_mask] = docs_normalized @ q_normalized
|
|
1651
|
+
|
|
1652
|
+
return scores
|
|
1653
|
+
|
|
1654
|
+
def _build_results_from_candidates(
|
|
1655
|
+
self,
|
|
1656
|
+
candidates: List[Tuple[int, int, Path]],
|
|
1657
|
+
index_paths: List[Path],
|
|
1658
|
+
stats: SearchStats,
|
|
1659
|
+
query: str,
|
|
1660
|
+
start_time: float,
|
|
1661
|
+
use_centralized: bool = False,
|
|
1662
|
+
) -> ChainSearchResult:
|
|
1663
|
+
"""Build ChainSearchResult from binary candidates using Hamming distance scores.
|
|
1664
|
+
|
|
1665
|
+
Used as fallback when dense embeddings are not available.
|
|
1666
|
+
|
|
1667
|
+
Args:
|
|
1668
|
+
candidates: List of (chunk_id, hamming_distance, index_path) tuples
|
|
1669
|
+
index_paths: List of all searched index paths
|
|
1670
|
+
stats: SearchStats to update
|
|
1671
|
+
query: Original query string
|
|
1672
|
+
start_time: Search start time for timing
|
|
1673
|
+
use_centralized: If True, index_path is the index_root directory
|
|
1674
|
+
and VectorMetadataStore should be used instead of SQLiteStore
|
|
1675
|
+
|
|
1676
|
+
Returns:
|
|
1677
|
+
ChainSearchResult with results scored by Hamming distance
|
|
1678
|
+
"""
|
|
1679
|
+
results: List[SearchResult] = []
|
|
1680
|
+
|
|
1681
|
+
# Group by index path
|
|
1682
|
+
candidates_by_index: Dict[Path, List[Tuple[int, int]]] = {}
|
|
1683
|
+
for chunk_id, distance, index_path in candidates:
|
|
1684
|
+
if index_path not in candidates_by_index:
|
|
1685
|
+
candidates_by_index[index_path] = []
|
|
1686
|
+
candidates_by_index[index_path].append((chunk_id, distance))
|
|
1687
|
+
|
|
1688
|
+
for index_path, chunk_tuples in candidates_by_index.items():
|
|
1689
|
+
try:
|
|
1690
|
+
chunk_ids = [c[0] for c in chunk_tuples]
|
|
1691
|
+
|
|
1692
|
+
# Use VectorMetadataStore for centralized search, SQLiteStore for per-directory
|
|
1693
|
+
if use_centralized:
|
|
1694
|
+
# index_path is actually index_root directory for centralized search
|
|
1695
|
+
meta_db_path = index_path / VECTORS_META_DB_NAME
|
|
1696
|
+
if not meta_db_path.exists():
|
|
1697
|
+
self.logger.debug(
|
|
1698
|
+
"VectorMetadataStore not found at %s, skipping", meta_db_path
|
|
1699
|
+
)
|
|
1700
|
+
continue
|
|
1701
|
+
meta_store = VectorMetadataStore(meta_db_path)
|
|
1702
|
+
chunks_data = meta_store.get_chunks_by_ids(chunk_ids)
|
|
1703
|
+
else:
|
|
1704
|
+
store = SQLiteStore(index_path)
|
|
1705
|
+
chunks_data = store.get_chunks_by_ids(chunk_ids)
|
|
1706
|
+
|
|
1707
|
+
chunk_content: Dict[int, Dict[str, Any]] = {
|
|
1708
|
+
c["id"]: c for c in chunks_data
|
|
1709
|
+
}
|
|
1710
|
+
|
|
1711
|
+
for chunk_id, distance in chunk_tuples:
|
|
1712
|
+
chunk_info = chunk_content.get(chunk_id)
|
|
1713
|
+
if chunk_info is None:
|
|
1714
|
+
continue
|
|
1715
|
+
|
|
1716
|
+
# Convert Hamming distance to score (lower distance = higher score)
|
|
1717
|
+
# Max Hamming distance for 256-bit is 256
|
|
1718
|
+
score = 1.0 - (distance / 256.0)
|
|
1719
|
+
|
|
1720
|
+
excerpt = chunk_info.get("content", "")[:500]
|
|
1721
|
+
result = SearchResult(
|
|
1722
|
+
path=chunk_info.get("file_path", ""),
|
|
1723
|
+
score=float(score),
|
|
1724
|
+
excerpt=excerpt,
|
|
1725
|
+
)
|
|
1726
|
+
results.append(result)
|
|
1727
|
+
|
|
1728
|
+
except Exception as exc:
|
|
1729
|
+
self.logger.debug(
|
|
1730
|
+
"Failed to build results from %s: %s", index_path, exc
|
|
1731
|
+
)
|
|
1732
|
+
|
|
1733
|
+
# Deduplicate by path
|
|
1734
|
+
path_to_result: Dict[str, SearchResult] = {}
|
|
1735
|
+
for result in results:
|
|
1736
|
+
if result.path not in path_to_result or result.score > path_to_result[result.path].score:
|
|
1737
|
+
path_to_result[result.path] = result
|
|
1738
|
+
|
|
1739
|
+
final_results = sorted(
|
|
1740
|
+
path_to_result.values(),
|
|
1741
|
+
key=lambda r: r.score,
|
|
1742
|
+
reverse=True,
|
|
1743
|
+
)
|
|
1744
|
+
|
|
1745
|
+
stats.files_matched = len(final_results)
|
|
1746
|
+
stats.time_ms = (time.time() - start_time) * 1000
|
|
1747
|
+
|
|
1748
|
+
return ChainSearchResult(
|
|
1749
|
+
query=query,
|
|
1750
|
+
results=final_results,
|
|
1751
|
+
symbols=[],
|
|
1752
|
+
stats=stats,
|
|
1753
|
+
)
|
|
1754
|
+
|
|
1755
|
+
def _cross_encoder_rerank(
|
|
1756
|
+
self,
|
|
1757
|
+
query: str,
|
|
1758
|
+
results: List[SearchResult],
|
|
1759
|
+
top_k: int,
|
|
1760
|
+
) -> List[SearchResult]:
|
|
1761
|
+
"""Rerank results using cross-encoder model.
|
|
1762
|
+
|
|
1763
|
+
Args:
|
|
1764
|
+
query: Search query string
|
|
1765
|
+
results: Candidate results to rerank
|
|
1766
|
+
top_k: Number of top results to return
|
|
1767
|
+
|
|
1768
|
+
Returns:
|
|
1769
|
+
Reranked results sorted by cross-encoder score
|
|
1770
|
+
"""
|
|
1771
|
+
if not results:
|
|
1772
|
+
return []
|
|
1773
|
+
|
|
1774
|
+
# Try to get reranker from config or create new one
|
|
1775
|
+
reranker = None
|
|
1776
|
+
try:
|
|
1777
|
+
from codexlens.semantic.reranker import (
|
|
1778
|
+
check_reranker_available,
|
|
1779
|
+
get_reranker,
|
|
1780
|
+
)
|
|
1781
|
+
|
|
1782
|
+
# Determine backend and model from config
|
|
1783
|
+
backend = "onnx"
|
|
1784
|
+
model_name = None
|
|
1785
|
+
use_gpu = True
|
|
1786
|
+
|
|
1787
|
+
if self._config is not None:
|
|
1788
|
+
backend = getattr(self._config, "reranker_backend", "onnx") or "onnx"
|
|
1789
|
+
model_name = getattr(self._config, "reranker_model", None)
|
|
1790
|
+
use_gpu = getattr(self._config, "embedding_use_gpu", True)
|
|
1791
|
+
|
|
1792
|
+
ok, err = check_reranker_available(backend)
|
|
1793
|
+
if not ok:
|
|
1794
|
+
self.logger.debug("Reranker backend unavailable (%s): %s", backend, err)
|
|
1795
|
+
return results[:top_k]
|
|
1796
|
+
|
|
1797
|
+
# Create reranker
|
|
1798
|
+
kwargs = {}
|
|
1799
|
+
if backend == "onnx":
|
|
1800
|
+
kwargs["use_gpu"] = use_gpu
|
|
1801
|
+
elif backend == "api":
|
|
1802
|
+
# Pass max_input_tokens for adaptive batching
|
|
1803
|
+
max_tokens = getattr(self._config, "reranker_max_input_tokens", None)
|
|
1804
|
+
if max_tokens:
|
|
1805
|
+
kwargs["max_input_tokens"] = max_tokens
|
|
1806
|
+
|
|
1807
|
+
reranker = get_reranker(backend=backend, model_name=model_name, **kwargs)
|
|
1808
|
+
|
|
1809
|
+
except ImportError as exc:
|
|
1810
|
+
self.logger.debug("Reranker not available: %s", exc)
|
|
1811
|
+
return results[:top_k]
|
|
1812
|
+
except Exception as exc:
|
|
1813
|
+
self.logger.debug("Failed to initialize reranker: %s", exc)
|
|
1814
|
+
return results[:top_k]
|
|
1815
|
+
|
|
1816
|
+
# Use cross_encoder_rerank from ranking module
|
|
1817
|
+
from codexlens.search.ranking import cross_encoder_rerank
|
|
1818
|
+
|
|
1819
|
+
return cross_encoder_rerank(
|
|
1820
|
+
query=query,
|
|
1821
|
+
results=results,
|
|
1822
|
+
reranker=reranker,
|
|
1823
|
+
top_k=top_k,
|
|
1824
|
+
batch_size=32,
|
|
246
1825
|
)
|
|
247
1826
|
|
|
248
1827
|
def search_files_only(self, query: str,
|
|
@@ -522,6 +2101,7 @@ class ChainSearchEngine:
|
|
|
522
2101
|
options.enable_fuzzy,
|
|
523
2102
|
options.enable_vector,
|
|
524
2103
|
options.pure_vector,
|
|
2104
|
+
options.enable_splade,
|
|
525
2105
|
options.hybrid_weights
|
|
526
2106
|
): idx_path
|
|
527
2107
|
for idx_path in index_paths
|
|
@@ -550,6 +2130,7 @@ class ChainSearchEngine:
|
|
|
550
2130
|
enable_fuzzy: bool = True,
|
|
551
2131
|
enable_vector: bool = False,
|
|
552
2132
|
pure_vector: bool = False,
|
|
2133
|
+
enable_splade: bool = False,
|
|
553
2134
|
hybrid_weights: Optional[Dict[str, float]] = None) -> List[SearchResult]:
|
|
554
2135
|
"""Search a single index database.
|
|
555
2136
|
|
|
@@ -565,6 +2146,7 @@ class ChainSearchEngine:
|
|
|
565
2146
|
enable_fuzzy: Enable fuzzy FTS in hybrid mode
|
|
566
2147
|
enable_vector: Enable vector semantic search
|
|
567
2148
|
pure_vector: If True, only use vector search without FTS fallback
|
|
2149
|
+
enable_splade: If True, force SPLADE sparse neural search
|
|
568
2150
|
hybrid_weights: Custom RRF weights for hybrid search
|
|
569
2151
|
|
|
570
2152
|
Returns:
|
|
@@ -581,6 +2163,7 @@ class ChainSearchEngine:
|
|
|
581
2163
|
enable_fuzzy=enable_fuzzy,
|
|
582
2164
|
enable_vector=enable_vector,
|
|
583
2165
|
pure_vector=pure_vector,
|
|
2166
|
+
enable_splade=enable_splade,
|
|
584
2167
|
)
|
|
585
2168
|
else:
|
|
586
2169
|
# Single-FTS search (exact or fuzzy mode)
|
|
@@ -624,21 +2207,72 @@ class ChainSearchEngine:
|
|
|
624
2207
|
self.logger.debug(f"Search error in {index_path}: {exc}")
|
|
625
2208
|
return []
|
|
626
2209
|
|
|
2210
|
+
def _filter_by_extension(self, results: List[SearchResult],
|
|
2211
|
+
code_only: bool = False,
|
|
2212
|
+
exclude_extensions: Optional[List[str]] = None) -> List[SearchResult]:
|
|
2213
|
+
"""Filter search results by file extension.
|
|
2214
|
+
|
|
2215
|
+
Args:
|
|
2216
|
+
results: Search results to filter
|
|
2217
|
+
code_only: If True, exclude non-code files (md, txt, json, yaml, xml, etc.)
|
|
2218
|
+
exclude_extensions: List of extensions to exclude (e.g., ["md", "txt"])
|
|
2219
|
+
|
|
2220
|
+
Returns:
|
|
2221
|
+
Filtered results
|
|
2222
|
+
"""
|
|
2223
|
+
# Non-code file extensions (same as MCP tool smart-search.ts)
|
|
2224
|
+
NON_CODE_EXTENSIONS = {
|
|
2225
|
+
'md', 'txt', 'json', 'yaml', 'yml', 'xml', 'csv', 'log',
|
|
2226
|
+
'ini', 'cfg', 'conf', 'toml', 'env', 'properties',
|
|
2227
|
+
'html', 'htm', 'svg', 'png', 'jpg', 'jpeg', 'gif', 'ico', 'webp',
|
|
2228
|
+
'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx',
|
|
2229
|
+
'lock', 'sum', 'mod',
|
|
2230
|
+
}
|
|
2231
|
+
|
|
2232
|
+
# Build exclusion set
|
|
2233
|
+
excluded_exts = set()
|
|
2234
|
+
if exclude_extensions:
|
|
2235
|
+
# Normalize extensions (remove leading dots, lowercase)
|
|
2236
|
+
excluded_exts = {ext.lower().lstrip('.') for ext in exclude_extensions}
|
|
2237
|
+
if code_only:
|
|
2238
|
+
excluded_exts.update(NON_CODE_EXTENSIONS)
|
|
2239
|
+
|
|
2240
|
+
if not excluded_exts:
|
|
2241
|
+
return results
|
|
2242
|
+
|
|
2243
|
+
# Filter results
|
|
2244
|
+
filtered = []
|
|
2245
|
+
for result in results:
|
|
2246
|
+
path_str = result.path
|
|
2247
|
+
if not path_str:
|
|
2248
|
+
continue
|
|
2249
|
+
|
|
2250
|
+
# Extract extension from path
|
|
2251
|
+
if '.' in path_str:
|
|
2252
|
+
ext = path_str.rsplit('.', 1)[-1].lower()
|
|
2253
|
+
if ext in excluded_exts:
|
|
2254
|
+
continue # Skip this result
|
|
2255
|
+
|
|
2256
|
+
filtered.append(result)
|
|
2257
|
+
|
|
2258
|
+
return filtered
|
|
2259
|
+
|
|
627
2260
|
def _merge_and_rank(self, results: List[SearchResult],
|
|
628
|
-
limit: int) -> List[SearchResult]:
|
|
2261
|
+
limit: int, offset: int = 0) -> List[SearchResult]:
|
|
629
2262
|
"""Aggregate, deduplicate, and rank results.
|
|
630
2263
|
|
|
631
2264
|
Process:
|
|
632
2265
|
1. Deduplicate by path (keep highest score)
|
|
633
2266
|
2. Sort by score descending
|
|
634
|
-
3.
|
|
2267
|
+
3. Apply offset and limit for pagination
|
|
635
2268
|
|
|
636
2269
|
Args:
|
|
637
2270
|
results: Raw results from all indexes
|
|
638
2271
|
limit: Maximum results to return
|
|
2272
|
+
offset: Number of results to skip (pagination offset)
|
|
639
2273
|
|
|
640
2274
|
Returns:
|
|
641
|
-
Deduplicated and ranked results
|
|
2275
|
+
Deduplicated and ranked results with pagination
|
|
642
2276
|
"""
|
|
643
2277
|
# Deduplicate by path, keeping best score
|
|
644
2278
|
path_to_result: Dict[str, SearchResult] = {}
|
|
@@ -651,8 +2285,8 @@ class ChainSearchEngine:
|
|
|
651
2285
|
unique_results = list(path_to_result.values())
|
|
652
2286
|
unique_results.sort(key=lambda r: r.score, reverse=True)
|
|
653
2287
|
|
|
654
|
-
# Apply limit
|
|
655
|
-
return unique_results[:limit]
|
|
2288
|
+
# Apply offset and limit for pagination
|
|
2289
|
+
return unique_results[offset:offset + limit]
|
|
656
2290
|
|
|
657
2291
|
def _search_symbols_parallel(self, index_paths: List[Path],
|
|
658
2292
|
name: str,
|