claude-code-workflow 6.1.4 → 6.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/CLAUDE.md +10 -0
- package/.claude/agents/action-planning-agent.md +857 -778
- package/.claude/agents/cli-execution-agent.md +266 -269
- package/.claude/agents/cli-explore-agent.md +2 -2
- package/.claude/agents/cli-lite-planning-agent.md +142 -92
- package/.claude/agents/cli-planning-agent.md +4 -4
- package/.claude/agents/code-developer.md +7 -6
- package/.claude/agents/conceptual-planning-agent.md +2 -2
- package/.claude/agents/context-search-agent.md +31 -32
- package/.claude/agents/doc-generator.md +4 -4
- package/.claude/agents/memory-bridge.md +93 -93
- package/.claude/agents/test-context-search-agent.md +8 -7
- package/.claude/agents/test-fix-agent.md +7 -6
- package/.claude/commands/clean.md +516 -0
- package/.claude/commands/memory/compact.md +383 -0
- package/.claude/commands/memory/docs-full-cli.md +471 -471
- package/.claude/commands/memory/docs-related-cli.md +386 -386
- package/.claude/commands/memory/docs.md +615 -615
- package/.claude/commands/memory/load.md +5 -5
- package/.claude/commands/memory/tech-research-rules.md +310 -0
- package/.claude/commands/memory/update-full.md +332 -332
- package/.claude/commands/memory/workflow-skill-memory.md +4 -4
- package/.claude/commands/task/create.md +151 -151
- package/.claude/commands/version.md +254 -254
- package/.claude/commands/workflow/brainstorm/api-designer.md +587 -585
- package/.claude/commands/workflow/brainstorm/artifacts.md +1 -0
- package/.claude/commands/workflow/brainstorm/auto-parallel.md +443 -443
- package/.claude/commands/workflow/brainstorm/data-architect.md +220 -220
- package/.claude/commands/workflow/brainstorm/product-manager.md +200 -200
- package/.claude/commands/workflow/brainstorm/product-owner.md +200 -200
- package/.claude/commands/workflow/brainstorm/scrum-master.md +200 -200
- package/.claude/commands/workflow/brainstorm/subject-matter-expert.md +200 -200
- package/.claude/commands/workflow/brainstorm/system-architect.md +389 -387
- package/.claude/commands/workflow/brainstorm/ui-designer.md +221 -221
- package/.claude/commands/workflow/brainstorm/ux-expert.md +221 -221
- package/.claude/commands/workflow/debug.md +321 -0
- package/.claude/commands/workflow/execute.md +13 -0
- package/.claude/commands/workflow/init.md +165 -164
- package/.claude/commands/workflow/lite-execute.md +119 -13
- package/.claude/commands/workflow/lite-fix.md +623 -621
- package/.claude/commands/workflow/lite-plan.md +610 -592
- package/.claude/commands/workflow/plan.md +5 -5
- package/.claude/commands/workflow/review-module-cycle.md +2 -0
- package/.claude/commands/workflow/review-session-cycle.md +2 -0
- package/.claude/commands/workflow/review.md +297 -291
- package/.claude/commands/workflow/session/complete.md +153 -500
- package/.claude/commands/workflow/session/list.md +95 -95
- package/.claude/commands/workflow/session/resume.md +60 -60
- package/.claude/commands/workflow/session/start.md +199 -199
- package/.claude/commands/workflow/tdd-plan.md +3 -3
- package/.claude/commands/workflow/tdd-verify.md +23 -9
- package/.claude/commands/workflow/test-cycle-execute.md +2 -0
- package/.claude/commands/workflow/test-fix-gen.md +699 -699
- package/.claude/commands/workflow/tools/conflict-resolution.md +104 -18
- package/.claude/commands/workflow/tools/context-gather.md +436 -434
- package/.claude/commands/workflow/tools/task-generate-agent.md +490 -291
- package/.claude/commands/workflow/tools/task-generate-tdd.md +18 -10
- package/.claude/commands/workflow/tools/test-concept-enhanced.md +2 -1
- package/.claude/commands/workflow/tools/test-context-gather.md +1 -0
- package/.claude/commands/workflow/tools/test-task-generate.md +1 -0
- package/.claude/commands/workflow/ui-design/import-from-code.md +9 -6
- package/.claude/skills/command-guide/SKILL.md +5 -5
- package/.claude/skills/command-guide/index/all-commands.json +1 -1
- package/.claude/skills/command-guide/index/by-category.json +1 -1
- package/.claude/skills/command-guide/index/by-use-case.json +1 -1
- package/.claude/skills/command-guide/reference/agents/action-planning-agent.md +857 -778
- package/.claude/skills/command-guide/reference/agents/cli-execution-agent.md +266 -269
- package/.claude/skills/command-guide/reference/agents/cli-explore-agent.md +2 -2
- package/.claude/skills/command-guide/reference/agents/cli-lite-planning-agent.md +142 -92
- package/.claude/skills/command-guide/reference/agents/cli-planning-agent.md +4 -4
- package/.claude/skills/command-guide/reference/agents/code-developer.md +7 -6
- package/.claude/skills/command-guide/reference/agents/conceptual-planning-agent.md +2 -2
- package/.claude/skills/command-guide/reference/agents/context-search-agent.md +31 -32
- package/.claude/skills/command-guide/reference/agents/doc-generator.md +4 -4
- package/.claude/skills/command-guide/reference/agents/memory-bridge.md +93 -93
- package/.claude/skills/command-guide/reference/agents/test-context-search-agent.md +8 -7
- package/.claude/skills/command-guide/reference/agents/test-fix-agent.md +7 -6
- package/.claude/skills/command-guide/reference/commands/memory/docs-full-cli.md +471 -471
- package/.claude/skills/command-guide/reference/commands/memory/docs-related-cli.md +386 -386
- package/.claude/skills/command-guide/reference/commands/memory/docs.md +17 -16
- package/.claude/skills/command-guide/reference/commands/memory/load.md +5 -5
- package/.claude/skills/command-guide/reference/commands/memory/tech-research.md +194 -357
- package/.claude/skills/command-guide/reference/commands/memory/update-full.md +332 -332
- package/.claude/skills/command-guide/reference/commands/memory/workflow-skill-memory.md +4 -4
- package/.claude/skills/command-guide/reference/commands/task/create.md +151 -151
- package/.claude/skills/command-guide/reference/commands/version.md +254 -254
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/api-designer.md +585 -585
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/auto-parallel.md +443 -443
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/data-architect.md +220 -220
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/product-manager.md +200 -200
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/product-owner.md +200 -200
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/scrum-master.md +200 -200
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/subject-matter-expert.md +200 -200
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/system-architect.md +387 -387
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/ui-designer.md +221 -221
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/ux-expert.md +221 -221
- package/.claude/skills/command-guide/reference/commands/workflow/execute.md +25 -20
- package/.claude/skills/command-guide/reference/commands/workflow/init.md +164 -164
- package/.claude/skills/command-guide/reference/commands/workflow/lite-execute.md +748 -686
- package/.claude/skills/command-guide/reference/commands/workflow/lite-fix.md +664 -621
- package/.claude/skills/command-guide/reference/commands/workflow/lite-plan.md +645 -592
- package/.claude/skills/command-guide/reference/commands/workflow/plan.md +5 -5
- package/.claude/skills/command-guide/reference/commands/workflow/review.md +25 -18
- package/.claude/skills/command-guide/reference/commands/workflow/session/complete.md +547 -500
- package/.claude/skills/command-guide/reference/commands/workflow/session/list.md +45 -27
- package/.claude/skills/command-guide/reference/commands/workflow/session/resume.md +35 -19
- package/.claude/skills/command-guide/reference/commands/workflow/session/start.md +90 -33
- package/.claude/skills/command-guide/reference/commands/workflow/tdd-plan.md +3 -3
- package/.claude/skills/command-guide/reference/commands/workflow/tdd-verify.md +23 -9
- package/.claude/skills/command-guide/reference/commands/workflow/test-fix-gen.md +699 -699
- package/.claude/skills/command-guide/reference/commands/workflow/tools/conflict-resolution.md +103 -17
- package/.claude/skills/command-guide/reference/commands/workflow/tools/context-gather.md +434 -434
- package/.claude/skills/command-guide/reference/commands/workflow/tools/task-generate-agent.md +487 -291
- package/.claude/skills/command-guide/reference/commands/workflow/tools/task-generate-tdd.md +17 -10
- package/.claude/skills/command-guide/reference/commands/workflow/tools/test-concept-enhanced.md +1 -1
- package/.claude/skills/command-guide/reference/commands/workflow/ui-design/import-from-code.md +6 -6
- package/.claude/workflows/chinese-response.md +38 -0
- package/.claude/workflows/cli-templates/prompts/rules/rule-api.txt +122 -0
- package/.claude/workflows/cli-templates/prompts/rules/rule-components.txt +122 -0
- package/.claude/workflows/cli-templates/prompts/rules/rule-config.txt +89 -0
- package/.claude/workflows/cli-templates/prompts/rules/rule-core.txt +60 -0
- package/.claude/workflows/cli-templates/prompts/rules/rule-patterns.txt +70 -0
- package/.claude/workflows/cli-templates/prompts/rules/rule-testing.txt +81 -0
- package/.claude/workflows/cli-templates/prompts/rules/tech-rules-agent-prompt.txt +89 -0
- package/.claude/workflows/cli-templates/prompts/workflow/gemini-solution-design.txt +131 -131
- package/.claude/workflows/cli-templates/prompts/workflow/skill-conflict-patterns.txt +5 -9
- package/.claude/workflows/cli-templates/prompts/workflow/skill-lessons-learned.txt +5 -9
- package/.claude/workflows/cli-templates/protocols/analysis-protocol.md +112 -0
- package/.claude/workflows/cli-templates/protocols/write-protocol.md +201 -0
- package/.claude/workflows/cli-templates/schemas/conflict-resolution-schema.json +137 -0
- package/.claude/workflows/cli-templates/schemas/debug-log-json-schema.json +127 -0
- package/.claude/workflows/cli-templates/schemas/fix-plan-json-schema.json +25 -0
- package/.claude/workflows/cli-templates/schemas/plan-json-schema.json +25 -0
- package/.claude/workflows/cli-tools-usage.md +526 -0
- package/{CLAUDE.md → .claude/workflows/coding-philosophy.md} +24 -45
- package/.claude/workflows/context-tools.md +84 -0
- package/.claude/workflows/file-modification.md +64 -0
- package/.claude/workflows/tool-strategy.md +216 -79
- package/.claude/workflows/windows-platform.md +16 -0
- package/.claude/workflows/workflow-architecture.md +942 -942
- package/.codex/AGENTS.md +63 -330
- package/.codex/prompts/debug.md +318 -0
- package/.codex/prompts/execute.md +273 -0
- package/.codex/prompts/lite-execute.md +164 -0
- package/.codex/prompts/lite-plan.md +469 -0
- package/.codex/prompts.zip +0 -0
- package/.gemini/GEMINI.md +25 -164
- package/.qwen/QWEN.md +0 -139
- package/README.md +29 -9
- package/ccw/README.md +30 -6
- package/ccw/bin/ccw-mcp.js +7 -0
- package/ccw/bin/ccw.js +9 -9
- package/ccw/package.json +65 -47
- package/ccw/src/.workflow/.cli-history/history.db +0 -0
- package/ccw/src/.workflow/.cli-history/history.db-shm +0 -0
- package/ccw/src/.workflow/.cli-history/history.db-wal +0 -0
- package/ccw/src/cli.ts +244 -0
- package/ccw/src/commands/cli.ts +740 -0
- package/ccw/src/commands/core-memory.ts +770 -0
- package/ccw/src/commands/hook.ts +315 -0
- package/ccw/src/commands/install.ts +519 -0
- package/ccw/src/commands/{list.js → list.ts} +1 -1
- package/ccw/src/commands/memory.ts +1090 -0
- package/ccw/src/commands/{serve.js → serve.ts} +14 -5
- package/ccw/src/commands/session-path-resolver.ts +372 -0
- package/ccw/src/commands/session.ts +1141 -0
- package/ccw/src/commands/{stop.js → stop.ts} +16 -6
- package/ccw/src/commands/tool.ts +201 -0
- package/ccw/src/commands/{uninstall.js → uninstall.ts} +89 -40
- package/ccw/src/commands/{upgrade.js → upgrade.ts} +68 -23
- package/ccw/src/commands/{view.js → view.ts} +22 -8
- package/ccw/src/config/storage-paths.ts +670 -0
- package/ccw/src/core/cache-manager.ts +294 -0
- package/ccw/src/core/claude-freshness.ts +319 -0
- package/ccw/src/core/core-memory-store.ts +1528 -0
- package/ccw/src/core/{dashboard-generator-patch.js → dashboard-generator-patch.ts} +18 -0
- package/ccw/src/core/{dashboard-generator.js → dashboard-generator.ts} +69 -12
- package/ccw/src/core/data-aggregator.ts +584 -0
- package/ccw/src/core/history-importer.ts +625 -0
- package/ccw/src/core/{lite-scanner.js → lite-scanner-complete.ts} +162 -66
- package/ccw/src/core/lite-scanner.ts +469 -0
- package/ccw/src/core/{manifest.js → manifest.ts} +104 -34
- package/ccw/src/core/memory-embedder-bridge.ts +262 -0
- package/ccw/src/core/memory-store.ts +978 -0
- package/ccw/src/core/routes/ccw-routes.ts +96 -0
- package/ccw/src/core/routes/claude-routes.ts +1183 -0
- package/ccw/src/core/routes/cli-routes.ts +561 -0
- package/ccw/src/core/routes/codexlens-routes.ts +806 -0
- package/ccw/src/core/routes/core-memory-routes.ts +605 -0
- package/ccw/src/core/routes/files-routes.ts +428 -0
- package/ccw/src/core/routes/graph-routes.md +164 -0
- package/ccw/src/core/routes/graph-routes.ts +626 -0
- package/ccw/src/core/routes/help-routes.ts +308 -0
- package/ccw/src/core/routes/hooks-routes.ts +405 -0
- package/ccw/src/core/routes/mcp-routes.ts +1271 -0
- package/ccw/src/core/routes/mcp-routes.ts.backup +550 -0
- package/ccw/src/core/routes/mcp-templates-db.ts +268 -0
- package/ccw/src/core/routes/memory-routes.ts +1206 -0
- package/ccw/src/core/routes/rules-routes.ts +526 -0
- package/ccw/src/core/routes/session-routes.ts +467 -0
- package/ccw/src/core/routes/skills-routes.ts +599 -0
- package/ccw/src/core/routes/status-routes.ts +57 -0
- package/ccw/src/core/routes/system-routes.ts +427 -0
- package/ccw/src/core/server.ts +431 -0
- package/ccw/src/core/session-clustering-service.ts +1258 -0
- package/ccw/src/core/session-scanner.ts +283 -0
- package/ccw/src/core/websocket.ts +190 -0
- package/ccw/src/{index.js → index.ts} +1 -0
- package/ccw/src/mcp-server/index.ts +186 -0
- package/ccw/src/templates/assets/css/github-dark.min.css +10 -0
- package/ccw/src/templates/assets/css/github.min.css +10 -0
- package/ccw/src/templates/assets/js/cytoscape.min.js +32 -0
- package/ccw/src/templates/assets/js/d3.min.js +2 -0
- package/ccw/src/templates/assets/js/highlight.min.js +1244 -0
- package/ccw/src/templates/assets/js/lucide.min.js +12 -0
- package/ccw/src/templates/assets/js/marked.min.js +69 -0
- package/ccw/src/templates/assets/js/tailwind.js +83 -0
- package/ccw/src/templates/dashboard-css/01-base.css +11 -0
- package/ccw/src/templates/dashboard-css/02-session.css +22 -0
- package/ccw/src/templates/dashboard-css/04-lite-tasks.css +10 -0
- package/ccw/src/templates/dashboard-css/06-cards.css +10 -4
- package/ccw/src/templates/dashboard-css/07-managers.css +1178 -7
- package/ccw/src/templates/dashboard-css/09-explorer.css +23 -12
- package/ccw/src/templates/dashboard-css/10-cli-status.css +337 -0
- package/ccw/src/templates/dashboard-css/11-cli-history.css +271 -0
- package/ccw/src/templates/dashboard-css/12-cli-legacy.css +796 -0
- package/ccw/src/templates/dashboard-css/13-cli-ccw.css +199 -0
- package/ccw/src/templates/dashboard-css/14-cli-modals.css +258 -0
- package/ccw/src/templates/dashboard-css/15-cli-endpoints.css +305 -0
- package/ccw/src/templates/dashboard-css/16-cli-session.css +241 -0
- package/ccw/src/templates/dashboard-css/17-cli-conversation.css +283 -0
- package/ccw/src/templates/dashboard-css/18-cli-settings.css +160 -0
- package/ccw/src/templates/dashboard-css/19-cli-native-session.css +496 -0
- package/ccw/src/templates/dashboard-css/20-cli-taskqueue.css +188 -0
- package/ccw/src/templates/dashboard-css/21-cli-toolmgmt.css +310 -0
- package/ccw/src/templates/dashboard-css/22-cli-semantic.css +240 -0
- package/ccw/src/templates/dashboard-css/23-memory.css +2390 -0
- package/ccw/src/templates/dashboard-css/24-prompt-history.css +1089 -0
- package/ccw/src/templates/dashboard-css/25-skills-rules.css +326 -0
- package/ccw/src/templates/dashboard-css/26-claude-manager.css +908 -0
- package/ccw/src/templates/dashboard-css/27-graph-explorer.css +1678 -0
- package/ccw/src/templates/dashboard-css/28-mcp-manager.css +748 -0
- package/ccw/src/templates/dashboard-css/29-help.css +264 -0
- package/ccw/src/templates/dashboard-css/30-core-memory.css +1700 -0
- package/ccw/src/templates/dashboard-js/api.js +162 -142
- package/ccw/src/templates/dashboard-js/components/carousel.js +4 -4
- package/ccw/src/templates/dashboard-js/components/cli-history.js +876 -0
- package/ccw/src/templates/dashboard-js/components/cli-status.js +978 -0
- package/ccw/src/templates/dashboard-js/components/global-notifications.js +508 -219
- package/ccw/src/templates/dashboard-js/components/hook-manager.js +1277 -282
- package/ccw/src/templates/dashboard-js/components/index-manager.js +302 -0
- package/ccw/src/templates/dashboard-js/components/mcp-manager.js +718 -27
- package/ccw/src/templates/dashboard-js/components/modals.js +66 -0
- package/ccw/src/templates/dashboard-js/components/navigation.js +80 -12
- package/ccw/src/templates/dashboard-js/components/notifications.js +758 -194
- package/ccw/src/templates/dashboard-js/components/storage-manager.js +478 -0
- package/ccw/src/templates/dashboard-js/components/tabs-other.js +157 -6
- package/ccw/src/templates/dashboard-js/components/task-queue-sidebar.js +716 -0
- package/ccw/src/templates/dashboard-js/help-i18n.js +272 -0
- package/ccw/src/templates/dashboard-js/i18n.js +2807 -0
- package/ccw/src/templates/dashboard-js/main.js +15 -0
- package/ccw/src/templates/dashboard-js/state.js +243 -42
- package/ccw/src/templates/dashboard-js/utils.js +47 -1
- package/ccw/src/templates/dashboard-js/views/claude-manager.js +912 -0
- package/ccw/src/templates/dashboard-js/views/cli-manager.js +2272 -0
- package/ccw/src/templates/dashboard-js/views/codexlens-manager.js +964 -0
- package/ccw/src/templates/dashboard-js/views/core-memory-clusters.js +503 -0
- package/ccw/src/templates/dashboard-js/views/core-memory.js +782 -0
- package/ccw/src/templates/dashboard-js/views/explorer.js +888 -852
- package/ccw/src/templates/dashboard-js/views/graph-explorer.js +1157 -0
- package/ccw/src/templates/dashboard-js/views/help.js +856 -0
- package/ccw/src/templates/dashboard-js/views/history.js +337 -0
- package/ccw/src/templates/dashboard-js/views/home.js +61 -15
- package/ccw/src/templates/dashboard-js/views/hook-manager.js +311 -43
- package/ccw/src/templates/dashboard-js/views/lite-tasks.js +204 -28
- package/ccw/src/templates/dashboard-js/views/mcp-manager.js +2187 -411
- package/ccw/src/templates/dashboard-js/views/mcp-manager.js.backup +1729 -0
- package/ccw/src/templates/dashboard-js/views/mcp-manager.js.new +928 -0
- package/ccw/src/templates/dashboard-js/views/memory.js +1221 -0
- package/ccw/src/templates/dashboard-js/views/prompt-history.js +713 -0
- package/ccw/src/templates/dashboard-js/views/rules-manager.js +828 -0
- package/ccw/src/templates/dashboard-js/views/session-detail.js +54 -53
- package/ccw/src/templates/dashboard-js/views/skills-manager.js +819 -0
- package/ccw/src/templates/dashboard.html +185 -85
- package/ccw/src/templates/hooks-config-example.json +60 -0
- package/ccw/src/tools/classify-folders.ts +245 -0
- package/ccw/src/tools/cli-config-manager.ts +268 -0
- package/ccw/src/tools/cli-executor.ts +2014 -0
- package/ccw/src/tools/cli-history-store.ts +1195 -0
- package/ccw/src/tools/codex-lens.ts +1141 -0
- package/ccw/src/tools/{convert-tokens-to-css.js → convert-tokens-to-css.ts} +73 -23
- package/ccw/src/tools/core-memory.ts +444 -0
- package/ccw/src/tools/detect-changed-modules.ts +325 -0
- package/ccw/src/tools/{discover-design-files.js → discover-design-files.ts} +74 -24
- package/ccw/src/tools/edit-file.ts +568 -0
- package/ccw/src/tools/{generate-module-docs.js → generate-module-docs.ts} +207 -185
- package/ccw/src/tools/{get-modules-by-depth.js → get-modules-by-depth.ts} +120 -79
- package/ccw/src/tools/index.ts +370 -0
- package/ccw/src/tools/native-session-discovery.ts +795 -0
- package/ccw/src/tools/notifier.ts +129 -0
- package/ccw/src/tools/read-file.ts +410 -0
- package/ccw/src/tools/resume-strategy.ts +345 -0
- package/ccw/src/tools/session-content-parser.ts +619 -0
- package/ccw/src/tools/session-manager.ts +1026 -0
- package/ccw/src/tools/smart-context.ts +228 -0
- package/ccw/src/tools/smart-search.ts +2065 -0
- package/ccw/src/tools/smart-search.ts.backup +1233 -0
- package/ccw/src/tools/storage-manager.ts +455 -0
- package/ccw/src/tools/write-file.ts +222 -0
- package/ccw/src/types/config.ts +11 -0
- package/ccw/src/types/index.ts +3 -0
- package/ccw/src/types/session.ts +25 -0
- package/ccw/src/types/tool.ts +41 -0
- package/ccw/src/utils/{browser-launcher.js → browser-launcher.ts} +10 -8
- package/ccw/src/utils/file-utils.ts +48 -0
- package/ccw/src/utils/{path-resolver.js → path-resolver.ts} +114 -78
- package/ccw/src/utils/path-validator.ts +153 -0
- package/ccw/src/utils/{ui.js → ui.ts} +32 -25
- package/codex-lens/pyproject.toml +48 -0
- package/codex-lens/src/codexlens/.workflow/.cli-history/history.db +0 -0
- package/codex-lens/src/codexlens/__init__.py +28 -0
- package/codex-lens/src/codexlens/__main__.py +14 -0
- package/codex-lens/src/codexlens/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/__main__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/entities.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/errors.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__init__.py +27 -0
- package/codex-lens/src/codexlens/cli/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/commands.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/output.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/commands.py +1931 -0
- package/codex-lens/src/codexlens/cli/embedding_manager.py +620 -0
- package/codex-lens/src/codexlens/cli/model_manager.py +289 -0
- package/codex-lens/src/codexlens/cli/output.py +124 -0
- package/codex-lens/src/codexlens/config.py +201 -0
- package/codex-lens/src/codexlens/entities.py +121 -0
- package/codex-lens/src/codexlens/errors.py +55 -0
- package/codex-lens/src/codexlens/indexing/README.md +77 -0
- package/codex-lens/src/codexlens/indexing/__init__.py +4 -0
- package/codex-lens/src/codexlens/indexing/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/indexing/__pycache__/symbol_extractor.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/indexing/symbol_extractor.py +243 -0
- package/codex-lens/src/codexlens/parsers/__init__.py +8 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/encoding.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/factory.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/tokenizer.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/treesitter_parser.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/encoding.py +202 -0
- package/codex-lens/src/codexlens/parsers/factory.py +256 -0
- package/codex-lens/src/codexlens/parsers/tokenizer.py +98 -0
- package/codex-lens/src/codexlens/parsers/treesitter_parser.py +335 -0
- package/codex-lens/src/codexlens/search/__init__.py +15 -0
- package/codex-lens/src/codexlens/search/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/enrichment.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/query_parser.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/chain_search.py +647 -0
- package/codex-lens/src/codexlens/search/enrichment.py +150 -0
- package/codex-lens/src/codexlens/search/hybrid_search.py +313 -0
- package/codex-lens/src/codexlens/search/query_parser.py +242 -0
- package/codex-lens/src/codexlens/search/ranking.py +274 -0
- package/codex-lens/src/codexlens/semantic/__init__.py +39 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/ann_index.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/code_extractor.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/graph_analyzer.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/llm_enhancer.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/vector_store.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/ann_index.py +414 -0
- package/codex-lens/src/codexlens/semantic/chunker.py +448 -0
- package/codex-lens/src/codexlens/semantic/code_extractor.py +274 -0
- package/codex-lens/src/codexlens/semantic/embedder.py +185 -0
- package/codex-lens/src/codexlens/semantic/vector_store.py +955 -0
- package/codex-lens/src/codexlens/storage/__init__.py +29 -0
- package/codex-lens/src/codexlens/storage/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/dir_index.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/file_cache.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/index_tree.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/migration_manager.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/path_mapper.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/registry.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/sqlite_store.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/sqlite_utils.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/dir_index.py +1850 -0
- package/codex-lens/src/codexlens/storage/file_cache.py +32 -0
- package/codex-lens/src/codexlens/storage/index_tree.py +776 -0
- package/codex-lens/src/codexlens/storage/migration_manager.py +154 -0
- package/codex-lens/src/codexlens/storage/migrations/__init__.py +1 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_001_normalize_keywords.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_002_add_token_metadata.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_003_code_relationships.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_004_dual_fts.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_005_cleanup_unused_fields.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/migration_001_normalize_keywords.py +123 -0
- package/codex-lens/src/codexlens/storage/migrations/migration_002_add_token_metadata.py +48 -0
- package/codex-lens/src/codexlens/storage/migrations/migration_004_dual_fts.py +232 -0
- package/codex-lens/src/codexlens/storage/migrations/migration_005_cleanup_unused_fields.py +196 -0
- package/codex-lens/src/codexlens/storage/path_mapper.py +274 -0
- package/codex-lens/src/codexlens/storage/registry.py +670 -0
- package/codex-lens/src/codexlens/storage/sqlite_store.py +576 -0
- package/codex-lens/src/codexlens/storage/sqlite_utils.py +64 -0
- package/package.json +4 -1
- package/.claude/commands/memory/tech-research.md +0 -477
- package/.claude/scripts/classify-folders.sh +0 -39
- package/.claude/scripts/convert_tokens_to_css.sh +0 -229
- package/.claude/scripts/detect_changed_modules.sh +0 -161
- package/.claude/scripts/discover-design-files.sh +0 -87
- package/.claude/scripts/extract-animations.js +0 -243
- package/.claude/scripts/extract-computed-styles.js +0 -118
- package/.claude/scripts/extract-layout-structure.js +0 -411
- package/.claude/scripts/generate_module_docs.sh +0 -717
- package/.claude/scripts/get_modules_by_depth.sh +0 -170
- package/.claude/scripts/ui-generate-preview.sh +0 -395
- package/.claude/scripts/ui-instantiate-prototypes.sh +0 -815
- package/.claude/scripts/update_module_claude.sh +0 -337
- package/.claude/workflows/context-search-strategy.md +0 -77
- package/.claude/workflows/intelligent-tools-strategy.md +0 -662
- package/ccw/src/cli.js +0 -119
- package/ccw/src/commands/install.js +0 -324
- package/ccw/src/commands/tool.js +0 -138
- package/ccw/src/core/data-aggregator.js +0 -409
- package/ccw/src/core/server.js +0 -2063
- package/ccw/src/core/session-scanner.js +0 -235
- package/ccw/src/tools/classify-folders.js +0 -204
- package/ccw/src/tools/detect-changed-modules.js +0 -288
- package/ccw/src/tools/edit-file.js +0 -266
- package/ccw/src/tools/index.js +0 -176
- package/ccw/src/utils/file-utils.js +0 -48
|
@@ -0,0 +1,620 @@
|
|
|
1
|
+
"""Embedding Manager - Manage semantic embeddings for code indexes."""
|
|
2
|
+
|
|
3
|
+
import gc
|
|
4
|
+
import logging
|
|
5
|
+
import sqlite3
|
|
6
|
+
import time
|
|
7
|
+
from itertools import islice
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, Generator, List, Optional, Tuple
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
from codexlens.semantic import SEMANTIC_AVAILABLE
|
|
13
|
+
if SEMANTIC_AVAILABLE:
|
|
14
|
+
from codexlens.semantic.embedder import Embedder, get_embedder, clear_embedder_cache
|
|
15
|
+
from codexlens.semantic.vector_store import VectorStore
|
|
16
|
+
from codexlens.semantic.chunker import Chunker, ChunkConfig
|
|
17
|
+
except ImportError:
|
|
18
|
+
SEMANTIC_AVAILABLE = False
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# Embedding batch size - larger values improve throughput on modern hardware
|
|
23
|
+
# Default 64 balances memory usage and GPU/CPU utilization
|
|
24
|
+
EMBEDDING_BATCH_SIZE = 64 # Increased from 8 for better performance
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _generate_chunks_from_cursor(
|
|
28
|
+
cursor,
|
|
29
|
+
chunker,
|
|
30
|
+
path_column: str,
|
|
31
|
+
file_batch_size: int,
|
|
32
|
+
failed_files: List[Tuple[str, str]],
|
|
33
|
+
) -> Generator[Tuple, None, Tuple[int, int]]:
|
|
34
|
+
"""Generator that yields chunks from database cursor in a streaming fashion.
|
|
35
|
+
|
|
36
|
+
This avoids loading all chunks into memory at once, significantly reducing
|
|
37
|
+
peak memory usage for large codebases.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
cursor: SQLite cursor with file data
|
|
41
|
+
chunker: Chunker instance for splitting files
|
|
42
|
+
path_column: Column name for file path
|
|
43
|
+
file_batch_size: Number of files to fetch at a time
|
|
44
|
+
failed_files: List to append failed files to
|
|
45
|
+
|
|
46
|
+
Yields:
|
|
47
|
+
(chunk, file_path) tuples
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
(total_files_processed, batch_count) after iteration completes
|
|
51
|
+
"""
|
|
52
|
+
total_files = 0
|
|
53
|
+
batch_count = 0
|
|
54
|
+
|
|
55
|
+
while True:
|
|
56
|
+
file_batch = cursor.fetchmany(file_batch_size)
|
|
57
|
+
if not file_batch:
|
|
58
|
+
break
|
|
59
|
+
|
|
60
|
+
batch_count += 1
|
|
61
|
+
|
|
62
|
+
for file_row in file_batch:
|
|
63
|
+
file_path = file_row[path_column]
|
|
64
|
+
content = file_row["content"]
|
|
65
|
+
language = file_row["language"] or "python"
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
chunks = chunker.chunk_sliding_window(
|
|
69
|
+
content,
|
|
70
|
+
file_path=file_path,
|
|
71
|
+
language=language
|
|
72
|
+
)
|
|
73
|
+
if chunks:
|
|
74
|
+
total_files += 1
|
|
75
|
+
for chunk in chunks:
|
|
76
|
+
yield (chunk, file_path)
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logger.error(f"Failed to chunk {file_path}: {e}")
|
|
79
|
+
failed_files.append((file_path, str(e)))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _get_path_column(conn: sqlite3.Connection) -> str:
|
|
83
|
+
"""Detect whether files table uses 'path' or 'full_path' column.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
conn: SQLite connection to the index database
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Column name ('path' or 'full_path')
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
ValueError: If neither column exists in files table
|
|
93
|
+
"""
|
|
94
|
+
cursor = conn.execute("PRAGMA table_info(files)")
|
|
95
|
+
columns = {row[1] for row in cursor.fetchall()}
|
|
96
|
+
if 'full_path' in columns:
|
|
97
|
+
return 'full_path'
|
|
98
|
+
elif 'path' in columns:
|
|
99
|
+
return 'path'
|
|
100
|
+
raise ValueError("files table has neither 'path' nor 'full_path' column")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def check_index_embeddings(index_path: Path) -> Dict[str, any]:
|
|
104
|
+
"""Check if an index has embeddings and return statistics.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
index_path: Path to _index.db file
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Dictionary with embedding statistics and status
|
|
111
|
+
"""
|
|
112
|
+
if not index_path.exists():
|
|
113
|
+
return {
|
|
114
|
+
"success": False,
|
|
115
|
+
"error": f"Index not found: {index_path}",
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
with sqlite3.connect(index_path) as conn:
|
|
120
|
+
# Check if semantic_chunks table exists
|
|
121
|
+
cursor = conn.execute(
|
|
122
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_chunks'"
|
|
123
|
+
)
|
|
124
|
+
table_exists = cursor.fetchone() is not None
|
|
125
|
+
|
|
126
|
+
if not table_exists:
|
|
127
|
+
# Count total indexed files even without embeddings
|
|
128
|
+
cursor = conn.execute("SELECT COUNT(*) FROM files")
|
|
129
|
+
total_files = cursor.fetchone()[0]
|
|
130
|
+
|
|
131
|
+
return {
|
|
132
|
+
"success": True,
|
|
133
|
+
"result": {
|
|
134
|
+
"has_embeddings": False,
|
|
135
|
+
"total_chunks": 0,
|
|
136
|
+
"total_files": total_files,
|
|
137
|
+
"files_with_chunks": 0,
|
|
138
|
+
"files_without_chunks": total_files,
|
|
139
|
+
"coverage_percent": 0.0,
|
|
140
|
+
"missing_files_sample": [],
|
|
141
|
+
"index_path": str(index_path),
|
|
142
|
+
},
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
# Count total chunks
|
|
146
|
+
cursor = conn.execute("SELECT COUNT(*) FROM semantic_chunks")
|
|
147
|
+
total_chunks = cursor.fetchone()[0]
|
|
148
|
+
|
|
149
|
+
# Count total indexed files
|
|
150
|
+
cursor = conn.execute("SELECT COUNT(*) FROM files")
|
|
151
|
+
total_files = cursor.fetchone()[0]
|
|
152
|
+
|
|
153
|
+
# Count files with embeddings
|
|
154
|
+
cursor = conn.execute(
|
|
155
|
+
"SELECT COUNT(DISTINCT file_path) FROM semantic_chunks"
|
|
156
|
+
)
|
|
157
|
+
files_with_chunks = cursor.fetchone()[0]
|
|
158
|
+
|
|
159
|
+
# Get a sample of files without embeddings
|
|
160
|
+
path_column = _get_path_column(conn)
|
|
161
|
+
cursor = conn.execute(f"""
|
|
162
|
+
SELECT {path_column}
|
|
163
|
+
FROM files
|
|
164
|
+
WHERE {path_column} NOT IN (
|
|
165
|
+
SELECT DISTINCT file_path FROM semantic_chunks
|
|
166
|
+
)
|
|
167
|
+
LIMIT 5
|
|
168
|
+
""")
|
|
169
|
+
missing_files = [row[0] for row in cursor.fetchall()]
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
"success": True,
|
|
173
|
+
"result": {
|
|
174
|
+
"has_embeddings": total_chunks > 0,
|
|
175
|
+
"total_chunks": total_chunks,
|
|
176
|
+
"total_files": total_files,
|
|
177
|
+
"files_with_chunks": files_with_chunks,
|
|
178
|
+
"files_without_chunks": total_files - files_with_chunks,
|
|
179
|
+
"coverage_percent": round((files_with_chunks / total_files * 100) if total_files > 0 else 0, 1),
|
|
180
|
+
"missing_files_sample": missing_files,
|
|
181
|
+
"index_path": str(index_path),
|
|
182
|
+
},
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
except Exception as e:
|
|
186
|
+
return {
|
|
187
|
+
"success": False,
|
|
188
|
+
"error": f"Failed to check embeddings: {str(e)}",
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def generate_embeddings(
|
|
193
|
+
index_path: Path,
|
|
194
|
+
model_profile: str = "code",
|
|
195
|
+
force: bool = False,
|
|
196
|
+
chunk_size: int = 2000,
|
|
197
|
+
progress_callback: Optional[callable] = None,
|
|
198
|
+
) -> Dict[str, any]:
|
|
199
|
+
"""Generate embeddings for an index using memory-efficient batch processing.
|
|
200
|
+
|
|
201
|
+
This function processes files in small batches to keep memory usage under 2GB,
|
|
202
|
+
regardless of the total project size.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
index_path: Path to _index.db file
|
|
206
|
+
model_profile: Model profile (fast, code, multilingual, balanced)
|
|
207
|
+
force: If True, regenerate even if embeddings exist
|
|
208
|
+
chunk_size: Maximum chunk size in characters
|
|
209
|
+
progress_callback: Optional callback for progress updates
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
Result dictionary with generation statistics
|
|
213
|
+
"""
|
|
214
|
+
if not SEMANTIC_AVAILABLE:
|
|
215
|
+
return {
|
|
216
|
+
"success": False,
|
|
217
|
+
"error": "Semantic search not available. Install with: pip install codexlens[semantic]",
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if not index_path.exists():
|
|
221
|
+
return {
|
|
222
|
+
"success": False,
|
|
223
|
+
"error": f"Index not found: {index_path}",
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
# Check existing chunks
|
|
227
|
+
status = check_index_embeddings(index_path)
|
|
228
|
+
if not status["success"]:
|
|
229
|
+
return status
|
|
230
|
+
|
|
231
|
+
existing_chunks = status["result"]["total_chunks"]
|
|
232
|
+
|
|
233
|
+
if existing_chunks > 0 and not force:
|
|
234
|
+
return {
|
|
235
|
+
"success": False,
|
|
236
|
+
"error": f"Index already has {existing_chunks} chunks. Use --force to regenerate.",
|
|
237
|
+
"existing_chunks": existing_chunks,
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if force and existing_chunks > 0:
|
|
241
|
+
if progress_callback:
|
|
242
|
+
progress_callback(f"Clearing {existing_chunks} existing chunks...")
|
|
243
|
+
|
|
244
|
+
try:
|
|
245
|
+
with sqlite3.connect(index_path) as conn:
|
|
246
|
+
conn.execute("DELETE FROM semantic_chunks")
|
|
247
|
+
conn.commit()
|
|
248
|
+
except Exception as e:
|
|
249
|
+
return {
|
|
250
|
+
"success": False,
|
|
251
|
+
"error": f"Failed to clear existing chunks: {str(e)}",
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
# Initialize components
|
|
255
|
+
try:
|
|
256
|
+
# Initialize embedder (singleton, reused throughout the function)
|
|
257
|
+
embedder = get_embedder(profile=model_profile)
|
|
258
|
+
# skip_token_count=True: Use fast estimation (len/4) instead of expensive tiktoken
|
|
259
|
+
# This significantly reduces CPU usage with minimal impact on metadata accuracy
|
|
260
|
+
chunker = Chunker(config=ChunkConfig(max_chunk_size=chunk_size, skip_token_count=True))
|
|
261
|
+
|
|
262
|
+
if progress_callback:
|
|
263
|
+
progress_callback(f"Using model: {embedder.model_name} ({embedder.embedding_dim} dimensions)")
|
|
264
|
+
|
|
265
|
+
except Exception as e:
|
|
266
|
+
return {
|
|
267
|
+
"success": False,
|
|
268
|
+
"error": f"Failed to initialize components: {str(e)}",
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
# --- STREAMING PROCESSING ---
|
|
272
|
+
# Process files in batches to control memory usage
|
|
273
|
+
start_time = time.time()
|
|
274
|
+
failed_files = []
|
|
275
|
+
total_chunks_created = 0
|
|
276
|
+
total_files_processed = 0
|
|
277
|
+
FILE_BATCH_SIZE = 100 # Process 100 files at a time
|
|
278
|
+
# EMBEDDING_BATCH_SIZE is defined at module level (default: 64)
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
with VectorStore(index_path) as vector_store:
|
|
282
|
+
# Use bulk insert mode for efficient batch ANN index building
|
|
283
|
+
# This defers ANN updates until end_bulk_insert() is called
|
|
284
|
+
with vector_store.bulk_insert():
|
|
285
|
+
with sqlite3.connect(index_path) as conn:
|
|
286
|
+
conn.row_factory = sqlite3.Row
|
|
287
|
+
path_column = _get_path_column(conn)
|
|
288
|
+
|
|
289
|
+
# Get total file count for progress reporting
|
|
290
|
+
total_files = conn.execute("SELECT COUNT(*) FROM files").fetchone()[0]
|
|
291
|
+
if total_files == 0:
|
|
292
|
+
return {"success": False, "error": "No files found in index"}
|
|
293
|
+
|
|
294
|
+
if progress_callback:
|
|
295
|
+
# Format must match Node.js parseProgressLine: "Processing N files" with 'embed' keyword
|
|
296
|
+
progress_callback(f"Processing {total_files} files for embeddings in batches of {FILE_BATCH_SIZE}...")
|
|
297
|
+
|
|
298
|
+
cursor = conn.execute(f"SELECT {path_column}, content, language FROM files")
|
|
299
|
+
|
|
300
|
+
# --- STREAMING GENERATOR APPROACH ---
|
|
301
|
+
# Instead of accumulating all chunks from 100 files, we use a generator
|
|
302
|
+
# that yields chunks on-demand, keeping memory usage low and constant.
|
|
303
|
+
chunk_generator = _generate_chunks_from_cursor(
|
|
304
|
+
cursor, chunker, path_column, FILE_BATCH_SIZE, failed_files
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
batch_number = 0
|
|
308
|
+
files_seen = set()
|
|
309
|
+
|
|
310
|
+
while True:
|
|
311
|
+
# Get a small batch of chunks from the generator (EMBEDDING_BATCH_SIZE at a time)
|
|
312
|
+
chunk_batch = list(islice(chunk_generator, EMBEDDING_BATCH_SIZE))
|
|
313
|
+
if not chunk_batch:
|
|
314
|
+
break
|
|
315
|
+
|
|
316
|
+
batch_number += 1
|
|
317
|
+
|
|
318
|
+
# Track unique files for progress
|
|
319
|
+
for _, file_path in chunk_batch:
|
|
320
|
+
files_seen.add(file_path)
|
|
321
|
+
|
|
322
|
+
# Generate embeddings directly to numpy (no tolist() conversion)
|
|
323
|
+
try:
|
|
324
|
+
batch_contents = [chunk.content for chunk, _ in chunk_batch]
|
|
325
|
+
embeddings_numpy = embedder.embed_to_numpy(batch_contents)
|
|
326
|
+
|
|
327
|
+
# Use add_chunks_batch_numpy to avoid numpy->list->numpy roundtrip
|
|
328
|
+
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
|
|
329
|
+
|
|
330
|
+
total_chunks_created += len(chunk_batch)
|
|
331
|
+
total_files_processed = len(files_seen)
|
|
332
|
+
|
|
333
|
+
if progress_callback and batch_number % 10 == 0:
|
|
334
|
+
progress_callback(f" Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
|
|
335
|
+
|
|
336
|
+
# Cleanup intermediate data
|
|
337
|
+
del batch_contents, embeddings_numpy, chunk_batch
|
|
338
|
+
|
|
339
|
+
except Exception as e:
|
|
340
|
+
logger.error(f"Failed to process embedding batch {batch_number}: {str(e)}")
|
|
341
|
+
# Continue to next batch instead of failing entirely
|
|
342
|
+
continue
|
|
343
|
+
|
|
344
|
+
# Notify before ANN index finalization (happens when bulk_insert context exits)
|
|
345
|
+
if progress_callback:
|
|
346
|
+
progress_callback(f"Finalizing index... Building ANN index for {total_chunks_created} chunks")
|
|
347
|
+
|
|
348
|
+
except Exception as e:
|
|
349
|
+
# Cleanup on error to prevent process hanging
|
|
350
|
+
try:
|
|
351
|
+
clear_embedder_cache()
|
|
352
|
+
gc.collect()
|
|
353
|
+
except Exception:
|
|
354
|
+
pass
|
|
355
|
+
return {"success": False, "error": f"Failed to read or process files: {str(e)}"}
|
|
356
|
+
|
|
357
|
+
elapsed_time = time.time() - start_time
|
|
358
|
+
|
|
359
|
+
# Final cleanup: release ONNX resources to allow process exit
|
|
360
|
+
# This is critical - without it, ONNX Runtime threads prevent Python from exiting
|
|
361
|
+
try:
|
|
362
|
+
clear_embedder_cache()
|
|
363
|
+
gc.collect()
|
|
364
|
+
except Exception:
|
|
365
|
+
pass
|
|
366
|
+
|
|
367
|
+
return {
|
|
368
|
+
"success": True,
|
|
369
|
+
"result": {
|
|
370
|
+
"chunks_created": total_chunks_created,
|
|
371
|
+
"files_processed": total_files_processed,
|
|
372
|
+
"files_failed": len(failed_files),
|
|
373
|
+
"elapsed_time": elapsed_time,
|
|
374
|
+
"model_profile": model_profile,
|
|
375
|
+
"model_name": embedder.model_name,
|
|
376
|
+
"failed_files": failed_files[:5], # First 5 failures
|
|
377
|
+
"index_path": str(index_path),
|
|
378
|
+
},
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def discover_all_index_dbs(index_root: Path) -> List[Path]:
|
|
383
|
+
"""Recursively find all _index.db files in an index tree.
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
index_root: Root directory to scan for _index.db files
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
Sorted list of paths to _index.db files
|
|
390
|
+
"""
|
|
391
|
+
if not index_root.exists():
|
|
392
|
+
return []
|
|
393
|
+
|
|
394
|
+
return sorted(index_root.rglob("_index.db"))
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def find_all_indexes(scan_dir: Path) -> List[Path]:
|
|
398
|
+
"""Find all _index.db files in directory tree.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
scan_dir: Directory to scan
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
List of paths to _index.db files
|
|
405
|
+
"""
|
|
406
|
+
if not scan_dir.exists():
|
|
407
|
+
return []
|
|
408
|
+
|
|
409
|
+
return list(scan_dir.rglob("_index.db"))
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def generate_embeddings_recursive(
|
|
414
|
+
index_root: Path,
|
|
415
|
+
model_profile: str = "code",
|
|
416
|
+
force: bool = False,
|
|
417
|
+
chunk_size: int = 2000,
|
|
418
|
+
progress_callback: Optional[callable] = None,
|
|
419
|
+
) -> Dict[str, any]:
|
|
420
|
+
"""Generate embeddings for all index databases in a project recursively.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
index_root: Root index directory containing _index.db files
|
|
424
|
+
model_profile: Model profile (fast, code, multilingual, balanced)
|
|
425
|
+
force: If True, regenerate even if embeddings exist
|
|
426
|
+
chunk_size: Maximum chunk size in characters
|
|
427
|
+
progress_callback: Optional callback for progress updates
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
Aggregated result dictionary with generation statistics
|
|
431
|
+
"""
|
|
432
|
+
# Discover all _index.db files
|
|
433
|
+
index_files = discover_all_index_dbs(index_root)
|
|
434
|
+
|
|
435
|
+
if not index_files:
|
|
436
|
+
return {
|
|
437
|
+
"success": False,
|
|
438
|
+
"error": f"No index databases found in {index_root}",
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
if progress_callback:
|
|
442
|
+
progress_callback(f"Found {len(index_files)} index databases to process")
|
|
443
|
+
|
|
444
|
+
# Process each index database
|
|
445
|
+
all_results = []
|
|
446
|
+
total_chunks = 0
|
|
447
|
+
total_files_processed = 0
|
|
448
|
+
total_files_failed = 0
|
|
449
|
+
|
|
450
|
+
for idx, index_path in enumerate(index_files, 1):
|
|
451
|
+
if progress_callback:
|
|
452
|
+
try:
|
|
453
|
+
rel_path = index_path.relative_to(index_root)
|
|
454
|
+
except ValueError:
|
|
455
|
+
rel_path = index_path
|
|
456
|
+
# Format: "Processing file X/Y: path" to match Node.js parseProgressLine
|
|
457
|
+
progress_callback(f"Processing file {idx}/{len(index_files)}: {rel_path}")
|
|
458
|
+
|
|
459
|
+
result = generate_embeddings(
|
|
460
|
+
index_path,
|
|
461
|
+
model_profile=model_profile,
|
|
462
|
+
force=force,
|
|
463
|
+
chunk_size=chunk_size,
|
|
464
|
+
progress_callback=None, # Don't cascade callbacks
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
all_results.append({
|
|
468
|
+
"path": str(index_path),
|
|
469
|
+
"success": result["success"],
|
|
470
|
+
"result": result.get("result"),
|
|
471
|
+
"error": result.get("error"),
|
|
472
|
+
})
|
|
473
|
+
|
|
474
|
+
if result["success"]:
|
|
475
|
+
data = result["result"]
|
|
476
|
+
total_chunks += data["chunks_created"]
|
|
477
|
+
total_files_processed += data["files_processed"]
|
|
478
|
+
total_files_failed += data["files_failed"]
|
|
479
|
+
|
|
480
|
+
successful = sum(1 for r in all_results if r["success"])
|
|
481
|
+
|
|
482
|
+
# Final cleanup after processing all indexes
|
|
483
|
+
# Each generate_embeddings() call does its own cleanup, but do a final one to be safe
|
|
484
|
+
try:
|
|
485
|
+
if SEMANTIC_AVAILABLE:
|
|
486
|
+
clear_embedder_cache()
|
|
487
|
+
gc.collect()
|
|
488
|
+
except Exception:
|
|
489
|
+
pass
|
|
490
|
+
|
|
491
|
+
return {
|
|
492
|
+
"success": successful > 0,
|
|
493
|
+
"result": {
|
|
494
|
+
"indexes_processed": len(index_files),
|
|
495
|
+
"indexes_successful": successful,
|
|
496
|
+
"indexes_failed": len(index_files) - successful,
|
|
497
|
+
"total_chunks_created": total_chunks,
|
|
498
|
+
"total_files_processed": total_files_processed,
|
|
499
|
+
"total_files_failed": total_files_failed,
|
|
500
|
+
"model_profile": model_profile,
|
|
501
|
+
"details": all_results,
|
|
502
|
+
},
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def get_embeddings_status(index_root: Path) -> Dict[str, any]:
|
|
507
|
+
"""Get comprehensive embeddings coverage status for all indexes.
|
|
508
|
+
|
|
509
|
+
Args:
|
|
510
|
+
index_root: Root index directory
|
|
511
|
+
|
|
512
|
+
Returns:
|
|
513
|
+
Aggregated status with coverage statistics
|
|
514
|
+
"""
|
|
515
|
+
index_files = discover_all_index_dbs(index_root)
|
|
516
|
+
|
|
517
|
+
if not index_files:
|
|
518
|
+
return {
|
|
519
|
+
"success": True,
|
|
520
|
+
"result": {
|
|
521
|
+
"total_indexes": 0,
|
|
522
|
+
"total_files": 0,
|
|
523
|
+
"files_with_embeddings": 0,
|
|
524
|
+
"files_without_embeddings": 0,
|
|
525
|
+
"total_chunks": 0,
|
|
526
|
+
"coverage_percent": 0.0,
|
|
527
|
+
"indexes_with_embeddings": 0,
|
|
528
|
+
"indexes_without_embeddings": 0,
|
|
529
|
+
},
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
total_files = 0
|
|
533
|
+
files_with_embeddings = 0
|
|
534
|
+
total_chunks = 0
|
|
535
|
+
indexes_with_embeddings = 0
|
|
536
|
+
|
|
537
|
+
for index_path in index_files:
|
|
538
|
+
status = check_index_embeddings(index_path)
|
|
539
|
+
if status["success"]:
|
|
540
|
+
result = status["result"]
|
|
541
|
+
total_files += result["total_files"]
|
|
542
|
+
files_with_embeddings += result["files_with_chunks"]
|
|
543
|
+
total_chunks += result["total_chunks"]
|
|
544
|
+
if result["has_embeddings"]:
|
|
545
|
+
indexes_with_embeddings += 1
|
|
546
|
+
|
|
547
|
+
return {
|
|
548
|
+
"success": True,
|
|
549
|
+
"result": {
|
|
550
|
+
"total_indexes": len(index_files),
|
|
551
|
+
"total_files": total_files,
|
|
552
|
+
"files_with_embeddings": files_with_embeddings,
|
|
553
|
+
"files_without_embeddings": total_files - files_with_embeddings,
|
|
554
|
+
"total_chunks": total_chunks,
|
|
555
|
+
"coverage_percent": round((files_with_embeddings / total_files * 100) if total_files > 0 else 0, 1),
|
|
556
|
+
"indexes_with_embeddings": indexes_with_embeddings,
|
|
557
|
+
"indexes_without_embeddings": len(index_files) - indexes_with_embeddings,
|
|
558
|
+
},
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def get_embedding_stats_summary(index_root: Path) -> Dict[str, any]:
|
|
563
|
+
"""Get summary statistics for all indexes in root directory.
|
|
564
|
+
|
|
565
|
+
Args:
|
|
566
|
+
index_root: Root directory containing indexes
|
|
567
|
+
|
|
568
|
+
Returns:
|
|
569
|
+
Summary statistics for all indexes
|
|
570
|
+
"""
|
|
571
|
+
indexes = find_all_indexes(index_root)
|
|
572
|
+
|
|
573
|
+
if not indexes:
|
|
574
|
+
return {
|
|
575
|
+
"success": True,
|
|
576
|
+
"result": {
|
|
577
|
+
"total_indexes": 0,
|
|
578
|
+
"indexes_with_embeddings": 0,
|
|
579
|
+
"total_chunks": 0,
|
|
580
|
+
"indexes": [],
|
|
581
|
+
},
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
total_chunks = 0
|
|
585
|
+
indexes_with_embeddings = 0
|
|
586
|
+
index_stats = []
|
|
587
|
+
|
|
588
|
+
for index_path in indexes:
|
|
589
|
+
status = check_index_embeddings(index_path)
|
|
590
|
+
|
|
591
|
+
if status["success"]:
|
|
592
|
+
result = status["result"]
|
|
593
|
+
has_emb = result["has_embeddings"]
|
|
594
|
+
chunks = result["total_chunks"]
|
|
595
|
+
|
|
596
|
+
if has_emb:
|
|
597
|
+
indexes_with_embeddings += 1
|
|
598
|
+
total_chunks += chunks
|
|
599
|
+
|
|
600
|
+
# Extract project name from path
|
|
601
|
+
project_name = index_path.parent.name
|
|
602
|
+
|
|
603
|
+
index_stats.append({
|
|
604
|
+
"project": project_name,
|
|
605
|
+
"path": str(index_path),
|
|
606
|
+
"has_embeddings": has_emb,
|
|
607
|
+
"total_chunks": chunks,
|
|
608
|
+
"total_files": result["total_files"],
|
|
609
|
+
"coverage_percent": result.get("coverage_percent", 0),
|
|
610
|
+
})
|
|
611
|
+
|
|
612
|
+
return {
|
|
613
|
+
"success": True,
|
|
614
|
+
"result": {
|
|
615
|
+
"total_indexes": len(indexes),
|
|
616
|
+
"indexes_with_embeddings": indexes_with_embeddings,
|
|
617
|
+
"total_chunks": total_chunks,
|
|
618
|
+
"indexes": index_stats,
|
|
619
|
+
},
|
|
620
|
+
}
|