claude-code-workflow 6.1.4 → 6.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/CLAUDE.md +10 -0
- package/.claude/agents/action-planning-agent.md +857 -778
- package/.claude/agents/cli-execution-agent.md +266 -269
- package/.claude/agents/cli-explore-agent.md +2 -2
- package/.claude/agents/cli-lite-planning-agent.md +142 -92
- package/.claude/agents/cli-planning-agent.md +4 -4
- package/.claude/agents/code-developer.md +7 -6
- package/.claude/agents/conceptual-planning-agent.md +2 -2
- package/.claude/agents/context-search-agent.md +31 -32
- package/.claude/agents/doc-generator.md +4 -4
- package/.claude/agents/memory-bridge.md +93 -93
- package/.claude/agents/test-context-search-agent.md +8 -7
- package/.claude/agents/test-fix-agent.md +7 -6
- package/.claude/commands/clean.md +516 -0
- package/.claude/commands/memory/compact.md +383 -0
- package/.claude/commands/memory/docs-full-cli.md +471 -471
- package/.claude/commands/memory/docs-related-cli.md +386 -386
- package/.claude/commands/memory/docs.md +615 -615
- package/.claude/commands/memory/load.md +5 -5
- package/.claude/commands/memory/tech-research-rules.md +310 -0
- package/.claude/commands/memory/update-full.md +332 -332
- package/.claude/commands/memory/workflow-skill-memory.md +4 -4
- package/.claude/commands/task/create.md +151 -151
- package/.claude/commands/version.md +254 -254
- package/.claude/commands/workflow/brainstorm/api-designer.md +587 -585
- package/.claude/commands/workflow/brainstorm/artifacts.md +1 -0
- package/.claude/commands/workflow/brainstorm/auto-parallel.md +443 -443
- package/.claude/commands/workflow/brainstorm/data-architect.md +220 -220
- package/.claude/commands/workflow/brainstorm/product-manager.md +200 -200
- package/.claude/commands/workflow/brainstorm/product-owner.md +200 -200
- package/.claude/commands/workflow/brainstorm/scrum-master.md +200 -200
- package/.claude/commands/workflow/brainstorm/subject-matter-expert.md +200 -200
- package/.claude/commands/workflow/brainstorm/system-architect.md +389 -387
- package/.claude/commands/workflow/brainstorm/ui-designer.md +221 -221
- package/.claude/commands/workflow/brainstorm/ux-expert.md +221 -221
- package/.claude/commands/workflow/debug.md +321 -0
- package/.claude/commands/workflow/execute.md +13 -0
- package/.claude/commands/workflow/init.md +165 -164
- package/.claude/commands/workflow/lite-execute.md +119 -13
- package/.claude/commands/workflow/lite-fix.md +623 -621
- package/.claude/commands/workflow/lite-plan.md +610 -592
- package/.claude/commands/workflow/plan.md +5 -5
- package/.claude/commands/workflow/review-module-cycle.md +2 -0
- package/.claude/commands/workflow/review-session-cycle.md +2 -0
- package/.claude/commands/workflow/review.md +297 -291
- package/.claude/commands/workflow/session/complete.md +153 -500
- package/.claude/commands/workflow/session/list.md +95 -95
- package/.claude/commands/workflow/session/resume.md +60 -60
- package/.claude/commands/workflow/session/start.md +199 -199
- package/.claude/commands/workflow/tdd-plan.md +3 -3
- package/.claude/commands/workflow/tdd-verify.md +23 -9
- package/.claude/commands/workflow/test-cycle-execute.md +2 -0
- package/.claude/commands/workflow/test-fix-gen.md +699 -699
- package/.claude/commands/workflow/tools/conflict-resolution.md +104 -18
- package/.claude/commands/workflow/tools/context-gather.md +436 -434
- package/.claude/commands/workflow/tools/task-generate-agent.md +490 -291
- package/.claude/commands/workflow/tools/task-generate-tdd.md +18 -10
- package/.claude/commands/workflow/tools/test-concept-enhanced.md +2 -1
- package/.claude/commands/workflow/tools/test-context-gather.md +1 -0
- package/.claude/commands/workflow/tools/test-task-generate.md +1 -0
- package/.claude/commands/workflow/ui-design/import-from-code.md +9 -6
- package/.claude/skills/command-guide/SKILL.md +5 -5
- package/.claude/skills/command-guide/index/all-commands.json +1 -1
- package/.claude/skills/command-guide/index/by-category.json +1 -1
- package/.claude/skills/command-guide/index/by-use-case.json +1 -1
- package/.claude/skills/command-guide/reference/agents/action-planning-agent.md +857 -778
- package/.claude/skills/command-guide/reference/agents/cli-execution-agent.md +266 -269
- package/.claude/skills/command-guide/reference/agents/cli-explore-agent.md +2 -2
- package/.claude/skills/command-guide/reference/agents/cli-lite-planning-agent.md +142 -92
- package/.claude/skills/command-guide/reference/agents/cli-planning-agent.md +4 -4
- package/.claude/skills/command-guide/reference/agents/code-developer.md +7 -6
- package/.claude/skills/command-guide/reference/agents/conceptual-planning-agent.md +2 -2
- package/.claude/skills/command-guide/reference/agents/context-search-agent.md +31 -32
- package/.claude/skills/command-guide/reference/agents/doc-generator.md +4 -4
- package/.claude/skills/command-guide/reference/agents/memory-bridge.md +93 -93
- package/.claude/skills/command-guide/reference/agents/test-context-search-agent.md +8 -7
- package/.claude/skills/command-guide/reference/agents/test-fix-agent.md +7 -6
- package/.claude/skills/command-guide/reference/commands/memory/docs-full-cli.md +471 -471
- package/.claude/skills/command-guide/reference/commands/memory/docs-related-cli.md +386 -386
- package/.claude/skills/command-guide/reference/commands/memory/docs.md +17 -16
- package/.claude/skills/command-guide/reference/commands/memory/load.md +5 -5
- package/.claude/skills/command-guide/reference/commands/memory/tech-research.md +194 -357
- package/.claude/skills/command-guide/reference/commands/memory/update-full.md +332 -332
- package/.claude/skills/command-guide/reference/commands/memory/workflow-skill-memory.md +4 -4
- package/.claude/skills/command-guide/reference/commands/task/create.md +151 -151
- package/.claude/skills/command-guide/reference/commands/version.md +254 -254
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/api-designer.md +585 -585
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/auto-parallel.md +443 -443
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/data-architect.md +220 -220
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/product-manager.md +200 -200
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/product-owner.md +200 -200
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/scrum-master.md +200 -200
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/subject-matter-expert.md +200 -200
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/system-architect.md +387 -387
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/ui-designer.md +221 -221
- package/.claude/skills/command-guide/reference/commands/workflow/brainstorm/ux-expert.md +221 -221
- package/.claude/skills/command-guide/reference/commands/workflow/execute.md +25 -20
- package/.claude/skills/command-guide/reference/commands/workflow/init.md +164 -164
- package/.claude/skills/command-guide/reference/commands/workflow/lite-execute.md +748 -686
- package/.claude/skills/command-guide/reference/commands/workflow/lite-fix.md +664 -621
- package/.claude/skills/command-guide/reference/commands/workflow/lite-plan.md +645 -592
- package/.claude/skills/command-guide/reference/commands/workflow/plan.md +5 -5
- package/.claude/skills/command-guide/reference/commands/workflow/review.md +25 -18
- package/.claude/skills/command-guide/reference/commands/workflow/session/complete.md +547 -500
- package/.claude/skills/command-guide/reference/commands/workflow/session/list.md +45 -27
- package/.claude/skills/command-guide/reference/commands/workflow/session/resume.md +35 -19
- package/.claude/skills/command-guide/reference/commands/workflow/session/start.md +90 -33
- package/.claude/skills/command-guide/reference/commands/workflow/tdd-plan.md +3 -3
- package/.claude/skills/command-guide/reference/commands/workflow/tdd-verify.md +23 -9
- package/.claude/skills/command-guide/reference/commands/workflow/test-fix-gen.md +699 -699
- package/.claude/skills/command-guide/reference/commands/workflow/tools/conflict-resolution.md +103 -17
- package/.claude/skills/command-guide/reference/commands/workflow/tools/context-gather.md +434 -434
- package/.claude/skills/command-guide/reference/commands/workflow/tools/task-generate-agent.md +487 -291
- package/.claude/skills/command-guide/reference/commands/workflow/tools/task-generate-tdd.md +17 -10
- package/.claude/skills/command-guide/reference/commands/workflow/tools/test-concept-enhanced.md +1 -1
- package/.claude/skills/command-guide/reference/commands/workflow/ui-design/import-from-code.md +6 -6
- package/.claude/workflows/chinese-response.md +38 -0
- package/.claude/workflows/cli-templates/prompts/rules/rule-api.txt +122 -0
- package/.claude/workflows/cli-templates/prompts/rules/rule-components.txt +122 -0
- package/.claude/workflows/cli-templates/prompts/rules/rule-config.txt +89 -0
- package/.claude/workflows/cli-templates/prompts/rules/rule-core.txt +60 -0
- package/.claude/workflows/cli-templates/prompts/rules/rule-patterns.txt +70 -0
- package/.claude/workflows/cli-templates/prompts/rules/rule-testing.txt +81 -0
- package/.claude/workflows/cli-templates/prompts/rules/tech-rules-agent-prompt.txt +89 -0
- package/.claude/workflows/cli-templates/prompts/workflow/gemini-solution-design.txt +131 -131
- package/.claude/workflows/cli-templates/prompts/workflow/skill-conflict-patterns.txt +5 -9
- package/.claude/workflows/cli-templates/prompts/workflow/skill-lessons-learned.txt +5 -9
- package/.claude/workflows/cli-templates/protocols/analysis-protocol.md +112 -0
- package/.claude/workflows/cli-templates/protocols/write-protocol.md +201 -0
- package/.claude/workflows/cli-templates/schemas/conflict-resolution-schema.json +137 -0
- package/.claude/workflows/cli-templates/schemas/debug-log-json-schema.json +127 -0
- package/.claude/workflows/cli-templates/schemas/fix-plan-json-schema.json +25 -0
- package/.claude/workflows/cli-templates/schemas/plan-json-schema.json +25 -0
- package/.claude/workflows/cli-tools-usage.md +526 -0
- package/{CLAUDE.md → .claude/workflows/coding-philosophy.md} +24 -45
- package/.claude/workflows/context-tools.md +84 -0
- package/.claude/workflows/file-modification.md +64 -0
- package/.claude/workflows/tool-strategy.md +216 -79
- package/.claude/workflows/windows-platform.md +16 -0
- package/.claude/workflows/workflow-architecture.md +942 -942
- package/.codex/AGENTS.md +63 -330
- package/.codex/prompts/debug.md +318 -0
- package/.codex/prompts/execute.md +273 -0
- package/.codex/prompts/lite-execute.md +164 -0
- package/.codex/prompts/lite-plan.md +469 -0
- package/.codex/prompts.zip +0 -0
- package/.gemini/GEMINI.md +25 -164
- package/.qwen/QWEN.md +0 -139
- package/README.md +29 -9
- package/ccw/README.md +30 -6
- package/ccw/bin/ccw-mcp.js +7 -0
- package/ccw/bin/ccw.js +9 -9
- package/ccw/package.json +65 -47
- package/ccw/src/.workflow/.cli-history/history.db +0 -0
- package/ccw/src/.workflow/.cli-history/history.db-shm +0 -0
- package/ccw/src/.workflow/.cli-history/history.db-wal +0 -0
- package/ccw/src/cli.ts +244 -0
- package/ccw/src/commands/cli.ts +740 -0
- package/ccw/src/commands/core-memory.ts +770 -0
- package/ccw/src/commands/hook.ts +315 -0
- package/ccw/src/commands/install.ts +519 -0
- package/ccw/src/commands/{list.js → list.ts} +1 -1
- package/ccw/src/commands/memory.ts +1090 -0
- package/ccw/src/commands/{serve.js → serve.ts} +14 -5
- package/ccw/src/commands/session-path-resolver.ts +372 -0
- package/ccw/src/commands/session.ts +1141 -0
- package/ccw/src/commands/{stop.js → stop.ts} +16 -6
- package/ccw/src/commands/tool.ts +201 -0
- package/ccw/src/commands/{uninstall.js → uninstall.ts} +89 -40
- package/ccw/src/commands/{upgrade.js → upgrade.ts} +68 -23
- package/ccw/src/commands/{view.js → view.ts} +22 -8
- package/ccw/src/config/storage-paths.ts +670 -0
- package/ccw/src/core/cache-manager.ts +294 -0
- package/ccw/src/core/claude-freshness.ts +319 -0
- package/ccw/src/core/core-memory-store.ts +1528 -0
- package/ccw/src/core/{dashboard-generator-patch.js → dashboard-generator-patch.ts} +18 -0
- package/ccw/src/core/{dashboard-generator.js → dashboard-generator.ts} +69 -12
- package/ccw/src/core/data-aggregator.ts +584 -0
- package/ccw/src/core/history-importer.ts +625 -0
- package/ccw/src/core/{lite-scanner.js → lite-scanner-complete.ts} +162 -66
- package/ccw/src/core/lite-scanner.ts +469 -0
- package/ccw/src/core/{manifest.js → manifest.ts} +104 -34
- package/ccw/src/core/memory-embedder-bridge.ts +262 -0
- package/ccw/src/core/memory-store.ts +978 -0
- package/ccw/src/core/routes/ccw-routes.ts +96 -0
- package/ccw/src/core/routes/claude-routes.ts +1183 -0
- package/ccw/src/core/routes/cli-routes.ts +561 -0
- package/ccw/src/core/routes/codexlens-routes.ts +806 -0
- package/ccw/src/core/routes/core-memory-routes.ts +605 -0
- package/ccw/src/core/routes/files-routes.ts +428 -0
- package/ccw/src/core/routes/graph-routes.md +164 -0
- package/ccw/src/core/routes/graph-routes.ts +626 -0
- package/ccw/src/core/routes/help-routes.ts +308 -0
- package/ccw/src/core/routes/hooks-routes.ts +405 -0
- package/ccw/src/core/routes/mcp-routes.ts +1271 -0
- package/ccw/src/core/routes/mcp-routes.ts.backup +550 -0
- package/ccw/src/core/routes/mcp-templates-db.ts +268 -0
- package/ccw/src/core/routes/memory-routes.ts +1206 -0
- package/ccw/src/core/routes/rules-routes.ts +526 -0
- package/ccw/src/core/routes/session-routes.ts +467 -0
- package/ccw/src/core/routes/skills-routes.ts +599 -0
- package/ccw/src/core/routes/status-routes.ts +57 -0
- package/ccw/src/core/routes/system-routes.ts +427 -0
- package/ccw/src/core/server.ts +431 -0
- package/ccw/src/core/session-clustering-service.ts +1258 -0
- package/ccw/src/core/session-scanner.ts +283 -0
- package/ccw/src/core/websocket.ts +190 -0
- package/ccw/src/{index.js → index.ts} +1 -0
- package/ccw/src/mcp-server/index.ts +186 -0
- package/ccw/src/templates/assets/css/github-dark.min.css +10 -0
- package/ccw/src/templates/assets/css/github.min.css +10 -0
- package/ccw/src/templates/assets/js/cytoscape.min.js +32 -0
- package/ccw/src/templates/assets/js/d3.min.js +2 -0
- package/ccw/src/templates/assets/js/highlight.min.js +1244 -0
- package/ccw/src/templates/assets/js/lucide.min.js +12 -0
- package/ccw/src/templates/assets/js/marked.min.js +69 -0
- package/ccw/src/templates/assets/js/tailwind.js +83 -0
- package/ccw/src/templates/dashboard-css/01-base.css +11 -0
- package/ccw/src/templates/dashboard-css/02-session.css +22 -0
- package/ccw/src/templates/dashboard-css/04-lite-tasks.css +10 -0
- package/ccw/src/templates/dashboard-css/06-cards.css +10 -4
- package/ccw/src/templates/dashboard-css/07-managers.css +1178 -7
- package/ccw/src/templates/dashboard-css/09-explorer.css +23 -12
- package/ccw/src/templates/dashboard-css/10-cli-status.css +337 -0
- package/ccw/src/templates/dashboard-css/11-cli-history.css +271 -0
- package/ccw/src/templates/dashboard-css/12-cli-legacy.css +796 -0
- package/ccw/src/templates/dashboard-css/13-cli-ccw.css +199 -0
- package/ccw/src/templates/dashboard-css/14-cli-modals.css +258 -0
- package/ccw/src/templates/dashboard-css/15-cli-endpoints.css +305 -0
- package/ccw/src/templates/dashboard-css/16-cli-session.css +241 -0
- package/ccw/src/templates/dashboard-css/17-cli-conversation.css +283 -0
- package/ccw/src/templates/dashboard-css/18-cli-settings.css +160 -0
- package/ccw/src/templates/dashboard-css/19-cli-native-session.css +496 -0
- package/ccw/src/templates/dashboard-css/20-cli-taskqueue.css +188 -0
- package/ccw/src/templates/dashboard-css/21-cli-toolmgmt.css +310 -0
- package/ccw/src/templates/dashboard-css/22-cli-semantic.css +240 -0
- package/ccw/src/templates/dashboard-css/23-memory.css +2390 -0
- package/ccw/src/templates/dashboard-css/24-prompt-history.css +1089 -0
- package/ccw/src/templates/dashboard-css/25-skills-rules.css +326 -0
- package/ccw/src/templates/dashboard-css/26-claude-manager.css +908 -0
- package/ccw/src/templates/dashboard-css/27-graph-explorer.css +1678 -0
- package/ccw/src/templates/dashboard-css/28-mcp-manager.css +748 -0
- package/ccw/src/templates/dashboard-css/29-help.css +264 -0
- package/ccw/src/templates/dashboard-css/30-core-memory.css +1700 -0
- package/ccw/src/templates/dashboard-js/api.js +162 -142
- package/ccw/src/templates/dashboard-js/components/carousel.js +4 -4
- package/ccw/src/templates/dashboard-js/components/cli-history.js +876 -0
- package/ccw/src/templates/dashboard-js/components/cli-status.js +978 -0
- package/ccw/src/templates/dashboard-js/components/global-notifications.js +508 -219
- package/ccw/src/templates/dashboard-js/components/hook-manager.js +1277 -282
- package/ccw/src/templates/dashboard-js/components/index-manager.js +302 -0
- package/ccw/src/templates/dashboard-js/components/mcp-manager.js +718 -27
- package/ccw/src/templates/dashboard-js/components/modals.js +66 -0
- package/ccw/src/templates/dashboard-js/components/navigation.js +80 -12
- package/ccw/src/templates/dashboard-js/components/notifications.js +758 -194
- package/ccw/src/templates/dashboard-js/components/storage-manager.js +478 -0
- package/ccw/src/templates/dashboard-js/components/tabs-other.js +157 -6
- package/ccw/src/templates/dashboard-js/components/task-queue-sidebar.js +716 -0
- package/ccw/src/templates/dashboard-js/help-i18n.js +272 -0
- package/ccw/src/templates/dashboard-js/i18n.js +2807 -0
- package/ccw/src/templates/dashboard-js/main.js +15 -0
- package/ccw/src/templates/dashboard-js/state.js +243 -42
- package/ccw/src/templates/dashboard-js/utils.js +47 -1
- package/ccw/src/templates/dashboard-js/views/claude-manager.js +912 -0
- package/ccw/src/templates/dashboard-js/views/cli-manager.js +2272 -0
- package/ccw/src/templates/dashboard-js/views/codexlens-manager.js +964 -0
- package/ccw/src/templates/dashboard-js/views/core-memory-clusters.js +503 -0
- package/ccw/src/templates/dashboard-js/views/core-memory.js +782 -0
- package/ccw/src/templates/dashboard-js/views/explorer.js +888 -852
- package/ccw/src/templates/dashboard-js/views/graph-explorer.js +1157 -0
- package/ccw/src/templates/dashboard-js/views/help.js +856 -0
- package/ccw/src/templates/dashboard-js/views/history.js +337 -0
- package/ccw/src/templates/dashboard-js/views/home.js +61 -15
- package/ccw/src/templates/dashboard-js/views/hook-manager.js +311 -43
- package/ccw/src/templates/dashboard-js/views/lite-tasks.js +204 -28
- package/ccw/src/templates/dashboard-js/views/mcp-manager.js +2187 -411
- package/ccw/src/templates/dashboard-js/views/mcp-manager.js.backup +1729 -0
- package/ccw/src/templates/dashboard-js/views/mcp-manager.js.new +928 -0
- package/ccw/src/templates/dashboard-js/views/memory.js +1221 -0
- package/ccw/src/templates/dashboard-js/views/prompt-history.js +713 -0
- package/ccw/src/templates/dashboard-js/views/rules-manager.js +828 -0
- package/ccw/src/templates/dashboard-js/views/session-detail.js +54 -53
- package/ccw/src/templates/dashboard-js/views/skills-manager.js +819 -0
- package/ccw/src/templates/dashboard.html +185 -85
- package/ccw/src/templates/hooks-config-example.json +60 -0
- package/ccw/src/tools/classify-folders.ts +245 -0
- package/ccw/src/tools/cli-config-manager.ts +268 -0
- package/ccw/src/tools/cli-executor.ts +2014 -0
- package/ccw/src/tools/cli-history-store.ts +1195 -0
- package/ccw/src/tools/codex-lens.ts +1141 -0
- package/ccw/src/tools/{convert-tokens-to-css.js → convert-tokens-to-css.ts} +73 -23
- package/ccw/src/tools/core-memory.ts +444 -0
- package/ccw/src/tools/detect-changed-modules.ts +325 -0
- package/ccw/src/tools/{discover-design-files.js → discover-design-files.ts} +74 -24
- package/ccw/src/tools/edit-file.ts +568 -0
- package/ccw/src/tools/{generate-module-docs.js → generate-module-docs.ts} +207 -185
- package/ccw/src/tools/{get-modules-by-depth.js → get-modules-by-depth.ts} +120 -79
- package/ccw/src/tools/index.ts +370 -0
- package/ccw/src/tools/native-session-discovery.ts +795 -0
- package/ccw/src/tools/notifier.ts +129 -0
- package/ccw/src/tools/read-file.ts +410 -0
- package/ccw/src/tools/resume-strategy.ts +345 -0
- package/ccw/src/tools/session-content-parser.ts +619 -0
- package/ccw/src/tools/session-manager.ts +1026 -0
- package/ccw/src/tools/smart-context.ts +228 -0
- package/ccw/src/tools/smart-search.ts +2065 -0
- package/ccw/src/tools/smart-search.ts.backup +1233 -0
- package/ccw/src/tools/storage-manager.ts +455 -0
- package/ccw/src/tools/write-file.ts +222 -0
- package/ccw/src/types/config.ts +11 -0
- package/ccw/src/types/index.ts +3 -0
- package/ccw/src/types/session.ts +25 -0
- package/ccw/src/types/tool.ts +41 -0
- package/ccw/src/utils/{browser-launcher.js → browser-launcher.ts} +10 -8
- package/ccw/src/utils/file-utils.ts +48 -0
- package/ccw/src/utils/{path-resolver.js → path-resolver.ts} +114 -78
- package/ccw/src/utils/path-validator.ts +153 -0
- package/ccw/src/utils/{ui.js → ui.ts} +32 -25
- package/codex-lens/pyproject.toml +48 -0
- package/codex-lens/src/codexlens/.workflow/.cli-history/history.db +0 -0
- package/codex-lens/src/codexlens/__init__.py +28 -0
- package/codex-lens/src/codexlens/__main__.py +14 -0
- package/codex-lens/src/codexlens/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/__main__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/entities.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/__pycache__/errors.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__init__.py +27 -0
- package/codex-lens/src/codexlens/cli/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/commands.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/output.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/commands.py +1931 -0
- package/codex-lens/src/codexlens/cli/embedding_manager.py +620 -0
- package/codex-lens/src/codexlens/cli/model_manager.py +289 -0
- package/codex-lens/src/codexlens/cli/output.py +124 -0
- package/codex-lens/src/codexlens/config.py +201 -0
- package/codex-lens/src/codexlens/entities.py +121 -0
- package/codex-lens/src/codexlens/errors.py +55 -0
- package/codex-lens/src/codexlens/indexing/README.md +77 -0
- package/codex-lens/src/codexlens/indexing/__init__.py +4 -0
- package/codex-lens/src/codexlens/indexing/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/indexing/__pycache__/symbol_extractor.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/indexing/symbol_extractor.py +243 -0
- package/codex-lens/src/codexlens/parsers/__init__.py +8 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/encoding.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/factory.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/tokenizer.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/__pycache__/treesitter_parser.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/parsers/encoding.py +202 -0
- package/codex-lens/src/codexlens/parsers/factory.py +256 -0
- package/codex-lens/src/codexlens/parsers/tokenizer.py +98 -0
- package/codex-lens/src/codexlens/parsers/treesitter_parser.py +335 -0
- package/codex-lens/src/codexlens/search/__init__.py +15 -0
- package/codex-lens/src/codexlens/search/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/enrichment.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/query_parser.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/chain_search.py +647 -0
- package/codex-lens/src/codexlens/search/enrichment.py +150 -0
- package/codex-lens/src/codexlens/search/hybrid_search.py +313 -0
- package/codex-lens/src/codexlens/search/query_parser.py +242 -0
- package/codex-lens/src/codexlens/search/ranking.py +274 -0
- package/codex-lens/src/codexlens/semantic/__init__.py +39 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/ann_index.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/code_extractor.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/graph_analyzer.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/llm_enhancer.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/vector_store.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/ann_index.py +414 -0
- package/codex-lens/src/codexlens/semantic/chunker.py +448 -0
- package/codex-lens/src/codexlens/semantic/code_extractor.py +274 -0
- package/codex-lens/src/codexlens/semantic/embedder.py +185 -0
- package/codex-lens/src/codexlens/semantic/vector_store.py +955 -0
- package/codex-lens/src/codexlens/storage/__init__.py +29 -0
- package/codex-lens/src/codexlens/storage/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/dir_index.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/file_cache.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/index_tree.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/migration_manager.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/path_mapper.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/registry.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/sqlite_store.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/__pycache__/sqlite_utils.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/dir_index.py +1850 -0
- package/codex-lens/src/codexlens/storage/file_cache.py +32 -0
- package/codex-lens/src/codexlens/storage/index_tree.py +776 -0
- package/codex-lens/src/codexlens/storage/migration_manager.py +154 -0
- package/codex-lens/src/codexlens/storage/migrations/__init__.py +1 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_001_normalize_keywords.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_002_add_token_metadata.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_003_code_relationships.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_004_dual_fts.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_005_cleanup_unused_fields.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/migration_001_normalize_keywords.py +123 -0
- package/codex-lens/src/codexlens/storage/migrations/migration_002_add_token_metadata.py +48 -0
- package/codex-lens/src/codexlens/storage/migrations/migration_004_dual_fts.py +232 -0
- package/codex-lens/src/codexlens/storage/migrations/migration_005_cleanup_unused_fields.py +196 -0
- package/codex-lens/src/codexlens/storage/path_mapper.py +274 -0
- package/codex-lens/src/codexlens/storage/registry.py +670 -0
- package/codex-lens/src/codexlens/storage/sqlite_store.py +576 -0
- package/codex-lens/src/codexlens/storage/sqlite_utils.py +64 -0
- package/package.json +4 -1
- package/.claude/commands/memory/tech-research.md +0 -477
- package/.claude/scripts/classify-folders.sh +0 -39
- package/.claude/scripts/convert_tokens_to_css.sh +0 -229
- package/.claude/scripts/detect_changed_modules.sh +0 -161
- package/.claude/scripts/discover-design-files.sh +0 -87
- package/.claude/scripts/extract-animations.js +0 -243
- package/.claude/scripts/extract-computed-styles.js +0 -118
- package/.claude/scripts/extract-layout-structure.js +0 -411
- package/.claude/scripts/generate_module_docs.sh +0 -717
- package/.claude/scripts/get_modules_by_depth.sh +0 -170
- package/.claude/scripts/ui-generate-preview.sh +0 -395
- package/.claude/scripts/ui-instantiate-prototypes.sh +0 -815
- package/.claude/scripts/update_module_claude.sh +0 -337
- package/.claude/workflows/context-search-strategy.md +0 -77
- package/.claude/workflows/intelligent-tools-strategy.md +0 -662
- package/ccw/src/cli.js +0 -119
- package/ccw/src/commands/install.js +0 -324
- package/ccw/src/commands/tool.js +0 -138
- package/ccw/src/core/data-aggregator.js +0 -409
- package/ccw/src/core/server.js +0 -2063
- package/ccw/src/core/session-scanner.js +0 -235
- package/ccw/src/tools/classify-folders.js +0 -204
- package/ccw/src/tools/detect-changed-modules.js +0 -288
- package/ccw/src/tools/edit-file.js +0 -266
- package/ccw/src/tools/index.js +0 -176
- package/ccw/src/utils/file-utils.js +0 -48
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
"""Code chunking strategies for semantic search.
|
|
2
|
+
|
|
3
|
+
This module provides various chunking strategies for breaking down source code
|
|
4
|
+
into semantic chunks suitable for embedding and search.
|
|
5
|
+
|
|
6
|
+
Lightweight Mode:
|
|
7
|
+
The ChunkConfig supports a `skip_token_count` option for performance optimization.
|
|
8
|
+
When enabled, token counting uses a fast character-based estimation (char/4)
|
|
9
|
+
instead of expensive tiktoken encoding.
|
|
10
|
+
|
|
11
|
+
Use cases for lightweight mode:
|
|
12
|
+
- Large-scale indexing where speed is critical
|
|
13
|
+
- Scenarios where approximate token counts are acceptable
|
|
14
|
+
- Memory-constrained environments
|
|
15
|
+
- Initial prototyping and development
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
# Default mode (accurate tiktoken encoding)
|
|
19
|
+
config = ChunkConfig()
|
|
20
|
+
chunker = Chunker(config)
|
|
21
|
+
|
|
22
|
+
# Lightweight mode (fast char/4 estimation)
|
|
23
|
+
config = ChunkConfig(skip_token_count=True)
|
|
24
|
+
chunker = Chunker(config)
|
|
25
|
+
chunks = chunker.chunk_file(content, symbols, path, language)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
from dataclasses import dataclass
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import List, Optional, Tuple
|
|
33
|
+
|
|
34
|
+
from codexlens.entities import SemanticChunk, Symbol
|
|
35
|
+
from codexlens.parsers.tokenizer import get_default_tokenizer
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class ChunkConfig:
|
|
40
|
+
"""Configuration for chunking strategies."""
|
|
41
|
+
max_chunk_size: int = 1000 # Max characters per chunk
|
|
42
|
+
overlap: int = 100 # Overlap for sliding window
|
|
43
|
+
strategy: str = "auto" # Chunking strategy: auto, symbol, sliding_window, hybrid
|
|
44
|
+
min_chunk_size: int = 50 # Minimum chunk size
|
|
45
|
+
skip_token_count: bool = False # Skip expensive token counting (use char/4 estimate)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class Chunker:
|
|
49
|
+
"""Chunk code files for semantic embedding."""
|
|
50
|
+
|
|
51
|
+
def __init__(self, config: ChunkConfig | None = None) -> None:
|
|
52
|
+
self.config = config or ChunkConfig()
|
|
53
|
+
self._tokenizer = get_default_tokenizer()
|
|
54
|
+
|
|
55
|
+
def _estimate_token_count(self, text: str) -> int:
|
|
56
|
+
"""Estimate token count based on config.
|
|
57
|
+
|
|
58
|
+
If skip_token_count is True, uses character-based estimation (char/4).
|
|
59
|
+
Otherwise, uses accurate tiktoken encoding.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
text: Text to count tokens for
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Estimated token count
|
|
66
|
+
"""
|
|
67
|
+
if self.config.skip_token_count:
|
|
68
|
+
# Fast character-based estimation: ~4 chars per token
|
|
69
|
+
return max(1, len(text) // 4)
|
|
70
|
+
return self._tokenizer.count_tokens(text)
|
|
71
|
+
|
|
72
|
+
def chunk_by_symbol(
|
|
73
|
+
self,
|
|
74
|
+
content: str,
|
|
75
|
+
symbols: List[Symbol],
|
|
76
|
+
file_path: str | Path,
|
|
77
|
+
language: str,
|
|
78
|
+
symbol_token_counts: Optional[dict[str, int]] = None,
|
|
79
|
+
) -> List[SemanticChunk]:
|
|
80
|
+
"""Chunk code by extracted symbols (functions, classes).
|
|
81
|
+
|
|
82
|
+
Each symbol becomes one chunk with its full content.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
content: Source code content
|
|
86
|
+
symbols: List of extracted symbols
|
|
87
|
+
file_path: Path to source file
|
|
88
|
+
language: Programming language
|
|
89
|
+
symbol_token_counts: Optional dict mapping symbol names to token counts
|
|
90
|
+
"""
|
|
91
|
+
chunks: List[SemanticChunk] = []
|
|
92
|
+
lines = content.splitlines(keepends=True)
|
|
93
|
+
|
|
94
|
+
for symbol in symbols:
|
|
95
|
+
start_line, end_line = symbol.range
|
|
96
|
+
# Convert to 0-indexed
|
|
97
|
+
start_idx = max(0, start_line - 1)
|
|
98
|
+
end_idx = min(len(lines), end_line)
|
|
99
|
+
|
|
100
|
+
chunk_content = "".join(lines[start_idx:end_idx])
|
|
101
|
+
if len(chunk_content.strip()) < self.config.min_chunk_size:
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
# Calculate token count if not provided
|
|
105
|
+
token_count = None
|
|
106
|
+
if symbol_token_counts and symbol.name in symbol_token_counts:
|
|
107
|
+
token_count = symbol_token_counts[symbol.name]
|
|
108
|
+
else:
|
|
109
|
+
token_count = self._estimate_token_count(chunk_content)
|
|
110
|
+
|
|
111
|
+
chunks.append(SemanticChunk(
|
|
112
|
+
content=chunk_content,
|
|
113
|
+
embedding=None,
|
|
114
|
+
metadata={
|
|
115
|
+
"file": str(file_path),
|
|
116
|
+
"language": language,
|
|
117
|
+
"symbol_name": symbol.name,
|
|
118
|
+
"symbol_kind": symbol.kind,
|
|
119
|
+
"start_line": start_line,
|
|
120
|
+
"end_line": end_line,
|
|
121
|
+
"strategy": "symbol",
|
|
122
|
+
"token_count": token_count,
|
|
123
|
+
}
|
|
124
|
+
))
|
|
125
|
+
|
|
126
|
+
return chunks
|
|
127
|
+
|
|
128
|
+
def chunk_sliding_window(
|
|
129
|
+
self,
|
|
130
|
+
content: str,
|
|
131
|
+
file_path: str | Path,
|
|
132
|
+
language: str,
|
|
133
|
+
line_mapping: Optional[List[int]] = None,
|
|
134
|
+
) -> List[SemanticChunk]:
|
|
135
|
+
"""Chunk code using sliding window approach.
|
|
136
|
+
|
|
137
|
+
Used for files without clear symbol boundaries or very long functions.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
content: Source code content
|
|
141
|
+
file_path: Path to source file
|
|
142
|
+
language: Programming language
|
|
143
|
+
line_mapping: Optional list mapping content line indices to original line numbers
|
|
144
|
+
(1-indexed). If provided, line_mapping[i] is the original line number
|
|
145
|
+
for the i-th line in content.
|
|
146
|
+
"""
|
|
147
|
+
chunks: List[SemanticChunk] = []
|
|
148
|
+
lines = content.splitlines(keepends=True)
|
|
149
|
+
|
|
150
|
+
if not lines:
|
|
151
|
+
return chunks
|
|
152
|
+
|
|
153
|
+
# Calculate lines per chunk based on average line length
|
|
154
|
+
avg_line_len = len(content) / max(len(lines), 1)
|
|
155
|
+
lines_per_chunk = max(10, int(self.config.max_chunk_size / max(avg_line_len, 1)))
|
|
156
|
+
overlap_lines = max(2, int(self.config.overlap / max(avg_line_len, 1)))
|
|
157
|
+
# Ensure overlap is less than chunk size to prevent infinite loop
|
|
158
|
+
overlap_lines = min(overlap_lines, lines_per_chunk - 1)
|
|
159
|
+
|
|
160
|
+
start = 0
|
|
161
|
+
chunk_idx = 0
|
|
162
|
+
|
|
163
|
+
while start < len(lines):
|
|
164
|
+
end = min(start + lines_per_chunk, len(lines))
|
|
165
|
+
chunk_content = "".join(lines[start:end])
|
|
166
|
+
|
|
167
|
+
if len(chunk_content.strip()) >= self.config.min_chunk_size:
|
|
168
|
+
token_count = self._estimate_token_count(chunk_content)
|
|
169
|
+
|
|
170
|
+
# Calculate correct line numbers
|
|
171
|
+
if line_mapping:
|
|
172
|
+
# Use line mapping to get original line numbers
|
|
173
|
+
start_line = line_mapping[start]
|
|
174
|
+
end_line = line_mapping[end - 1]
|
|
175
|
+
else:
|
|
176
|
+
# Default behavior: treat content as starting at line 1
|
|
177
|
+
start_line = start + 1
|
|
178
|
+
end_line = end
|
|
179
|
+
|
|
180
|
+
chunks.append(SemanticChunk(
|
|
181
|
+
content=chunk_content,
|
|
182
|
+
embedding=None,
|
|
183
|
+
metadata={
|
|
184
|
+
"file": str(file_path),
|
|
185
|
+
"language": language,
|
|
186
|
+
"chunk_index": chunk_idx,
|
|
187
|
+
"start_line": start_line,
|
|
188
|
+
"end_line": end_line,
|
|
189
|
+
"strategy": "sliding_window",
|
|
190
|
+
"token_count": token_count,
|
|
191
|
+
}
|
|
192
|
+
))
|
|
193
|
+
chunk_idx += 1
|
|
194
|
+
|
|
195
|
+
# Move window, accounting for overlap
|
|
196
|
+
step = lines_per_chunk - overlap_lines
|
|
197
|
+
if step <= 0:
|
|
198
|
+
step = 1 # Failsafe to prevent infinite loop
|
|
199
|
+
start += step
|
|
200
|
+
|
|
201
|
+
# Break if we've reached the end
|
|
202
|
+
if end >= len(lines):
|
|
203
|
+
break
|
|
204
|
+
|
|
205
|
+
return chunks
|
|
206
|
+
|
|
207
|
+
def chunk_file(
|
|
208
|
+
self,
|
|
209
|
+
content: str,
|
|
210
|
+
symbols: List[Symbol],
|
|
211
|
+
file_path: str | Path,
|
|
212
|
+
language: str,
|
|
213
|
+
symbol_token_counts: Optional[dict[str, int]] = None,
|
|
214
|
+
) -> List[SemanticChunk]:
|
|
215
|
+
"""Chunk a file using the best strategy.
|
|
216
|
+
|
|
217
|
+
Uses symbol-based chunking if symbols available,
|
|
218
|
+
falls back to sliding window for files without symbols.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
content: Source code content
|
|
222
|
+
symbols: List of extracted symbols
|
|
223
|
+
file_path: Path to source file
|
|
224
|
+
language: Programming language
|
|
225
|
+
symbol_token_counts: Optional dict mapping symbol names to token counts
|
|
226
|
+
"""
|
|
227
|
+
if symbols:
|
|
228
|
+
return self.chunk_by_symbol(content, symbols, file_path, language, symbol_token_counts)
|
|
229
|
+
return self.chunk_sliding_window(content, file_path, language)
|
|
230
|
+
|
|
231
|
+
class DocstringExtractor:
|
|
232
|
+
"""Extract docstrings from source code."""
|
|
233
|
+
|
|
234
|
+
@staticmethod
|
|
235
|
+
def extract_python_docstrings(content: str) -> List[Tuple[str, int, int]]:
|
|
236
|
+
"""Extract Python docstrings with their line ranges.
|
|
237
|
+
|
|
238
|
+
Returns: List of (docstring_content, start_line, end_line) tuples
|
|
239
|
+
"""
|
|
240
|
+
docstrings: List[Tuple[str, int, int]] = []
|
|
241
|
+
lines = content.splitlines(keepends=True)
|
|
242
|
+
|
|
243
|
+
i = 0
|
|
244
|
+
while i < len(lines):
|
|
245
|
+
line = lines[i]
|
|
246
|
+
stripped = line.strip()
|
|
247
|
+
if stripped.startswith('"""') or stripped.startswith("'''"):
|
|
248
|
+
quote_type = '"""' if stripped.startswith('"""') else "'''"
|
|
249
|
+
start_line = i + 1
|
|
250
|
+
|
|
251
|
+
if stripped.count(quote_type) >= 2:
|
|
252
|
+
docstring_content = line
|
|
253
|
+
end_line = i + 1
|
|
254
|
+
docstrings.append((docstring_content, start_line, end_line))
|
|
255
|
+
i += 1
|
|
256
|
+
continue
|
|
257
|
+
|
|
258
|
+
docstring_lines = [line]
|
|
259
|
+
i += 1
|
|
260
|
+
while i < len(lines):
|
|
261
|
+
docstring_lines.append(lines[i])
|
|
262
|
+
if quote_type in lines[i]:
|
|
263
|
+
break
|
|
264
|
+
i += 1
|
|
265
|
+
|
|
266
|
+
end_line = i + 1
|
|
267
|
+
docstring_content = "".join(docstring_lines)
|
|
268
|
+
docstrings.append((docstring_content, start_line, end_line))
|
|
269
|
+
|
|
270
|
+
i += 1
|
|
271
|
+
|
|
272
|
+
return docstrings
|
|
273
|
+
|
|
274
|
+
@staticmethod
|
|
275
|
+
def extract_jsdoc_comments(content: str) -> List[Tuple[str, int, int]]:
|
|
276
|
+
"""Extract JSDoc comments with their line ranges.
|
|
277
|
+
|
|
278
|
+
Returns: List of (comment_content, start_line, end_line) tuples
|
|
279
|
+
"""
|
|
280
|
+
comments: List[Tuple[str, int, int]] = []
|
|
281
|
+
lines = content.splitlines(keepends=True)
|
|
282
|
+
|
|
283
|
+
i = 0
|
|
284
|
+
while i < len(lines):
|
|
285
|
+
line = lines[i]
|
|
286
|
+
stripped = line.strip()
|
|
287
|
+
|
|
288
|
+
if stripped.startswith('/**'):
|
|
289
|
+
start_line = i + 1
|
|
290
|
+
comment_lines = [line]
|
|
291
|
+
i += 1
|
|
292
|
+
|
|
293
|
+
while i < len(lines):
|
|
294
|
+
comment_lines.append(lines[i])
|
|
295
|
+
if '*/' in lines[i]:
|
|
296
|
+
break
|
|
297
|
+
i += 1
|
|
298
|
+
|
|
299
|
+
end_line = i + 1
|
|
300
|
+
comment_content = "".join(comment_lines)
|
|
301
|
+
comments.append((comment_content, start_line, end_line))
|
|
302
|
+
|
|
303
|
+
i += 1
|
|
304
|
+
|
|
305
|
+
return comments
|
|
306
|
+
|
|
307
|
+
@classmethod
|
|
308
|
+
def extract_docstrings(
|
|
309
|
+
cls,
|
|
310
|
+
content: str,
|
|
311
|
+
language: str
|
|
312
|
+
) -> List[Tuple[str, int, int]]:
|
|
313
|
+
"""Extract docstrings based on language.
|
|
314
|
+
|
|
315
|
+
Returns: List of (docstring_content, start_line, end_line) tuples
|
|
316
|
+
"""
|
|
317
|
+
if language == "python":
|
|
318
|
+
return cls.extract_python_docstrings(content)
|
|
319
|
+
elif language in {"javascript", "typescript"}:
|
|
320
|
+
return cls.extract_jsdoc_comments(content)
|
|
321
|
+
return []
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class HybridChunker:
|
|
325
|
+
"""Hybrid chunker that prioritizes docstrings before symbol-based chunking.
|
|
326
|
+
|
|
327
|
+
Composition-based strategy that:
|
|
328
|
+
1. Extracts docstrings as dedicated chunks
|
|
329
|
+
2. For remaining code, uses base chunker (symbol or sliding window)
|
|
330
|
+
"""
|
|
331
|
+
|
|
332
|
+
def __init__(
|
|
333
|
+
self,
|
|
334
|
+
base_chunker: Chunker | None = None,
|
|
335
|
+
config: ChunkConfig | None = None
|
|
336
|
+
) -> None:
|
|
337
|
+
"""Initialize hybrid chunker.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
base_chunker: Chunker to use for non-docstring content
|
|
341
|
+
config: Configuration for chunking
|
|
342
|
+
"""
|
|
343
|
+
self.config = config or ChunkConfig()
|
|
344
|
+
self.base_chunker = base_chunker or Chunker(self.config)
|
|
345
|
+
self.docstring_extractor = DocstringExtractor()
|
|
346
|
+
|
|
347
|
+
def _get_excluded_line_ranges(
|
|
348
|
+
self,
|
|
349
|
+
docstrings: List[Tuple[str, int, int]]
|
|
350
|
+
) -> set[int]:
|
|
351
|
+
"""Get set of line numbers that are part of docstrings."""
|
|
352
|
+
excluded_lines: set[int] = set()
|
|
353
|
+
for _, start_line, end_line in docstrings:
|
|
354
|
+
for line_num in range(start_line, end_line + 1):
|
|
355
|
+
excluded_lines.add(line_num)
|
|
356
|
+
return excluded_lines
|
|
357
|
+
|
|
358
|
+
def _filter_symbols_outside_docstrings(
|
|
359
|
+
self,
|
|
360
|
+
symbols: List[Symbol],
|
|
361
|
+
excluded_lines: set[int]
|
|
362
|
+
) -> List[Symbol]:
|
|
363
|
+
"""Filter symbols to exclude those completely within docstrings."""
|
|
364
|
+
filtered: List[Symbol] = []
|
|
365
|
+
for symbol in symbols:
|
|
366
|
+
start_line, end_line = symbol.range
|
|
367
|
+
symbol_lines = set(range(start_line, end_line + 1))
|
|
368
|
+
if not symbol_lines.issubset(excluded_lines):
|
|
369
|
+
filtered.append(symbol)
|
|
370
|
+
return filtered
|
|
371
|
+
|
|
372
|
+
def chunk_file(
|
|
373
|
+
self,
|
|
374
|
+
content: str,
|
|
375
|
+
symbols: List[Symbol],
|
|
376
|
+
file_path: str | Path,
|
|
377
|
+
language: str,
|
|
378
|
+
symbol_token_counts: Optional[dict[str, int]] = None,
|
|
379
|
+
) -> List[SemanticChunk]:
|
|
380
|
+
"""Chunk file using hybrid strategy.
|
|
381
|
+
|
|
382
|
+
Extracts docstrings first, then chunks remaining code.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
content: Source code content
|
|
386
|
+
symbols: List of extracted symbols
|
|
387
|
+
file_path: Path to source file
|
|
388
|
+
language: Programming language
|
|
389
|
+
symbol_token_counts: Optional dict mapping symbol names to token counts
|
|
390
|
+
"""
|
|
391
|
+
chunks: List[SemanticChunk] = []
|
|
392
|
+
|
|
393
|
+
# Step 1: Extract docstrings as dedicated chunks
|
|
394
|
+
docstrings = self.docstring_extractor.extract_docstrings(content, language)
|
|
395
|
+
|
|
396
|
+
for docstring_content, start_line, end_line in docstrings:
|
|
397
|
+
if len(docstring_content.strip()) >= self.config.min_chunk_size:
|
|
398
|
+
# Use base chunker's token estimation method
|
|
399
|
+
token_count = self.base_chunker._estimate_token_count(docstring_content)
|
|
400
|
+
chunks.append(SemanticChunk(
|
|
401
|
+
content=docstring_content,
|
|
402
|
+
embedding=None,
|
|
403
|
+
metadata={
|
|
404
|
+
"file": str(file_path),
|
|
405
|
+
"language": language,
|
|
406
|
+
"chunk_type": "docstring",
|
|
407
|
+
"start_line": start_line,
|
|
408
|
+
"end_line": end_line,
|
|
409
|
+
"strategy": "hybrid",
|
|
410
|
+
"token_count": token_count,
|
|
411
|
+
}
|
|
412
|
+
))
|
|
413
|
+
|
|
414
|
+
# Step 2: Get line ranges occupied by docstrings
|
|
415
|
+
excluded_lines = self._get_excluded_line_ranges(docstrings)
|
|
416
|
+
|
|
417
|
+
# Step 3: Filter symbols to exclude docstring-only ranges
|
|
418
|
+
filtered_symbols = self._filter_symbols_outside_docstrings(symbols, excluded_lines)
|
|
419
|
+
|
|
420
|
+
# Step 4: Chunk remaining content using base chunker
|
|
421
|
+
if filtered_symbols:
|
|
422
|
+
base_chunks = self.base_chunker.chunk_by_symbol(
|
|
423
|
+
content, filtered_symbols, file_path, language, symbol_token_counts
|
|
424
|
+
)
|
|
425
|
+
for chunk in base_chunks:
|
|
426
|
+
chunk.metadata["strategy"] = "hybrid"
|
|
427
|
+
chunk.metadata["chunk_type"] = "code"
|
|
428
|
+
chunks.append(chunk)
|
|
429
|
+
else:
|
|
430
|
+
lines = content.splitlines(keepends=True)
|
|
431
|
+
remaining_lines: List[str] = []
|
|
432
|
+
|
|
433
|
+
for i, line in enumerate(lines, start=1):
|
|
434
|
+
if i not in excluded_lines:
|
|
435
|
+
remaining_lines.append(line)
|
|
436
|
+
|
|
437
|
+
if remaining_lines:
|
|
438
|
+
remaining_content = "".join(remaining_lines)
|
|
439
|
+
if len(remaining_content.strip()) >= self.config.min_chunk_size:
|
|
440
|
+
base_chunks = self.base_chunker.chunk_sliding_window(
|
|
441
|
+
remaining_content, file_path, language
|
|
442
|
+
)
|
|
443
|
+
for chunk in base_chunks:
|
|
444
|
+
chunk.metadata["strategy"] = "hybrid"
|
|
445
|
+
chunk.metadata["chunk_type"] = "code"
|
|
446
|
+
chunks.append(chunk)
|
|
447
|
+
|
|
448
|
+
return chunks
|