codeboarding 0.11.0__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codeboarding-0.11.0/codeboarding.egg-info → codeboarding-0.12.0}/PKG-INFO +2 -2
- {codeboarding-0.11.0 → codeboarding-0.12.0}/README.md +1 -1
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/agent.py +72 -51
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/agent_responses.py +186 -32
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/cluster_methods_mixin.py +53 -8
- codeboarding-0.12.0/agents/incremental_agent.py +787 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/llm_config.py +6 -6
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/__init__.py +4 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/abstract_prompt_factory.py +2 -2
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/claude_prompts.py +82 -62
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/deepseek_prompts.py +66 -42
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/gemini_flash_prompts.py +53 -54
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/glm_prompts.py +69 -36
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/gpt_prompts.py +57 -54
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/kimi_prompts.py +60 -50
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/prompt_factory.py +4 -4
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/validation.py +66 -1
- {codeboarding-0.11.0 → codeboarding-0.12.0/codeboarding.egg-info}/PKG-INFO +2 -2
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding.egg-info/SOURCES.txt +7 -16
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding.egg-info/requires.txt +1 -1
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding.egg-info/top_level.txt +0 -1
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_cli/commands/full_analysis.py +9 -0
- codeboarding-0.12.0/codeboarding_cli/commands/incremental_analysis.py +139 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_cli/commands/partial_analysis.py +0 -1
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_workflows/__init__.py +4 -3
- codeboarding-0.12.0/codeboarding_workflows/analysis.py +238 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/core/__init__.py +2 -1
- {codeboarding-0.11.0 → codeboarding-0.12.0}/core/protocols.py +2 -1
- {codeboarding-0.11.0 → codeboarding-0.12.0}/diagram_analysis/analysis_json.py +6 -1
- codeboarding-0.12.0/diagram_analysis/cluster_delta.py +455 -0
- codeboarding-0.12.0/diagram_analysis/cluster_snapshot.py +101 -0
- codeboarding-0.12.0/diagram_analysis/diagram_generator.py +735 -0
- codeboarding-0.12.0/diagram_analysis/exceptions.py +43 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/diagram_analysis/io_utils.py +143 -33
- codeboarding-0.12.0/diagram_analysis/run_mode.py +10 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/github_action.py +2 -1
- {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/cohesion.py +6 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/health/runner.py +9 -3
- {codeboarding-0.11.0 → codeboarding-0.12.0}/main.py +0 -6
- {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/paths.py +4 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/pyproject.toml +2 -4
- {codeboarding-0.11.0 → codeboarding-0.12.0}/repo_utils/change_detector.py +4 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/repo_utils/diff_parser.py +16 -3
- {codeboarding-0.11.0 → codeboarding-0.12.0}/repo_utils/git_ops.py +79 -43
- {codeboarding-0.11.0 → codeboarding-0.12.0}/repo_utils/ignore.py +33 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/__init__.py +308 -266
- codeboarding-0.12.0/static_analyzer/analysis_cache.py +475 -0
- codeboarding-0.12.0/static_analyzer/analysis_result.py +273 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/cluster_helpers.py +52 -30
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/constants.py +2 -4
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/graph.py +110 -22
- codeboarding-0.12.0/static_analyzer/incremental_orchestrator.py +125 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/java_utils.py +8 -10
- codeboarding-0.12.0/static_analyzer/language_results.py +128 -0
- codeboarding-0.12.0/static_analyzer/leiden_utils.py +103 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/reference_resolve_mixin.py +8 -4
- codeboarding-0.12.0/static_analyzer/typescript_config_scanner.py +235 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_github_action.py +18 -6
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_main.py +89 -16
- {codeboarding-0.11.0 → codeboarding-0.12.0}/user_config.py +2 -2
- {codeboarding-0.11.0 → codeboarding-0.12.0}/utils.py +17 -0
- codeboarding-0.11.0/agents/analysis_patcher.py +0 -206
- codeboarding-0.11.0/codeboarding_cli/commands/incremental_analysis.py +0 -137
- codeboarding-0.11.0/codeboarding_workflows/analysis.py +0 -144
- codeboarding-0.11.0/diagram_analysis/diagram_generator.py +0 -679
- codeboarding-0.11.0/diagram_analysis/ease.py +0 -68
- codeboarding-0.11.0/diagram_analysis/incremental/delta.py +0 -84
- codeboarding-0.11.0/diagram_analysis/incremental/models.py +0 -220
- codeboarding-0.11.0/diagram_analysis/incremental/payload.py +0 -129
- codeboarding-0.11.0/diagram_analysis/incremental/pipeline.py +0 -264
- codeboarding-0.11.0/diagram_analysis/incremental/semantic_diff.py +0 -557
- codeboarding-0.11.0/diagram_analysis/incremental/trace_planner.py +0 -435
- codeboarding-0.11.0/diagram_analysis/incremental/tracer.py +0 -458
- codeboarding-0.11.0/diagram_analysis/incremental/updater.py +0 -460
- codeboarding-0.11.0/diagram_analysis/run_metadata.py +0 -146
- codeboarding-0.11.0/duckdb_crud.py +0 -125
- codeboarding-0.11.0/health/constants.py +0 -19
- codeboarding-0.11.0/health_main.py +0 -151
- codeboarding-0.11.0/output_generators/__init__.py +0 -0
- codeboarding-0.11.0/static_analyzer/analysis_cache.py +0 -761
- codeboarding-0.11.0/static_analyzer/analysis_result.py +0 -488
- codeboarding-0.11.0/static_analyzer/cluster_change_analyzer.py +0 -391
- codeboarding-0.11.0/static_analyzer/incremental_orchestrator.py +0 -644
- codeboarding-0.11.0/static_analyzer/typescript_config_scanner.py +0 -54
- {codeboarding-0.11.0 → codeboarding-0.12.0}/LICENSE +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/PYPI.md +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/abstraction_agent.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/change_status.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/cluster_budget.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/constants.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/dependency_discovery.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/details_agent.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/meta_agent.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/model_capabilities.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/planner_agent.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/retry.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/base.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/get_external_deps.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/get_method_invocations.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_cfg.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_docs.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_file.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_file_structure.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_packages.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_source.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_structure.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/toolkit.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/caching/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/caching/cache.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/caching/details_cache.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/caching/meta_cache.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding.egg-info/dependency_links.txt +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding.egg-info/entry_points.txt +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_cli/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_cli/bootstrap.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_cli/commands/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_workflows/orchestration.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_workflows/rendering.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_workflows/sources/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_workflows/sources/local.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_workflows/sources/remote.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/constants.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/core/plugin_loader.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/core/registry.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/diagram_analysis/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/diagram_analysis/file_coverage.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/diagram_analysis/run_context.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/diagram_analysis/version.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/health/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/circular_deps.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/coupling.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/function_size.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/god_class.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/inheritance.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/instability.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/unused_code_diagnostics.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/health/config.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/health/models.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/install.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/logging_config.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/callbacks.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/context.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/mixin.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/stats.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/writers.py +0 -0
- {codeboarding-0.11.0/diagram_analysis/incremental → codeboarding-0.12.0/output_generators}/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/output_generators/html.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/output_generators/html_template.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/output_generators/markdown.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/output_generators/mdx.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/output_generators/sphinx.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/repo_utils/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/repo_utils/errors.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/setup.cfg +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/cfg_skip_planner.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/cluster_relations.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/csharp_config_scanner.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/csharp_adapter.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/go_adapter.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/java_adapter.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/php_adapter.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/python_adapter.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/rust_adapter.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/typescript_adapter.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/call_graph_builder.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/edge_build_context.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/edge_builder.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/hierarchy_builder.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/language_adapter.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/lsp_client.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/lsp_constants.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/models.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/progress.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/protocols.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/result_converter.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/source_inspector.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/symbol_table.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/utils.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/java_config_scanner.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/lsp_client/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/lsp_client/diagnostics.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/node.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/programming_language.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/scanner.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_cli_parser.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_install.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_logging_config.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_pyproject_packages.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_registry_coverage.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_tool_registry.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_user_config.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_vscode_constants.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_windows_compatibility.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_windows_encoding.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tool_registry/__init__.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tool_registry/installers.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tool_registry/manifest.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tool_registry/paths.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/tool_registry/registry.py +0 -0
- {codeboarding-0.11.0 → codeboarding-0.12.0}/vscode_constants.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeboarding
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.0
|
|
4
4
|
Summary: Interactive Diagrams for Code
|
|
5
5
|
Author: CodeBoarding Team
|
|
6
6
|
License-Expression: MIT
|
|
@@ -18,7 +18,6 @@ Description-Content-Type: text/markdown
|
|
|
18
18
|
License-File: LICENSE
|
|
19
19
|
Requires-Dist: docker>=7.1
|
|
20
20
|
Requires-Dist: dotenv>=0.9
|
|
21
|
-
Requires-Dist: duckdb>=1.3
|
|
22
21
|
Requires-Dist: dulwich>=0.22
|
|
23
22
|
Requires-Dist: fastapi>=0.115
|
|
24
23
|
Requires-Dist: filelock>=3.12
|
|
@@ -34,6 +33,7 @@ Requires-Dist: langchain-community>=0.4
|
|
|
34
33
|
Requires-Dist: langchain-google-genai>=3.1
|
|
35
34
|
Requires-Dist: langchain-ollama>=1.0
|
|
36
35
|
Requires-Dist: langchain-openai>=1.1
|
|
36
|
+
Requires-Dist: leidenalg>=0.10
|
|
37
37
|
Requires-Dist: markdown>=3.8
|
|
38
38
|
Requires-Dist: markdown-it-py>=3.0
|
|
39
39
|
Requires-Dist: markitdown>=0.1
|
|
@@ -143,7 +143,7 @@ python main.py full https://github.com/pytorch/pytorch
|
|
|
143
143
|
|
|
144
144
|
## Supported stack
|
|
145
145
|
|
|
146
|
-
- Languages: Python, TypeScript, JavaScript, Java, Go, PHP, Rust
|
|
146
|
+
- Languages: Python, TypeScript, JavaScript, Java, Go, PHP, Rust, C#.
|
|
147
147
|
- LLM providers: OpenAI, Anthropic, Google, Vercel AI Gateway, AWS Bedrock, Ollama, OpenRouter, and more.
|
|
148
148
|
|
|
149
149
|
## Examples
|
|
@@ -5,7 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
from google.api_core.exceptions import ResourceExhausted
|
|
6
6
|
from langchain_core.exceptions import OutputParserException
|
|
7
7
|
from langchain_core.language_models import BaseChatModel
|
|
8
|
-
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
|
|
8
|
+
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage
|
|
9
9
|
from langchain_core.output_parsers import PydanticOutputParser
|
|
10
10
|
from langchain_core.prompts import PromptTemplate
|
|
11
11
|
from langchain.agents import create_agent
|
|
@@ -20,6 +20,7 @@ from agents.tools.toolkit import CodeBoardingToolkit
|
|
|
20
20
|
from agents.validation import ValidationResult, score_validation_results, VALIDATOR_WEIGHTS, DEFAULT_VALIDATOR_WEIGHT
|
|
21
21
|
from monitoring.mixin import MonitoringMixin
|
|
22
22
|
from repo_utils.ignore import RepoIgnoreManager
|
|
23
|
+
from agents.agent_responses import LLMBaseModel
|
|
23
24
|
from agents.llm_config import MONITORING_CALLBACK
|
|
24
25
|
from static_analyzer.analysis_result import StaticAnalysisResults
|
|
25
26
|
from static_analyzer.reference_resolve_mixin import ReferenceResolverMixin
|
|
@@ -43,10 +44,10 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
|
|
|
43
44
|
ReferenceResolverMixin.__init__(self, repo_dir, static_analysis)
|
|
44
45
|
MonitoringMixin.__init__(self)
|
|
45
46
|
self.parsing_llm = parsing_llm
|
|
47
|
+
self.agent_llm = agent_llm
|
|
46
48
|
self.repo_dir = repo_dir
|
|
47
49
|
self.ignore_manager = RepoIgnoreManager(repo_dir)
|
|
48
50
|
|
|
49
|
-
# Initialize the professional toolkit
|
|
50
51
|
context = RepoContext(repo_dir=repo_dir, ignore_manager=self.ignore_manager, static_analysis=static_analysis)
|
|
51
52
|
self.toolkit = CodeBoardingToolkit(context=context)
|
|
52
53
|
|
|
@@ -200,10 +201,10 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
|
|
|
200
201
|
except Empty:
|
|
201
202
|
raise RuntimeError("Agent invocation completed but no result was returned")
|
|
202
203
|
|
|
203
|
-
def _parse_invoke(self, prompt: str, type: type):
|
|
204
|
+
def _parse_invoke(self, prompt: str, type: type, include_hidden: bool = False):
|
|
204
205
|
response = self._invoke(prompt)
|
|
205
206
|
assert isinstance(response, str), f"Expected a string as response type got {response}"
|
|
206
|
-
return self._parse_response(prompt, response, type)
|
|
207
|
+
return self._parse_response(prompt, response, type, include_hidden=include_hidden)
|
|
207
208
|
|
|
208
209
|
def _score_result(self, result, validators: list, context) -> tuple[float, list[tuple[float, str]]]:
|
|
209
210
|
"""Run all validators on a result and return (score, prioritized_feedback).
|
|
@@ -233,7 +234,13 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
|
|
|
233
234
|
return score, weighted_feedback
|
|
234
235
|
|
|
235
236
|
def _validation_invoke(
|
|
236
|
-
self,
|
|
237
|
+
self,
|
|
238
|
+
prompt: str,
|
|
239
|
+
return_type: type,
|
|
240
|
+
validators: list,
|
|
241
|
+
context,
|
|
242
|
+
max_validation_attempts: int = 1,
|
|
243
|
+
include_hidden: bool = False,
|
|
237
244
|
):
|
|
238
245
|
"""
|
|
239
246
|
Invoke LLM with validation, feedback loop, and best-of-N selection.
|
|
@@ -261,7 +268,12 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
|
|
|
261
268
|
# Compute the maximum possible score so we can detect a perfect result
|
|
262
269
|
max_possible_score = sum(VALIDATOR_WEIGHTS.get(v.__name__, DEFAULT_VALIDATOR_WEIGHT) for v in validators)
|
|
263
270
|
|
|
264
|
-
result = self._parse_invoke(prompt, return_type)
|
|
271
|
+
result = self._parse_invoke(prompt, return_type, include_hidden=include_hidden)
|
|
272
|
+
logger.info(
|
|
273
|
+
"[Validation] Parsed %s: %s",
|
|
274
|
+
return_type.__name__,
|
|
275
|
+
result.llm_str()[:500],
|
|
276
|
+
)
|
|
265
277
|
|
|
266
278
|
# Track the best candidate across all attempts
|
|
267
279
|
best_result = result
|
|
@@ -314,42 +326,33 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
|
|
|
314
326
|
f"[Validation] Preparing attempt {attempt + 1}/{max_validation_attempts} "
|
|
315
327
|
f"with {len(weighted_feedback)} feedback items"
|
|
316
328
|
)
|
|
317
|
-
result = self._parse_invoke(feedback_prompt, return_type)
|
|
329
|
+
result = self._parse_invoke(feedback_prompt, return_type, include_hidden=include_hidden)
|
|
318
330
|
|
|
319
331
|
return best_result
|
|
320
332
|
|
|
321
|
-
def _parse_response(self, prompt, response, return_type, max_retries=5, attempt=0):
|
|
333
|
+
def _parse_response(self, prompt, response, return_type, max_retries=5, attempt=0, include_hidden: bool = False):
|
|
322
334
|
if response is None or response.strip() == "":
|
|
323
335
|
logger.error(f"Empty response for prompt: {prompt}")
|
|
324
336
|
|
|
337
|
+
if include_hidden and issubclass(return_type, LLMBaseModel):
|
|
338
|
+
schema = return_type.model_json_schema(include_hidden=True)
|
|
339
|
+
parser = PydanticOutputParser(pydantic_object=return_type)
|
|
340
|
+
format_instructions = (
|
|
341
|
+
f"The output should be formatted as a JSON instance that conforms to the JSON schema below.\n"
|
|
342
|
+
f"Here is the output schema:\n```json\n{json.dumps(schema, indent=2)}\n```"
|
|
343
|
+
)
|
|
344
|
+
else:
|
|
345
|
+
parser = PydanticOutputParser(pydantic_object=return_type)
|
|
346
|
+
format_instructions = parser.get_format_instructions()
|
|
347
|
+
|
|
325
348
|
def call_once():
|
|
326
|
-
# Extractor is rebuilt on every attempt — previous trustcall state
|
|
327
|
-
# may have corrupted attributes (see the tool_call_id bug below).
|
|
328
|
-
extractor = create_extractor(self.parsing_llm, tools=[return_type], tool_choice=return_type.__name__)
|
|
329
349
|
try:
|
|
330
|
-
result =
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
# 'ExtractionState' object has no attribute 'tool_call_id' during validation retry.
|
|
337
|
-
# Treat as a non-retriable fallback to the Pydantic parser.
|
|
338
|
-
if "tool_call_id" in str(e):
|
|
339
|
-
logger.warning(f"Trustcall bug encountered, falling back to Pydantic parser: {e}")
|
|
340
|
-
parser = PydanticOutputParser(pydantic_object=return_type)
|
|
341
|
-
return self._try_parse(response, parser)
|
|
342
|
-
raise
|
|
343
|
-
if "responses" in result and len(result["responses"]) != 0:
|
|
344
|
-
return return_type.model_validate(result["responses"][0])
|
|
345
|
-
if "messages" in result and len(result["messages"]) != 0:
|
|
346
|
-
message = result["messages"][0].content
|
|
347
|
-
parser = PydanticOutputParser(pydantic_object=return_type)
|
|
348
|
-
if not message:
|
|
349
|
-
raise EmptyExtractorMessageError("Extractor returned empty message content")
|
|
350
|
-
return self._try_parse(message, parser)
|
|
351
|
-
parser = PydanticOutputParser(pydantic_object=return_type)
|
|
352
|
-
return self._try_parse(response, parser)
|
|
350
|
+
result = self._structured_parse(response, parser, format_instructions=format_instructions)
|
|
351
|
+
logger.debug("[parse_response] structured_parse succeeded for %s", return_type.__name__)
|
|
352
|
+
return result
|
|
353
|
+
except Exception as e:
|
|
354
|
+
logger.warning("[parse_response] structured_parse failed for %s: %s", return_type.__name__, e)
|
|
355
|
+
return self._extractor_parse(response, return_type, parser, include_hidden=include_hidden)
|
|
353
356
|
|
|
354
357
|
def classify(exc: Exception, attempt: int) -> RetryDecision:
|
|
355
358
|
if isinstance(exc, ResourceExhausted):
|
|
@@ -359,20 +362,15 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
|
|
|
359
362
|
)
|
|
360
363
|
if isinstance(exc, (EmptyExtractorMessageError, IndexError, json.JSONDecodeError, ValueError)):
|
|
361
364
|
return RetryDecision(action=RetryAction.RETRY_NOW)
|
|
362
|
-
# AttributeError (non-tool_call_id) and any other exception: give up.
|
|
363
365
|
return RetryDecision(action=RetryAction.GIVE_UP)
|
|
364
366
|
|
|
365
367
|
def on_exhausted(exc: Exception):
|
|
366
|
-
# Preserve historic shape: ResourceExhausted surfaces the original exception;
|
|
367
|
-
# parse-error exhaustion wraps with a descriptive message naming the response.
|
|
368
368
|
if isinstance(exc, ResourceExhausted):
|
|
369
369
|
logger.error(f"Resource exhausted on final parsing attempt: {exc}")
|
|
370
370
|
raise exc
|
|
371
371
|
logger.error(f"Max retries ({max_retries}) reached for parsing response: {response}")
|
|
372
372
|
raise Exception(f"Max retries reached for parsing response: {response}")
|
|
373
373
|
|
|
374
|
-
# ``attempt`` kwarg kept for backwards-compat with callers that passed it;
|
|
375
|
-
# the effective attempt count is ``max_retries - attempt``.
|
|
376
374
|
return with_retries(
|
|
377
375
|
call_once,
|
|
378
376
|
max_attempts=max(1, max_retries - attempt),
|
|
@@ -381,19 +379,21 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
|
|
|
381
379
|
log_prefix="Parse response",
|
|
382
380
|
)
|
|
383
381
|
|
|
384
|
-
def
|
|
385
|
-
|
|
386
|
-
|
|
382
|
+
def _structured_parse(self, message_content, parser, format_instructions: str | None = None):
|
|
383
|
+
if format_instructions is None:
|
|
384
|
+
format_instructions = parser.get_format_instructions()
|
|
385
|
+
prompt_template = """You are a JSON expert. Here you need to extract information in the following json format: {format_instructions}
|
|
387
386
|
|
|
388
|
-
|
|
387
|
+
Here is the content to parse and fix: {adjective}
|
|
389
388
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
389
|
+
Please provide only the JSON output without any additional text."""
|
|
390
|
+
prompt = PromptTemplate(
|
|
391
|
+
template=prompt_template,
|
|
392
|
+
input_variables=["adjective"],
|
|
393
|
+
partial_variables={"format_instructions": format_instructions},
|
|
394
|
+
)
|
|
395
|
+
chain = prompt | self.parsing_llm | parser
|
|
396
|
+
try:
|
|
397
397
|
return chain.invoke(
|
|
398
398
|
{"adjective": message_content},
|
|
399
399
|
config={"callbacks": [MONITORING_CALLBACK, self.agent_monitoring_callback]},
|
|
@@ -401,7 +401,28 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
|
|
|
401
401
|
except (ValidationError, OutputParserException):
|
|
402
402
|
for _, v in json.loads(message_content).items():
|
|
403
403
|
try:
|
|
404
|
-
return self.
|
|
404
|
+
return self._structured_parse(json.dumps(v), parser)
|
|
405
405
|
except:
|
|
406
406
|
pass
|
|
407
407
|
raise ValueError(f"Couldn't parse {message_content}")
|
|
408
|
+
|
|
409
|
+
def _extractor_parse(self, response, return_type, parser, include_hidden: bool = False):
|
|
410
|
+
extractor = create_extractor(self.parsing_llm, tools=[return_type], tool_choice=return_type.__name__)
|
|
411
|
+
try:
|
|
412
|
+
result = extractor.invoke(
|
|
413
|
+
return_type.extractor_str(include_hidden=include_hidden) + response,
|
|
414
|
+
config={"callbacks": [MONITORING_CALLBACK, self.agent_monitoring_callback]},
|
|
415
|
+
)
|
|
416
|
+
except AttributeError as e:
|
|
417
|
+
if "tool_call_id" in str(e):
|
|
418
|
+
logger.warning(f"Trustcall bug encountered: {e}")
|
|
419
|
+
raise
|
|
420
|
+
raise
|
|
421
|
+
if "responses" in result and len(result["responses"]) != 0:
|
|
422
|
+
return return_type.model_validate(result["responses"][0])
|
|
423
|
+
if "messages" in result and len(result["messages"]) != 0:
|
|
424
|
+
message = result["messages"][0].content
|
|
425
|
+
if not message:
|
|
426
|
+
raise EmptyExtractorMessageError("Extractor returned empty message content")
|
|
427
|
+
return self._structured_parse(message, parser)
|
|
428
|
+
raise EmptyExtractorMessageError("Extractor returned no responses and no messages")
|
|
@@ -7,6 +7,7 @@ from pathlib import PurePosixPath
|
|
|
7
7
|
from typing import get_origin, Optional
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
|
+
from pydantic.fields import FieldInfo
|
|
10
11
|
|
|
11
12
|
logger = logging.getLogger(__name__)
|
|
12
13
|
|
|
@@ -19,30 +20,104 @@ class LLMBaseModel(BaseModel, abc.ABC):
|
|
|
19
20
|
raise NotImplementedError("LLM String has to be implemented.")
|
|
20
21
|
|
|
21
22
|
@classmethod
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
def _is_field_hidden(cls, fvalue: FieldInfo) -> bool:
|
|
24
|
+
if fvalue.exclude:
|
|
25
|
+
return True
|
|
26
|
+
extra = fvalue.json_schema_extra
|
|
27
|
+
if isinstance(extra, dict):
|
|
28
|
+
return bool(extra.get("hidden"))
|
|
29
|
+
return False
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def _excluded_fields(cls, include_hidden: bool = False) -> set[str]:
|
|
33
|
+
if include_hidden:
|
|
34
|
+
return set()
|
|
35
|
+
names: set[str] = set()
|
|
36
|
+
for klass in cls.__mro__:
|
|
37
|
+
if hasattr(klass, "model_fields"):
|
|
38
|
+
for fname, finfo in klass.model_fields.items():
|
|
39
|
+
if cls._is_field_hidden(finfo):
|
|
40
|
+
names.add(fname)
|
|
41
|
+
return names
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def _resolve_excluded_by_title(cls, include_hidden: bool = False) -> dict[str, set[str]]:
|
|
45
|
+
seen: set[type] = set()
|
|
46
|
+
result: dict[str, set[str]] = {}
|
|
47
|
+
|
|
48
|
+
def walk(model: type) -> None:
|
|
49
|
+
if model in seen or not hasattr(model, "model_fields"):
|
|
50
|
+
return
|
|
51
|
+
seen.add(model)
|
|
52
|
+
title = getattr(model, "__name__", "")
|
|
53
|
+
excluded = model._excluded_fields(include_hidden) # type: ignore[attr-defined]
|
|
54
|
+
if excluded:
|
|
55
|
+
result[title] = excluded
|
|
56
|
+
for finfo in getattr(model, "model_fields", {}).values():
|
|
57
|
+
ann = finfo.annotation
|
|
58
|
+
for candidate in getattr(ann, "__args__", [ann]):
|
|
59
|
+
if isinstance(candidate, type) and issubclass(candidate, LLMBaseModel):
|
|
60
|
+
walk(candidate) # type: ignore[arg-type]
|
|
61
|
+
|
|
62
|
+
walk(cls)
|
|
63
|
+
return result
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def _extractor_fields(cls, indent: str = " ", include_hidden: bool = False) -> str:
|
|
67
|
+
parts: list[str] = []
|
|
25
68
|
for fname, fvalue in cls.model_fields.items():
|
|
26
|
-
if
|
|
69
|
+
if cls._is_field_hidden(fvalue) and not include_hidden:
|
|
27
70
|
continue
|
|
28
|
-
# check if the field type is Optional
|
|
29
71
|
ftype = fvalue.annotation
|
|
30
|
-
# Check if the type is a typing.List (e.g., typing.List[SomeType])
|
|
31
72
|
if get_origin(ftype) is list:
|
|
32
|
-
# get the type of the list:
|
|
33
73
|
if ftype is not None and hasattr(ftype, "__args__"):
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
74
|
+
inner = ftype.__args__[0]
|
|
75
|
+
if isinstance(inner, type) and issubclass(inner, LLMBaseModel):
|
|
76
|
+
parts.append(
|
|
77
|
+
f"{indent}- {fname}: a list, where each item has:\n{inner._extractor_fields(indent + ' ', include_hidden)}"
|
|
78
|
+
)
|
|
79
|
+
continue
|
|
80
|
+
parts.append(f"{indent}- {fname}: {fvalue.description}")
|
|
81
|
+
elif isinstance(ftype, type) and issubclass(ftype, LLMBaseModel):
|
|
82
|
+
parts.append(ftype._extractor_fields(indent, include_hidden))
|
|
41
83
|
else:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
84
|
+
parts.append(f"{indent}- {fname}: {fvalue.description}")
|
|
85
|
+
return "\n".join(parts)
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def extractor_str(cls, include_hidden: bool = False) -> str:
|
|
89
|
+
title = cls.__name__
|
|
90
|
+
fields = cls._extractor_fields(include_hidden=include_hidden)
|
|
91
|
+
return (
|
|
92
|
+
f"You are a JSON extraction expert. "
|
|
93
|
+
f"Extract a valid JSON object of type `{title}` from the text below.\n"
|
|
94
|
+
f"The JSON must have these fields:\n{fields}\n\n"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def model_json_schema(
|
|
99
|
+
cls,
|
|
100
|
+
by_alias: bool = True,
|
|
101
|
+
ref_template: str = "#/$defs/{model}",
|
|
102
|
+
schema_generator: type | None = None,
|
|
103
|
+
mode: str = "validation",
|
|
104
|
+
include_hidden: bool = False,
|
|
105
|
+
**kwargs,
|
|
106
|
+
) -> dict:
|
|
107
|
+
call_kwargs: dict = {"by_alias": by_alias, "ref_template": ref_template, "mode": mode}
|
|
108
|
+
if schema_generator is not None:
|
|
109
|
+
call_kwargs["schema_generator"] = schema_generator
|
|
110
|
+
call_kwargs.update(kwargs)
|
|
111
|
+
schema = super().model_json_schema(**call_kwargs)
|
|
112
|
+
excluded_by_title = cls._resolve_excluded_by_title(include_hidden)
|
|
113
|
+
for title, excluded in excluded_by_title.items():
|
|
114
|
+
defn = schema.get("$defs", {}).get(title)
|
|
115
|
+
if isinstance(defn, dict) and "properties" in defn:
|
|
116
|
+
defn["properties"] = {k: v for k, v in defn["properties"].items() if k not in excluded}
|
|
117
|
+
own_excluded = cls._excluded_fields(include_hidden)
|
|
118
|
+
if "properties" in schema:
|
|
119
|
+
schema["properties"] = {k: v for k, v in schema["properties"].items() if k not in own_excluded}
|
|
120
|
+
return schema
|
|
46
121
|
|
|
47
122
|
|
|
48
123
|
class SourceCodeReference(LLMBaseModel):
|
|
@@ -114,6 +189,39 @@ class ClustersComponent(LLMBaseModel):
|
|
|
114
189
|
description: str = Field(
|
|
115
190
|
description="Explanation of what this component does, its main flow, WHY these clusters are grouped together, how it interacts with other cluster groups, and the most important classes/methods (by their exact qualified names from the clusters)"
|
|
116
191
|
)
|
|
192
|
+
existing_component_id: str | None = Field(
|
|
193
|
+
default=None,
|
|
194
|
+
description=(
|
|
195
|
+
"Incremental routing: the exact component_id of the existing component "
|
|
196
|
+
"this entry is routing clusters into (e.g. '1.3'). Set to null to create "
|
|
197
|
+
"a brand-new component. Identity is by ID, not name — leaving this null "
|
|
198
|
+
"while reusing an existing component's name forks a duplicate component. "
|
|
199
|
+
"Ignored by the full-analysis flow."
|
|
200
|
+
),
|
|
201
|
+
json_schema_extra={"hidden": True},
|
|
202
|
+
)
|
|
203
|
+
parent_id: str | None = Field(
|
|
204
|
+
default=None,
|
|
205
|
+
description=(
|
|
206
|
+
"Incremental routing: when ``existing_component_id`` is null (brand-new "
|
|
207
|
+
"component), the existing component_id under which the new component "
|
|
208
|
+
"should attach (or null to attach at root). Ignored when "
|
|
209
|
+
"``existing_component_id`` is set, and ignored by the full-analysis flow."
|
|
210
|
+
),
|
|
211
|
+
json_schema_extra={"hidden": True},
|
|
212
|
+
)
|
|
213
|
+
redetail_needed: bool = Field(
|
|
214
|
+
default=True,
|
|
215
|
+
description=(
|
|
216
|
+
"Incremental routing only: when routing clusters into an existing component "
|
|
217
|
+
"(``existing_component_id`` is set), set False if the cluster delta is "
|
|
218
|
+
"cosmetic (refactor, internal rename, small bug fix) and the component's "
|
|
219
|
+
"high-level purpose is unchanged — the existing description stays. Default "
|
|
220
|
+
"True forces a full redetail. Ignored for brand-new components (always "
|
|
221
|
+
"redetailed) and by the full-analysis flow."
|
|
222
|
+
),
|
|
223
|
+
json_schema_extra={"hidden": True},
|
|
224
|
+
)
|
|
117
225
|
|
|
118
226
|
def llm_str(self):
|
|
119
227
|
ids_str = ", ".join(str(cid) for cid in self.cluster_ids)
|
|
@@ -151,15 +259,6 @@ class MethodEntry(BaseModel):
|
|
|
151
259
|
return NotImplemented
|
|
152
260
|
return self.qualified_name == other.qualified_name
|
|
153
261
|
|
|
154
|
-
@classmethod
|
|
155
|
-
def from_method_change(cls, method_change) -> MethodEntry:
|
|
156
|
-
return cls(
|
|
157
|
-
qualified_name=method_change.qualified_name,
|
|
158
|
-
start_line=method_change.start_line,
|
|
159
|
-
end_line=method_change.end_line,
|
|
160
|
-
node_type=method_change.node_type,
|
|
161
|
-
)
|
|
162
|
-
|
|
163
262
|
@classmethod
|
|
164
263
|
def from_node(cls, node) -> MethodEntry:
|
|
165
264
|
"""Build from a ``static_analyzer.Node``. Accepts ``Any`` to avoid a hard dep."""
|
|
@@ -210,18 +309,21 @@ class Component(LLMBaseModel):
|
|
|
210
309
|
description="List of cluster IDs from CFG analysis that this component encompasses (populated deterministically from source_group_names).",
|
|
211
310
|
default_factory=list,
|
|
212
311
|
exclude=True,
|
|
312
|
+
json_schema_extra={"hidden": True},
|
|
213
313
|
)
|
|
214
314
|
|
|
215
315
|
file_methods: list[FileMethodGroup] = Field(
|
|
216
316
|
description="All methods/functions belonging to this component, grouped by file (populated deterministically from cluster results).",
|
|
217
317
|
default_factory=list,
|
|
218
318
|
exclude=True,
|
|
319
|
+
json_schema_extra={"hidden": True},
|
|
219
320
|
)
|
|
220
321
|
|
|
221
322
|
component_id: str = Field(
|
|
222
323
|
default="",
|
|
223
324
|
description="Deterministic unique identifier for this component.",
|
|
224
325
|
exclude=True,
|
|
326
|
+
json_schema_extra={"hidden": True},
|
|
225
327
|
)
|
|
226
328
|
|
|
227
329
|
def llm_str(self):
|
|
@@ -247,6 +349,7 @@ class AnalysisInsights(LLMBaseModel):
|
|
|
247
349
|
default_factory=dict,
|
|
248
350
|
description="Top-level file index keyed by relative file path. Contains all methods and statuses.",
|
|
249
351
|
exclude=True,
|
|
352
|
+
json_schema_extra={"hidden": True},
|
|
250
353
|
)
|
|
251
354
|
components: list[Component] = Field(description="List of the components identified in the project.")
|
|
252
355
|
components_relations: list[Relation] = Field(description="List of relations among the components.")
|
|
@@ -264,7 +367,7 @@ class AnalysisInsights(LLMBaseModel):
|
|
|
264
367
|
return {str(PurePosixPath(fg.file_path)): c.component_id for c in self.components for fg in c.file_methods}
|
|
265
368
|
|
|
266
369
|
|
|
267
|
-
def assign_component_ids(analysis: AnalysisInsights, parent_id: str = "") -> None:
|
|
370
|
+
def assign_component_ids(analysis: AnalysisInsights, parent_id: str = "", only_new: bool = False) -> None:
|
|
268
371
|
"""Assign hierarchical component IDs based on sibling index.
|
|
269
372
|
|
|
270
373
|
IDs encode structural position in the component tree:
|
|
@@ -272,11 +375,28 @@ def assign_component_ids(analysis: AnalysisInsights, parent_id: str = "") -> Non
|
|
|
272
375
|
- Under "1" (parent_id="1"): "1.1", "1.2"
|
|
273
376
|
- Under "1.2" (parent_id="1.2"): "1.2.1", "1.2.2"
|
|
274
377
|
|
|
275
|
-
|
|
276
|
-
|
|
378
|
+
With ``only_new=True`` (incremental path), components that already carry a
|
|
379
|
+
populated ``component_id`` are preserved verbatim and only siblings with an
|
|
380
|
+
empty id are assigned a fresh slot — used when stitching new components into
|
|
381
|
+
an existing tree without renumbering survivors.
|
|
277
382
|
"""
|
|
278
|
-
|
|
279
|
-
|
|
383
|
+
if only_new:
|
|
384
|
+
used_indices: set[int] = set()
|
|
385
|
+
for component in analysis.components:
|
|
386
|
+
if not component.component_id:
|
|
387
|
+
continue
|
|
388
|
+
tail = component.component_id.split(".")[-1]
|
|
389
|
+
if tail.isdigit():
|
|
390
|
+
used_indices.add(int(tail))
|
|
391
|
+
next_idx = max(used_indices, default=0) + 1
|
|
392
|
+
for component in analysis.components:
|
|
393
|
+
if component.component_id:
|
|
394
|
+
continue
|
|
395
|
+
component.component_id = f"{parent_id}.{next_idx}" if parent_id else str(next_idx)
|
|
396
|
+
next_idx += 1
|
|
397
|
+
else:
|
|
398
|
+
for idx, component in enumerate(analysis.components, start=1):
|
|
399
|
+
component.component_id = f"{parent_id}.{idx}" if parent_id else str(idx)
|
|
280
400
|
|
|
281
401
|
# Assign relation IDs by looking up component names (first occurrence wins for duplicates)
|
|
282
402
|
name_to_id: dict[str, str] = {}
|
|
@@ -293,6 +413,29 @@ def assign_component_ids(analysis: AnalysisInsights, parent_id: str = "") -> Non
|
|
|
293
413
|
relation.dst_id = name_to_id.get(relation.dst_name, "")
|
|
294
414
|
|
|
295
415
|
|
|
416
|
+
def iter_components(
|
|
417
|
+
root_analysis: AnalysisInsights,
|
|
418
|
+
sub_analyses: dict[str, AnalysisInsights],
|
|
419
|
+
) -> list[Component]:
|
|
420
|
+
"""Return every component across the root and all sub-analyses, in tree order."""
|
|
421
|
+
components = list(root_analysis.components)
|
|
422
|
+
for sub in sub_analyses.values():
|
|
423
|
+
components.extend(sub.components)
|
|
424
|
+
return components
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def index_components_by_id(
|
|
428
|
+
root_analysis: AnalysisInsights,
|
|
429
|
+
sub_analyses: dict[str, AnalysisInsights],
|
|
430
|
+
) -> dict[str, Component]:
|
|
431
|
+
"""Build a ``component_id -> Component`` lookup across the full tree.
|
|
432
|
+
|
|
433
|
+
Components without a ``component_id`` are skipped. Later occurrences of
|
|
434
|
+
the same id silently override earlier ones (sub-analyses win over root).
|
|
435
|
+
"""
|
|
436
|
+
return {c.component_id: c for c in iter_components(root_analysis, sub_analyses) if c.component_id}
|
|
437
|
+
|
|
438
|
+
|
|
296
439
|
class CFGComponent(LLMBaseModel):
|
|
297
440
|
"""A component derived from control flow graph analysis."""
|
|
298
441
|
|
|
@@ -416,6 +559,17 @@ class ComponentFiles(LLMBaseModel):
|
|
|
416
559
|
return title + body
|
|
417
560
|
|
|
418
561
|
|
|
562
|
+
class ScopeRelations(LLMBaseModel):
|
|
563
|
+
"""Relations between components within a single scope."""
|
|
564
|
+
|
|
565
|
+
components_relations: list[Relation] = Field(description="Inter-component relationships within this scope.")
|
|
566
|
+
|
|
567
|
+
def llm_str(self):
|
|
568
|
+
if not self.components_relations:
|
|
569
|
+
return "No relations found."
|
|
570
|
+
return "\n".join(r.llm_str() for r in self.components_relations)
|
|
571
|
+
|
|
572
|
+
|
|
419
573
|
class FilePath(LLMBaseModel):
|
|
420
574
|
"""File path with optional line range reference."""
|
|
421
575
|
|
|
@@ -32,7 +32,7 @@ from static_analyzer.cluster_relations import (
|
|
|
32
32
|
build_node_to_component_map,
|
|
33
33
|
merge_relations,
|
|
34
34
|
)
|
|
35
|
-
from static_analyzer.constants import CALLABLE_TYPES, CLASS_TYPES, NodeType
|
|
35
|
+
from static_analyzer.constants import CALLABLE_TYPES, CLASS_TYPES, Language, NodeType
|
|
36
36
|
from static_analyzer.graph import CallGraph, ClusterResult
|
|
37
37
|
from static_analyzer.node import Node
|
|
38
38
|
|
|
@@ -70,7 +70,7 @@ class ClusterMethodsMixin:
|
|
|
70
70
|
|
|
71
71
|
def _build_cluster_string(
|
|
72
72
|
self,
|
|
73
|
-
programming_langs: list[
|
|
73
|
+
programming_langs: list[Language],
|
|
74
74
|
cluster_results: dict[str, ClusterResult],
|
|
75
75
|
cluster_ids: set[int] | None = None,
|
|
76
76
|
prompt_overhead_chars: int = 0,
|
|
@@ -110,7 +110,7 @@ class ClusterMethodsMixin:
|
|
|
110
110
|
|
|
111
111
|
def _render_cluster_string(
|
|
112
112
|
self,
|
|
113
|
-
programming_langs: list[
|
|
113
|
+
programming_langs: list[Language],
|
|
114
114
|
cluster_results: dict[str, ClusterResult],
|
|
115
115
|
cluster_ids: set[int] | None,
|
|
116
116
|
skip_sets: dict[str, set[str]],
|
|
@@ -146,7 +146,7 @@ class ClusterMethodsMixin:
|
|
|
146
146
|
|
|
147
147
|
def _plan_skip_sets(
|
|
148
148
|
self,
|
|
149
|
-
programming_langs: list[
|
|
149
|
+
programming_langs: list[Language],
|
|
150
150
|
cluster_results: dict[str, ClusterResult],
|
|
151
151
|
prompt_overhead_chars: int,
|
|
152
152
|
) -> dict[str, set[str]]:
|
|
@@ -472,7 +472,9 @@ class ClusterMethodsMixin:
|
|
|
472
472
|
"""
|
|
473
473
|
all_nodes: dict[str, Node] = {}
|
|
474
474
|
for lang in cluster_results:
|
|
475
|
-
cfg =
|
|
475
|
+
cfg = (
|
|
476
|
+
cfg_graphs[lang] if cfg_graphs and lang in cfg_graphs else self.static_analysis.get_cfg(Language(lang))
|
|
477
|
+
)
|
|
476
478
|
all_nodes.update(cfg.nodes)
|
|
477
479
|
return all_nodes
|
|
478
480
|
|
|
@@ -492,7 +494,9 @@ class ClusterMethodsMixin:
|
|
|
492
494
|
"""
|
|
493
495
|
graphs: dict[str, nx.Graph] = {}
|
|
494
496
|
for lang in cluster_results:
|
|
495
|
-
cfg =
|
|
497
|
+
cfg = (
|
|
498
|
+
cfg_graphs[lang] if cfg_graphs and lang in cfg_graphs else self.static_analysis.get_cfg(Language(lang))
|
|
499
|
+
)
|
|
496
500
|
graphs[lang] = cfg.to_networkx().to_undirected()
|
|
497
501
|
return graphs
|
|
498
502
|
|
|
@@ -703,7 +707,7 @@ class ClusterMethodsMixin:
|
|
|
703
707
|
pct = (assigned_nodes / total_nodes * 100) if total_nodes else 0
|
|
704
708
|
logger.info(f"Node coverage: {assigned_nodes}/{total_nodes} ({pct:.1f}%) nodes assigned to components")
|
|
705
709
|
|
|
706
|
-
def
|
|
710
|
+
def build_files_index(self, analysis: AnalysisInsights) -> dict[str, FileEntry]:
|
|
707
711
|
files: dict[str, FileEntry] = {}
|
|
708
712
|
for component in analysis.components:
|
|
709
713
|
for fmg in component.file_methods:
|
|
@@ -761,7 +765,7 @@ class ClusterMethodsMixin:
|
|
|
761
765
|
for comp in analysis.components:
|
|
762
766
|
comp.file_methods = self._build_file_methods_from_nodes(component_nodes.get(comp.component_id, []))
|
|
763
767
|
|
|
764
|
-
analysis.files = self.
|
|
768
|
+
analysis.files = self.build_files_index(analysis)
|
|
765
769
|
|
|
766
770
|
self._log_node_coverage(analysis, len(all_nodes))
|
|
767
771
|
|
|
@@ -784,3 +788,44 @@ class ClusterMethodsMixin:
|
|
|
784
788
|
node_to_component = build_node_to_component_map(analysis)
|
|
785
789
|
static_relations = build_component_relations(node_to_component, cfg_graphs)
|
|
786
790
|
analysis.components_relations = merge_relations(analysis.components_relations, static_relations, analysis)
|
|
791
|
+
|
|
792
|
+
def build_scope_cfg_string(self, analysis: AnalysisInsights) -> str:
|
|
793
|
+
"""Render cross-component communication edges as a human-readable string for the LLM.
|
|
794
|
+
|
|
795
|
+
For every CFG edge where src belongs to component A and dst belongs to
|
|
796
|
+
component B (A != B), this produces a grouped summary like:
|
|
797
|
+
|
|
798
|
+
ComponentA -> ComponentB (3 edges):
|
|
799
|
+
src_pkg.MethodX -> dst_pkg.MethodY
|
|
800
|
+
src_pkg.MethodZ -> dst_pkg.MethodW
|
|
801
|
+
"""
|
|
802
|
+
node_to_component = build_node_to_component_map(analysis)
|
|
803
|
+
id_to_name = {c.component_id: c.name for c in analysis.components}
|
|
804
|
+
cfg_graphs = {lang: self.static_analysis.get_cfg(lang) for lang in self.static_analysis.get_languages()}
|
|
805
|
+
|
|
806
|
+
cross_edges: dict[tuple[str, str], list[tuple[str, str]]] = defaultdict(list)
|
|
807
|
+
for cfg in cfg_graphs.values():
|
|
808
|
+
for edge in cfg.edges:
|
|
809
|
+
src_name = edge.get_source()
|
|
810
|
+
dst_name = edge.get_destination()
|
|
811
|
+
src_comp = node_to_component.get(src_name)
|
|
812
|
+
dst_comp = node_to_component.get(dst_name)
|
|
813
|
+
if src_comp and dst_comp and src_comp != dst_comp:
|
|
814
|
+
cross_edges[(src_comp, dst_comp)].append((src_name, dst_name))
|
|
815
|
+
|
|
816
|
+
if not cross_edges:
|
|
817
|
+
return "No cross-component communication edges found."
|
|
818
|
+
|
|
819
|
+
lines: list[str] = []
|
|
820
|
+
for (src_id, dst_id), edges in sorted(cross_edges.items()):
|
|
821
|
+
src_label = id_to_name.get(src_id, src_id)
|
|
822
|
+
dst_label = id_to_name.get(dst_id, dst_id)
|
|
823
|
+
lines.append(f"\n{src_label} -> {dst_label} ({len(edges)} edge{'s' if len(edges) != 1 else ''}):")
|
|
824
|
+
for s, d in edges[:10]:
|
|
825
|
+
short_s = s.split(".")[-1]
|
|
826
|
+
short_d = d.split(".")[-1]
|
|
827
|
+
lines.append(f" {short_s} -> {short_d}")
|
|
828
|
+
if len(edges) > 10:
|
|
829
|
+
lines.append(f" ... and {len(edges) - 10} more")
|
|
830
|
+
|
|
831
|
+
return "\n".join(lines)
|