codeboarding 0.11.0__tar.gz → 0.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. {codeboarding-0.11.0/codeboarding.egg-info → codeboarding-0.12.0}/PKG-INFO +2 -2
  2. {codeboarding-0.11.0 → codeboarding-0.12.0}/README.md +1 -1
  3. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/agent.py +72 -51
  4. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/agent_responses.py +186 -32
  5. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/cluster_methods_mixin.py +53 -8
  6. codeboarding-0.12.0/agents/incremental_agent.py +787 -0
  7. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/llm_config.py +6 -6
  8. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/__init__.py +4 -0
  9. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/abstract_prompt_factory.py +2 -2
  10. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/claude_prompts.py +82 -62
  11. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/deepseek_prompts.py +66 -42
  12. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/gemini_flash_prompts.py +53 -54
  13. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/glm_prompts.py +69 -36
  14. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/gpt_prompts.py +57 -54
  15. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/kimi_prompts.py +60 -50
  16. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/prompts/prompt_factory.py +4 -4
  17. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/validation.py +66 -1
  18. {codeboarding-0.11.0 → codeboarding-0.12.0/codeboarding.egg-info}/PKG-INFO +2 -2
  19. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding.egg-info/SOURCES.txt +7 -16
  20. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding.egg-info/requires.txt +1 -1
  21. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding.egg-info/top_level.txt +0 -1
  22. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_cli/commands/full_analysis.py +9 -0
  23. codeboarding-0.12.0/codeboarding_cli/commands/incremental_analysis.py +139 -0
  24. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_cli/commands/partial_analysis.py +0 -1
  25. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_workflows/__init__.py +4 -3
  26. codeboarding-0.12.0/codeboarding_workflows/analysis.py +238 -0
  27. {codeboarding-0.11.0 → codeboarding-0.12.0}/core/__init__.py +2 -1
  28. {codeboarding-0.11.0 → codeboarding-0.12.0}/core/protocols.py +2 -1
  29. {codeboarding-0.11.0 → codeboarding-0.12.0}/diagram_analysis/analysis_json.py +6 -1
  30. codeboarding-0.12.0/diagram_analysis/cluster_delta.py +455 -0
  31. codeboarding-0.12.0/diagram_analysis/cluster_snapshot.py +101 -0
  32. codeboarding-0.12.0/diagram_analysis/diagram_generator.py +735 -0
  33. codeboarding-0.12.0/diagram_analysis/exceptions.py +43 -0
  34. {codeboarding-0.11.0 → codeboarding-0.12.0}/diagram_analysis/io_utils.py +143 -33
  35. codeboarding-0.12.0/diagram_analysis/run_mode.py +10 -0
  36. {codeboarding-0.11.0 → codeboarding-0.12.0}/github_action.py +2 -1
  37. {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/cohesion.py +6 -0
  38. {codeboarding-0.11.0 → codeboarding-0.12.0}/health/runner.py +9 -3
  39. {codeboarding-0.11.0 → codeboarding-0.12.0}/main.py +0 -6
  40. {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/paths.py +4 -0
  41. {codeboarding-0.11.0 → codeboarding-0.12.0}/pyproject.toml +2 -4
  42. {codeboarding-0.11.0 → codeboarding-0.12.0}/repo_utils/change_detector.py +4 -0
  43. {codeboarding-0.11.0 → codeboarding-0.12.0}/repo_utils/diff_parser.py +16 -3
  44. {codeboarding-0.11.0 → codeboarding-0.12.0}/repo_utils/git_ops.py +79 -43
  45. {codeboarding-0.11.0 → codeboarding-0.12.0}/repo_utils/ignore.py +33 -0
  46. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/__init__.py +308 -266
  47. codeboarding-0.12.0/static_analyzer/analysis_cache.py +475 -0
  48. codeboarding-0.12.0/static_analyzer/analysis_result.py +273 -0
  49. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/cluster_helpers.py +52 -30
  50. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/constants.py +2 -4
  51. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/graph.py +110 -22
  52. codeboarding-0.12.0/static_analyzer/incremental_orchestrator.py +125 -0
  53. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/java_utils.py +8 -10
  54. codeboarding-0.12.0/static_analyzer/language_results.py +128 -0
  55. codeboarding-0.12.0/static_analyzer/leiden_utils.py +103 -0
  56. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/reference_resolve_mixin.py +8 -4
  57. codeboarding-0.12.0/static_analyzer/typescript_config_scanner.py +235 -0
  58. {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_github_action.py +18 -6
  59. {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_main.py +89 -16
  60. {codeboarding-0.11.0 → codeboarding-0.12.0}/user_config.py +2 -2
  61. {codeboarding-0.11.0 → codeboarding-0.12.0}/utils.py +17 -0
  62. codeboarding-0.11.0/agents/analysis_patcher.py +0 -206
  63. codeboarding-0.11.0/codeboarding_cli/commands/incremental_analysis.py +0 -137
  64. codeboarding-0.11.0/codeboarding_workflows/analysis.py +0 -144
  65. codeboarding-0.11.0/diagram_analysis/diagram_generator.py +0 -679
  66. codeboarding-0.11.0/diagram_analysis/ease.py +0 -68
  67. codeboarding-0.11.0/diagram_analysis/incremental/delta.py +0 -84
  68. codeboarding-0.11.0/diagram_analysis/incremental/models.py +0 -220
  69. codeboarding-0.11.0/diagram_analysis/incremental/payload.py +0 -129
  70. codeboarding-0.11.0/diagram_analysis/incremental/pipeline.py +0 -264
  71. codeboarding-0.11.0/diagram_analysis/incremental/semantic_diff.py +0 -557
  72. codeboarding-0.11.0/diagram_analysis/incremental/trace_planner.py +0 -435
  73. codeboarding-0.11.0/diagram_analysis/incremental/tracer.py +0 -458
  74. codeboarding-0.11.0/diagram_analysis/incremental/updater.py +0 -460
  75. codeboarding-0.11.0/diagram_analysis/run_metadata.py +0 -146
  76. codeboarding-0.11.0/duckdb_crud.py +0 -125
  77. codeboarding-0.11.0/health/constants.py +0 -19
  78. codeboarding-0.11.0/health_main.py +0 -151
  79. codeboarding-0.11.0/output_generators/__init__.py +0 -0
  80. codeboarding-0.11.0/static_analyzer/analysis_cache.py +0 -761
  81. codeboarding-0.11.0/static_analyzer/analysis_result.py +0 -488
  82. codeboarding-0.11.0/static_analyzer/cluster_change_analyzer.py +0 -391
  83. codeboarding-0.11.0/static_analyzer/incremental_orchestrator.py +0 -644
  84. codeboarding-0.11.0/static_analyzer/typescript_config_scanner.py +0 -54
  85. {codeboarding-0.11.0 → codeboarding-0.12.0}/LICENSE +0 -0
  86. {codeboarding-0.11.0 → codeboarding-0.12.0}/PYPI.md +0 -0
  87. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/__init__.py +0 -0
  88. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/abstraction_agent.py +0 -0
  89. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/change_status.py +0 -0
  90. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/cluster_budget.py +0 -0
  91. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/constants.py +0 -0
  92. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/dependency_discovery.py +0 -0
  93. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/details_agent.py +0 -0
  94. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/meta_agent.py +0 -0
  95. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/model_capabilities.py +0 -0
  96. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/planner_agent.py +0 -0
  97. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/retry.py +0 -0
  98. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/__init__.py +0 -0
  99. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/base.py +0 -0
  100. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/get_external_deps.py +0 -0
  101. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/get_method_invocations.py +0 -0
  102. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_cfg.py +0 -0
  103. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_docs.py +0 -0
  104. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_file.py +0 -0
  105. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_file_structure.py +0 -0
  106. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_packages.py +0 -0
  107. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_source.py +0 -0
  108. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/read_structure.py +0 -0
  109. {codeboarding-0.11.0 → codeboarding-0.12.0}/agents/tools/toolkit.py +0 -0
  110. {codeboarding-0.11.0 → codeboarding-0.12.0}/caching/__init__.py +0 -0
  111. {codeboarding-0.11.0 → codeboarding-0.12.0}/caching/cache.py +0 -0
  112. {codeboarding-0.11.0 → codeboarding-0.12.0}/caching/details_cache.py +0 -0
  113. {codeboarding-0.11.0 → codeboarding-0.12.0}/caching/meta_cache.py +0 -0
  114. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding.egg-info/dependency_links.txt +0 -0
  115. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding.egg-info/entry_points.txt +0 -0
  116. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_cli/__init__.py +0 -0
  117. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_cli/bootstrap.py +0 -0
  118. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_cli/commands/__init__.py +0 -0
  119. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_workflows/orchestration.py +0 -0
  120. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_workflows/rendering.py +0 -0
  121. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_workflows/sources/__init__.py +0 -0
  122. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_workflows/sources/local.py +0 -0
  123. {codeboarding-0.11.0 → codeboarding-0.12.0}/codeboarding_workflows/sources/remote.py +0 -0
  124. {codeboarding-0.11.0 → codeboarding-0.12.0}/constants.py +0 -0
  125. {codeboarding-0.11.0 → codeboarding-0.12.0}/core/plugin_loader.py +0 -0
  126. {codeboarding-0.11.0 → codeboarding-0.12.0}/core/registry.py +0 -0
  127. {codeboarding-0.11.0 → codeboarding-0.12.0}/diagram_analysis/__init__.py +0 -0
  128. {codeboarding-0.11.0 → codeboarding-0.12.0}/diagram_analysis/file_coverage.py +0 -0
  129. {codeboarding-0.11.0 → codeboarding-0.12.0}/diagram_analysis/run_context.py +0 -0
  130. {codeboarding-0.11.0 → codeboarding-0.12.0}/diagram_analysis/version.py +0 -0
  131. {codeboarding-0.11.0 → codeboarding-0.12.0}/health/__init__.py +0 -0
  132. {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/__init__.py +0 -0
  133. {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/circular_deps.py +0 -0
  134. {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/coupling.py +0 -0
  135. {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/function_size.py +0 -0
  136. {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/god_class.py +0 -0
  137. {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/inheritance.py +0 -0
  138. {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/instability.py +0 -0
  139. {codeboarding-0.11.0 → codeboarding-0.12.0}/health/checks/unused_code_diagnostics.py +0 -0
  140. {codeboarding-0.11.0 → codeboarding-0.12.0}/health/config.py +0 -0
  141. {codeboarding-0.11.0 → codeboarding-0.12.0}/health/models.py +0 -0
  142. {codeboarding-0.11.0 → codeboarding-0.12.0}/install.py +0 -0
  143. {codeboarding-0.11.0 → codeboarding-0.12.0}/logging_config.py +0 -0
  144. {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/__init__.py +0 -0
  145. {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/callbacks.py +0 -0
  146. {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/context.py +0 -0
  147. {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/mixin.py +0 -0
  148. {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/stats.py +0 -0
  149. {codeboarding-0.11.0 → codeboarding-0.12.0}/monitoring/writers.py +0 -0
  150. {codeboarding-0.11.0/diagram_analysis/incremental → codeboarding-0.12.0/output_generators}/__init__.py +0 -0
  151. {codeboarding-0.11.0 → codeboarding-0.12.0}/output_generators/html.py +0 -0
  152. {codeboarding-0.11.0 → codeboarding-0.12.0}/output_generators/html_template.py +0 -0
  153. {codeboarding-0.11.0 → codeboarding-0.12.0}/output_generators/markdown.py +0 -0
  154. {codeboarding-0.11.0 → codeboarding-0.12.0}/output_generators/mdx.py +0 -0
  155. {codeboarding-0.11.0 → codeboarding-0.12.0}/output_generators/sphinx.py +0 -0
  156. {codeboarding-0.11.0 → codeboarding-0.12.0}/repo_utils/__init__.py +0 -0
  157. {codeboarding-0.11.0 → codeboarding-0.12.0}/repo_utils/errors.py +0 -0
  158. {codeboarding-0.11.0 → codeboarding-0.12.0}/setup.cfg +0 -0
  159. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/cfg_skip_planner.py +0 -0
  160. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/cluster_relations.py +0 -0
  161. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/csharp_config_scanner.py +0 -0
  162. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/__init__.py +0 -0
  163. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/__init__.py +0 -0
  164. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/csharp_adapter.py +0 -0
  165. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/go_adapter.py +0 -0
  166. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/java_adapter.py +0 -0
  167. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/php_adapter.py +0 -0
  168. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/python_adapter.py +0 -0
  169. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/rust_adapter.py +0 -0
  170. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/adapters/typescript_adapter.py +0 -0
  171. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/call_graph_builder.py +0 -0
  172. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/edge_build_context.py +0 -0
  173. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/edge_builder.py +0 -0
  174. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/hierarchy_builder.py +0 -0
  175. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/language_adapter.py +0 -0
  176. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/lsp_client.py +0 -0
  177. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/lsp_constants.py +0 -0
  178. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/models.py +0 -0
  179. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/progress.py +0 -0
  180. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/protocols.py +0 -0
  181. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/result_converter.py +0 -0
  182. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/source_inspector.py +0 -0
  183. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/symbol_table.py +0 -0
  184. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/engine/utils.py +0 -0
  185. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/java_config_scanner.py +0 -0
  186. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/lsp_client/__init__.py +0 -0
  187. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/lsp_client/diagnostics.py +0 -0
  188. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/node.py +0 -0
  189. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/programming_language.py +0 -0
  190. {codeboarding-0.11.0 → codeboarding-0.12.0}/static_analyzer/scanner.py +0 -0
  191. {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_cli_parser.py +0 -0
  192. {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_install.py +0 -0
  193. {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_logging_config.py +0 -0
  194. {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_pyproject_packages.py +0 -0
  195. {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_registry_coverage.py +0 -0
  196. {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_tool_registry.py +0 -0
  197. {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_user_config.py +0 -0
  198. {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_vscode_constants.py +0 -0
  199. {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_windows_compatibility.py +0 -0
  200. {codeboarding-0.11.0 → codeboarding-0.12.0}/tests/test_windows_encoding.py +0 -0
  201. {codeboarding-0.11.0 → codeboarding-0.12.0}/tool_registry/__init__.py +0 -0
  202. {codeboarding-0.11.0 → codeboarding-0.12.0}/tool_registry/installers.py +0 -0
  203. {codeboarding-0.11.0 → codeboarding-0.12.0}/tool_registry/manifest.py +0 -0
  204. {codeboarding-0.11.0 → codeboarding-0.12.0}/tool_registry/paths.py +0 -0
  205. {codeboarding-0.11.0 → codeboarding-0.12.0}/tool_registry/registry.py +0 -0
  206. {codeboarding-0.11.0 → codeboarding-0.12.0}/vscode_constants.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeboarding
3
- Version: 0.11.0
3
+ Version: 0.12.0
4
4
  Summary: Interactive Diagrams for Code
5
5
  Author: CodeBoarding Team
6
6
  License-Expression: MIT
@@ -18,7 +18,6 @@ Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: docker>=7.1
20
20
  Requires-Dist: dotenv>=0.9
21
- Requires-Dist: duckdb>=1.3
22
21
  Requires-Dist: dulwich>=0.22
23
22
  Requires-Dist: fastapi>=0.115
24
23
  Requires-Dist: filelock>=3.12
@@ -34,6 +33,7 @@ Requires-Dist: langchain-community>=0.4
34
33
  Requires-Dist: langchain-google-genai>=3.1
35
34
  Requires-Dist: langchain-ollama>=1.0
36
35
  Requires-Dist: langchain-openai>=1.1
36
+ Requires-Dist: leidenalg>=0.10
37
37
  Requires-Dist: markdown>=3.8
38
38
  Requires-Dist: markdown-it-py>=3.0
39
39
  Requires-Dist: markitdown>=0.1
@@ -143,7 +143,7 @@ python main.py full https://github.com/pytorch/pytorch
143
143
 
144
144
  ## Supported stack
145
145
 
146
- - Languages: Python, TypeScript, JavaScript, Java, Go, PHP, Rust.
146
+ - Languages: Python, TypeScript, JavaScript, Java, Go, PHP, Rust, C#.
147
147
  - LLM providers: OpenAI, Anthropic, Google, Vercel AI Gateway, AWS Bedrock, Ollama, OpenRouter, and more.
148
148
 
149
149
  ## Examples
@@ -5,7 +5,7 @@ from pathlib import Path
5
5
  from google.api_core.exceptions import ResourceExhausted
6
6
  from langchain_core.exceptions import OutputParserException
7
7
  from langchain_core.language_models import BaseChatModel
8
- from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
8
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage
9
9
  from langchain_core.output_parsers import PydanticOutputParser
10
10
  from langchain_core.prompts import PromptTemplate
11
11
  from langchain.agents import create_agent
@@ -20,6 +20,7 @@ from agents.tools.toolkit import CodeBoardingToolkit
20
20
  from agents.validation import ValidationResult, score_validation_results, VALIDATOR_WEIGHTS, DEFAULT_VALIDATOR_WEIGHT
21
21
  from monitoring.mixin import MonitoringMixin
22
22
  from repo_utils.ignore import RepoIgnoreManager
23
+ from agents.agent_responses import LLMBaseModel
23
24
  from agents.llm_config import MONITORING_CALLBACK
24
25
  from static_analyzer.analysis_result import StaticAnalysisResults
25
26
  from static_analyzer.reference_resolve_mixin import ReferenceResolverMixin
@@ -43,10 +44,10 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
43
44
  ReferenceResolverMixin.__init__(self, repo_dir, static_analysis)
44
45
  MonitoringMixin.__init__(self)
45
46
  self.parsing_llm = parsing_llm
47
+ self.agent_llm = agent_llm
46
48
  self.repo_dir = repo_dir
47
49
  self.ignore_manager = RepoIgnoreManager(repo_dir)
48
50
 
49
- # Initialize the professional toolkit
50
51
  context = RepoContext(repo_dir=repo_dir, ignore_manager=self.ignore_manager, static_analysis=static_analysis)
51
52
  self.toolkit = CodeBoardingToolkit(context=context)
52
53
 
@@ -200,10 +201,10 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
200
201
  except Empty:
201
202
  raise RuntimeError("Agent invocation completed but no result was returned")
202
203
 
203
- def _parse_invoke(self, prompt: str, type: type):
204
+ def _parse_invoke(self, prompt: str, type: type, include_hidden: bool = False):
204
205
  response = self._invoke(prompt)
205
206
  assert isinstance(response, str), f"Expected a string as response type got {response}"
206
- return self._parse_response(prompt, response, type)
207
+ return self._parse_response(prompt, response, type, include_hidden=include_hidden)
207
208
 
208
209
  def _score_result(self, result, validators: list, context) -> tuple[float, list[tuple[float, str]]]:
209
210
  """Run all validators on a result and return (score, prioritized_feedback).
@@ -233,7 +234,13 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
233
234
  return score, weighted_feedback
234
235
 
235
236
  def _validation_invoke(
236
- self, prompt: str, return_type: type, validators: list, context, max_validation_attempts: int = 1
237
+ self,
238
+ prompt: str,
239
+ return_type: type,
240
+ validators: list,
241
+ context,
242
+ max_validation_attempts: int = 1,
243
+ include_hidden: bool = False,
237
244
  ):
238
245
  """
239
246
  Invoke LLM with validation, feedback loop, and best-of-N selection.
@@ -261,7 +268,12 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
261
268
  # Compute the maximum possible score so we can detect a perfect result
262
269
  max_possible_score = sum(VALIDATOR_WEIGHTS.get(v.__name__, DEFAULT_VALIDATOR_WEIGHT) for v in validators)
263
270
 
264
- result = self._parse_invoke(prompt, return_type)
271
+ result = self._parse_invoke(prompt, return_type, include_hidden=include_hidden)
272
+ logger.info(
273
+ "[Validation] Parsed %s: %s",
274
+ return_type.__name__,
275
+ result.llm_str()[:500],
276
+ )
265
277
 
266
278
  # Track the best candidate across all attempts
267
279
  best_result = result
@@ -314,42 +326,33 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
314
326
  f"[Validation] Preparing attempt {attempt + 1}/{max_validation_attempts} "
315
327
  f"with {len(weighted_feedback)} feedback items"
316
328
  )
317
- result = self._parse_invoke(feedback_prompt, return_type)
329
+ result = self._parse_invoke(feedback_prompt, return_type, include_hidden=include_hidden)
318
330
 
319
331
  return best_result
320
332
 
321
- def _parse_response(self, prompt, response, return_type, max_retries=5, attempt=0):
333
+ def _parse_response(self, prompt, response, return_type, max_retries=5, attempt=0, include_hidden: bool = False):
322
334
  if response is None or response.strip() == "":
323
335
  logger.error(f"Empty response for prompt: {prompt}")
324
336
 
337
+ if include_hidden and issubclass(return_type, LLMBaseModel):
338
+ schema = return_type.model_json_schema(include_hidden=True)
339
+ parser = PydanticOutputParser(pydantic_object=return_type)
340
+ format_instructions = (
341
+ f"The output should be formatted as a JSON instance that conforms to the JSON schema below.\n"
342
+ f"Here is the output schema:\n```json\n{json.dumps(schema, indent=2)}\n```"
343
+ )
344
+ else:
345
+ parser = PydanticOutputParser(pydantic_object=return_type)
346
+ format_instructions = parser.get_format_instructions()
347
+
325
348
  def call_once():
326
- # Extractor is rebuilt on every attempt — previous trustcall state
327
- # may have corrupted attributes (see the tool_call_id bug below).
328
- extractor = create_extractor(self.parsing_llm, tools=[return_type], tool_choice=return_type.__name__)
329
349
  try:
330
- result = extractor.invoke(
331
- return_type.extractor_str() + response,
332
- config={"callbacks": [MONITORING_CALLBACK, self.agent_monitoring_callback]},
333
- )
334
- except AttributeError as e:
335
- # Trustcall bug: https://github.com/hinthornw/trustcall/issues/47
336
- # 'ExtractionState' object has no attribute 'tool_call_id' during validation retry.
337
- # Treat as a non-retriable fallback to the Pydantic parser.
338
- if "tool_call_id" in str(e):
339
- logger.warning(f"Trustcall bug encountered, falling back to Pydantic parser: {e}")
340
- parser = PydanticOutputParser(pydantic_object=return_type)
341
- return self._try_parse(response, parser)
342
- raise
343
- if "responses" in result and len(result["responses"]) != 0:
344
- return return_type.model_validate(result["responses"][0])
345
- if "messages" in result and len(result["messages"]) != 0:
346
- message = result["messages"][0].content
347
- parser = PydanticOutputParser(pydantic_object=return_type)
348
- if not message:
349
- raise EmptyExtractorMessageError("Extractor returned empty message content")
350
- return self._try_parse(message, parser)
351
- parser = PydanticOutputParser(pydantic_object=return_type)
352
- return self._try_parse(response, parser)
350
+ result = self._structured_parse(response, parser, format_instructions=format_instructions)
351
+ logger.debug("[parse_response] structured_parse succeeded for %s", return_type.__name__)
352
+ return result
353
+ except Exception as e:
354
+ logger.warning("[parse_response] structured_parse failed for %s: %s", return_type.__name__, e)
355
+ return self._extractor_parse(response, return_type, parser, include_hidden=include_hidden)
353
356
 
354
357
  def classify(exc: Exception, attempt: int) -> RetryDecision:
355
358
  if isinstance(exc, ResourceExhausted):
@@ -359,20 +362,15 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
359
362
  )
360
363
  if isinstance(exc, (EmptyExtractorMessageError, IndexError, json.JSONDecodeError, ValueError)):
361
364
  return RetryDecision(action=RetryAction.RETRY_NOW)
362
- # AttributeError (non-tool_call_id) and any other exception: give up.
363
365
  return RetryDecision(action=RetryAction.GIVE_UP)
364
366
 
365
367
  def on_exhausted(exc: Exception):
366
- # Preserve historic shape: ResourceExhausted surfaces the original exception;
367
- # parse-error exhaustion wraps with a descriptive message naming the response.
368
368
  if isinstance(exc, ResourceExhausted):
369
369
  logger.error(f"Resource exhausted on final parsing attempt: {exc}")
370
370
  raise exc
371
371
  logger.error(f"Max retries ({max_retries}) reached for parsing response: {response}")
372
372
  raise Exception(f"Max retries reached for parsing response: {response}")
373
373
 
374
- # ``attempt`` kwarg kept for backwards-compat with callers that passed it;
375
- # the effective attempt count is ``max_retries - attempt``.
376
374
  return with_retries(
377
375
  call_once,
378
376
  max_attempts=max(1, max_retries - attempt),
@@ -381,19 +379,21 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
381
379
  log_prefix="Parse response",
382
380
  )
383
381
 
384
- def _try_parse(self, message_content, parser):
385
- try:
386
- prompt_template = """You are an JSON expert. Here you need to extract information in the following json format: {format_instructions}
382
+ def _structured_parse(self, message_content, parser, format_instructions: str | None = None):
383
+ if format_instructions is None:
384
+ format_instructions = parser.get_format_instructions()
385
+ prompt_template = """You are a JSON expert. Here you need to extract information in the following json format: {format_instructions}
387
386
 
388
- Here is the content to parse and fix: {adjective}
387
+ Here is the content to parse and fix: {adjective}
389
388
 
390
- Please provide only the JSON output without any additional text."""
391
- prompt = PromptTemplate(
392
- template=prompt_template,
393
- input_variables=["adjective"],
394
- partial_variables={"format_instructions": parser.get_format_instructions()},
395
- )
396
- chain = prompt | self.parsing_llm | parser
389
+ Please provide only the JSON output without any additional text."""
390
+ prompt = PromptTemplate(
391
+ template=prompt_template,
392
+ input_variables=["adjective"],
393
+ partial_variables={"format_instructions": format_instructions},
394
+ )
395
+ chain = prompt | self.parsing_llm | parser
396
+ try:
397
397
  return chain.invoke(
398
398
  {"adjective": message_content},
399
399
  config={"callbacks": [MONITORING_CALLBACK, self.agent_monitoring_callback]},
@@ -401,7 +401,28 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
401
401
  except (ValidationError, OutputParserException):
402
402
  for _, v in json.loads(message_content).items():
403
403
  try:
404
- return self._try_parse(json.dumps(v), parser)
404
+ return self._structured_parse(json.dumps(v), parser)
405
405
  except:
406
406
  pass
407
407
  raise ValueError(f"Couldn't parse {message_content}")
408
+
409
+ def _extractor_parse(self, response, return_type, parser, include_hidden: bool = False):
410
+ extractor = create_extractor(self.parsing_llm, tools=[return_type], tool_choice=return_type.__name__)
411
+ try:
412
+ result = extractor.invoke(
413
+ return_type.extractor_str(include_hidden=include_hidden) + response,
414
+ config={"callbacks": [MONITORING_CALLBACK, self.agent_monitoring_callback]},
415
+ )
416
+ except AttributeError as e:
417
+ if "tool_call_id" in str(e):
418
+ logger.warning(f"Trustcall bug encountered: {e}")
419
+ raise
420
+ raise
421
+ if "responses" in result and len(result["responses"]) != 0:
422
+ return return_type.model_validate(result["responses"][0])
423
+ if "messages" in result and len(result["messages"]) != 0:
424
+ message = result["messages"][0].content
425
+ if not message:
426
+ raise EmptyExtractorMessageError("Extractor returned empty message content")
427
+ return self._structured_parse(message, parser)
428
+ raise EmptyExtractorMessageError("Extractor returned no responses and no messages")
@@ -7,6 +7,7 @@ from pathlib import PurePosixPath
7
7
  from typing import get_origin, Optional
8
8
 
9
9
  from pydantic import BaseModel, Field
10
+ from pydantic.fields import FieldInfo
10
11
 
11
12
  logger = logging.getLogger(__name__)
12
13
 
@@ -19,30 +20,104 @@ class LLMBaseModel(BaseModel, abc.ABC):
19
20
  raise NotImplementedError("LLM String has to be implemented.")
20
21
 
21
22
  @classmethod
22
- def extractor_str(cls):
23
- # Here iterate over the fields that we have and use their description like:
24
- result_str = "please extract the following: "
23
+ def _is_field_hidden(cls, fvalue: FieldInfo) -> bool:
24
+ if fvalue.exclude:
25
+ return True
26
+ extra = fvalue.json_schema_extra
27
+ if isinstance(extra, dict):
28
+ return bool(extra.get("hidden"))
29
+ return False
30
+
31
+ @classmethod
32
+ def _excluded_fields(cls, include_hidden: bool = False) -> set[str]:
33
+ if include_hidden:
34
+ return set()
35
+ names: set[str] = set()
36
+ for klass in cls.__mro__:
37
+ if hasattr(klass, "model_fields"):
38
+ for fname, finfo in klass.model_fields.items():
39
+ if cls._is_field_hidden(finfo):
40
+ names.add(fname)
41
+ return names
42
+
43
+ @classmethod
44
+ def _resolve_excluded_by_title(cls, include_hidden: bool = False) -> dict[str, set[str]]:
45
+ seen: set[type] = set()
46
+ result: dict[str, set[str]] = {}
47
+
48
+ def walk(model: type) -> None:
49
+ if model in seen or not hasattr(model, "model_fields"):
50
+ return
51
+ seen.add(model)
52
+ title = getattr(model, "__name__", "")
53
+ excluded = model._excluded_fields(include_hidden) # type: ignore[attr-defined]
54
+ if excluded:
55
+ result[title] = excluded
56
+ for finfo in getattr(model, "model_fields", {}).values():
57
+ ann = finfo.annotation
58
+ for candidate in getattr(ann, "__args__", [ann]):
59
+ if isinstance(candidate, type) and issubclass(candidate, LLMBaseModel):
60
+ walk(candidate) # type: ignore[arg-type]
61
+
62
+ walk(cls)
63
+ return result
64
+
65
+ @classmethod
66
+ def _extractor_fields(cls, indent: str = " ", include_hidden: bool = False) -> str:
67
+ parts: list[str] = []
25
68
  for fname, fvalue in cls.model_fields.items():
26
- if getattr(fvalue, "exclude", False):
69
+ if cls._is_field_hidden(fvalue) and not include_hidden:
27
70
  continue
28
- # check if the field type is Optional
29
71
  ftype = fvalue.annotation
30
- # Check if the type is a typing.List (e.g., typing.List[SomeType])
31
72
  if get_origin(ftype) is list:
32
- # get the type of the list:
33
73
  if ftype is not None and hasattr(ftype, "__args__"):
34
- ftype = ftype.__args__[0]
35
- result_str += f"{fname} which is a list ("
36
- if ftype is Optional:
37
- result_str += f"{fname} ({fvalue.description}), "
38
- elif ftype is not None and isinstance(ftype, type) and issubclass(ftype, LLMBaseModel):
39
- # Now I need to call the extractor_str method of the field
40
- result_str += ftype.extractor_str()
74
+ inner = ftype.__args__[0]
75
+ if isinstance(inner, type) and issubclass(inner, LLMBaseModel):
76
+ parts.append(
77
+ f"{indent}- {fname}: a list, where each item has:\n{inner._extractor_fields(indent + ' ', include_hidden)}"
78
+ )
79
+ continue
80
+ parts.append(f"{indent}- {fname}: {fvalue.description}")
81
+ elif isinstance(ftype, type) and issubclass(ftype, LLMBaseModel):
82
+ parts.append(ftype._extractor_fields(indent, include_hidden))
41
83
  else:
42
- result_str += f"{fname} ({fvalue.description}), "
43
- if get_origin(ftype) is list:
44
- result_str += "), "
45
- return result_str
84
+ parts.append(f"{indent}- {fname}: {fvalue.description}")
85
+ return "\n".join(parts)
86
+
87
+ @classmethod
88
+ def extractor_str(cls, include_hidden: bool = False) -> str:
89
+ title = cls.__name__
90
+ fields = cls._extractor_fields(include_hidden=include_hidden)
91
+ return (
92
+ f"You are a JSON extraction expert. "
93
+ f"Extract a valid JSON object of type `{title}` from the text below.\n"
94
+ f"The JSON must have these fields:\n{fields}\n\n"
95
+ )
96
+
97
+ @classmethod
98
+ def model_json_schema(
99
+ cls,
100
+ by_alias: bool = True,
101
+ ref_template: str = "#/$defs/{model}",
102
+ schema_generator: type | None = None,
103
+ mode: str = "validation",
104
+ include_hidden: bool = False,
105
+ **kwargs,
106
+ ) -> dict:
107
+ call_kwargs: dict = {"by_alias": by_alias, "ref_template": ref_template, "mode": mode}
108
+ if schema_generator is not None:
109
+ call_kwargs["schema_generator"] = schema_generator
110
+ call_kwargs.update(kwargs)
111
+ schema = super().model_json_schema(**call_kwargs)
112
+ excluded_by_title = cls._resolve_excluded_by_title(include_hidden)
113
+ for title, excluded in excluded_by_title.items():
114
+ defn = schema.get("$defs", {}).get(title)
115
+ if isinstance(defn, dict) and "properties" in defn:
116
+ defn["properties"] = {k: v for k, v in defn["properties"].items() if k not in excluded}
117
+ own_excluded = cls._excluded_fields(include_hidden)
118
+ if "properties" in schema:
119
+ schema["properties"] = {k: v for k, v in schema["properties"].items() if k not in own_excluded}
120
+ return schema
46
121
 
47
122
 
48
123
  class SourceCodeReference(LLMBaseModel):
@@ -114,6 +189,39 @@ class ClustersComponent(LLMBaseModel):
114
189
  description: str = Field(
115
190
  description="Explanation of what this component does, its main flow, WHY these clusters are grouped together, how it interacts with other cluster groups, and the most important classes/methods (by their exact qualified names from the clusters)"
116
191
  )
192
+ existing_component_id: str | None = Field(
193
+ default=None,
194
+ description=(
195
+ "Incremental routing: the exact component_id of the existing component "
196
+ "this entry is routing clusters into (e.g. '1.3'). Set to null to create "
197
+ "a brand-new component. Identity is by ID, not name — leaving this null "
198
+ "while reusing an existing component's name forks a duplicate component. "
199
+ "Ignored by the full-analysis flow."
200
+ ),
201
+ json_schema_extra={"hidden": True},
202
+ )
203
+ parent_id: str | None = Field(
204
+ default=None,
205
+ description=(
206
+ "Incremental routing: when ``existing_component_id`` is null (brand-new "
207
+ "component), the existing component_id under which the new component "
208
+ "should attach (or null to attach at root). Ignored when "
209
+ "``existing_component_id`` is set, and ignored by the full-analysis flow."
210
+ ),
211
+ json_schema_extra={"hidden": True},
212
+ )
213
+ redetail_needed: bool = Field(
214
+ default=True,
215
+ description=(
216
+ "Incremental routing only: when routing clusters into an existing component "
217
+ "(``existing_component_id`` is set), set False if the cluster delta is "
218
+ "cosmetic (refactor, internal rename, small bug fix) and the component's "
219
+ "high-level purpose is unchanged — the existing description stays. Default "
220
+ "True forces a full redetail. Ignored for brand-new components (always "
221
+ "redetailed) and by the full-analysis flow."
222
+ ),
223
+ json_schema_extra={"hidden": True},
224
+ )
117
225
 
118
226
  def llm_str(self):
119
227
  ids_str = ", ".join(str(cid) for cid in self.cluster_ids)
@@ -151,15 +259,6 @@ class MethodEntry(BaseModel):
151
259
  return NotImplemented
152
260
  return self.qualified_name == other.qualified_name
153
261
 
154
- @classmethod
155
- def from_method_change(cls, method_change) -> MethodEntry:
156
- return cls(
157
- qualified_name=method_change.qualified_name,
158
- start_line=method_change.start_line,
159
- end_line=method_change.end_line,
160
- node_type=method_change.node_type,
161
- )
162
-
163
262
  @classmethod
164
263
  def from_node(cls, node) -> MethodEntry:
165
264
  """Build from a ``static_analyzer.Node``. Accepts ``Any`` to avoid a hard dep."""
@@ -210,18 +309,21 @@ class Component(LLMBaseModel):
210
309
  description="List of cluster IDs from CFG analysis that this component encompasses (populated deterministically from source_group_names).",
211
310
  default_factory=list,
212
311
  exclude=True,
312
+ json_schema_extra={"hidden": True},
213
313
  )
214
314
 
215
315
  file_methods: list[FileMethodGroup] = Field(
216
316
  description="All methods/functions belonging to this component, grouped by file (populated deterministically from cluster results).",
217
317
  default_factory=list,
218
318
  exclude=True,
319
+ json_schema_extra={"hidden": True},
219
320
  )
220
321
 
221
322
  component_id: str = Field(
222
323
  default="",
223
324
  description="Deterministic unique identifier for this component.",
224
325
  exclude=True,
326
+ json_schema_extra={"hidden": True},
225
327
  )
226
328
 
227
329
  def llm_str(self):
@@ -247,6 +349,7 @@ class AnalysisInsights(LLMBaseModel):
247
349
  default_factory=dict,
248
350
  description="Top-level file index keyed by relative file path. Contains all methods and statuses.",
249
351
  exclude=True,
352
+ json_schema_extra={"hidden": True},
250
353
  )
251
354
  components: list[Component] = Field(description="List of the components identified in the project.")
252
355
  components_relations: list[Relation] = Field(description="List of relations among the components.")
@@ -264,7 +367,7 @@ class AnalysisInsights(LLMBaseModel):
264
367
  return {str(PurePosixPath(fg.file_path)): c.component_id for c in self.components for fg in c.file_methods}
265
368
 
266
369
 
267
- def assign_component_ids(analysis: AnalysisInsights, parent_id: str = "") -> None:
370
+ def assign_component_ids(analysis: AnalysisInsights, parent_id: str = "", only_new: bool = False) -> None:
268
371
  """Assign hierarchical component IDs based on sibling index.
269
372
 
270
373
  IDs encode structural position in the component tree:
@@ -272,11 +375,28 @@ def assign_component_ids(analysis: AnalysisInsights, parent_id: str = "") -> Non
272
375
  - Under "1" (parent_id="1"): "1.1", "1.2"
273
376
  - Under "1.2" (parent_id="1.2"): "1.2.1", "1.2.2"
274
377
 
275
- These IDs serve as both component identifiers and cluster IDs,
276
- enabling hierarchical relationship generalization.
378
+ With ``only_new=True`` (incremental path), components that already carry a
379
+ populated ``component_id`` are preserved verbatim and only siblings with an
380
+ empty id are assigned a fresh slot — used when stitching new components into
381
+ an existing tree without renumbering survivors.
277
382
  """
278
- for idx, component in enumerate(analysis.components, start=1):
279
- component.component_id = f"{parent_id}.{idx}" if parent_id else str(idx)
383
+ if only_new:
384
+ used_indices: set[int] = set()
385
+ for component in analysis.components:
386
+ if not component.component_id:
387
+ continue
388
+ tail = component.component_id.split(".")[-1]
389
+ if tail.isdigit():
390
+ used_indices.add(int(tail))
391
+ next_idx = max(used_indices, default=0) + 1
392
+ for component in analysis.components:
393
+ if component.component_id:
394
+ continue
395
+ component.component_id = f"{parent_id}.{next_idx}" if parent_id else str(next_idx)
396
+ next_idx += 1
397
+ else:
398
+ for idx, component in enumerate(analysis.components, start=1):
399
+ component.component_id = f"{parent_id}.{idx}" if parent_id else str(idx)
280
400
 
281
401
  # Assign relation IDs by looking up component names (first occurrence wins for duplicates)
282
402
  name_to_id: dict[str, str] = {}
@@ -293,6 +413,29 @@ def assign_component_ids(analysis: AnalysisInsights, parent_id: str = "") -> Non
293
413
  relation.dst_id = name_to_id.get(relation.dst_name, "")
294
414
 
295
415
 
416
+ def iter_components(
417
+ root_analysis: AnalysisInsights,
418
+ sub_analyses: dict[str, AnalysisInsights],
419
+ ) -> list[Component]:
420
+ """Return every component across the root and all sub-analyses, in tree order."""
421
+ components = list(root_analysis.components)
422
+ for sub in sub_analyses.values():
423
+ components.extend(sub.components)
424
+ return components
425
+
426
+
427
+ def index_components_by_id(
428
+ root_analysis: AnalysisInsights,
429
+ sub_analyses: dict[str, AnalysisInsights],
430
+ ) -> dict[str, Component]:
431
+ """Build a ``component_id -> Component`` lookup across the full tree.
432
+
433
+ Components without a ``component_id`` are skipped. Later occurrences of
434
+ the same id silently override earlier ones (sub-analyses win over root).
435
+ """
436
+ return {c.component_id: c for c in iter_components(root_analysis, sub_analyses) if c.component_id}
437
+
438
+
296
439
  class CFGComponent(LLMBaseModel):
297
440
  """A component derived from control flow graph analysis."""
298
441
 
@@ -416,6 +559,17 @@ class ComponentFiles(LLMBaseModel):
416
559
  return title + body
417
560
 
418
561
 
562
+ class ScopeRelations(LLMBaseModel):
563
+ """Relations between components within a single scope."""
564
+
565
+ components_relations: list[Relation] = Field(description="Inter-component relationships within this scope.")
566
+
567
+ def llm_str(self):
568
+ if not self.components_relations:
569
+ return "No relations found."
570
+ return "\n".join(r.llm_str() for r in self.components_relations)
571
+
572
+
419
573
  class FilePath(LLMBaseModel):
420
574
  """File path with optional line range reference."""
421
575
 
@@ -32,7 +32,7 @@ from static_analyzer.cluster_relations import (
32
32
  build_node_to_component_map,
33
33
  merge_relations,
34
34
  )
35
- from static_analyzer.constants import CALLABLE_TYPES, CLASS_TYPES, NodeType
35
+ from static_analyzer.constants import CALLABLE_TYPES, CLASS_TYPES, Language, NodeType
36
36
  from static_analyzer.graph import CallGraph, ClusterResult
37
37
  from static_analyzer.node import Node
38
38
 
@@ -70,7 +70,7 @@ class ClusterMethodsMixin:
70
70
 
71
71
  def _build_cluster_string(
72
72
  self,
73
- programming_langs: list[str],
73
+ programming_langs: list[Language],
74
74
  cluster_results: dict[str, ClusterResult],
75
75
  cluster_ids: set[int] | None = None,
76
76
  prompt_overhead_chars: int = 0,
@@ -110,7 +110,7 @@ class ClusterMethodsMixin:
110
110
 
111
111
  def _render_cluster_string(
112
112
  self,
113
- programming_langs: list[str],
113
+ programming_langs: list[Language],
114
114
  cluster_results: dict[str, ClusterResult],
115
115
  cluster_ids: set[int] | None,
116
116
  skip_sets: dict[str, set[str]],
@@ -146,7 +146,7 @@ class ClusterMethodsMixin:
146
146
 
147
147
  def _plan_skip_sets(
148
148
  self,
149
- programming_langs: list[str],
149
+ programming_langs: list[Language],
150
150
  cluster_results: dict[str, ClusterResult],
151
151
  prompt_overhead_chars: int,
152
152
  ) -> dict[str, set[str]]:
@@ -472,7 +472,9 @@ class ClusterMethodsMixin:
472
472
  """
473
473
  all_nodes: dict[str, Node] = {}
474
474
  for lang in cluster_results:
475
- cfg = cfg_graphs[lang] if cfg_graphs and lang in cfg_graphs else self.static_analysis.get_cfg(lang)
475
+ cfg = (
476
+ cfg_graphs[lang] if cfg_graphs and lang in cfg_graphs else self.static_analysis.get_cfg(Language(lang))
477
+ )
476
478
  all_nodes.update(cfg.nodes)
477
479
  return all_nodes
478
480
 
@@ -492,7 +494,9 @@ class ClusterMethodsMixin:
492
494
  """
493
495
  graphs: dict[str, nx.Graph] = {}
494
496
  for lang in cluster_results:
495
- cfg = cfg_graphs[lang] if cfg_graphs and lang in cfg_graphs else self.static_analysis.get_cfg(lang)
497
+ cfg = (
498
+ cfg_graphs[lang] if cfg_graphs and lang in cfg_graphs else self.static_analysis.get_cfg(Language(lang))
499
+ )
496
500
  graphs[lang] = cfg.to_networkx().to_undirected()
497
501
  return graphs
498
502
 
@@ -703,7 +707,7 @@ class ClusterMethodsMixin:
703
707
  pct = (assigned_nodes / total_nodes * 100) if total_nodes else 0
704
708
  logger.info(f"Node coverage: {assigned_nodes}/{total_nodes} ({pct:.1f}%) nodes assigned to components")
705
709
 
706
- def _build_files_index(self, analysis: AnalysisInsights) -> dict[str, FileEntry]:
710
+ def build_files_index(self, analysis: AnalysisInsights) -> dict[str, FileEntry]:
707
711
  files: dict[str, FileEntry] = {}
708
712
  for component in analysis.components:
709
713
  for fmg in component.file_methods:
@@ -761,7 +765,7 @@ class ClusterMethodsMixin:
761
765
  for comp in analysis.components:
762
766
  comp.file_methods = self._build_file_methods_from_nodes(component_nodes.get(comp.component_id, []))
763
767
 
764
- analysis.files = self._build_files_index(analysis)
768
+ analysis.files = self.build_files_index(analysis)
765
769
 
766
770
  self._log_node_coverage(analysis, len(all_nodes))
767
771
 
@@ -784,3 +788,44 @@ class ClusterMethodsMixin:
784
788
  node_to_component = build_node_to_component_map(analysis)
785
789
  static_relations = build_component_relations(node_to_component, cfg_graphs)
786
790
  analysis.components_relations = merge_relations(analysis.components_relations, static_relations, analysis)
791
+
792
+ def build_scope_cfg_string(self, analysis: AnalysisInsights) -> str:
793
+ """Render cross-component communication edges as a human-readable string for the LLM.
794
+
795
+ For every CFG edge where src belongs to component A and dst belongs to
796
+ component B (A != B), this produces a grouped summary like:
797
+
798
+ ComponentA -> ComponentB (3 edges):
799
+ src_pkg.MethodX -> dst_pkg.MethodY
800
+ src_pkg.MethodZ -> dst_pkg.MethodW
801
+ """
802
+ node_to_component = build_node_to_component_map(analysis)
803
+ id_to_name = {c.component_id: c.name for c in analysis.components}
804
+ cfg_graphs = {lang: self.static_analysis.get_cfg(lang) for lang in self.static_analysis.get_languages()}
805
+
806
+ cross_edges: dict[tuple[str, str], list[tuple[str, str]]] = defaultdict(list)
807
+ for cfg in cfg_graphs.values():
808
+ for edge in cfg.edges:
809
+ src_name = edge.get_source()
810
+ dst_name = edge.get_destination()
811
+ src_comp = node_to_component.get(src_name)
812
+ dst_comp = node_to_component.get(dst_name)
813
+ if src_comp and dst_comp and src_comp != dst_comp:
814
+ cross_edges[(src_comp, dst_comp)].append((src_name, dst_name))
815
+
816
+ if not cross_edges:
817
+ return "No cross-component communication edges found."
818
+
819
+ lines: list[str] = []
820
+ for (src_id, dst_id), edges in sorted(cross_edges.items()):
821
+ src_label = id_to_name.get(src_id, src_id)
822
+ dst_label = id_to_name.get(dst_id, dst_id)
823
+ lines.append(f"\n{src_label} -> {dst_label} ({len(edges)} edge{'s' if len(edges) != 1 else ''}):")
824
+ for s, d in edges[:10]:
825
+ short_s = s.split(".")[-1]
826
+ short_d = d.split(".")[-1]
827
+ lines.append(f" {short_s} -> {short_d}")
828
+ if len(edges) > 10:
829
+ lines.append(f" ... and {len(edges) - 10} more")
830
+
831
+ return "\n".join(lines)