codeboarding 0.10.3__tar.gz → 0.10.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. {codeboarding-0.10.3/codeboarding.egg-info → codeboarding-0.10.4}/PKG-INFO +1 -1
  2. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/abstraction_agent.py +14 -2
  3. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/agent_responses.py +3 -16
  4. codeboarding-0.10.4/agents/cluster_budget.py +21 -0
  5. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/cluster_methods_mixin.py +170 -17
  6. codeboarding-0.10.4/agents/constants.py +38 -0
  7. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/details_agent.py +12 -2
  8. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/llm_config.py +67 -40
  9. codeboarding-0.10.4/agents/model_capabilities.py +217 -0
  10. {codeboarding-0.10.3 → codeboarding-0.10.4/codeboarding.egg-info}/PKG-INFO +1 -1
  11. {codeboarding-0.10.3 → codeboarding-0.10.4}/codeboarding.egg-info/SOURCES.txt +3 -0
  12. {codeboarding-0.10.3 → codeboarding-0.10.4}/diagram_analysis/analysis_json.py +146 -115
  13. {codeboarding-0.10.3 → codeboarding-0.10.4}/diagram_analysis/diagram_generator.py +0 -59
  14. {codeboarding-0.10.3 → codeboarding-0.10.4}/diagram_analysis/incremental_updater.py +50 -61
  15. {codeboarding-0.10.3 → codeboarding-0.10.4}/diagram_analysis/run_context.py +3 -0
  16. {codeboarding-0.10.3 → codeboarding-0.10.4}/pyproject.toml +1 -1
  17. {codeboarding-0.10.3 → codeboarding-0.10.4}/repo_utils/method_diff.py +53 -68
  18. codeboarding-0.10.4/static_analyzer/cfg_skip_planner.py +207 -0
  19. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/graph.py +85 -27
  20. {codeboarding-0.10.3 → codeboarding-0.10.4}/tests/test_tool_registry.py +10 -2
  21. {codeboarding-0.10.3 → codeboarding-0.10.4}/tests/test_user_config.py +34 -1
  22. {codeboarding-0.10.3 → codeboarding-0.10.4}/tool_registry/registry.py +4 -3
  23. {codeboarding-0.10.3 → codeboarding-0.10.4}/user_config.py +23 -6
  24. {codeboarding-0.10.3 → codeboarding-0.10.4}/vscode_constants.py +2 -2
  25. codeboarding-0.10.3/agents/constants.py +0 -13
  26. {codeboarding-0.10.3 → codeboarding-0.10.4}/LICENSE +0 -0
  27. {codeboarding-0.10.3 → codeboarding-0.10.4}/PYPI.md +0 -0
  28. {codeboarding-0.10.3 → codeboarding-0.10.4}/README.md +0 -0
  29. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/__init__.py +0 -0
  30. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/agent.py +0 -0
  31. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/change_status.py +0 -0
  32. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/dependency_discovery.py +0 -0
  33. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/meta_agent.py +0 -0
  34. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/planner_agent.py +0 -0
  35. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/prompts/__init__.py +0 -0
  36. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/prompts/abstract_prompt_factory.py +0 -0
  37. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/prompts/claude_prompts.py +0 -0
  38. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/prompts/deepseek_prompts.py +0 -0
  39. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/prompts/gemini_flash_prompts.py +0 -0
  40. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/prompts/glm_prompts.py +0 -0
  41. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/prompts/gpt_prompts.py +0 -0
  42. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/prompts/kimi_prompts.py +0 -0
  43. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/prompts/prompt_factory.py +0 -0
  44. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/tools/__init__.py +0 -0
  45. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/tools/base.py +0 -0
  46. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/tools/get_external_deps.py +0 -0
  47. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/tools/get_method_invocations.py +0 -0
  48. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/tools/read_cfg.py +0 -0
  49. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/tools/read_docs.py +0 -0
  50. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/tools/read_file.py +0 -0
  51. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/tools/read_file_structure.py +0 -0
  52. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/tools/read_git_diff.py +0 -0
  53. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/tools/read_packages.py +0 -0
  54. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/tools/read_source.py +0 -0
  55. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/tools/read_structure.py +0 -0
  56. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/tools/toolkit.py +0 -0
  57. {codeboarding-0.10.3 → codeboarding-0.10.4}/agents/validation.py +0 -0
  58. {codeboarding-0.10.3 → codeboarding-0.10.4}/caching/__init__.py +0 -0
  59. {codeboarding-0.10.3 → codeboarding-0.10.4}/caching/cache.py +0 -0
  60. {codeboarding-0.10.3 → codeboarding-0.10.4}/caching/details_cache.py +0 -0
  61. {codeboarding-0.10.3 → codeboarding-0.10.4}/caching/meta_cache.py +0 -0
  62. {codeboarding-0.10.3 → codeboarding-0.10.4}/codeboarding.egg-info/dependency_links.txt +0 -0
  63. {codeboarding-0.10.3 → codeboarding-0.10.4}/codeboarding.egg-info/entry_points.txt +0 -0
  64. {codeboarding-0.10.3 → codeboarding-0.10.4}/codeboarding.egg-info/requires.txt +0 -0
  65. {codeboarding-0.10.3 → codeboarding-0.10.4}/codeboarding.egg-info/top_level.txt +0 -0
  66. {codeboarding-0.10.3 → codeboarding-0.10.4}/constants.py +0 -0
  67. {codeboarding-0.10.3 → codeboarding-0.10.4}/core/__init__.py +0 -0
  68. {codeboarding-0.10.3 → codeboarding-0.10.4}/core/plugin_loader.py +0 -0
  69. {codeboarding-0.10.3 → codeboarding-0.10.4}/core/protocols.py +0 -0
  70. {codeboarding-0.10.3 → codeboarding-0.10.4}/core/registry.py +0 -0
  71. {codeboarding-0.10.3 → codeboarding-0.10.4}/diagram_analysis/__init__.py +0 -0
  72. {codeboarding-0.10.3 → codeboarding-0.10.4}/diagram_analysis/file_coverage.py +0 -0
  73. {codeboarding-0.10.3 → codeboarding-0.10.4}/diagram_analysis/incremental_types.py +0 -0
  74. {codeboarding-0.10.3 → codeboarding-0.10.4}/diagram_analysis/io_utils.py +0 -0
  75. {codeboarding-0.10.3 → codeboarding-0.10.4}/diagram_analysis/version.py +0 -0
  76. {codeboarding-0.10.3 → codeboarding-0.10.4}/duckdb_crud.py +0 -0
  77. {codeboarding-0.10.3 → codeboarding-0.10.4}/github_action.py +0 -0
  78. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/__init__.py +0 -0
  79. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/checks/__init__.py +0 -0
  80. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/checks/circular_deps.py +0 -0
  81. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/checks/cohesion.py +0 -0
  82. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/checks/coupling.py +0 -0
  83. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/checks/function_size.py +0 -0
  84. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/checks/god_class.py +0 -0
  85. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/checks/inheritance.py +0 -0
  86. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/checks/instability.py +0 -0
  87. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/checks/unused_code_diagnostics.py +0 -0
  88. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/config.py +0 -0
  89. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/constants.py +0 -0
  90. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/models.py +0 -0
  91. {codeboarding-0.10.3 → codeboarding-0.10.4}/health/runner.py +0 -0
  92. {codeboarding-0.10.3 → codeboarding-0.10.4}/health_main.py +0 -0
  93. {codeboarding-0.10.3 → codeboarding-0.10.4}/install.py +0 -0
  94. {codeboarding-0.10.3 → codeboarding-0.10.4}/logging_config.py +0 -0
  95. {codeboarding-0.10.3 → codeboarding-0.10.4}/main.py +0 -0
  96. {codeboarding-0.10.3 → codeboarding-0.10.4}/monitoring/__init__.py +0 -0
  97. {codeboarding-0.10.3 → codeboarding-0.10.4}/monitoring/callbacks.py +0 -0
  98. {codeboarding-0.10.3 → codeboarding-0.10.4}/monitoring/context.py +0 -0
  99. {codeboarding-0.10.3 → codeboarding-0.10.4}/monitoring/mixin.py +0 -0
  100. {codeboarding-0.10.3 → codeboarding-0.10.4}/monitoring/paths.py +0 -0
  101. {codeboarding-0.10.3 → codeboarding-0.10.4}/monitoring/stats.py +0 -0
  102. {codeboarding-0.10.3 → codeboarding-0.10.4}/monitoring/writers.py +0 -0
  103. {codeboarding-0.10.3 → codeboarding-0.10.4}/output_generators/__init__.py +0 -0
  104. {codeboarding-0.10.3 → codeboarding-0.10.4}/output_generators/html.py +0 -0
  105. {codeboarding-0.10.3 → codeboarding-0.10.4}/output_generators/html_template.py +0 -0
  106. {codeboarding-0.10.3 → codeboarding-0.10.4}/output_generators/markdown.py +0 -0
  107. {codeboarding-0.10.3 → codeboarding-0.10.4}/output_generators/mdx.py +0 -0
  108. {codeboarding-0.10.3 → codeboarding-0.10.4}/output_generators/sphinx.py +0 -0
  109. {codeboarding-0.10.3 → codeboarding-0.10.4}/repo_utils/__init__.py +0 -0
  110. {codeboarding-0.10.3 → codeboarding-0.10.4}/repo_utils/change_detector.py +0 -0
  111. {codeboarding-0.10.3 → codeboarding-0.10.4}/repo_utils/errors.py +0 -0
  112. {codeboarding-0.10.3 → codeboarding-0.10.4}/repo_utils/git_diff.py +0 -0
  113. {codeboarding-0.10.3 → codeboarding-0.10.4}/repo_utils/ignore.py +0 -0
  114. {codeboarding-0.10.3 → codeboarding-0.10.4}/setup.cfg +0 -0
  115. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/__init__.py +0 -0
  116. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/analysis_cache.py +0 -0
  117. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/analysis_result.py +0 -0
  118. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/cluster_change_analyzer.py +0 -0
  119. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/cluster_helpers.py +0 -0
  120. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/cluster_relations.py +0 -0
  121. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/constants.py +0 -0
  122. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/csharp_config_scanner.py +0 -0
  123. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/__init__.py +0 -0
  124. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/adapters/__init__.py +0 -0
  125. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/adapters/csharp_adapter.py +0 -0
  126. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/adapters/go_adapter.py +0 -0
  127. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/adapters/java_adapter.py +0 -0
  128. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/adapters/php_adapter.py +0 -0
  129. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/adapters/python_adapter.py +0 -0
  130. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/adapters/rust_adapter.py +0 -0
  131. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/adapters/typescript_adapter.py +0 -0
  132. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/call_graph_builder.py +0 -0
  133. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/edge_build_context.py +0 -0
  134. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/edge_builder.py +0 -0
  135. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/hierarchy_builder.py +0 -0
  136. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/language_adapter.py +0 -0
  137. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/lsp_client.py +0 -0
  138. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/lsp_constants.py +0 -0
  139. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/models.py +0 -0
  140. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/progress.py +0 -0
  141. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/protocols.py +0 -0
  142. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/result_converter.py +0 -0
  143. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/source_inspector.py +0 -0
  144. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/symbol_table.py +0 -0
  145. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/engine/utils.py +0 -0
  146. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/git_diff_analyzer.py +0 -0
  147. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/incremental_orchestrator.py +0 -0
  148. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/java_config_scanner.py +0 -0
  149. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/java_utils.py +0 -0
  150. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/lsp_client/__init__.py +0 -0
  151. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/lsp_client/diagnostics.py +0 -0
  152. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/node.py +0 -0
  153. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/programming_language.py +0 -0
  154. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/reference_resolve_mixin.py +0 -0
  155. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/scanner.py +0 -0
  156. {codeboarding-0.10.3 → codeboarding-0.10.4}/static_analyzer/typescript_config_scanner.py +0 -0
  157. {codeboarding-0.10.3 → codeboarding-0.10.4}/tests/test_github_action.py +0 -0
  158. {codeboarding-0.10.3 → codeboarding-0.10.4}/tests/test_install.py +0 -0
  159. {codeboarding-0.10.3 → codeboarding-0.10.4}/tests/test_logging_config.py +0 -0
  160. {codeboarding-0.10.3 → codeboarding-0.10.4}/tests/test_main.py +0 -0
  161. {codeboarding-0.10.3 → codeboarding-0.10.4}/tests/test_pyproject_packages.py +0 -0
  162. {codeboarding-0.10.3 → codeboarding-0.10.4}/tests/test_registry_coverage.py +0 -0
  163. {codeboarding-0.10.3 → codeboarding-0.10.4}/tests/test_vscode_constants.py +0 -0
  164. {codeboarding-0.10.3 → codeboarding-0.10.4}/tests/test_windows_compatibility.py +0 -0
  165. {codeboarding-0.10.3 → codeboarding-0.10.4}/tests/test_windows_encoding.py +0 -0
  166. {codeboarding-0.10.3 → codeboarding-0.10.4}/tool_registry/__init__.py +0 -0
  167. {codeboarding-0.10.3 → codeboarding-0.10.4}/tool_registry/installers.py +0 -0
  168. {codeboarding-0.10.3 → codeboarding-0.10.4}/tool_registry/manifest.py +0 -0
  169. {codeboarding-0.10.3 → codeboarding-0.10.4}/tool_registry/paths.py +0 -0
  170. {codeboarding-0.10.3 → codeboarding-0.10.4}/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeboarding
3
- Version: 0.10.3
3
+ Version: 0.10.4
4
4
  Summary: Interactive Diagrams for Code
5
5
  Author: CodeBoarding Team
6
6
  License-Expression: MIT
@@ -70,8 +70,20 @@ class AbstractionAgent(ClusterMethodsMixin, CodeBoardingAgent):
70
70
 
71
71
  programming_langs = self.static_analysis.get_languages()
72
72
 
73
- # Build cluster string using the pre-computed cluster results
74
- cluster_str = self._build_cluster_string(programming_langs, cluster_results)
73
+ # Measure everything that wraps cfg_clusters (system message + rendered
74
+ # template with an empty slot) so the skip planner can back it out of
75
+ # the input window before budgeting the cluster string.
76
+ overhead_chars = len(str(self.system_message.content)) + len(
77
+ self.prompts["group_clusters"].format(
78
+ project_name=self.project_name,
79
+ cfg_clusters="",
80
+ meta_context=meta_context_str,
81
+ project_type=project_type,
82
+ )
83
+ )
84
+ cluster_str = self._build_cluster_string(
85
+ programming_langs, cluster_results, prompt_overhead_chars=overhead_chars
86
+ )
75
87
 
76
88
  prompt = self.prompts["group_clusters"].format(
77
89
  project_name=self.project_name,
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import abc
2
4
  import logging
3
5
  from abc import abstractmethod
@@ -6,8 +8,6 @@ from typing import get_origin, Optional
6
8
 
7
9
  from pydantic import BaseModel, Field
8
10
 
9
- from agents.change_status import ChangeStatus
10
-
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
13
 
@@ -142,10 +142,6 @@ class MethodEntry(BaseModel):
142
142
  start_line: int = Field(description="Starting line number in the file.")
143
143
  end_line: int = Field(description="Ending line number in the file.")
144
144
  node_type: str = Field(description="Node type name matching NodeType enum (e.g. METHOD, FUNCTION, CLASS).")
145
- status: ChangeStatus = Field(
146
- default=ChangeStatus.UNCHANGED,
147
- description="Diff status of this method: added, modified, deleted, or unchanged.",
148
- )
149
145
 
150
146
  def __hash__(self) -> int:
151
147
  return hash(self.qualified_name)
@@ -156,13 +152,12 @@ class MethodEntry(BaseModel):
156
152
  return self.qualified_name == other.qualified_name
157
153
 
158
154
  @classmethod
159
- def from_method_change(cls, method_change, *, status_override: ChangeStatus | None = None) -> "MethodEntry":
155
+ def from_method_change(cls, method_change) -> MethodEntry:
160
156
  return cls(
161
157
  qualified_name=method_change.qualified_name,
162
158
  start_line=method_change.start_line,
163
159
  end_line=method_change.end_line,
164
160
  node_type=method_change.node_type,
165
- status=status_override or method_change.change_type,
166
161
  )
167
162
 
168
163
 
@@ -170,10 +165,6 @@ class FileMethodGroup(BaseModel):
170
165
  """All methods/functions belonging to a component within a single file."""
171
166
 
172
167
  file_path: str = Field(description="Relative path to the source file.")
173
- file_status: ChangeStatus = Field(
174
- default=ChangeStatus.UNCHANGED,
175
- description="Diff status of this file: added, modified, deleted, renamed, or unchanged.",
176
- )
177
168
  methods: list[MethodEntry] = Field(
178
169
  default_factory=list,
179
170
  description="Methods and functions in this file that belong to the component, sorted by start_line.",
@@ -183,10 +174,6 @@ class FileMethodGroup(BaseModel):
183
174
  class FileEntry(BaseModel):
184
175
  """Single source of truth for methods in one file."""
185
176
 
186
- file_status: ChangeStatus = Field(
187
- default=ChangeStatus.UNCHANGED,
188
- description="Diff status of this file: added, modified, deleted, renamed, or unchanged.",
189
- )
190
177
  methods: list[MethodEntry] = Field(
191
178
  default_factory=list,
192
179
  description="Methods and functions in this file, sorted by start line.",
@@ -0,0 +1,21 @@
1
+ from dataclasses import dataclass
2
+
3
+ from agents.constants import ModelCapabilities
4
+
5
+ OUTPUT_HEADROOM_TOKENS = 8_000
6
+ CONTEXT_MARGIN = 0.9
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class ClusterPromptBudget:
11
+ """Character budget for the full rendered ``cfg_clusters`` prompt slot."""
12
+
13
+ input_tokens: int
14
+ output_headroom_tokens: int = OUTPUT_HEADROOM_TOKENS
15
+ chars_per_token: float = ModelCapabilities.CHARS_PER_TOKEN
16
+ margin: float = CONTEXT_MARGIN
17
+
18
+ def available_chars(self, prompt_overhead_chars: int) -> int:
19
+ prompt_overhead_tokens = prompt_overhead_chars / self.chars_per_token
20
+ available_tokens = (self.input_tokens - self.output_headroom_tokens - prompt_overhead_tokens) * self.margin
21
+ return int(available_tokens * self.chars_per_token)
@@ -1,7 +1,9 @@
1
1
  import logging
2
2
  import os
3
3
  from collections import defaultdict
4
+ from dataclasses import dataclass
4
5
  from pathlib import Path
6
+ from typing import NoReturn
5
7
 
6
8
  import networkx as nx
7
9
 
@@ -13,8 +15,11 @@ from agents.agent_responses import (
13
15
  FileMethodGroup,
14
16
  MethodEntry,
15
17
  )
18
+ from agents.cluster_budget import ClusterPromptBudget
19
+ from agents.llm_config import get_current_agent_context_window
16
20
  from constants import MIN_CLUSTERS_THRESHOLD
17
21
  from static_analyzer.analysis_result import StaticAnalysisResults
22
+ from static_analyzer.cfg_skip_planner import ContextBudgetExceededError, plan_skip_set
18
23
  from static_analyzer.cluster_helpers import (
19
24
  MAX_LLM_CLUSTERS,
20
25
  enforce_cross_language_budget,
@@ -34,6 +39,13 @@ from static_analyzer.node import Node
34
39
  logger = logging.getLogger(__name__)
35
40
 
36
41
 
42
+ @dataclass(frozen=True)
43
+ class _RenderedClusterString:
44
+ text: str
45
+ by_language: dict[str, str]
46
+ cluster_ids: set[int]
47
+
48
+
37
49
  class ClusterMethodsMixin:
38
50
  """
39
51
  Mixin providing shared cluster-related functionality for agents.
@@ -61,6 +73,7 @@ class ClusterMethodsMixin:
61
73
  programming_langs: list[str],
62
74
  cluster_results: dict[str, ClusterResult],
63
75
  cluster_ids: set[int] | None = None,
76
+ prompt_overhead_chars: int = 0,
64
77
  ) -> str:
65
78
  """
66
79
  Build a cluster string for LLM consumption using pre-computed cluster results.
@@ -69,29 +82,59 @@ class ClusterMethodsMixin:
69
82
  programming_langs: List of languages to include
70
83
  cluster_results: Pre-computed cluster results mapping language -> ClusterResult
71
84
  cluster_ids: Optional set of cluster IDs to filter by
85
+ prompt_overhead_chars: Characters used by everything else in the
86
+ prompt (system message + rendered template with an empty
87
+ ``cfg_clusters`` slot). The skip planner subtracts this from
88
+ the model's input window before computing the char budget for
89
+ the cluster string.
72
90
 
73
91
  Returns:
74
92
  Formatted cluster string with headers per language
75
93
  """
76
- cluster_lines = []
94
+ rendered = self._render_cluster_string(programming_langs, cluster_results, cluster_ids, {})
95
+ if cluster_ids:
96
+ return rendered.text
97
+
98
+ char_budget = self._cluster_prompt_budget(prompt_overhead_chars)
99
+ if len(rendered.text) <= char_budget:
100
+ return rendered.text
101
+
102
+ per_lang_skip = self._plan_skip_sets(programming_langs, cluster_results, prompt_overhead_chars)
103
+ rendered_with_skips = self._render_cluster_string(
104
+ programming_langs, cluster_results, cluster_ids, per_lang_skip
105
+ )
106
+ if len(rendered_with_skips.text) > char_budget:
107
+ self._raise_cluster_budget_error(char_budget, rendered_with_skips, per_lang_skip)
108
+
109
+ return rendered_with_skips.text
110
+
111
+ def _render_cluster_string(
112
+ self,
113
+ programming_langs: list[str],
114
+ cluster_results: dict[str, ClusterResult],
115
+ cluster_ids: set[int] | None,
116
+ skip_sets: dict[str, set[str]],
117
+ ) -> _RenderedClusterString:
118
+ cluster_lines: list[str] = []
119
+ by_language: dict[str, str] = {}
77
120
  all_cluster_ids: set[int] = set()
78
121
 
79
122
  for lang in programming_langs:
80
123
  cfg = self.static_analysis.get_cfg(lang)
81
- # Get cluster result for this language
82
124
  cluster_result = cluster_results.get(lang)
83
- cluster_str = cfg.to_cluster_string(cluster_ids, cluster_result)
125
+ cluster_str = cfg.to_cluster_string(
126
+ cluster_ids or set(), cluster_result, skip_nodes=skip_sets.get(lang, set())
127
+ )
84
128
 
85
129
  if cluster_str.strip() and cluster_str not in ("empty", "none", "No clusters found."):
86
130
  header = "Component CFG" if cluster_ids else "Clusters"
87
- cluster_lines.append(f"\n## {lang.capitalize()} - {header}\n")
88
- cluster_lines.append(cluster_str)
89
- cluster_lines.append("\n")
131
+ lang_text = f"\n## {lang.capitalize()} - {header}\n{cluster_str}\n"
132
+ cluster_lines.append(lang_text)
133
+ by_language[lang] = lang_text
90
134
  if cluster_result:
91
135
  lang_ids = cluster_ids if cluster_ids else cluster_result.get_cluster_ids()
92
136
  all_cluster_ids.update(lang_ids)
93
137
 
94
- # Add explicit ID checklist so the LLM knows exactly which IDs to assign
95
138
  if all_cluster_ids and not cluster_ids:
96
139
  sorted_cluster_ids = sorted(all_cluster_ids)
97
140
  cluster_lines.append(
@@ -99,7 +142,117 @@ class ClusterMethodsMixin:
99
142
  f"Every one of these IDs: {sorted_cluster_ids} must appear in exactly one group."
100
143
  )
101
144
 
102
- return "".join(cluster_lines)
145
+ return _RenderedClusterString(text="".join(cluster_lines), by_language=by_language, cluster_ids=all_cluster_ids)
146
+
147
+ def _plan_skip_sets(
148
+ self,
149
+ programming_langs: list[str],
150
+ cluster_results: dict[str, ClusterResult],
151
+ prompt_overhead_chars: int,
152
+ ) -> dict[str, set[str]]:
153
+ """Compute per-language skip sets so the final combined cluster string fits."""
154
+ char_budget = self._cluster_prompt_budget(prompt_overhead_chars)
155
+ if char_budget <= 0:
156
+ ctx = get_current_agent_context_window()
157
+ msg = (
158
+ f"Prompt overhead ({prompt_overhead_chars} chars) consumes the entire agent input "
159
+ f"window ({ctx.input_tokens} tokens); no room for cluster renderings."
160
+ )
161
+ logger.error("[CFG skip planner] %s", msg)
162
+ raise ContextBudgetExceededError(msg)
163
+
164
+ langs_with_clusters = [l for l in programming_langs if cluster_results.get(l)]
165
+ if not langs_with_clusters:
166
+ return {}
167
+
168
+ skip_sets: dict[str, set[str]] = {}
169
+ rendered = self._render_cluster_string(programming_langs, cluster_results, None, skip_sets)
170
+ if len(rendered.text) <= char_budget:
171
+ return skip_sets
172
+
173
+ max_iterations = max(1, len(langs_with_clusters) * 5)
174
+ for _ in range(max_iterations):
175
+ deficit = len(rendered.text) - char_budget
176
+ ordered_langs = sorted(
177
+ langs_with_clusters,
178
+ key=lambda lang: len(rendered.by_language.get(lang, "")),
179
+ reverse=True,
180
+ )
181
+ progressed = False
182
+
183
+ for lang in ordered_langs:
184
+ lang_text = rendered.by_language.get(lang, "")
185
+ current_len = len(lang_text)
186
+ if current_len == 0:
187
+ continue
188
+
189
+ for target in self._language_budget_targets(current_len, deficit):
190
+ try:
191
+ skip = plan_skip_set(self.static_analysis.get_cfg(lang), cluster_results[lang], target)
192
+ except ContextBudgetExceededError:
193
+ continue
194
+
195
+ if skip == skip_sets.get(lang, set()):
196
+ continue
197
+
198
+ trial_skip_sets = dict(skip_sets)
199
+ if skip:
200
+ trial_skip_sets[lang] = skip
201
+ else:
202
+ trial_skip_sets.pop(lang, None)
203
+
204
+ trial_rendered = self._render_cluster_string(
205
+ programming_langs, cluster_results, None, trial_skip_sets
206
+ )
207
+ if len(trial_rendered.text) >= len(rendered.text):
208
+ continue
209
+
210
+ skip_sets = trial_skip_sets
211
+ rendered = trial_rendered
212
+ progressed = True
213
+ break
214
+
215
+ if progressed:
216
+ break
217
+
218
+ if len(rendered.text) <= char_budget:
219
+ return skip_sets
220
+ if not progressed:
221
+ break
222
+
223
+ self._raise_cluster_budget_error(char_budget, rendered, skip_sets)
224
+
225
+ @staticmethod
226
+ def _language_budget_targets(current_len: int, deficit: int) -> list[int]:
227
+ exact_target = max(0, current_len - deficit)
228
+ targets = {
229
+ exact_target,
230
+ int(current_len * 0.9),
231
+ int(current_len * 0.75),
232
+ int(current_len * 0.5),
233
+ 0,
234
+ }
235
+ return sorted((target for target in targets if target < current_len), reverse=True)
236
+
237
+ @staticmethod
238
+ def _raise_cluster_budget_error(
239
+ char_budget: int,
240
+ rendered: _RenderedClusterString,
241
+ skip_sets: dict[str, set[str]],
242
+ ) -> NoReturn:
243
+ per_lang_sizes = {lang: len(text) for lang, text in rendered.by_language.items()}
244
+ skipped_counts = {lang: len(skip) for lang, skip in skip_sets.items() if skip}
245
+ msg = (
246
+ f"Cluster render {len(rendered.text)} chars exceeds budget {char_budget}. "
247
+ f"Per-language sizes: {per_lang_sizes}; skipped nodes: {skipped_counts}."
248
+ )
249
+ logger.error("[CFG skip planner] %s", msg)
250
+ raise ContextBudgetExceededError(msg)
251
+
252
+ @staticmethod
253
+ def _cluster_prompt_budget(prompt_overhead_chars: int) -> int:
254
+ ctx = get_current_agent_context_window()
255
+ return ClusterPromptBudget(input_tokens=ctx.input_tokens).available_chars(prompt_overhead_chars)
103
256
 
104
257
  def _ensure_unique_key_entities(self, analysis: AnalysisInsights):
105
258
  """
@@ -247,11 +400,11 @@ class ClusterMethodsMixin:
247
400
  logger.warning(f"Component {component.name} has no assigned files")
248
401
  return "No assigned files found for this component.", {}, {}
249
402
 
250
- # Convert files to absolute paths for comparison
251
- assigned_file_set = set()
252
- for f in component_files:
253
- abs_path = os.path.join(self.repo_dir, f) if not os.path.isabs(f) else f
254
- assigned_file_set.add(abs_path)
403
+ # Collect qualified names for method-level filtering
404
+ assigned_qnames: set[str] = set()
405
+ for group in component.file_methods:
406
+ for method in group.methods:
407
+ assigned_qnames.add(method.qualified_name)
255
408
 
256
409
  cluster_results: dict[str, ClusterResult] = {}
257
410
  subgraph_cfgs: dict[str, CallGraph] = {}
@@ -259,8 +412,8 @@ class ClusterMethodsMixin:
259
412
  for lang in self.static_analysis.get_languages():
260
413
  cfg = self.static_analysis.get_cfg(lang)
261
414
 
262
- # Use strict filtering logic
263
- sub_cfg = cfg.filter_by_files(assigned_file_set)
415
+ # Filter by exact method set to prevent scope leakage
416
+ sub_cfg = cfg.filter_by_nodes(assigned_qnames)
264
417
 
265
418
  if sub_cfg.nodes:
266
419
  subgraph_cfgs[lang] = sub_cfg
@@ -299,7 +452,7 @@ class ClusterMethodsMixin:
299
452
  result = "".join(result_parts)
300
453
 
301
454
  if not result.strip():
302
- logger.warning(f"No CFG found for component {component.name} with {len(component_files)} files")
455
+ logger.warning(f"No CFG found for component {component.name} with {len(assigned_qnames)} methods")
303
456
  return "No relevant CFG clusters found for this component.", cluster_results, subgraph_cfgs
304
457
 
305
458
  return result, cluster_results, subgraph_cfgs
@@ -556,7 +709,7 @@ class ClusterMethodsMixin:
556
709
  for fmg in component.file_methods:
557
710
  entry = files.get(fmg.file_path)
558
711
  if entry is None:
559
- entry = FileEntry(file_status=fmg.file_status, methods=[])
712
+ entry = FileEntry(methods=[])
560
713
  files[fmg.file_path] = entry
561
714
 
562
715
  methods_by_qname = {m.qualified_name: m for m in entry.methods}
@@ -0,0 +1,38 @@
1
+ """Constants for the agents module."""
2
+
3
+
4
+ class LLMDefaults:
5
+ DEFAULT_AGENT_TEMPERATURE = 0
6
+ DEFAULT_PARSING_TEMPERATURE = 0
7
+ AWS_MAX_TOKENS = 4096
8
+
9
+
10
+ class FileStructureConfig:
11
+ MAX_LINES = 500
12
+ DEFAULT_MAX_DEPTH = 10
13
+ FALLBACK_MAX_LINES = 50000
14
+
15
+
16
+ class ModelCapabilities:
17
+ FALLBACK_INPUT = 256_000
18
+ FALLBACK_OUTPUT = 64_000
19
+ CACHE_TTL_SECONDS = 24 * 3600
20
+ CHARS_PER_TOKEN = 3.5 # community consensus conversion is around 3 or 4 chars/token.
21
+
22
+ SOURCES = {
23
+ "litellm": "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json",
24
+ "modelsdev": "https://models.dev/api.json",
25
+ "openrouter": "https://openrouter.ai/api/v1/models",
26
+ }
27
+
28
+ # models.dev uses slugs that diverge from our internal provider names.
29
+ MODELSDEV_SLUG = {
30
+ "aws": "amazon-bedrock",
31
+ "kimi": "moonshotai",
32
+ "glm": "zai",
33
+ }
34
+
35
+ OPENROUTER_PREFIX = {
36
+ "kimi": "moonshotai",
37
+ "glm": "z-ai",
38
+ }
@@ -84,8 +84,18 @@ class DetailsAgent(ClusterMethodsMixin, CodeBoardingAgent):
84
84
 
85
85
  programming_langs = self.static_analysis.get_languages()
86
86
 
87
- # Build cluster string using the pre-computed cluster results (same as AbstractionAgent)
88
- cluster_str = self._build_cluster_string(programming_langs, subgraph_cluster_results)
87
+ overhead_chars = len(str(self.system_message.content)) + len(
88
+ self.prompts["group_clusters"].format(
89
+ project_name=self.project_name,
90
+ cfg_clusters="",
91
+ component=component.llm_str(),
92
+ meta_context=meta_context_str,
93
+ project_type=project_type,
94
+ )
95
+ )
96
+ cluster_str = self._build_cluster_string(
97
+ programming_langs, subgraph_cluster_results, prompt_overhead_chars=overhead_chars
98
+ )
89
99
 
90
100
  prompt = self.prompts["group_clusters"].format(
91
101
  project_name=self.project_name,
@@ -11,7 +11,8 @@ from langchain_google_genai import ChatGoogleGenerativeAI
11
11
  from langchain_ollama import ChatOllama
12
12
  from langchain_openai import ChatOpenAI
13
13
 
14
- from agents.constants import LLMDefaults
14
+ from agents.constants import LLMDefaults, ModelCapabilities
15
+ from agents.model_capabilities import ContextWindow, get_context_window
15
16
  from agents.prompts.prompt_factory import LLMType, initialize_global_factory
16
17
  from monitoring.callbacks import MonitoringCallback
17
18
 
@@ -123,7 +124,7 @@ LLM_PROVIDERS = {
123
124
  chat_class=ChatOpenAI,
124
125
  api_key_env="VERCEL_API_KEY",
125
126
  agent_model="google/gemini-3-flash",
126
- parsing_model="openai/gpt-5-mini", # Use OpenAI model for parsing to avoid trustcall compatibility issues with Gemini
127
+ parsing_model="openai/gpt-5-mini",
127
128
  llm_type=LLMType.GEMINI_FLASH,
128
129
  alt_env_vars=["VERCEL_BASE_URL"],
129
130
  extra_args={
@@ -136,8 +137,8 @@ LLM_PROVIDERS = {
136
137
  "anthropic": LLMConfig(
137
138
  chat_class=ChatAnthropic,
138
139
  api_key_env="ANTHROPIC_API_KEY",
139
- agent_model="claude-sonnet-4-5-20250929",
140
- parsing_model="claude-3-haiku-20240307",
140
+ agent_model="claude-sonnet-4-6",
141
+ parsing_model="claude-haiku-4-5",
141
142
  llm_type=LLMType.CLAUDE,
142
143
  extra_args={
143
144
  "max_tokens": 8192,
@@ -149,7 +150,7 @@ LLM_PROVIDERS = {
149
150
  chat_class=ChatGoogleGenerativeAI,
150
151
  api_key_env="GOOGLE_API_KEY",
151
152
  agent_model="gemini-3-flash",
152
- parsing_model="gemini-3-flash",
153
+ parsing_model="gemini-3.1-flash-lite-preview",
153
154
  llm_type=LLMType.GEMINI_FLASH,
154
155
  extra_args={
155
156
  "max_tokens": None,
@@ -160,9 +161,9 @@ LLM_PROVIDERS = {
160
161
  "aws": LLMConfig(
161
162
  chat_class=ChatBedrockConverse,
162
163
  api_key_env="AWS_BEARER_TOKEN_BEDROCK", # Used for existence check
163
- agent_model="us.anthropic.claude-sonnet-4-5-20250929-v1:0",
164
- parsing_model="us.anthropic.claude-3-haiku-20240307-v1:0",
165
- llm_type=LLMType.CLAUDE,
164
+ agent_model="anthropic.claude-sonnet-4-6",
165
+ parsing_model="claude-haiku-4-5",
166
+ llm_type=LLMType.CLAUDE_SONNET,
166
167
  extra_args={
167
168
  "max_tokens": 4096,
168
169
  "region_name": lambda: os.getenv("AWS_DEFAULT_REGION", "us-east-1"),
@@ -172,9 +173,9 @@ LLM_PROVIDERS = {
172
173
  "cerebras": LLMConfig(
173
174
  chat_class=ChatCerebras,
174
175
  api_key_env="CEREBRAS_API_KEY",
175
- agent_model="gpt-oss-120b",
176
- parsing_model="llama3.1-8b",
177
- llm_type=LLMType.GPT4,
176
+ agent_model="zai-glm-4.7",
177
+ parsing_model="gpt-oss-120b",
178
+ llm_type=LLMType.KIMI,
178
179
  extra_args={
179
180
  "max_tokens": None,
180
181
  "timeout": None,
@@ -238,8 +239,8 @@ LLM_PROVIDERS = {
238
239
  "openrouter": LLMConfig(
239
240
  chat_class=ChatOpenAI,
240
241
  api_key_env="OPENROUTER_API_KEY",
241
- agent_model="google/gemini-2.5-flash",
242
- parsing_model="google/gemini-2.5-flash",
242
+ agent_model="google/gemini-3-flash",
243
+ parsing_model="google/gemini-3.1-flash-lite-preview",
243
244
  llm_type=LLMType.GEMINI_FLASH,
244
245
  extra_args={
245
246
  "base_url": lambda: os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1"),
@@ -258,41 +259,53 @@ def _initialize_llm(
258
259
  log_prefix: str,
259
260
  init_factory: bool = False,
260
261
  ) -> tuple[BaseChatModel, str]:
261
- for name, config in LLM_PROVIDERS.items():
262
- if not config.is_active():
263
- continue
262
+ resolved = _resolve_active_provider(model_override, model_attr)
263
+ if resolved is None:
264
+ required_vars = []
265
+ for config in LLM_PROVIDERS.values():
266
+ required_vars.append(config.api_key_env)
267
+ required_vars.extend(config.alt_env_vars)
264
268
 
265
- model_name = model_override or getattr(config, model_attr)
269
+ raise ValueError(
270
+ f"No valid LLM configuration found. Please set one of: {', '.join(sorted(set(required_vars)))}"
271
+ )
266
272
 
267
- if init_factory:
268
- detected_llm_type = LLMType.from_model_name(model_name)
269
- initialize_global_factory(detected_llm_type)
270
- logger.info(
271
- f"Initialized prompt factory for {name} provider with model '{model_name}' "
272
- f"-> {detected_llm_type.value} prompt factory"
273
- )
273
+ name, config, model_name = resolved
274
274
 
275
- logger.info(f"Using {name.title()} {log_prefix}LLM with model: {model_name}")
275
+ if init_factory:
276
+ detected_llm_type = LLMType.from_model_name(model_name)
277
+ initialize_global_factory(detected_llm_type)
278
+ logger.info(
279
+ f"Initialized prompt factory for {name} provider with model '{model_name}' "
280
+ f"-> {detected_llm_type.value} prompt factory"
281
+ )
276
282
 
277
- kwargs = {
278
- "model": model_name,
279
- "temperature": getattr(config, temperature_attr),
280
- }
281
- kwargs.update(config.get_resolved_extra_args())
283
+ logger.info(f"Using {name.title()} {log_prefix}LLM with model: {model_name}")
282
284
 
283
- if name not in ["aws", "ollama"]:
284
- api_key = config.get_api_key()
285
- kwargs["api_key"] = api_key or "no-key-required"
285
+ kwargs = {
286
+ "model": model_name,
287
+ "temperature": getattr(config, temperature_attr),
288
+ }
289
+ kwargs.update(config.get_resolved_extra_args())
286
290
 
287
- model = config.chat_class(**kwargs) # type: ignore[call-arg, arg-type]
288
- return model, model_name
291
+ if name not in ["aws", "ollama"]:
292
+ api_key = config.get_api_key()
293
+ kwargs["api_key"] = api_key or "no-key-required"
289
294
 
290
- required_vars = []
291
- for config in LLM_PROVIDERS.values():
292
- required_vars.append(config.api_key_env)
293
- required_vars.extend(config.alt_env_vars)
295
+ model = config.chat_class(**kwargs) # type: ignore[call-arg, arg-type]
296
+ return model, model_name
294
297
 
295
- raise ValueError(f"No valid LLM configuration found. Please set one of: {', '.join(sorted(set(required_vars)))}")
298
+
299
+ def _resolve_active_provider(
300
+ model_override: str | None,
301
+ model_attr: str,
302
+ ) -> tuple[str, LLMConfig, str] | None:
303
+ """Return the active provider, config, and resolved model name."""
304
+ for name, config in LLM_PROVIDERS.items():
305
+ if not config.is_active():
306
+ continue
307
+ return name, config, model_override or getattr(config, model_attr)
308
+ return None
296
309
 
297
310
 
298
311
  def validate_api_key_provided() -> None:
@@ -311,6 +324,20 @@ def initialize_agent_llm(model_override: str | None = None) -> BaseChatModel:
311
324
  return model
312
325
 
313
326
 
327
+ def get_current_agent_context_window() -> ContextWindow:
328
+ """Context window for the currently active agent provider/model.
329
+
330
+ Resolves the first active provider (same rule as ``_initialize_llm``) on
331
+ every call. ``get_context_window`` handles its own caching, so this is
332
+ cheap enough to call without a module-level cache.
333
+ """
334
+ resolved = _resolve_active_provider(_agent_model_override or os.getenv("AGENT_MODEL"), "agent_model")
335
+ if resolved is not None:
336
+ name, _config, model_name = resolved
337
+ return get_context_window(name, model_name)
338
+ return ContextWindow(ModelCapabilities.FALLBACK_INPUT, ModelCapabilities.FALLBACK_OUTPUT)
339
+
340
+
314
341
  def initialize_parsing_llm(model_override: str | None = None) -> BaseChatModel:
315
342
  model, _ = _initialize_llm(model_override, "parsing_model", "parsing_temperature", "Extractor ")
316
343
  return model