codeboarding 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/__init__.py +0 -0
- agents/abstraction_agent.py +150 -0
- agents/agent.py +467 -0
- agents/agent_responses.py +363 -0
- agents/cluster_methods_mixin.py +281 -0
- agents/constants.py +13 -0
- agents/dependency_discovery.py +159 -0
- agents/details_agent.py +174 -0
- agents/llm_config.py +309 -0
- agents/meta_agent.py +105 -0
- agents/planner_agent.py +105 -0
- agents/prompts/__init__.py +85 -0
- agents/prompts/abstract_prompt_factory.py +63 -0
- agents/prompts/claude_prompts.py +381 -0
- agents/prompts/deepseek_prompts.py +389 -0
- agents/prompts/gemini_flash_prompts.py +362 -0
- agents/prompts/glm_prompts.py +407 -0
- agents/prompts/gpt_prompts.py +470 -0
- agents/prompts/kimi_prompts.py +400 -0
- agents/prompts/prompt_factory.py +179 -0
- agents/tools/__init__.py +8 -0
- agents/tools/base.py +96 -0
- agents/tools/get_external_deps.py +47 -0
- agents/tools/get_method_invocations.py +47 -0
- agents/tools/read_cfg.py +60 -0
- agents/tools/read_docs.py +132 -0
- agents/tools/read_file.py +90 -0
- agents/tools/read_file_structure.py +156 -0
- agents/tools/read_git_diff.py +131 -0
- agents/tools/read_packages.py +60 -0
- agents/tools/read_source.py +105 -0
- agents/tools/read_structure.py +49 -0
- agents/tools/toolkit.py +119 -0
- agents/validation.py +383 -0
- caching/__init__.py +4 -0
- caching/cache.py +29 -0
- caching/meta_cache.py +227 -0
- codeboarding-0.9.0.dist-info/METADATA +223 -0
- codeboarding-0.9.0.dist-info/RECORD +126 -0
- codeboarding-0.9.0.dist-info/WHEEL +5 -0
- codeboarding-0.9.0.dist-info/entry_points.txt +3 -0
- codeboarding-0.9.0.dist-info/licenses/LICENSE +21 -0
- codeboarding-0.9.0.dist-info/top_level.txt +18 -0
- core/__init__.py +101 -0
- core/plugin_loader.py +46 -0
- core/protocols.py +27 -0
- core/registry.py +46 -0
- diagram_analysis/__init__.py +4 -0
- diagram_analysis/analysis_json.py +346 -0
- diagram_analysis/diagram_generator.py +486 -0
- diagram_analysis/file_coverage.py +212 -0
- diagram_analysis/incremental/__init__.py +63 -0
- diagram_analysis/incremental/component_checker.py +236 -0
- diagram_analysis/incremental/file_manager.py +217 -0
- diagram_analysis/incremental/impact_analyzer.py +238 -0
- diagram_analysis/incremental/io_utils.py +281 -0
- diagram_analysis/incremental/models.py +72 -0
- diagram_analysis/incremental/path_patching.py +164 -0
- diagram_analysis/incremental/reexpansion.py +166 -0
- diagram_analysis/incremental/scoped_analysis.py +227 -0
- diagram_analysis/incremental/updater.py +464 -0
- diagram_analysis/incremental/validation.py +48 -0
- diagram_analysis/manifest.py +152 -0
- diagram_analysis/version.py +6 -0
- duckdb_crud.py +125 -0
- github_action.py +172 -0
- health/__init__.py +3 -0
- health/checks/__init__.py +11 -0
- health/checks/circular_deps.py +48 -0
- health/checks/cohesion.py +93 -0
- health/checks/coupling.py +140 -0
- health/checks/function_size.py +85 -0
- health/checks/god_class.py +167 -0
- health/checks/inheritance.py +104 -0
- health/checks/instability.py +77 -0
- health/checks/unused_code_diagnostics.py +338 -0
- health/config.py +172 -0
- health/constants.py +19 -0
- health/models.py +186 -0
- health/runner.py +236 -0
- install.py +518 -0
- logging_config.py +105 -0
- main.py +529 -0
- monitoring/__init__.py +12 -0
- monitoring/callbacks.py +163 -0
- monitoring/context.py +158 -0
- monitoring/mixin.py +16 -0
- monitoring/paths.py +47 -0
- monitoring/stats.py +50 -0
- monitoring/writers.py +172 -0
- output_generators/__init__.py +0 -0
- output_generators/html.py +163 -0
- output_generators/html_template.py +382 -0
- output_generators/markdown.py +140 -0
- output_generators/mdx.py +171 -0
- output_generators/sphinx.py +175 -0
- repo_utils/__init__.py +277 -0
- repo_utils/change_detector.py +289 -0
- repo_utils/errors.py +6 -0
- repo_utils/git_diff.py +74 -0
- repo_utils/ignore.py +341 -0
- static_analyzer/__init__.py +335 -0
- static_analyzer/analysis_cache.py +699 -0
- static_analyzer/analysis_result.py +269 -0
- static_analyzer/cluster_change_analyzer.py +391 -0
- static_analyzer/cluster_helpers.py +79 -0
- static_analyzer/constants.py +166 -0
- static_analyzer/git_diff_analyzer.py +224 -0
- static_analyzer/graph.py +746 -0
- static_analyzer/incremental_orchestrator.py +671 -0
- static_analyzer/java_config_scanner.py +232 -0
- static_analyzer/java_utils.py +227 -0
- static_analyzer/lsp_client/__init__.py +12 -0
- static_analyzer/lsp_client/client.py +1642 -0
- static_analyzer/lsp_client/diagnostics.py +62 -0
- static_analyzer/lsp_client/java_client.py +517 -0
- static_analyzer/lsp_client/language_settings.py +97 -0
- static_analyzer/lsp_client/typescript_client.py +235 -0
- static_analyzer/programming_language.py +152 -0
- static_analyzer/reference_resolve_mixin.py +166 -0
- static_analyzer/scanner.py +95 -0
- static_analyzer/typescript_config_scanner.py +54 -0
- tool_registry.py +433 -0
- user_config.py +134 -0
- utils.py +56 -0
- vscode_constants.py +124 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections.abc import Set as AbstractSet
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from enum import StrEnum
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from repo_utils.ignore import RepoIgnoreManager
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Ecosystem(StrEnum):
|
|
13
|
+
PYTHON = "python"
|
|
14
|
+
NODE = "node"
|
|
15
|
+
GO = "go"
|
|
16
|
+
JAVA = "java"
|
|
17
|
+
PHP = "php"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FileRole(StrEnum):
|
|
21
|
+
MANIFEST = "manifest"
|
|
22
|
+
LOCK = "lock"
|
|
23
|
+
CONFIG = "config"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True, slots=True)
|
|
27
|
+
class DependencyFileSpec:
|
|
28
|
+
filename: str
|
|
29
|
+
ecosystem: Ecosystem
|
|
30
|
+
role: FileRole
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
DEPENDENCY_REGISTRY: tuple[DependencyFileSpec, ...] = (
|
|
34
|
+
# ── Python ──
|
|
35
|
+
DependencyFileSpec("requirements.txt", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
36
|
+
DependencyFileSpec("requirements-dev.txt", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
37
|
+
DependencyFileSpec("requirements-test.txt", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
38
|
+
DependencyFileSpec("dev-requirements.txt", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
39
|
+
DependencyFileSpec("test-requirements.txt", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
40
|
+
DependencyFileSpec("setup.py", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
41
|
+
DependencyFileSpec("setup.cfg", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
42
|
+
DependencyFileSpec("Pipfile", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
43
|
+
DependencyFileSpec("Pipfile.lock", Ecosystem.PYTHON, FileRole.LOCK),
|
|
44
|
+
DependencyFileSpec("pyproject.toml", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
45
|
+
DependencyFileSpec("poetry.lock", Ecosystem.PYTHON, FileRole.LOCK),
|
|
46
|
+
DependencyFileSpec("pdm.lock", Ecosystem.PYTHON, FileRole.LOCK),
|
|
47
|
+
DependencyFileSpec("uv.lock", Ecosystem.PYTHON, FileRole.LOCK),
|
|
48
|
+
DependencyFileSpec("environment.yml", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
49
|
+
DependencyFileSpec("environment.yaml", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
50
|
+
DependencyFileSpec("conda.yml", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
51
|
+
DependencyFileSpec("conda.yaml", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
52
|
+
DependencyFileSpec("pixi.toml", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
53
|
+
DependencyFileSpec("requirements.in", Ecosystem.PYTHON, FileRole.MANIFEST),
|
|
54
|
+
DependencyFileSpec("pixi.lock", Ecosystem.PYTHON, FileRole.LOCK),
|
|
55
|
+
# ── Node / TypeScript / JavaScript ──
|
|
56
|
+
DependencyFileSpec("package.json", Ecosystem.NODE, FileRole.MANIFEST),
|
|
57
|
+
DependencyFileSpec("package-lock.json", Ecosystem.NODE, FileRole.LOCK),
|
|
58
|
+
DependencyFileSpec("yarn.lock", Ecosystem.NODE, FileRole.LOCK),
|
|
59
|
+
DependencyFileSpec("pnpm-lock.yaml", Ecosystem.NODE, FileRole.LOCK),
|
|
60
|
+
DependencyFileSpec("bun.lockb", Ecosystem.NODE, FileRole.LOCK),
|
|
61
|
+
DependencyFileSpec("tsconfig.json", Ecosystem.NODE, FileRole.CONFIG),
|
|
62
|
+
DependencyFileSpec("jsconfig.json", Ecosystem.NODE, FileRole.CONFIG),
|
|
63
|
+
DependencyFileSpec("bun.lock", Ecosystem.NODE, FileRole.LOCK),
|
|
64
|
+
DependencyFileSpec("deno.json", Ecosystem.NODE, FileRole.MANIFEST),
|
|
65
|
+
DependencyFileSpec("deno.jsonc", Ecosystem.NODE, FileRole.MANIFEST),
|
|
66
|
+
DependencyFileSpec("deno.lock", Ecosystem.NODE, FileRole.LOCK),
|
|
67
|
+
DependencyFileSpec("lerna.json", Ecosystem.NODE, FileRole.CONFIG),
|
|
68
|
+
# ── Go ──
|
|
69
|
+
DependencyFileSpec("go.mod", Ecosystem.GO, FileRole.MANIFEST),
|
|
70
|
+
DependencyFileSpec("go.sum", Ecosystem.GO, FileRole.LOCK),
|
|
71
|
+
DependencyFileSpec("go.work", Ecosystem.GO, FileRole.CONFIG),
|
|
72
|
+
DependencyFileSpec("go.work.sum", Ecosystem.GO, FileRole.LOCK),
|
|
73
|
+
# ── Java / JVM ──
|
|
74
|
+
DependencyFileSpec("pom.xml", Ecosystem.JAVA, FileRole.MANIFEST),
|
|
75
|
+
DependencyFileSpec("pom.properties", Ecosystem.JAVA, FileRole.CONFIG),
|
|
76
|
+
DependencyFileSpec("build.gradle", Ecosystem.JAVA, FileRole.MANIFEST),
|
|
77
|
+
DependencyFileSpec("build.gradle.kts", Ecosystem.JAVA, FileRole.MANIFEST),
|
|
78
|
+
DependencyFileSpec("settings.gradle", Ecosystem.JAVA, FileRole.CONFIG),
|
|
79
|
+
DependencyFileSpec("settings.gradle.kts", Ecosystem.JAVA, FileRole.CONFIG),
|
|
80
|
+
DependencyFileSpec("gradle.properties", Ecosystem.JAVA, FileRole.CONFIG),
|
|
81
|
+
DependencyFileSpec("build.sbt", Ecosystem.JAVA, FileRole.MANIFEST),
|
|
82
|
+
DependencyFileSpec("gradle.lockfile", Ecosystem.JAVA, FileRole.LOCK),
|
|
83
|
+
DependencyFileSpec("verification-metadata.xml", Ecosystem.JAVA, FileRole.LOCK),
|
|
84
|
+
# ── PHP ──
|
|
85
|
+
DependencyFileSpec("composer.json", Ecosystem.PHP, FileRole.MANIFEST),
|
|
86
|
+
DependencyFileSpec("composer.lock", Ecosystem.PHP, FileRole.LOCK),
|
|
87
|
+
DependencyFileSpec("symfony.lock", Ecosystem.PHP, FileRole.LOCK),
|
|
88
|
+
DependencyFileSpec("phive.xml", Ecosystem.PHP, FileRole.MANIFEST),
|
|
89
|
+
DependencyFileSpec("package.xml", Ecosystem.PHP, FileRole.MANIFEST),
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
DEPENDENCY_FILES: tuple[str, ...] = tuple(spec.filename for spec in DEPENDENCY_REGISTRY)
|
|
93
|
+
|
|
94
|
+
_FILENAME_TO_SPEC: dict[str, DependencyFileSpec] = {spec.filename: spec for spec in DEPENDENCY_REGISTRY}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass
|
|
98
|
+
class DiscoveredDependencyFile:
|
|
99
|
+
path: Path
|
|
100
|
+
spec: DependencyFileSpec
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def discover_dependency_files(
|
|
104
|
+
repo_dir: Path,
|
|
105
|
+
ignore_manager: RepoIgnoreManager,
|
|
106
|
+
*,
|
|
107
|
+
max_depth: int = 3,
|
|
108
|
+
roles: AbstractSet[FileRole] | None = None,
|
|
109
|
+
ecosystems: AbstractSet[Ecosystem] | None = None,
|
|
110
|
+
) -> list[DiscoveredDependencyFile]:
|
|
111
|
+
"""Discover dependency files with full ecosystem / role metadata.
|
|
112
|
+
|
|
113
|
+
Walks the repository tree up to *max_depth* directories deep,
|
|
114
|
+
matching filenames against the known dependency registry in O(1)
|
|
115
|
+
per file. The *ignore_manager* prunes entire subtrees early.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
repo_dir: Repository root.
|
|
119
|
+
ignore_manager: Ignore-rule evaluator (gitignore, codeboardingignore).
|
|
120
|
+
max_depth: Maximum directory depth to descend (0 = root only).
|
|
121
|
+
roles: If given, only return files whose role is in this set.
|
|
122
|
+
ecosystems: If given, only return files whose ecosystem is in this set.
|
|
123
|
+
"""
|
|
124
|
+
found: list[DiscoveredDependencyFile] = []
|
|
125
|
+
seen: set[Path] = set()
|
|
126
|
+
|
|
127
|
+
def _walk(directory: Path, depth: int) -> None:
|
|
128
|
+
if depth > max_depth:
|
|
129
|
+
return
|
|
130
|
+
if directory != repo_dir and ignore_manager.should_ignore(directory):
|
|
131
|
+
return
|
|
132
|
+
try:
|
|
133
|
+
entries = sorted(directory.iterdir())
|
|
134
|
+
except PermissionError:
|
|
135
|
+
return
|
|
136
|
+
|
|
137
|
+
for entry in entries:
|
|
138
|
+
if entry.is_file():
|
|
139
|
+
spec = _FILENAME_TO_SPEC.get(entry.name)
|
|
140
|
+
if spec is None:
|
|
141
|
+
continue
|
|
142
|
+
if roles and spec.role not in roles:
|
|
143
|
+
continue
|
|
144
|
+
if ecosystems and spec.ecosystem not in ecosystems:
|
|
145
|
+
continue
|
|
146
|
+
if not ignore_manager.should_ignore(entry) and entry not in seen:
|
|
147
|
+
found.append(DiscoveredDependencyFile(path=entry, spec=spec))
|
|
148
|
+
seen.add(entry)
|
|
149
|
+
elif entry.is_dir() and depth < max_depth:
|
|
150
|
+
_walk(entry, depth + 1)
|
|
151
|
+
|
|
152
|
+
_walk(repo_dir, 0)
|
|
153
|
+
|
|
154
|
+
logger.debug(
|
|
155
|
+
"[Dependency Discovery] Found %d dependency files: %s",
|
|
156
|
+
len(found),
|
|
157
|
+
", ".join(d.path.relative_to(repo_dir).as_posix() for d in found),
|
|
158
|
+
)
|
|
159
|
+
return found
|
agents/details_agent.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from langchain_core.prompts import PromptTemplate
|
|
5
|
+
from langchain_core.language_models import BaseChatModel
|
|
6
|
+
|
|
7
|
+
from agents.agent import CodeBoardingAgent
|
|
8
|
+
from agents.agent_responses import (
|
|
9
|
+
AnalysisInsights,
|
|
10
|
+
ClusterAnalysis,
|
|
11
|
+
Component,
|
|
12
|
+
MetaAnalysisInsights,
|
|
13
|
+
assign_component_ids,
|
|
14
|
+
)
|
|
15
|
+
from agents.prompts import get_system_details_message, get_cfg_details_message, get_details_message
|
|
16
|
+
from agents.cluster_methods_mixin import ClusterMethodsMixin
|
|
17
|
+
from agents.validation import (
|
|
18
|
+
ValidationContext,
|
|
19
|
+
validate_cluster_coverage,
|
|
20
|
+
validate_component_relationships,
|
|
21
|
+
validate_key_entities,
|
|
22
|
+
validate_relation_component_names,
|
|
23
|
+
)
|
|
24
|
+
from monitoring import trace
|
|
25
|
+
from static_analyzer.analysis_result import StaticAnalysisResults
|
|
26
|
+
from static_analyzer.cluster_helpers import get_all_cluster_ids
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class DetailsAgent(ClusterMethodsMixin, CodeBoardingAgent):
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
repo_dir: Path,
|
|
35
|
+
static_analysis: StaticAnalysisResults,
|
|
36
|
+
project_name: str,
|
|
37
|
+
meta_context: MetaAnalysisInsights,
|
|
38
|
+
agent_llm: BaseChatModel,
|
|
39
|
+
parsing_llm: BaseChatModel,
|
|
40
|
+
):
|
|
41
|
+
super().__init__(repo_dir, static_analysis, get_system_details_message(), agent_llm, parsing_llm)
|
|
42
|
+
self.project_name = project_name
|
|
43
|
+
self.meta_context = meta_context
|
|
44
|
+
|
|
45
|
+
self.prompts = {
|
|
46
|
+
"group_clusters": PromptTemplate(
|
|
47
|
+
template=get_cfg_details_message(),
|
|
48
|
+
input_variables=["project_name", "cfg_str", "component", "meta_context", "project_type"],
|
|
49
|
+
),
|
|
50
|
+
"final_analysis": PromptTemplate(
|
|
51
|
+
template=get_details_message(),
|
|
52
|
+
input_variables=["insight_so_far", "component", "meta_context", "project_type"],
|
|
53
|
+
),
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
@trace
|
|
57
|
+
def step_cluster_grouping(
|
|
58
|
+
self, component: Component, subgraph_cluster_str: str, subgraph_cluster_results: dict
|
|
59
|
+
) -> ClusterAnalysis:
|
|
60
|
+
"""
|
|
61
|
+
Group clusters within the component's subgraph into logical sub-components.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
component: The component being analyzed
|
|
65
|
+
subgraph_cluster_str: String representation of the component's CFG subgraph
|
|
66
|
+
subgraph_cluster_results: Cluster results for the subgraph (from _create_strict_component_subgraph)
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
ClusterAnalysis with grouped clusters for this component
|
|
70
|
+
"""
|
|
71
|
+
logger.info(f"[DetailsAgent] Grouping clusters for component: {component.name}")
|
|
72
|
+
meta_context_str = self.meta_context.llm_str() if self.meta_context else "No project context available."
|
|
73
|
+
project_type = self.meta_context.project_type if self.meta_context else "unknown"
|
|
74
|
+
|
|
75
|
+
prompt = self.prompts["group_clusters"].format(
|
|
76
|
+
project_name=self.project_name,
|
|
77
|
+
cfg_str=subgraph_cluster_str,
|
|
78
|
+
component=component.llm_str(),
|
|
79
|
+
meta_context=meta_context_str,
|
|
80
|
+
project_type=project_type,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Build validation context using subgraph cluster results
|
|
84
|
+
context = ValidationContext(
|
|
85
|
+
cluster_results=subgraph_cluster_results,
|
|
86
|
+
expected_cluster_ids=get_all_cluster_ids(subgraph_cluster_results),
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
cluster_analysis = self._validation_invoke(
|
|
90
|
+
prompt, ClusterAnalysis, validators=[validate_cluster_coverage], context=context
|
|
91
|
+
)
|
|
92
|
+
return cluster_analysis
|
|
93
|
+
|
|
94
|
+
@trace
|
|
95
|
+
def step_final_analysis(
|
|
96
|
+
self, component: Component, cluster_analysis: ClusterAnalysis, subgraph_cluster_results: dict
|
|
97
|
+
) -> AnalysisInsights:
|
|
98
|
+
"""
|
|
99
|
+
Generate detailed final analysis from grouped clusters.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
component: The component being analyzed
|
|
103
|
+
cluster_analysis: The clustered structure from step_cluster_grouping
|
|
104
|
+
subgraph_cluster_results: Cluster results for the subgraph (for validation)
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
AnalysisInsights with detailed component information
|
|
108
|
+
"""
|
|
109
|
+
logger.info(f"[DetailsAgent] Generating final detailed analysis for: {component.name}")
|
|
110
|
+
meta_context_str = self.meta_context.llm_str() if self.meta_context else "No project context available."
|
|
111
|
+
project_type = self.meta_context.project_type if self.meta_context else "unknown"
|
|
112
|
+
|
|
113
|
+
cluster_str = cluster_analysis.llm_str() if cluster_analysis else "No cluster analysis available."
|
|
114
|
+
|
|
115
|
+
prompt = self.prompts["final_analysis"].format(
|
|
116
|
+
insight_so_far=cluster_str,
|
|
117
|
+
component=component.llm_str(),
|
|
118
|
+
meta_context=meta_context_str,
|
|
119
|
+
project_type=project_type,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Build validation context with subgraph CFG graphs for edge checking
|
|
123
|
+
context = ValidationContext(
|
|
124
|
+
cluster_results=subgraph_cluster_results,
|
|
125
|
+
cfg_graphs={lang: self.static_analysis.get_cfg(lang) for lang in self.static_analysis.get_languages()},
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
return self._validation_invoke(
|
|
129
|
+
prompt,
|
|
130
|
+
AnalysisInsights,
|
|
131
|
+
validators=[validate_relation_component_names, validate_component_relationships, validate_key_entities],
|
|
132
|
+
context=context,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
def run(self, component: Component):
|
|
136
|
+
"""
|
|
137
|
+
Analyze a component in detail by creating a subgraph and analyzing its structure.
|
|
138
|
+
|
|
139
|
+
This follows the same pattern as AbstractionAgent but operates on a component-level
|
|
140
|
+
subgraph instead of the full codebase.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
component: Component to analyze in detail
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Tuple of (AnalysisInsights, cluster_results dict) with detailed component information
|
|
147
|
+
"""
|
|
148
|
+
logger.info(f"[DetailsAgent] Processing component: {component.name}")
|
|
149
|
+
|
|
150
|
+
# Step 1: Create subgraph from component's assigned files using strict filtering
|
|
151
|
+
subgraph_str, subgraph_cluster_results = self._create_strict_component_subgraph(component)
|
|
152
|
+
|
|
153
|
+
# Step 2: Group clusters within the subgraph
|
|
154
|
+
cluster_analysis = self.step_cluster_grouping(component, subgraph_str, subgraph_cluster_results)
|
|
155
|
+
|
|
156
|
+
# Step 3: Generate detailed analysis from grouped clusters
|
|
157
|
+
analysis = self.step_final_analysis(component, cluster_analysis, subgraph_cluster_results)
|
|
158
|
+
|
|
159
|
+
# Step 4: Sanitize cluster IDs (remove invalid ones) - use subgraph's cluster results
|
|
160
|
+
self._sanitize_component_cluster_ids(analysis, cluster_results=subgraph_cluster_results)
|
|
161
|
+
|
|
162
|
+
# Step 5: Assign files to components (deterministic + LLM-based with validation)
|
|
163
|
+
# Pass component's assigned files as scope to limit classification to this component
|
|
164
|
+
self.classify_files(analysis, subgraph_cluster_results, component.assigned_files)
|
|
165
|
+
|
|
166
|
+
# Step 6: Fix source code reference lines (resolves reference_file paths)
|
|
167
|
+
analysis = self.fix_source_code_reference_lines(analysis)
|
|
168
|
+
|
|
169
|
+
# Step 7: Ensure unique key entities across components
|
|
170
|
+
self._ensure_unique_key_entities(analysis)
|
|
171
|
+
# Step 8: Assign deterministic component IDs based on parent
|
|
172
|
+
assign_component_ids(analysis, parent_id=component.component_id)
|
|
173
|
+
|
|
174
|
+
return analysis, subgraph_cluster_results
|
agents/llm_config.py
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any, Type
|
|
5
|
+
|
|
6
|
+
from langchain_anthropic import ChatAnthropic
|
|
7
|
+
from langchain_aws import ChatBedrockConverse
|
|
8
|
+
from langchain_cerebras import ChatCerebras
|
|
9
|
+
from langchain_core.language_models import BaseChatModel
|
|
10
|
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
11
|
+
from langchain_ollama import ChatOllama
|
|
12
|
+
from langchain_openai import ChatOpenAI
|
|
13
|
+
|
|
14
|
+
from agents.constants import LLMDefaults
|
|
15
|
+
from agents.prompts.prompt_factory import LLMType, initialize_global_factory
|
|
16
|
+
from monitoring.callbacks import MonitoringCallback
|
|
17
|
+
|
|
18
|
+
# Initialize global monitoring callback with its own stats container to avoid ContextVar dependency
|
|
19
|
+
from monitoring.stats import RunStats
|
|
20
|
+
|
|
21
|
+
MONITORING_CALLBACK = MonitoringCallback(stats_container=RunStats())
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
# Module-level model overrides – set once by the orchestrator (main.py) and
|
|
27
|
+
# consumed by initialize_llms() without needing to thread the values through
|
|
28
|
+
# every intermediate function signature.
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
_agent_model_override: str | None = None
|
|
31
|
+
_parsing_model_override: str | None = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def configure_models(
|
|
35
|
+
agent_model: str | None = None,
|
|
36
|
+
parsing_model: str | None = None,
|
|
37
|
+
api_keys: dict[str, str] | None = None,
|
|
38
|
+
) -> None:
|
|
39
|
+
"""Set process-wide model and provider overrides. Call this once at startup.
|
|
40
|
+
|
|
41
|
+
``api_keys`` maps provider env-var names to values, e.g.::
|
|
42
|
+
|
|
43
|
+
configure_models(api_keys={"OPENAI_API_KEY": "sk-..."})
|
|
44
|
+
|
|
45
|
+
Keys already present in the shell environment are never overwritten, so
|
|
46
|
+
CI/CD pipelines that export keys directly retain full control.
|
|
47
|
+
|
|
48
|
+
Priority (highest to lowest):
|
|
49
|
+
1. Shell environment variables (set before the process starts)
|
|
50
|
+
2. ``api_keys`` passed here / values from ~/.codeboarding/config.toml
|
|
51
|
+
3. AGENT_MODEL / PARSING_MODEL environment variables (for model names)
|
|
52
|
+
4. Provider defaults defined in LLM_PROVIDERS
|
|
53
|
+
"""
|
|
54
|
+
global _agent_model_override, _parsing_model_override
|
|
55
|
+
_agent_model_override = agent_model
|
|
56
|
+
_parsing_model_override = parsing_model
|
|
57
|
+
if api_keys:
|
|
58
|
+
for env_var, value in api_keys.items():
|
|
59
|
+
if value and not os.environ.get(env_var):
|
|
60
|
+
os.environ[env_var] = value
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class LLMConfig:
|
|
65
|
+
"""
|
|
66
|
+
Configuration for LLM providers.
|
|
67
|
+
|
|
68
|
+
Attributes:
|
|
69
|
+
agent_model: The "agent" model used for complex reasoning and agentic tasks.
|
|
70
|
+
parsing_model: The "parsing" model used for fast, cost-effective extraction and parsing tasks.
|
|
71
|
+
agent_temperature: Temperature for the agent model. Defaults to 0 for deterministic behavior
|
|
72
|
+
which is crucial for code understanding and reasoning.
|
|
73
|
+
parsing_temperature: Temperature for the parsing model. Defaults to 0 for deterministic behavior
|
|
74
|
+
which is crucial for structured output extraction.
|
|
75
|
+
llm_type: The LLMType enum value for prompt factory selection.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
chat_class: Type[BaseChatModel]
|
|
79
|
+
api_key_env: str
|
|
80
|
+
agent_model: str
|
|
81
|
+
parsing_model: str
|
|
82
|
+
llm_type: LLMType
|
|
83
|
+
agent_temperature: float = LLMDefaults.DEFAULT_AGENT_TEMPERATURE
|
|
84
|
+
parsing_temperature: float = LLMDefaults.DEFAULT_PARSING_TEMPERATURE
|
|
85
|
+
extra_args: dict[str, Any] = field(default_factory=dict)
|
|
86
|
+
alt_env_vars: list[str] = field(default_factory=list)
|
|
87
|
+
|
|
88
|
+
def get_api_key(self) -> str | None:
|
|
89
|
+
return os.getenv(self.api_key_env)
|
|
90
|
+
|
|
91
|
+
def is_active(self) -> bool:
|
|
92
|
+
"""Check if any of the environment variables (primary or alternate) are set."""
|
|
93
|
+
if os.getenv(self.api_key_env):
|
|
94
|
+
return True
|
|
95
|
+
return any(os.getenv(var) for var in self.alt_env_vars)
|
|
96
|
+
|
|
97
|
+
def get_resolved_extra_args(self) -> dict[str, Any]:
|
|
98
|
+
resolved = {}
|
|
99
|
+
for k, v in self.extra_args.items():
|
|
100
|
+
value = v() if callable(v) else v
|
|
101
|
+
if value is not None:
|
|
102
|
+
resolved[k] = value
|
|
103
|
+
return resolved
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# Define supported providers in priority order
|
|
107
|
+
LLM_PROVIDERS = {
|
|
108
|
+
"openai": LLMConfig(
|
|
109
|
+
chat_class=ChatOpenAI,
|
|
110
|
+
api_key_env="OPENAI_API_KEY",
|
|
111
|
+
agent_model="gpt-4o",
|
|
112
|
+
parsing_model="gpt-4o-mini",
|
|
113
|
+
llm_type=LLMType.GPT4,
|
|
114
|
+
alt_env_vars=["OPENAI_BASE_URL"],
|
|
115
|
+
extra_args={
|
|
116
|
+
"base_url": lambda: os.getenv("OPENAI_BASE_URL"),
|
|
117
|
+
"max_tokens": None,
|
|
118
|
+
"timeout": None,
|
|
119
|
+
"max_retries": 0,
|
|
120
|
+
},
|
|
121
|
+
),
|
|
122
|
+
"vercel": LLMConfig(
|
|
123
|
+
chat_class=ChatOpenAI,
|
|
124
|
+
api_key_env="VERCEL_API_KEY",
|
|
125
|
+
agent_model="google/gemini-3-flash",
|
|
126
|
+
parsing_model="openai/gpt-oss-120b", # Use OpenAI model for parsing to avoid trustcall compatibility issues with Gemini
|
|
127
|
+
llm_type=LLMType.GEMINI_FLASH,
|
|
128
|
+
alt_env_vars=["VERCEL_BASE_URL"],
|
|
129
|
+
extra_args={
|
|
130
|
+
"base_url": lambda: os.getenv("VERCEL_BASE_URL", f"https://ai-gateway.vercel.sh/v1"),
|
|
131
|
+
"max_tokens": None,
|
|
132
|
+
"timeout": None,
|
|
133
|
+
"max_retries": 0,
|
|
134
|
+
},
|
|
135
|
+
),
|
|
136
|
+
"anthropic": LLMConfig(
|
|
137
|
+
chat_class=ChatAnthropic,
|
|
138
|
+
api_key_env="ANTHROPIC_API_KEY",
|
|
139
|
+
agent_model="claude-3-7-sonnet-20250219",
|
|
140
|
+
parsing_model="claude-3-haiku-20240307",
|
|
141
|
+
llm_type=LLMType.CLAUDE,
|
|
142
|
+
extra_args={
|
|
143
|
+
"max_tokens": 8192,
|
|
144
|
+
"timeout": None,
|
|
145
|
+
"max_retries": 0,
|
|
146
|
+
},
|
|
147
|
+
),
|
|
148
|
+
"google": LLMConfig(
|
|
149
|
+
chat_class=ChatGoogleGenerativeAI,
|
|
150
|
+
api_key_env="GOOGLE_API_KEY",
|
|
151
|
+
agent_model="gemini-3-flash",
|
|
152
|
+
parsing_model="gemini-3-flash",
|
|
153
|
+
llm_type=LLMType.GEMINI_FLASH,
|
|
154
|
+
extra_args={
|
|
155
|
+
"max_tokens": None,
|
|
156
|
+
"timeout": None,
|
|
157
|
+
"max_retries": 0,
|
|
158
|
+
},
|
|
159
|
+
),
|
|
160
|
+
"aws": LLMConfig(
|
|
161
|
+
chat_class=ChatBedrockConverse,
|
|
162
|
+
api_key_env="AWS_BEARER_TOKEN_BEDROCK", # Used for existence check
|
|
163
|
+
agent_model="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
|
164
|
+
parsing_model="us.anthropic.claude-3-haiku-20240307-v1:0",
|
|
165
|
+
llm_type=LLMType.CLAUDE,
|
|
166
|
+
extra_args={
|
|
167
|
+
"max_tokens": 4096,
|
|
168
|
+
"region_name": lambda: os.getenv("AWS_DEFAULT_REGION", "us-east-1"),
|
|
169
|
+
"credentials_profile_name": None,
|
|
170
|
+
},
|
|
171
|
+
),
|
|
172
|
+
"cerebras": LLMConfig(
|
|
173
|
+
chat_class=ChatCerebras,
|
|
174
|
+
api_key_env="CEREBRAS_API_KEY",
|
|
175
|
+
agent_model="gpt-oss-120b",
|
|
176
|
+
parsing_model="llama3.1-8b",
|
|
177
|
+
llm_type=LLMType.GPT4,
|
|
178
|
+
extra_args={
|
|
179
|
+
"max_tokens": None,
|
|
180
|
+
"timeout": None,
|
|
181
|
+
"max_retries": 0,
|
|
182
|
+
},
|
|
183
|
+
),
|
|
184
|
+
"ollama": LLMConfig(
|
|
185
|
+
chat_class=ChatOllama,
|
|
186
|
+
api_key_env="OLLAMA_BASE_URL", # Used for existence check
|
|
187
|
+
agent_model="qwen3:30b",
|
|
188
|
+
parsing_model="qwen2.5:7b",
|
|
189
|
+
llm_type=LLMType.GEMINI_FLASH,
|
|
190
|
+
agent_temperature=0.1,
|
|
191
|
+
parsing_temperature=0.1,
|
|
192
|
+
extra_args={
|
|
193
|
+
"base_url": lambda: os.getenv("OLLAMA_BASE_URL"),
|
|
194
|
+
},
|
|
195
|
+
),
|
|
196
|
+
"deepseek": LLMConfig(
|
|
197
|
+
chat_class=ChatOpenAI,
|
|
198
|
+
api_key_env="DEEPSEEK_API_KEY",
|
|
199
|
+
agent_model="deepseek-chat",
|
|
200
|
+
parsing_model="deepseek-chat",
|
|
201
|
+
llm_type=LLMType.DEEPSEEK,
|
|
202
|
+
alt_env_vars=["DEEPSEEK_BASE_URL"],
|
|
203
|
+
extra_args={
|
|
204
|
+
"base_url": lambda: os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1"),
|
|
205
|
+
"max_tokens": None,
|
|
206
|
+
"timeout": None,
|
|
207
|
+
"max_retries": 0,
|
|
208
|
+
},
|
|
209
|
+
),
|
|
210
|
+
"glm": LLMConfig(
|
|
211
|
+
chat_class=ChatOpenAI,
|
|
212
|
+
api_key_env="GLM_API_KEY",
|
|
213
|
+
agent_model="glm-4-flash",
|
|
214
|
+
parsing_model="glm-4-flash",
|
|
215
|
+
llm_type=LLMType.GLM,
|
|
216
|
+
alt_env_vars=["GLM_BASE_URL"],
|
|
217
|
+
extra_args={
|
|
218
|
+
"base_url": lambda: os.getenv("GLM_BASE_URL", "https://open.bigmodel.cn/api/paas/v4"),
|
|
219
|
+
"max_tokens": None,
|
|
220
|
+
"timeout": None,
|
|
221
|
+
"max_retries": 0,
|
|
222
|
+
},
|
|
223
|
+
),
|
|
224
|
+
"kimi": LLMConfig(
|
|
225
|
+
chat_class=ChatOpenAI,
|
|
226
|
+
api_key_env="KIMI_API_KEY",
|
|
227
|
+
agent_model="kimi-k2.5",
|
|
228
|
+
parsing_model="kimi-k2.5",
|
|
229
|
+
llm_type=LLMType.KIMI,
|
|
230
|
+
alt_env_vars=["KIMI_BASE_URL"],
|
|
231
|
+
extra_args={
|
|
232
|
+
"base_url": lambda: os.getenv("KIMI_BASE_URL", "https://api.moonshot.cn/v1"),
|
|
233
|
+
"max_tokens": None,
|
|
234
|
+
"timeout": None,
|
|
235
|
+
"max_retries": 0,
|
|
236
|
+
},
|
|
237
|
+
),
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _initialize_llm(
|
|
242
|
+
model_override: str | None,
|
|
243
|
+
model_attr: str,
|
|
244
|
+
temperature_attr: str,
|
|
245
|
+
log_prefix: str,
|
|
246
|
+
init_factory: bool = False,
|
|
247
|
+
) -> tuple[BaseChatModel, str]:
|
|
248
|
+
for name, config in LLM_PROVIDERS.items():
|
|
249
|
+
if not config.is_active():
|
|
250
|
+
continue
|
|
251
|
+
|
|
252
|
+
model_name = model_override or getattr(config, model_attr)
|
|
253
|
+
|
|
254
|
+
if init_factory:
|
|
255
|
+
detected_llm_type = LLMType.from_model_name(model_name)
|
|
256
|
+
initialize_global_factory(detected_llm_type)
|
|
257
|
+
logger.info(
|
|
258
|
+
f"Initialized prompt factory for {name} provider with model '{model_name}' "
|
|
259
|
+
f"-> {detected_llm_type.value} prompt factory"
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
logger.info(f"Using {name.title()} {log_prefix}LLM with model: {model_name}")
|
|
263
|
+
|
|
264
|
+
kwargs = {
|
|
265
|
+
"model": model_name,
|
|
266
|
+
"temperature": getattr(config, temperature_attr),
|
|
267
|
+
}
|
|
268
|
+
kwargs.update(config.get_resolved_extra_args())
|
|
269
|
+
|
|
270
|
+
if name not in ["aws", "ollama"]:
|
|
271
|
+
api_key = config.get_api_key()
|
|
272
|
+
kwargs["api_key"] = api_key or "no-key-required"
|
|
273
|
+
|
|
274
|
+
model = config.chat_class(**kwargs) # type: ignore[call-arg, arg-type]
|
|
275
|
+
return model, model_name
|
|
276
|
+
|
|
277
|
+
required_vars = []
|
|
278
|
+
for config in LLM_PROVIDERS.values():
|
|
279
|
+
required_vars.append(config.api_key_env)
|
|
280
|
+
required_vars.extend(config.alt_env_vars)
|
|
281
|
+
|
|
282
|
+
raise ValueError(f"No valid LLM configuration found. Please set one of: {', '.join(sorted(set(required_vars)))}")
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def validate_api_key_provided() -> None:
|
|
286
|
+
"""Raise ValueError if zero or more than one LLM provider key is configured."""
|
|
287
|
+
active = [name for name, config in LLM_PROVIDERS.items() if config.is_active()]
|
|
288
|
+
if not active:
|
|
289
|
+
required = sorted({config.api_key_env for config in LLM_PROVIDERS.values()})
|
|
290
|
+
raise ValueError(f"No LLM provider API key found. Set one of: {', '.join(required)}")
|
|
291
|
+
if len(active) > 1:
|
|
292
|
+
raise ValueError(f"Multiple LLM provider keys detected ({', '.join(active)}); please set only one.")
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def initialize_agent_llm(model_override: str | None = None) -> BaseChatModel:
|
|
296
|
+
model, model_name = _initialize_llm(model_override, "agent_model", "agent_temperature", "", init_factory=True)
|
|
297
|
+
MONITORING_CALLBACK.model_name = model_name
|
|
298
|
+
return model
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def initialize_parsing_llm(model_override: str | None = None) -> BaseChatModel:
|
|
302
|
+
model, _ = _initialize_llm(model_override, "parsing_model", "parsing_temperature", "Extractor ")
|
|
303
|
+
return model
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def initialize_llms() -> tuple[BaseChatModel, BaseChatModel]:
|
|
307
|
+
agent_llm = initialize_agent_llm(_agent_model_override or os.getenv("AGENT_MODEL"))
|
|
308
|
+
parsing_llm = initialize_parsing_llm(_parsing_model_override or os.getenv("PARSING_MODEL"))
|
|
309
|
+
return agent_llm, parsing_llm
|