codeboarding 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. agents/__init__.py +0 -0
  2. agents/abstraction_agent.py +150 -0
  3. agents/agent.py +467 -0
  4. agents/agent_responses.py +363 -0
  5. agents/cluster_methods_mixin.py +281 -0
  6. agents/constants.py +13 -0
  7. agents/dependency_discovery.py +159 -0
  8. agents/details_agent.py +174 -0
  9. agents/llm_config.py +309 -0
  10. agents/meta_agent.py +105 -0
  11. agents/planner_agent.py +105 -0
  12. agents/prompts/__init__.py +85 -0
  13. agents/prompts/abstract_prompt_factory.py +63 -0
  14. agents/prompts/claude_prompts.py +381 -0
  15. agents/prompts/deepseek_prompts.py +389 -0
  16. agents/prompts/gemini_flash_prompts.py +362 -0
  17. agents/prompts/glm_prompts.py +407 -0
  18. agents/prompts/gpt_prompts.py +470 -0
  19. agents/prompts/kimi_prompts.py +400 -0
  20. agents/prompts/prompt_factory.py +179 -0
  21. agents/tools/__init__.py +8 -0
  22. agents/tools/base.py +96 -0
  23. agents/tools/get_external_deps.py +47 -0
  24. agents/tools/get_method_invocations.py +47 -0
  25. agents/tools/read_cfg.py +60 -0
  26. agents/tools/read_docs.py +132 -0
  27. agents/tools/read_file.py +90 -0
  28. agents/tools/read_file_structure.py +156 -0
  29. agents/tools/read_git_diff.py +131 -0
  30. agents/tools/read_packages.py +60 -0
  31. agents/tools/read_source.py +105 -0
  32. agents/tools/read_structure.py +49 -0
  33. agents/tools/toolkit.py +119 -0
  34. agents/validation.py +383 -0
  35. caching/__init__.py +4 -0
  36. caching/cache.py +29 -0
  37. caching/meta_cache.py +227 -0
  38. codeboarding-0.9.0.dist-info/METADATA +223 -0
  39. codeboarding-0.9.0.dist-info/RECORD +126 -0
  40. codeboarding-0.9.0.dist-info/WHEEL +5 -0
  41. codeboarding-0.9.0.dist-info/entry_points.txt +3 -0
  42. codeboarding-0.9.0.dist-info/licenses/LICENSE +21 -0
  43. codeboarding-0.9.0.dist-info/top_level.txt +18 -0
  44. core/__init__.py +101 -0
  45. core/plugin_loader.py +46 -0
  46. core/protocols.py +27 -0
  47. core/registry.py +46 -0
  48. diagram_analysis/__init__.py +4 -0
  49. diagram_analysis/analysis_json.py +346 -0
  50. diagram_analysis/diagram_generator.py +486 -0
  51. diagram_analysis/file_coverage.py +212 -0
  52. diagram_analysis/incremental/__init__.py +63 -0
  53. diagram_analysis/incremental/component_checker.py +236 -0
  54. diagram_analysis/incremental/file_manager.py +217 -0
  55. diagram_analysis/incremental/impact_analyzer.py +238 -0
  56. diagram_analysis/incremental/io_utils.py +281 -0
  57. diagram_analysis/incremental/models.py +72 -0
  58. diagram_analysis/incremental/path_patching.py +164 -0
  59. diagram_analysis/incremental/reexpansion.py +166 -0
  60. diagram_analysis/incremental/scoped_analysis.py +227 -0
  61. diagram_analysis/incremental/updater.py +464 -0
  62. diagram_analysis/incremental/validation.py +48 -0
  63. diagram_analysis/manifest.py +152 -0
  64. diagram_analysis/version.py +6 -0
  65. duckdb_crud.py +125 -0
  66. github_action.py +172 -0
  67. health/__init__.py +3 -0
  68. health/checks/__init__.py +11 -0
  69. health/checks/circular_deps.py +48 -0
  70. health/checks/cohesion.py +93 -0
  71. health/checks/coupling.py +140 -0
  72. health/checks/function_size.py +85 -0
  73. health/checks/god_class.py +167 -0
  74. health/checks/inheritance.py +104 -0
  75. health/checks/instability.py +77 -0
  76. health/checks/unused_code_diagnostics.py +338 -0
  77. health/config.py +172 -0
  78. health/constants.py +19 -0
  79. health/models.py +186 -0
  80. health/runner.py +236 -0
  81. install.py +518 -0
  82. logging_config.py +105 -0
  83. main.py +529 -0
  84. monitoring/__init__.py +12 -0
  85. monitoring/callbacks.py +163 -0
  86. monitoring/context.py +158 -0
  87. monitoring/mixin.py +16 -0
  88. monitoring/paths.py +47 -0
  89. monitoring/stats.py +50 -0
  90. monitoring/writers.py +172 -0
  91. output_generators/__init__.py +0 -0
  92. output_generators/html.py +163 -0
  93. output_generators/html_template.py +382 -0
  94. output_generators/markdown.py +140 -0
  95. output_generators/mdx.py +171 -0
  96. output_generators/sphinx.py +175 -0
  97. repo_utils/__init__.py +277 -0
  98. repo_utils/change_detector.py +289 -0
  99. repo_utils/errors.py +6 -0
  100. repo_utils/git_diff.py +74 -0
  101. repo_utils/ignore.py +341 -0
  102. static_analyzer/__init__.py +335 -0
  103. static_analyzer/analysis_cache.py +699 -0
  104. static_analyzer/analysis_result.py +269 -0
  105. static_analyzer/cluster_change_analyzer.py +391 -0
  106. static_analyzer/cluster_helpers.py +79 -0
  107. static_analyzer/constants.py +166 -0
  108. static_analyzer/git_diff_analyzer.py +224 -0
  109. static_analyzer/graph.py +746 -0
  110. static_analyzer/incremental_orchestrator.py +671 -0
  111. static_analyzer/java_config_scanner.py +232 -0
  112. static_analyzer/java_utils.py +227 -0
  113. static_analyzer/lsp_client/__init__.py +12 -0
  114. static_analyzer/lsp_client/client.py +1642 -0
  115. static_analyzer/lsp_client/diagnostics.py +62 -0
  116. static_analyzer/lsp_client/java_client.py +517 -0
  117. static_analyzer/lsp_client/language_settings.py +97 -0
  118. static_analyzer/lsp_client/typescript_client.py +235 -0
  119. static_analyzer/programming_language.py +152 -0
  120. static_analyzer/reference_resolve_mixin.py +166 -0
  121. static_analyzer/scanner.py +95 -0
  122. static_analyzer/typescript_config_scanner.py +54 -0
  123. tool_registry.py +433 -0
  124. user_config.py +134 -0
  125. utils.py +56 -0
  126. vscode_constants.py +124 -0
@@ -0,0 +1,159 @@
1
+ import logging
2
+ from collections.abc import Set as AbstractSet
3
+ from dataclasses import dataclass
4
+ from enum import StrEnum
5
+ from pathlib import Path
6
+
7
+ from repo_utils.ignore import RepoIgnoreManager
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class Ecosystem(StrEnum):
13
+ PYTHON = "python"
14
+ NODE = "node"
15
+ GO = "go"
16
+ JAVA = "java"
17
+ PHP = "php"
18
+
19
+
20
+ class FileRole(StrEnum):
21
+ MANIFEST = "manifest"
22
+ LOCK = "lock"
23
+ CONFIG = "config"
24
+
25
+
26
+ @dataclass(frozen=True, slots=True)
27
+ class DependencyFileSpec:
28
+ filename: str
29
+ ecosystem: Ecosystem
30
+ role: FileRole
31
+
32
+
33
+ DEPENDENCY_REGISTRY: tuple[DependencyFileSpec, ...] = (
34
+ # ── Python ──
35
+ DependencyFileSpec("requirements.txt", Ecosystem.PYTHON, FileRole.MANIFEST),
36
+ DependencyFileSpec("requirements-dev.txt", Ecosystem.PYTHON, FileRole.MANIFEST),
37
+ DependencyFileSpec("requirements-test.txt", Ecosystem.PYTHON, FileRole.MANIFEST),
38
+ DependencyFileSpec("dev-requirements.txt", Ecosystem.PYTHON, FileRole.MANIFEST),
39
+ DependencyFileSpec("test-requirements.txt", Ecosystem.PYTHON, FileRole.MANIFEST),
40
+ DependencyFileSpec("setup.py", Ecosystem.PYTHON, FileRole.MANIFEST),
41
+ DependencyFileSpec("setup.cfg", Ecosystem.PYTHON, FileRole.MANIFEST),
42
+ DependencyFileSpec("Pipfile", Ecosystem.PYTHON, FileRole.MANIFEST),
43
+ DependencyFileSpec("Pipfile.lock", Ecosystem.PYTHON, FileRole.LOCK),
44
+ DependencyFileSpec("pyproject.toml", Ecosystem.PYTHON, FileRole.MANIFEST),
45
+ DependencyFileSpec("poetry.lock", Ecosystem.PYTHON, FileRole.LOCK),
46
+ DependencyFileSpec("pdm.lock", Ecosystem.PYTHON, FileRole.LOCK),
47
+ DependencyFileSpec("uv.lock", Ecosystem.PYTHON, FileRole.LOCK),
48
+ DependencyFileSpec("environment.yml", Ecosystem.PYTHON, FileRole.MANIFEST),
49
+ DependencyFileSpec("environment.yaml", Ecosystem.PYTHON, FileRole.MANIFEST),
50
+ DependencyFileSpec("conda.yml", Ecosystem.PYTHON, FileRole.MANIFEST),
51
+ DependencyFileSpec("conda.yaml", Ecosystem.PYTHON, FileRole.MANIFEST),
52
+ DependencyFileSpec("pixi.toml", Ecosystem.PYTHON, FileRole.MANIFEST),
53
+ DependencyFileSpec("requirements.in", Ecosystem.PYTHON, FileRole.MANIFEST),
54
+ DependencyFileSpec("pixi.lock", Ecosystem.PYTHON, FileRole.LOCK),
55
+ # ── Node / TypeScript / JavaScript ──
56
+ DependencyFileSpec("package.json", Ecosystem.NODE, FileRole.MANIFEST),
57
+ DependencyFileSpec("package-lock.json", Ecosystem.NODE, FileRole.LOCK),
58
+ DependencyFileSpec("yarn.lock", Ecosystem.NODE, FileRole.LOCK),
59
+ DependencyFileSpec("pnpm-lock.yaml", Ecosystem.NODE, FileRole.LOCK),
60
+ DependencyFileSpec("bun.lockb", Ecosystem.NODE, FileRole.LOCK),
61
+ DependencyFileSpec("tsconfig.json", Ecosystem.NODE, FileRole.CONFIG),
62
+ DependencyFileSpec("jsconfig.json", Ecosystem.NODE, FileRole.CONFIG),
63
+ DependencyFileSpec("bun.lock", Ecosystem.NODE, FileRole.LOCK),
64
+ DependencyFileSpec("deno.json", Ecosystem.NODE, FileRole.MANIFEST),
65
+ DependencyFileSpec("deno.jsonc", Ecosystem.NODE, FileRole.MANIFEST),
66
+ DependencyFileSpec("deno.lock", Ecosystem.NODE, FileRole.LOCK),
67
+ DependencyFileSpec("lerna.json", Ecosystem.NODE, FileRole.CONFIG),
68
+ # ── Go ──
69
+ DependencyFileSpec("go.mod", Ecosystem.GO, FileRole.MANIFEST),
70
+ DependencyFileSpec("go.sum", Ecosystem.GO, FileRole.LOCK),
71
+ DependencyFileSpec("go.work", Ecosystem.GO, FileRole.CONFIG),
72
+ DependencyFileSpec("go.work.sum", Ecosystem.GO, FileRole.LOCK),
73
+ # ── Java / JVM ──
74
+ DependencyFileSpec("pom.xml", Ecosystem.JAVA, FileRole.MANIFEST),
75
+ DependencyFileSpec("pom.properties", Ecosystem.JAVA, FileRole.CONFIG),
76
+ DependencyFileSpec("build.gradle", Ecosystem.JAVA, FileRole.MANIFEST),
77
+ DependencyFileSpec("build.gradle.kts", Ecosystem.JAVA, FileRole.MANIFEST),
78
+ DependencyFileSpec("settings.gradle", Ecosystem.JAVA, FileRole.CONFIG),
79
+ DependencyFileSpec("settings.gradle.kts", Ecosystem.JAVA, FileRole.CONFIG),
80
+ DependencyFileSpec("gradle.properties", Ecosystem.JAVA, FileRole.CONFIG),
81
+ DependencyFileSpec("build.sbt", Ecosystem.JAVA, FileRole.MANIFEST),
82
+ DependencyFileSpec("gradle.lockfile", Ecosystem.JAVA, FileRole.LOCK),
83
+ DependencyFileSpec("verification-metadata.xml", Ecosystem.JAVA, FileRole.LOCK),
84
+ # ── PHP ──
85
+ DependencyFileSpec("composer.json", Ecosystem.PHP, FileRole.MANIFEST),
86
+ DependencyFileSpec("composer.lock", Ecosystem.PHP, FileRole.LOCK),
87
+ DependencyFileSpec("symfony.lock", Ecosystem.PHP, FileRole.LOCK),
88
+ DependencyFileSpec("phive.xml", Ecosystem.PHP, FileRole.MANIFEST),
89
+ DependencyFileSpec("package.xml", Ecosystem.PHP, FileRole.MANIFEST),
90
+ )
91
+
92
+ DEPENDENCY_FILES: tuple[str, ...] = tuple(spec.filename for spec in DEPENDENCY_REGISTRY)
93
+
94
+ _FILENAME_TO_SPEC: dict[str, DependencyFileSpec] = {spec.filename: spec for spec in DEPENDENCY_REGISTRY}
95
+
96
+
97
+ @dataclass
98
+ class DiscoveredDependencyFile:
99
+ path: Path
100
+ spec: DependencyFileSpec
101
+
102
+
103
+ def discover_dependency_files(
104
+ repo_dir: Path,
105
+ ignore_manager: RepoIgnoreManager,
106
+ *,
107
+ max_depth: int = 3,
108
+ roles: AbstractSet[FileRole] | None = None,
109
+ ecosystems: AbstractSet[Ecosystem] | None = None,
110
+ ) -> list[DiscoveredDependencyFile]:
111
+ """Discover dependency files with full ecosystem / role metadata.
112
+
113
+ Walks the repository tree up to *max_depth* directories deep,
114
+ matching filenames against the known dependency registry in O(1)
115
+ per file. The *ignore_manager* prunes entire subtrees early.
116
+
117
+ Args:
118
+ repo_dir: Repository root.
119
+ ignore_manager: Ignore-rule evaluator (gitignore, codeboardingignore).
120
+ max_depth: Maximum directory depth to descend (0 = root only).
121
+ roles: If given, only return files whose role is in this set.
122
+ ecosystems: If given, only return files whose ecosystem is in this set.
123
+ """
124
+ found: list[DiscoveredDependencyFile] = []
125
+ seen: set[Path] = set()
126
+
127
+ def _walk(directory: Path, depth: int) -> None:
128
+ if depth > max_depth:
129
+ return
130
+ if directory != repo_dir and ignore_manager.should_ignore(directory):
131
+ return
132
+ try:
133
+ entries = sorted(directory.iterdir())
134
+ except PermissionError:
135
+ return
136
+
137
+ for entry in entries:
138
+ if entry.is_file():
139
+ spec = _FILENAME_TO_SPEC.get(entry.name)
140
+ if spec is None:
141
+ continue
142
+ if roles and spec.role not in roles:
143
+ continue
144
+ if ecosystems and spec.ecosystem not in ecosystems:
145
+ continue
146
+ if not ignore_manager.should_ignore(entry) and entry not in seen:
147
+ found.append(DiscoveredDependencyFile(path=entry, spec=spec))
148
+ seen.add(entry)
149
+ elif entry.is_dir() and depth < max_depth:
150
+ _walk(entry, depth + 1)
151
+
152
+ _walk(repo_dir, 0)
153
+
154
+ logger.debug(
155
+ "[Dependency Discovery] Found %d dependency files: %s",
156
+ len(found),
157
+ ", ".join(d.path.relative_to(repo_dir).as_posix() for d in found),
158
+ )
159
+ return found
@@ -0,0 +1,174 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ from langchain_core.prompts import PromptTemplate
5
+ from langchain_core.language_models import BaseChatModel
6
+
7
+ from agents.agent import CodeBoardingAgent
8
+ from agents.agent_responses import (
9
+ AnalysisInsights,
10
+ ClusterAnalysis,
11
+ Component,
12
+ MetaAnalysisInsights,
13
+ assign_component_ids,
14
+ )
15
+ from agents.prompts import get_system_details_message, get_cfg_details_message, get_details_message
16
+ from agents.cluster_methods_mixin import ClusterMethodsMixin
17
+ from agents.validation import (
18
+ ValidationContext,
19
+ validate_cluster_coverage,
20
+ validate_component_relationships,
21
+ validate_key_entities,
22
+ validate_relation_component_names,
23
+ )
24
+ from monitoring import trace
25
+ from static_analyzer.analysis_result import StaticAnalysisResults
26
+ from static_analyzer.cluster_helpers import get_all_cluster_ids
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class DetailsAgent(ClusterMethodsMixin, CodeBoardingAgent):
32
+ def __init__(
33
+ self,
34
+ repo_dir: Path,
35
+ static_analysis: StaticAnalysisResults,
36
+ project_name: str,
37
+ meta_context: MetaAnalysisInsights,
38
+ agent_llm: BaseChatModel,
39
+ parsing_llm: BaseChatModel,
40
+ ):
41
+ super().__init__(repo_dir, static_analysis, get_system_details_message(), agent_llm, parsing_llm)
42
+ self.project_name = project_name
43
+ self.meta_context = meta_context
44
+
45
+ self.prompts = {
46
+ "group_clusters": PromptTemplate(
47
+ template=get_cfg_details_message(),
48
+ input_variables=["project_name", "cfg_str", "component", "meta_context", "project_type"],
49
+ ),
50
+ "final_analysis": PromptTemplate(
51
+ template=get_details_message(),
52
+ input_variables=["insight_so_far", "component", "meta_context", "project_type"],
53
+ ),
54
+ }
55
+
56
+ @trace
57
+ def step_cluster_grouping(
58
+ self, component: Component, subgraph_cluster_str: str, subgraph_cluster_results: dict
59
+ ) -> ClusterAnalysis:
60
+ """
61
+ Group clusters within the component's subgraph into logical sub-components.
62
+
63
+ Args:
64
+ component: The component being analyzed
65
+ subgraph_cluster_str: String representation of the component's CFG subgraph
66
+ subgraph_cluster_results: Cluster results for the subgraph (from _create_strict_component_subgraph)
67
+
68
+ Returns:
69
+ ClusterAnalysis with grouped clusters for this component
70
+ """
71
+ logger.info(f"[DetailsAgent] Grouping clusters for component: {component.name}")
72
+ meta_context_str = self.meta_context.llm_str() if self.meta_context else "No project context available."
73
+ project_type = self.meta_context.project_type if self.meta_context else "unknown"
74
+
75
+ prompt = self.prompts["group_clusters"].format(
76
+ project_name=self.project_name,
77
+ cfg_str=subgraph_cluster_str,
78
+ component=component.llm_str(),
79
+ meta_context=meta_context_str,
80
+ project_type=project_type,
81
+ )
82
+
83
+ # Build validation context using subgraph cluster results
84
+ context = ValidationContext(
85
+ cluster_results=subgraph_cluster_results,
86
+ expected_cluster_ids=get_all_cluster_ids(subgraph_cluster_results),
87
+ )
88
+
89
+ cluster_analysis = self._validation_invoke(
90
+ prompt, ClusterAnalysis, validators=[validate_cluster_coverage], context=context
91
+ )
92
+ return cluster_analysis
93
+
94
+ @trace
95
+ def step_final_analysis(
96
+ self, component: Component, cluster_analysis: ClusterAnalysis, subgraph_cluster_results: dict
97
+ ) -> AnalysisInsights:
98
+ """
99
+ Generate detailed final analysis from grouped clusters.
100
+
101
+ Args:
102
+ component: The component being analyzed
103
+ cluster_analysis: The clustered structure from step_cluster_grouping
104
+ subgraph_cluster_results: Cluster results for the subgraph (for validation)
105
+
106
+ Returns:
107
+ AnalysisInsights with detailed component information
108
+ """
109
+ logger.info(f"[DetailsAgent] Generating final detailed analysis for: {component.name}")
110
+ meta_context_str = self.meta_context.llm_str() if self.meta_context else "No project context available."
111
+ project_type = self.meta_context.project_type if self.meta_context else "unknown"
112
+
113
+ cluster_str = cluster_analysis.llm_str() if cluster_analysis else "No cluster analysis available."
114
+
115
+ prompt = self.prompts["final_analysis"].format(
116
+ insight_so_far=cluster_str,
117
+ component=component.llm_str(),
118
+ meta_context=meta_context_str,
119
+ project_type=project_type,
120
+ )
121
+
122
+ # Build validation context with subgraph CFG graphs for edge checking
123
+ context = ValidationContext(
124
+ cluster_results=subgraph_cluster_results,
125
+ cfg_graphs={lang: self.static_analysis.get_cfg(lang) for lang in self.static_analysis.get_languages()},
126
+ )
127
+
128
+ return self._validation_invoke(
129
+ prompt,
130
+ AnalysisInsights,
131
+ validators=[validate_relation_component_names, validate_component_relationships, validate_key_entities],
132
+ context=context,
133
+ )
134
+
135
+ def run(self, component: Component):
136
+ """
137
+ Analyze a component in detail by creating a subgraph and analyzing its structure.
138
+
139
+ This follows the same pattern as AbstractionAgent but operates on a component-level
140
+ subgraph instead of the full codebase.
141
+
142
+ Args:
143
+ component: Component to analyze in detail
144
+
145
+ Returns:
146
+ Tuple of (AnalysisInsights, cluster_results dict) with detailed component information
147
+ """
148
+ logger.info(f"[DetailsAgent] Processing component: {component.name}")
149
+
150
+ # Step 1: Create subgraph from component's assigned files using strict filtering
151
+ subgraph_str, subgraph_cluster_results = self._create_strict_component_subgraph(component)
152
+
153
+ # Step 2: Group clusters within the subgraph
154
+ cluster_analysis = self.step_cluster_grouping(component, subgraph_str, subgraph_cluster_results)
155
+
156
+ # Step 3: Generate detailed analysis from grouped clusters
157
+ analysis = self.step_final_analysis(component, cluster_analysis, subgraph_cluster_results)
158
+
159
+ # Step 4: Sanitize cluster IDs (remove invalid ones) - use subgraph's cluster results
160
+ self._sanitize_component_cluster_ids(analysis, cluster_results=subgraph_cluster_results)
161
+
162
+ # Step 5: Assign files to components (deterministic + LLM-based with validation)
163
+ # Pass component's assigned files as scope to limit classification to this component
164
+ self.classify_files(analysis, subgraph_cluster_results, component.assigned_files)
165
+
166
+ # Step 6: Fix source code reference lines (resolves reference_file paths)
167
+ analysis = self.fix_source_code_reference_lines(analysis)
168
+
169
+ # Step 7: Ensure unique key entities across components
170
+ self._ensure_unique_key_entities(analysis)
171
+ # Step 8: Assign deterministic component IDs based on parent
172
+ assign_component_ids(analysis, parent_id=component.component_id)
173
+
174
+ return analysis, subgraph_cluster_results
agents/llm_config.py ADDED
@@ -0,0 +1,309 @@
1
+ import logging
2
+ import os
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Type
5
+
6
+ from langchain_anthropic import ChatAnthropic
7
+ from langchain_aws import ChatBedrockConverse
8
+ from langchain_cerebras import ChatCerebras
9
+ from langchain_core.language_models import BaseChatModel
10
+ from langchain_google_genai import ChatGoogleGenerativeAI
11
+ from langchain_ollama import ChatOllama
12
+ from langchain_openai import ChatOpenAI
13
+
14
+ from agents.constants import LLMDefaults
15
+ from agents.prompts.prompt_factory import LLMType, initialize_global_factory
16
+ from monitoring.callbacks import MonitoringCallback
17
+
18
+ # Initialize global monitoring callback with its own stats container to avoid ContextVar dependency
19
+ from monitoring.stats import RunStats
20
+
21
+ MONITORING_CALLBACK = MonitoringCallback(stats_container=RunStats())
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # Module-level model overrides – set once by the orchestrator (main.py) and
27
+ # consumed by initialize_llms() without needing to thread the values through
28
+ # every intermediate function signature.
29
+ # ---------------------------------------------------------------------------
30
+ _agent_model_override: str | None = None
31
+ _parsing_model_override: str | None = None
32
+
33
+
34
+ def configure_models(
35
+ agent_model: str | None = None,
36
+ parsing_model: str | None = None,
37
+ api_keys: dict[str, str] | None = None,
38
+ ) -> None:
39
+ """Set process-wide model and provider overrides. Call this once at startup.
40
+
41
+ ``api_keys`` maps provider env-var names to values, e.g.::
42
+
43
+ configure_models(api_keys={"OPENAI_API_KEY": "sk-..."})
44
+
45
+ Keys already present in the shell environment are never overwritten, so
46
+ CI/CD pipelines that export keys directly retain full control.
47
+
48
+ Priority (highest to lowest):
49
+ 1. Shell environment variables (set before the process starts)
50
+ 2. ``api_keys`` passed here / values from ~/.codeboarding/config.toml
51
+ 3. AGENT_MODEL / PARSING_MODEL environment variables (for model names)
52
+ 4. Provider defaults defined in LLM_PROVIDERS
53
+ """
54
+ global _agent_model_override, _parsing_model_override
55
+ _agent_model_override = agent_model
56
+ _parsing_model_override = parsing_model
57
+ if api_keys:
58
+ for env_var, value in api_keys.items():
59
+ if value and not os.environ.get(env_var):
60
+ os.environ[env_var] = value
61
+
62
+
63
+ @dataclass
64
+ class LLMConfig:
65
+ """
66
+ Configuration for LLM providers.
67
+
68
+ Attributes:
69
+ agent_model: The "agent" model used for complex reasoning and agentic tasks.
70
+ parsing_model: The "parsing" model used for fast, cost-effective extraction and parsing tasks.
71
+ agent_temperature: Temperature for the agent model. Defaults to 0 for deterministic behavior
72
+ which is crucial for code understanding and reasoning.
73
+ parsing_temperature: Temperature for the parsing model. Defaults to 0 for deterministic behavior
74
+ which is crucial for structured output extraction.
75
+ llm_type: The LLMType enum value for prompt factory selection.
76
+ """
77
+
78
+ chat_class: Type[BaseChatModel]
79
+ api_key_env: str
80
+ agent_model: str
81
+ parsing_model: str
82
+ llm_type: LLMType
83
+ agent_temperature: float = LLMDefaults.DEFAULT_AGENT_TEMPERATURE
84
+ parsing_temperature: float = LLMDefaults.DEFAULT_PARSING_TEMPERATURE
85
+ extra_args: dict[str, Any] = field(default_factory=dict)
86
+ alt_env_vars: list[str] = field(default_factory=list)
87
+
88
+ def get_api_key(self) -> str | None:
89
+ return os.getenv(self.api_key_env)
90
+
91
+ def is_active(self) -> bool:
92
+ """Check if any of the environment variables (primary or alternate) are set."""
93
+ if os.getenv(self.api_key_env):
94
+ return True
95
+ return any(os.getenv(var) for var in self.alt_env_vars)
96
+
97
+ def get_resolved_extra_args(self) -> dict[str, Any]:
98
+ resolved = {}
99
+ for k, v in self.extra_args.items():
100
+ value = v() if callable(v) else v
101
+ if value is not None:
102
+ resolved[k] = value
103
+ return resolved
104
+
105
+
106
+ # Define supported providers in priority order
107
+ LLM_PROVIDERS = {
108
+ "openai": LLMConfig(
109
+ chat_class=ChatOpenAI,
110
+ api_key_env="OPENAI_API_KEY",
111
+ agent_model="gpt-4o",
112
+ parsing_model="gpt-4o-mini",
113
+ llm_type=LLMType.GPT4,
114
+ alt_env_vars=["OPENAI_BASE_URL"],
115
+ extra_args={
116
+ "base_url": lambda: os.getenv("OPENAI_BASE_URL"),
117
+ "max_tokens": None,
118
+ "timeout": None,
119
+ "max_retries": 0,
120
+ },
121
+ ),
122
+ "vercel": LLMConfig(
123
+ chat_class=ChatOpenAI,
124
+ api_key_env="VERCEL_API_KEY",
125
+ agent_model="google/gemini-3-flash",
126
+ parsing_model="openai/gpt-oss-120b", # Use OpenAI model for parsing to avoid trustcall compatibility issues with Gemini
127
+ llm_type=LLMType.GEMINI_FLASH,
128
+ alt_env_vars=["VERCEL_BASE_URL"],
129
+ extra_args={
130
+ "base_url": lambda: os.getenv("VERCEL_BASE_URL", f"https://ai-gateway.vercel.sh/v1"),
131
+ "max_tokens": None,
132
+ "timeout": None,
133
+ "max_retries": 0,
134
+ },
135
+ ),
136
+ "anthropic": LLMConfig(
137
+ chat_class=ChatAnthropic,
138
+ api_key_env="ANTHROPIC_API_KEY",
139
+ agent_model="claude-3-7-sonnet-20250219",
140
+ parsing_model="claude-3-haiku-20240307",
141
+ llm_type=LLMType.CLAUDE,
142
+ extra_args={
143
+ "max_tokens": 8192,
144
+ "timeout": None,
145
+ "max_retries": 0,
146
+ },
147
+ ),
148
+ "google": LLMConfig(
149
+ chat_class=ChatGoogleGenerativeAI,
150
+ api_key_env="GOOGLE_API_KEY",
151
+ agent_model="gemini-3-flash",
152
+ parsing_model="gemini-3-flash",
153
+ llm_type=LLMType.GEMINI_FLASH,
154
+ extra_args={
155
+ "max_tokens": None,
156
+ "timeout": None,
157
+ "max_retries": 0,
158
+ },
159
+ ),
160
+ "aws": LLMConfig(
161
+ chat_class=ChatBedrockConverse,
162
+ api_key_env="AWS_BEARER_TOKEN_BEDROCK", # Used for existence check
163
+ agent_model="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
164
+ parsing_model="us.anthropic.claude-3-haiku-20240307-v1:0",
165
+ llm_type=LLMType.CLAUDE,
166
+ extra_args={
167
+ "max_tokens": 4096,
168
+ "region_name": lambda: os.getenv("AWS_DEFAULT_REGION", "us-east-1"),
169
+ "credentials_profile_name": None,
170
+ },
171
+ ),
172
+ "cerebras": LLMConfig(
173
+ chat_class=ChatCerebras,
174
+ api_key_env="CEREBRAS_API_KEY",
175
+ agent_model="gpt-oss-120b",
176
+ parsing_model="llama3.1-8b",
177
+ llm_type=LLMType.GPT4,
178
+ extra_args={
179
+ "max_tokens": None,
180
+ "timeout": None,
181
+ "max_retries": 0,
182
+ },
183
+ ),
184
+ "ollama": LLMConfig(
185
+ chat_class=ChatOllama,
186
+ api_key_env="OLLAMA_BASE_URL", # Used for existence check
187
+ agent_model="qwen3:30b",
188
+ parsing_model="qwen2.5:7b",
189
+ llm_type=LLMType.GEMINI_FLASH,
190
+ agent_temperature=0.1,
191
+ parsing_temperature=0.1,
192
+ extra_args={
193
+ "base_url": lambda: os.getenv("OLLAMA_BASE_URL"),
194
+ },
195
+ ),
196
+ "deepseek": LLMConfig(
197
+ chat_class=ChatOpenAI,
198
+ api_key_env="DEEPSEEK_API_KEY",
199
+ agent_model="deepseek-chat",
200
+ parsing_model="deepseek-chat",
201
+ llm_type=LLMType.DEEPSEEK,
202
+ alt_env_vars=["DEEPSEEK_BASE_URL"],
203
+ extra_args={
204
+ "base_url": lambda: os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com/v1"),
205
+ "max_tokens": None,
206
+ "timeout": None,
207
+ "max_retries": 0,
208
+ },
209
+ ),
210
+ "glm": LLMConfig(
211
+ chat_class=ChatOpenAI,
212
+ api_key_env="GLM_API_KEY",
213
+ agent_model="glm-4-flash",
214
+ parsing_model="glm-4-flash",
215
+ llm_type=LLMType.GLM,
216
+ alt_env_vars=["GLM_BASE_URL"],
217
+ extra_args={
218
+ "base_url": lambda: os.getenv("GLM_BASE_URL", "https://open.bigmodel.cn/api/paas/v4"),
219
+ "max_tokens": None,
220
+ "timeout": None,
221
+ "max_retries": 0,
222
+ },
223
+ ),
224
+ "kimi": LLMConfig(
225
+ chat_class=ChatOpenAI,
226
+ api_key_env="KIMI_API_KEY",
227
+ agent_model="kimi-k2.5",
228
+ parsing_model="kimi-k2.5",
229
+ llm_type=LLMType.KIMI,
230
+ alt_env_vars=["KIMI_BASE_URL"],
231
+ extra_args={
232
+ "base_url": lambda: os.getenv("KIMI_BASE_URL", "https://api.moonshot.cn/v1"),
233
+ "max_tokens": None,
234
+ "timeout": None,
235
+ "max_retries": 0,
236
+ },
237
+ ),
238
+ }
239
+
240
+
241
+ def _initialize_llm(
242
+ model_override: str | None,
243
+ model_attr: str,
244
+ temperature_attr: str,
245
+ log_prefix: str,
246
+ init_factory: bool = False,
247
+ ) -> tuple[BaseChatModel, str]:
248
+ for name, config in LLM_PROVIDERS.items():
249
+ if not config.is_active():
250
+ continue
251
+
252
+ model_name = model_override or getattr(config, model_attr)
253
+
254
+ if init_factory:
255
+ detected_llm_type = LLMType.from_model_name(model_name)
256
+ initialize_global_factory(detected_llm_type)
257
+ logger.info(
258
+ f"Initialized prompt factory for {name} provider with model '{model_name}' "
259
+ f"-> {detected_llm_type.value} prompt factory"
260
+ )
261
+
262
+ logger.info(f"Using {name.title()} {log_prefix}LLM with model: {model_name}")
263
+
264
+ kwargs = {
265
+ "model": model_name,
266
+ "temperature": getattr(config, temperature_attr),
267
+ }
268
+ kwargs.update(config.get_resolved_extra_args())
269
+
270
+ if name not in ["aws", "ollama"]:
271
+ api_key = config.get_api_key()
272
+ kwargs["api_key"] = api_key or "no-key-required"
273
+
274
+ model = config.chat_class(**kwargs) # type: ignore[call-arg, arg-type]
275
+ return model, model_name
276
+
277
+ required_vars = []
278
+ for config in LLM_PROVIDERS.values():
279
+ required_vars.append(config.api_key_env)
280
+ required_vars.extend(config.alt_env_vars)
281
+
282
+ raise ValueError(f"No valid LLM configuration found. Please set one of: {', '.join(sorted(set(required_vars)))}")
283
+
284
+
285
+ def validate_api_key_provided() -> None:
286
+ """Raise ValueError if zero or more than one LLM provider key is configured."""
287
+ active = [name for name, config in LLM_PROVIDERS.items() if config.is_active()]
288
+ if not active:
289
+ required = sorted({config.api_key_env for config in LLM_PROVIDERS.values()})
290
+ raise ValueError(f"No LLM provider API key found. Set one of: {', '.join(required)}")
291
+ if len(active) > 1:
292
+ raise ValueError(f"Multiple LLM provider keys detected ({', '.join(active)}); please set only one.")
293
+
294
+
295
+ def initialize_agent_llm(model_override: str | None = None) -> BaseChatModel:
296
+ model, model_name = _initialize_llm(model_override, "agent_model", "agent_temperature", "", init_factory=True)
297
+ MONITORING_CALLBACK.model_name = model_name
298
+ return model
299
+
300
+
301
+ def initialize_parsing_llm(model_override: str | None = None) -> BaseChatModel:
302
+ model, _ = _initialize_llm(model_override, "parsing_model", "parsing_temperature", "Extractor ")
303
+ return model
304
+
305
+
306
+ def initialize_llms() -> tuple[BaseChatModel, BaseChatModel]:
307
+ agent_llm = initialize_agent_llm(_agent_model_override or os.getenv("AGENT_MODEL"))
308
+ parsing_llm = initialize_parsing_llm(_parsing_model_override or os.getenv("PARSING_MODEL"))
309
+ return agent_llm, parsing_llm