codeboarding 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. agents/__init__.py +0 -0
  2. agents/abstraction_agent.py +150 -0
  3. agents/agent.py +467 -0
  4. agents/agent_responses.py +363 -0
  5. agents/cluster_methods_mixin.py +281 -0
  6. agents/constants.py +13 -0
  7. agents/dependency_discovery.py +159 -0
  8. agents/details_agent.py +174 -0
  9. agents/llm_config.py +309 -0
  10. agents/meta_agent.py +105 -0
  11. agents/planner_agent.py +105 -0
  12. agents/prompts/__init__.py +85 -0
  13. agents/prompts/abstract_prompt_factory.py +63 -0
  14. agents/prompts/claude_prompts.py +381 -0
  15. agents/prompts/deepseek_prompts.py +389 -0
  16. agents/prompts/gemini_flash_prompts.py +362 -0
  17. agents/prompts/glm_prompts.py +407 -0
  18. agents/prompts/gpt_prompts.py +470 -0
  19. agents/prompts/kimi_prompts.py +400 -0
  20. agents/prompts/prompt_factory.py +179 -0
  21. agents/tools/__init__.py +8 -0
  22. agents/tools/base.py +96 -0
  23. agents/tools/get_external_deps.py +47 -0
  24. agents/tools/get_method_invocations.py +47 -0
  25. agents/tools/read_cfg.py +60 -0
  26. agents/tools/read_docs.py +132 -0
  27. agents/tools/read_file.py +90 -0
  28. agents/tools/read_file_structure.py +156 -0
  29. agents/tools/read_git_diff.py +131 -0
  30. agents/tools/read_packages.py +60 -0
  31. agents/tools/read_source.py +105 -0
  32. agents/tools/read_structure.py +49 -0
  33. agents/tools/toolkit.py +119 -0
  34. agents/validation.py +383 -0
  35. caching/__init__.py +4 -0
  36. caching/cache.py +29 -0
  37. caching/meta_cache.py +227 -0
  38. codeboarding-0.9.0.dist-info/METADATA +223 -0
  39. codeboarding-0.9.0.dist-info/RECORD +126 -0
  40. codeboarding-0.9.0.dist-info/WHEEL +5 -0
  41. codeboarding-0.9.0.dist-info/entry_points.txt +3 -0
  42. codeboarding-0.9.0.dist-info/licenses/LICENSE +21 -0
  43. codeboarding-0.9.0.dist-info/top_level.txt +18 -0
  44. core/__init__.py +101 -0
  45. core/plugin_loader.py +46 -0
  46. core/protocols.py +27 -0
  47. core/registry.py +46 -0
  48. diagram_analysis/__init__.py +4 -0
  49. diagram_analysis/analysis_json.py +346 -0
  50. diagram_analysis/diagram_generator.py +486 -0
  51. diagram_analysis/file_coverage.py +212 -0
  52. diagram_analysis/incremental/__init__.py +63 -0
  53. diagram_analysis/incremental/component_checker.py +236 -0
  54. diagram_analysis/incremental/file_manager.py +217 -0
  55. diagram_analysis/incremental/impact_analyzer.py +238 -0
  56. diagram_analysis/incremental/io_utils.py +281 -0
  57. diagram_analysis/incremental/models.py +72 -0
  58. diagram_analysis/incremental/path_patching.py +164 -0
  59. diagram_analysis/incremental/reexpansion.py +166 -0
  60. diagram_analysis/incremental/scoped_analysis.py +227 -0
  61. diagram_analysis/incremental/updater.py +464 -0
  62. diagram_analysis/incremental/validation.py +48 -0
  63. diagram_analysis/manifest.py +152 -0
  64. diagram_analysis/version.py +6 -0
  65. duckdb_crud.py +125 -0
  66. github_action.py +172 -0
  67. health/__init__.py +3 -0
  68. health/checks/__init__.py +11 -0
  69. health/checks/circular_deps.py +48 -0
  70. health/checks/cohesion.py +93 -0
  71. health/checks/coupling.py +140 -0
  72. health/checks/function_size.py +85 -0
  73. health/checks/god_class.py +167 -0
  74. health/checks/inheritance.py +104 -0
  75. health/checks/instability.py +77 -0
  76. health/checks/unused_code_diagnostics.py +338 -0
  77. health/config.py +172 -0
  78. health/constants.py +19 -0
  79. health/models.py +186 -0
  80. health/runner.py +236 -0
  81. install.py +518 -0
  82. logging_config.py +105 -0
  83. main.py +529 -0
  84. monitoring/__init__.py +12 -0
  85. monitoring/callbacks.py +163 -0
  86. monitoring/context.py +158 -0
  87. monitoring/mixin.py +16 -0
  88. monitoring/paths.py +47 -0
  89. monitoring/stats.py +50 -0
  90. monitoring/writers.py +172 -0
  91. output_generators/__init__.py +0 -0
  92. output_generators/html.py +163 -0
  93. output_generators/html_template.py +382 -0
  94. output_generators/markdown.py +140 -0
  95. output_generators/mdx.py +171 -0
  96. output_generators/sphinx.py +175 -0
  97. repo_utils/__init__.py +277 -0
  98. repo_utils/change_detector.py +289 -0
  99. repo_utils/errors.py +6 -0
  100. repo_utils/git_diff.py +74 -0
  101. repo_utils/ignore.py +341 -0
  102. static_analyzer/__init__.py +335 -0
  103. static_analyzer/analysis_cache.py +699 -0
  104. static_analyzer/analysis_result.py +269 -0
  105. static_analyzer/cluster_change_analyzer.py +391 -0
  106. static_analyzer/cluster_helpers.py +79 -0
  107. static_analyzer/constants.py +166 -0
  108. static_analyzer/git_diff_analyzer.py +224 -0
  109. static_analyzer/graph.py +746 -0
  110. static_analyzer/incremental_orchestrator.py +671 -0
  111. static_analyzer/java_config_scanner.py +232 -0
  112. static_analyzer/java_utils.py +227 -0
  113. static_analyzer/lsp_client/__init__.py +12 -0
  114. static_analyzer/lsp_client/client.py +1642 -0
  115. static_analyzer/lsp_client/diagnostics.py +62 -0
  116. static_analyzer/lsp_client/java_client.py +517 -0
  117. static_analyzer/lsp_client/language_settings.py +97 -0
  118. static_analyzer/lsp_client/typescript_client.py +235 -0
  119. static_analyzer/programming_language.py +152 -0
  120. static_analyzer/reference_resolve_mixin.py +166 -0
  121. static_analyzer/scanner.py +95 -0
  122. static_analyzer/typescript_config_scanner.py +54 -0
  123. tool_registry.py +433 -0
  124. user_config.py +134 -0
  125. utils.py +56 -0
  126. vscode_constants.py +124 -0
duckdb_crud.py ADDED
@@ -0,0 +1,125 @@
1
+ from filelock import FileLock
2
+ import duckdb
3
+ from typing import Optional
4
+ import os
5
+
6
+ DB_PATH = os.getenv("JOB_DB", "jobs.duckdb")
7
+ LOCK_PATH = DB_PATH + ".lock"
8
+
9
+
10
+ # -- DuckDB Connection Helper --
11
+ def _connect():
12
+ return duckdb.connect(DB_PATH)
13
+
14
+
15
+ # Initialize DB on startup
16
+ def init_db():
17
+ # ensure directory exists
18
+ dir_path = os.path.dirname(DB_PATH)
19
+ if dir_path and not os.path.exists(dir_path):
20
+ os.makedirs(dir_path, exist_ok=True)
21
+ # wipe existing DB and lock files
22
+ if os.path.exists(DB_PATH):
23
+ try:
24
+ os.remove(DB_PATH)
25
+ os.remove(LOCK_PATH)
26
+ except OSError:
27
+ pass
28
+ # create fresh table
29
+ with FileLock(LOCK_PATH):
30
+ conn = _connect()
31
+ conn.execute(
32
+ """
33
+ CREATE TABLE IF NOT EXISTS jobs (
34
+ id TEXT PRIMARY KEY,
35
+ repo_url TEXT,
36
+ status TEXT,
37
+ result TEXT,
38
+ error TEXT,
39
+ created_at TIMESTAMP,
40
+ started_at TIMESTAMP,
41
+ finished_at TIMESTAMP
42
+ )
43
+ """
44
+ )
45
+ conn.close()
46
+
47
+
48
+ # -- CRUD operations --
49
+ def insert_job(job: dict):
50
+ with FileLock(LOCK_PATH):
51
+ conn = _connect()
52
+ conn.execute(
53
+ "INSERT INTO jobs VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
54
+ [
55
+ job["id"],
56
+ job["repo_url"],
57
+ job["status"],
58
+ job["result"],
59
+ job["error"],
60
+ job["created_at"],
61
+ job["started_at"],
62
+ job["finished_at"],
63
+ ],
64
+ )
65
+ conn.close()
66
+
67
+
68
+ def update_job(job_id: str, **fields):
69
+ cols, vals = zip(*fields.items())
70
+ set_clause = ", ".join(f"{c} = ?" for c in cols)
71
+ with FileLock(LOCK_PATH):
72
+ conn = _connect()
73
+ conn.execute(
74
+ f"UPDATE jobs SET {set_clause} WHERE id = ?",
75
+ list(vals) + [job_id],
76
+ )
77
+ conn.close()
78
+
79
+
80
+ def fetch_job(job_id: str) -> Optional[dict]:
81
+ conn = _connect()
82
+ res = conn.execute(
83
+ "SELECT id, repo_url, status, result, error, created_at, started_at, finished_at" " FROM jobs WHERE id = ?",
84
+ [job_id],
85
+ ).fetchall()
86
+ conn.close()
87
+ if not res:
88
+ return None
89
+ id_, repo_url, status, result, error, created_at, started_at, finished_at = res[0]
90
+ return {
91
+ "id": id_,
92
+ "repo_url": repo_url,
93
+ "status": status,
94
+ "result": result,
95
+ "error": error,
96
+ "created_at": created_at.isoformat() if created_at else None,
97
+ "started_at": started_at.isoformat() if started_at else None,
98
+ "finished_at": finished_at.isoformat() if finished_at else None,
99
+ }
100
+
101
+
102
+ def fetch_all_jobs() -> list[dict]:
103
+ conn = _connect()
104
+ res = conn.execute(
105
+ "SELECT id, repo_url, status, result, error, created_at, started_at, finished_at"
106
+ " FROM jobs ORDER BY created_at DESC"
107
+ ).fetchall()
108
+ conn.close()
109
+
110
+ jobs = []
111
+ for row in res:
112
+ id_, repo_url, status, result, error, created_at, started_at, finished_at = row
113
+ jobs.append(
114
+ {
115
+ "id": id_,
116
+ "repo_url": repo_url,
117
+ "status": status,
118
+ "result": result,
119
+ "error": error,
120
+ "created_at": created_at.isoformat() if created_at else None,
121
+ "started_at": started_at.isoformat() if started_at else None,
122
+ "finished_at": finished_at.isoformat() if finished_at else None,
123
+ }
124
+ )
125
+ return jobs
github_action.py ADDED
@@ -0,0 +1,172 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import shutil
5
+ from pathlib import Path
6
+
7
+ from agents.agent_responses import AnalysisInsights
8
+ from diagram_analysis import DiagramGenerator
9
+ from diagram_analysis.analysis_json import build_id_to_name_map, parse_unified_analysis
10
+ from output_generators.html import generate_html_file
11
+ from output_generators.markdown import generate_markdown_file
12
+ from output_generators.mdx import generate_mdx_file
13
+ from output_generators.sphinx import generate_rst_file
14
+ from repo_utils import checkout_repo, clone_repository
15
+ from utils import create_temp_repo_folder, sanitize
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def _load_all_analyses(analysis_path: Path) -> list[tuple[str, AnalysisInsights, set[str]]]:
21
+ """Load the unified analysis.json and return a list of (file_name, analysis, expanded_components) tuples.
22
+
23
+ Returns the root analysis as 'overview' plus one entry per expanded component.
24
+ """
25
+ with open(analysis_path, "r") as f:
26
+ data = json.load(f)
27
+
28
+ root_analysis, sub_analyses = parse_unified_analysis(data)
29
+
30
+ # Build a complete id-to-name mapping across all levels
31
+ id_to_name = build_id_to_name_map(root_analysis, sub_analyses)
32
+
33
+ # Root analysis: expanded components are those that have sub-analyses
34
+ root_expanded = set(sub_analyses.keys())
35
+ entries: list[tuple[str, AnalysisInsights, set[str]]] = [("overview", root_analysis, root_expanded)]
36
+
37
+ # Sub-analyses: determine which of their components are further expanded
38
+ for comp_id, sub_analysis in sub_analyses.items():
39
+ sub_expanded = {c.component_id for c in sub_analysis.components if c.component_id in sub_analyses}
40
+ comp_name = id_to_name.get(comp_id, comp_id)
41
+ fname = sanitize(comp_name)
42
+ entries.append((fname, sub_analysis, sub_expanded))
43
+
44
+ return entries
45
+
46
+
47
+ def generate_markdown(
48
+ analysis_path: Path, repo_name: str, repo_url: str, target_branch: str, temp_repo_folder: Path, output_dir: str
49
+ ):
50
+ entries = _load_all_analyses(analysis_path)
51
+ for fname, analysis, expanded_components in entries:
52
+ logger.info(f"Generating markdown for: {fname}")
53
+ generate_markdown_file(
54
+ fname,
55
+ analysis,
56
+ repo_name,
57
+ repo_ref=f"{repo_url}/blob/{target_branch}/{output_dir}",
58
+ expanded_components=expanded_components,
59
+ temp_dir=temp_repo_folder,
60
+ )
61
+
62
+
63
+ def generate_html(analysis_path: Path, repo_name: str, repo_url: str, target_branch: str, temp_repo_folder: Path):
64
+ entries = _load_all_analyses(analysis_path)
65
+ for fname, analysis, expanded_components in entries:
66
+ logger.info(f"Generating HTML for: {fname}")
67
+ generate_html_file(
68
+ fname,
69
+ analysis,
70
+ repo_name,
71
+ repo_ref=f"{repo_url}/blob/{target_branch}",
72
+ expanded_components=expanded_components,
73
+ temp_dir=temp_repo_folder,
74
+ )
75
+
76
+
77
+ def generate_mdx(
78
+ analysis_path: Path, repo_name: str, repo_url: str, target_branch: str, temp_repo_folder: Path, output_dir: str
79
+ ):
80
+ entries = _load_all_analyses(analysis_path)
81
+ for fname, analysis, expanded_components in entries:
82
+ logger.info(f"Generating MDX for: {fname}")
83
+ generate_mdx_file(
84
+ fname,
85
+ analysis,
86
+ repo_name,
87
+ repo_ref=f"{repo_url}/blob/{target_branch}/{output_dir}",
88
+ expanded_components=expanded_components,
89
+ temp_dir=temp_repo_folder,
90
+ )
91
+
92
+
93
+ def generate_rst(
94
+ analysis_path: Path, repo_name: str, repo_url: str, target_branch: str, temp_repo_folder: Path, output_dir: str
95
+ ):
96
+ entries = _load_all_analyses(analysis_path)
97
+ for fname, analysis, expanded_components in entries:
98
+ logger.info(f"Generating RST for: {fname}")
99
+ generate_rst_file(
100
+ fname,
101
+ analysis,
102
+ repo_name,
103
+ repo_ref=f"{repo_url}/blob/{target_branch}/{output_dir}",
104
+ expanded_components=expanded_components,
105
+ temp_dir=temp_repo_folder,
106
+ )
107
+
108
+
109
+ def _seed_existing_analysis(existing_analysis_dir: Path, temp_repo_folder: Path) -> None:
110
+ """Copy existing analysis files into the temp folder so incremental analysis can use them."""
111
+ for filename in ("analysis.json", "analysis_manifest.json"):
112
+ src = existing_analysis_dir / filename
113
+ if src.is_file():
114
+ shutil.copy2(src, temp_repo_folder / filename)
115
+ logger.info(f"Seeded existing {filename} for incremental analysis")
116
+
117
+
118
+ def generate_analysis(
119
+ repo_url: str,
120
+ source_branch: str,
121
+ target_branch: str,
122
+ extension: str,
123
+ output_dir: str = ".codeboarding",
124
+ existing_analysis_dir: str | None = None,
125
+ ):
126
+ """
127
+ Generate analysis for a GitHub repository URL.
128
+ This function is intended to be used in a GitHub Action context.
129
+
130
+ Args:
131
+ existing_analysis_dir: Path to a directory containing a previous analysis.json
132
+ and analysis_manifest.json. When provided, incremental analysis is attempted
133
+ before falling back to a full analysis.
134
+ """
135
+ repo_root = Path(os.getenv("REPO_ROOT", "repos"))
136
+ repo_name = clone_repository(repo_url, repo_root)
137
+ repo_dir = repo_root / repo_name
138
+ checkout_repo(repo_dir, source_branch)
139
+ temp_repo_folder = create_temp_repo_folder()
140
+
141
+ # Seed previous analysis files so incremental update can detect changes
142
+ if existing_analysis_dir:
143
+ _seed_existing_analysis(Path(existing_analysis_dir), temp_repo_folder)
144
+
145
+ generator = DiagramGenerator(
146
+ repo_location=repo_dir,
147
+ temp_folder=temp_repo_folder,
148
+ repo_name=repo_name,
149
+ output_dir=temp_repo_folder,
150
+ depth_level=int(os.getenv("DIAGRAM_DEPTH_LEVEL", "1")),
151
+ )
152
+
153
+ # Use smart analysis: tries incremental first, falls back to full
154
+ analysis_files = generator.generate_analysis_smart()
155
+
156
+ # The generator now returns a single analysis.json path
157
+ analysis_path = Path(analysis_files[0])
158
+
159
+ # Now generate the output docs:
160
+ match extension:
161
+ case ".md":
162
+ generate_markdown(analysis_path, repo_name, repo_url, target_branch, temp_repo_folder, output_dir)
163
+ case ".html":
164
+ generate_html(analysis_path, repo_name, repo_url, target_branch, temp_repo_folder)
165
+ case ".mdx":
166
+ generate_mdx(analysis_path, repo_name, repo_url, target_branch, temp_repo_folder, output_dir)
167
+ case ".rst":
168
+ generate_rst(analysis_path, repo_name, repo_url, target_branch, temp_repo_folder, output_dir)
169
+ case _:
170
+ raise ValueError(f"Unsupported extension: {extension}")
171
+
172
+ return temp_repo_folder
health/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from health.runner import run_health_checks
2
+
3
+ __all__ = ["run_health_checks"]
@@ -0,0 +1,11 @@
1
+ """Health checks module."""
2
+
3
+ from health.checks.unused_code_diagnostics import (
4
+ LSPDiagnosticsCollector,
5
+ check_unused_code_diagnostics,
6
+ )
7
+
8
+ __all__ = [
9
+ "LSPDiagnosticsCollector",
10
+ "check_unused_code_diagnostics",
11
+ ]
@@ -0,0 +1,48 @@
1
+ import logging
2
+
3
+ import networkx as nx
4
+
5
+ from health.models import CircularDependencyCheck, HealthCheckConfig
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ def check_circular_dependencies(package_dependencies: dict, config: HealthCheckConfig) -> CircularDependencyCheck:
11
+ """E6: Detect circular dependencies at the package level.
12
+
13
+ Circular dependencies make the system rigid, hard to modify, and
14
+ difficult to test in isolation.
15
+ """
16
+ cycles: list[str] = []
17
+
18
+ graph = nx.DiGraph()
19
+ for package, info in package_dependencies.items():
20
+ graph.add_node(package)
21
+ # Prefer import_deps (text-based imports only) over the combined imports
22
+ # key which may include LSP reference-based deps that inflate edges.
23
+ imports = info.get("import_deps", info.get("imports", []))
24
+ if isinstance(imports, dict):
25
+ imports = list(imports.keys())
26
+ for imported in imports:
27
+ if imported in package_dependencies:
28
+ graph.add_edge(package, imported)
29
+
30
+ total_packages = graph.number_of_nodes()
31
+ packages_in_cycles: set[str] = set()
32
+
33
+ try:
34
+ for cycle in nx.simple_cycles(graph):
35
+ if len(cycles) >= config.max_cycles_reported:
36
+ break
37
+ packages_in_cycles.update(cycle)
38
+ cycles.append(" -> ".join(cycle + [cycle[0]]))
39
+ except nx.NetworkXError:
40
+ logger.warning("Error while detecting cycles in package dependency graph")
41
+
42
+ return CircularDependencyCheck(
43
+ check_name="circular_dependencies",
44
+ description="Detects circular dependencies between packages",
45
+ cycles=cycles,
46
+ packages_checked=total_packages,
47
+ packages_in_cycles=len(packages_in_cycles),
48
+ )
@@ -0,0 +1,93 @@
1
+ import logging
2
+
3
+ from health.models import FindingEntity, FindingGroup, HealthCheckConfig, Severity, StandardCheckSummary
4
+ from static_analyzer.graph import CallGraph
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ def check_component_cohesion(call_graph: CallGraph, config: HealthCheckConfig) -> StandardCheckSummary:
10
+ """E10: Measure component cohesion via internal vs external edge ratio per cluster.
11
+
12
+ For each cluster identified by the call graph clustering, compute:
13
+ cohesion = internal_edges / total_edges
14
+
15
+ Low cohesion means the cluster's nodes talk more to nodes outside the
16
+ cluster than inside it, suggesting the grouping may not reflect
17
+ actual code organization.
18
+ """
19
+ warning_entities: list[FindingEntity] = []
20
+
21
+ cluster_result = call_graph.cluster()
22
+ if not cluster_result.clusters:
23
+ return StandardCheckSummary(
24
+ check_name="component_cohesion",
25
+ description="Measures internal vs external edge ratio per component/cluster",
26
+ total_entities_checked=0,
27
+ findings_count=0,
28
+ score=1.0,
29
+ finding_groups=[],
30
+ )
31
+
32
+ total_checked = 0
33
+
34
+ for cluster_id, node_names in cluster_result.clusters.items():
35
+ internal_edges = 0
36
+ external_edges = 0
37
+
38
+ for node_name in node_names:
39
+ node = call_graph.nodes.get(node_name)
40
+ if not node:
41
+ continue
42
+ for called_fqn in node.methods_called_by_me:
43
+ if called_fqn in node_names:
44
+ internal_edges += 1
45
+ else:
46
+ external_edges += 1
47
+
48
+ total_edges = internal_edges + external_edges
49
+ if total_edges == 0:
50
+ continue
51
+
52
+ total_checked += 1
53
+ cohesion = internal_edges / total_edges
54
+
55
+ # Get representative file for the cluster
56
+ cluster_files = cluster_result.get_files_for_cluster(cluster_id)
57
+ representative_file = next(iter(cluster_files), None) if cluster_files else None
58
+
59
+ if cohesion <= config.cohesion_low:
60
+ warning_entities.append(
61
+ FindingEntity(
62
+ entity_name=f"cluster_{cluster_id}",
63
+ file_path=representative_file,
64
+ line_start=None,
65
+ line_end=None,
66
+ metric_value=round(cohesion, 3),
67
+ )
68
+ )
69
+
70
+ finding_groups: list[FindingGroup] = []
71
+ if warning_entities:
72
+ finding_groups.append(
73
+ FindingGroup(
74
+ severity=Severity.WARNING,
75
+ threshold=config.cohesion_low,
76
+ description=f"Components with low cohesion (below {config.cohesion_low})",
77
+ entities=sorted(warning_entities, key=lambda e: e.metric_value),
78
+ )
79
+ )
80
+
81
+ total_findings = len(warning_entities)
82
+ passing = total_checked - total_findings
83
+ score = passing / total_checked if total_checked > 0 else 1.0
84
+
85
+ return StandardCheckSummary(
86
+ check_name="component_cohesion",
87
+ description="Measures internal vs external edge ratio per component/cluster",
88
+ total_entities_checked=total_checked,
89
+ findings_count=total_findings,
90
+ warning_count=len(warning_entities),
91
+ score=score,
92
+ finding_groups=finding_groups,
93
+ )
@@ -0,0 +1,140 @@
1
+ import logging
2
+
3
+ from health.models import (
4
+ FindingEntity,
5
+ FindingGroup,
6
+ HealthCheckConfig,
7
+ Severity,
8
+ StandardCheckSummary,
9
+ )
10
+ from static_analyzer.graph import CallGraph
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def collect_coupling_values(call_graph: CallGraph) -> tuple[list[float], list[float]]:
16
+ """Collect fan-out and fan-in values for all callable entities.
17
+
18
+ Returns:
19
+ A tuple of (fan_out_values, fan_in_values).
20
+ """
21
+ nx_graph = call_graph.to_networkx()
22
+ fan_out_values: list[float] = []
23
+ fan_in_values: list[float] = []
24
+
25
+ for node_name in nx_graph.nodes:
26
+ node = call_graph.nodes.get(node_name)
27
+ if node and (node.is_class() or node.is_data()):
28
+ continue
29
+ fan_out_values.append(float(nx_graph.out_degree(node_name)))
30
+ fan_in_values.append(float(nx_graph.in_degree(node_name)))
31
+
32
+ return fan_out_values, fan_in_values
33
+
34
+
35
+ def check_fan_out(call_graph: CallGraph, config: HealthCheckConfig) -> StandardCheckSummary:
36
+ """E2: Check efferent coupling (fan-out) per function.
37
+
38
+ Fan-out measures how many other functions a given function calls.
39
+ High fan-out indicates a function that does too much or orchestrates
40
+ too many dependencies.
41
+ """
42
+ findings: list[FindingEntity] = []
43
+ total_checked = 0
44
+ threshold = config.fan_out_max
45
+
46
+ for fqn, node in call_graph.nodes.items():
47
+ if node.is_class() or node.is_data():
48
+ continue
49
+
50
+ fan_out = len(node.methods_called_by_me)
51
+ total_checked += 1
52
+
53
+ if fan_out >= threshold:
54
+ findings.append(
55
+ FindingEntity(
56
+ entity_name=fqn,
57
+ file_path=node.file_path,
58
+ line_start=node.line_start,
59
+ line_end=node.line_end,
60
+ metric_value=fan_out,
61
+ )
62
+ )
63
+
64
+ finding_groups: list[FindingGroup] = []
65
+ if findings:
66
+ finding_groups.append(
67
+ FindingGroup(
68
+ severity=Severity.WARNING,
69
+ threshold=threshold,
70
+ description=f"Functions calling more than {threshold:.1f} other functions",
71
+ entities=sorted(findings, key=lambda e: e.metric_value, reverse=True),
72
+ )
73
+ )
74
+
75
+ score = (total_checked - len(findings)) / total_checked if total_checked > 0 else 1.0
76
+
77
+ return StandardCheckSummary(
78
+ check_name="fan_out",
79
+ description="Checks efferent coupling: how many other functions each function calls",
80
+ total_entities_checked=total_checked,
81
+ findings_count=len(findings),
82
+ warning_count=len(findings),
83
+ score=score,
84
+ finding_groups=finding_groups,
85
+ )
86
+
87
+
88
+ def check_fan_in(call_graph: CallGraph, config: HealthCheckConfig) -> StandardCheckSummary:
89
+ """E3: Check afferent coupling (fan-in) per function.
90
+
91
+ Fan-in measures how many other functions call a given function.
92
+ High fan-in means the function is a critical dependency — changes
93
+ to it are high-risk and affect many callers.
94
+ """
95
+ findings: list[FindingEntity] = []
96
+ total_checked = 0
97
+ threshold = config.fan_in_max
98
+
99
+ nx_graph = call_graph.to_networkx()
100
+ for node_name in nx_graph.nodes:
101
+ node = call_graph.nodes.get(node_name)
102
+ if node and (node.is_class() or node.is_data()):
103
+ continue
104
+
105
+ fan_in = nx_graph.in_degree(node_name)
106
+ total_checked += 1
107
+
108
+ if fan_in >= threshold:
109
+ findings.append(
110
+ FindingEntity(
111
+ entity_name=node_name,
112
+ file_path=node.file_path if node else None,
113
+ line_start=node.line_start if node else None,
114
+ line_end=node.line_end if node else None,
115
+ metric_value=fan_in,
116
+ )
117
+ )
118
+
119
+ finding_groups: list[FindingGroup] = []
120
+ if findings:
121
+ finding_groups.append(
122
+ FindingGroup(
123
+ severity=Severity.WARNING,
124
+ threshold=threshold,
125
+ description=f"Functions called by more than {threshold:.1f} other functions",
126
+ entities=sorted(findings, key=lambda e: e.metric_value, reverse=True),
127
+ )
128
+ )
129
+
130
+ score = (total_checked - len(findings)) / total_checked if total_checked > 0 else 1.0
131
+
132
+ return StandardCheckSummary(
133
+ check_name="fan_in",
134
+ description="Checks afferent coupling: how many other functions call each function",
135
+ total_entities_checked=total_checked,
136
+ findings_count=len(findings),
137
+ warning_count=len(findings),
138
+ score=score,
139
+ finding_groups=finding_groups,
140
+ )
@@ -0,0 +1,85 @@
1
+ import logging
2
+
3
+ from health.models import (
4
+ FindingEntity,
5
+ FindingGroup,
6
+ HealthCheckConfig,
7
+ Severity,
8
+ StandardCheckSummary,
9
+ )
10
+ from repo_utils.ignore import is_test_or_infrastructure_file
11
+ from static_analyzer.graph import CallGraph
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def collect_function_sizes(call_graph: CallGraph) -> list[float]:
17
+ """Collect function sizes (line counts) for all callable entities in the graph."""
18
+ sizes: list[float] = []
19
+ for node in call_graph.nodes.values():
20
+ if node.is_class() or node.is_data():
21
+ continue
22
+ size = node.line_end - node.line_start
23
+ if size > 0:
24
+ sizes.append(float(size))
25
+ return sizes
26
+
27
+
28
+ def check_function_size(call_graph: CallGraph, config: HealthCheckConfig) -> StandardCheckSummary:
29
+ """E1: Check function/method sizes across the call graph.
30
+
31
+ Flags functions that exceed line count thresholds. Large functions are
32
+ harder to understand, test, and maintain.
33
+
34
+ Excludes test and infrastructure files as they have different size norms.
35
+ """
36
+ findings: list[FindingEntity] = []
37
+ total_checked = 0
38
+ threshold = config.function_size_max
39
+
40
+ for fqn, node in call_graph.nodes.items():
41
+ if node.is_class() or node.is_data():
42
+ continue
43
+
44
+ # Skip test/infrastructure files
45
+ if is_test_or_infrastructure_file(node.file_path):
46
+ continue
47
+
48
+ size = node.line_end - node.line_start
49
+ if size <= 0:
50
+ continue
51
+ total_checked += 1
52
+
53
+ if size >= threshold:
54
+ findings.append(
55
+ FindingEntity(
56
+ entity_name=fqn,
57
+ file_path=node.file_path,
58
+ line_start=node.line_start,
59
+ line_end=node.line_end,
60
+ metric_value=size,
61
+ )
62
+ )
63
+
64
+ finding_groups: list[FindingGroup] = []
65
+ if findings:
66
+ finding_groups.append(
67
+ FindingGroup(
68
+ severity=Severity.WARNING,
69
+ threshold=threshold,
70
+ description=f"Functions exceeding {threshold:.1f} lines",
71
+ entities=sorted(findings, key=lambda e: e.metric_value, reverse=True),
72
+ )
73
+ )
74
+
75
+ score = (total_checked - len(findings)) / total_checked if total_checked > 0 else 1.0
76
+
77
+ return StandardCheckSummary(
78
+ check_name="function_size",
79
+ description="Checks that functions/methods do not exceed line count thresholds",
80
+ total_entities_checked=total_checked,
81
+ findings_count=len(findings),
82
+ warning_count=len(findings),
83
+ score=score,
84
+ finding_groups=finding_groups,
85
+ )