PyPI - codeboarding - Versions diffs - 0.9.0__py3-none-any.whl - Mend

codeboarding 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

agents/__init__.py +0 -0
agents/abstraction_agent.py +150 -0
agents/agent.py +467 -0
agents/agent_responses.py +363 -0
agents/cluster_methods_mixin.py +281 -0
agents/constants.py +13 -0
agents/dependency_discovery.py +159 -0
agents/details_agent.py +174 -0
agents/llm_config.py +309 -0
agents/meta_agent.py +105 -0
agents/planner_agent.py +105 -0
agents/prompts/__init__.py +85 -0
agents/prompts/abstract_prompt_factory.py +63 -0
agents/prompts/claude_prompts.py +381 -0
agents/prompts/deepseek_prompts.py +389 -0
agents/prompts/gemini_flash_prompts.py +362 -0
agents/prompts/glm_prompts.py +407 -0
agents/prompts/gpt_prompts.py +470 -0
agents/prompts/kimi_prompts.py +400 -0
agents/prompts/prompt_factory.py +179 -0
agents/tools/__init__.py +8 -0
agents/tools/base.py +96 -0
agents/tools/get_external_deps.py +47 -0
agents/tools/get_method_invocations.py +47 -0
agents/tools/read_cfg.py +60 -0
agents/tools/read_docs.py +132 -0
agents/tools/read_file.py +90 -0
agents/tools/read_file_structure.py +156 -0
agents/tools/read_git_diff.py +131 -0
agents/tools/read_packages.py +60 -0
agents/tools/read_source.py +105 -0
agents/tools/read_structure.py +49 -0
agents/tools/toolkit.py +119 -0
agents/validation.py +383 -0
caching/__init__.py +4 -0
caching/cache.py +29 -0
caching/meta_cache.py +227 -0
codeboarding-0.9.0.dist-info/METADATA +223 -0
codeboarding-0.9.0.dist-info/RECORD +126 -0
codeboarding-0.9.0.dist-info/WHEEL +5 -0
codeboarding-0.9.0.dist-info/entry_points.txt +3 -0
codeboarding-0.9.0.dist-info/licenses/LICENSE +21 -0
codeboarding-0.9.0.dist-info/top_level.txt +18 -0
core/__init__.py +101 -0
core/plugin_loader.py +46 -0
core/protocols.py +27 -0
core/registry.py +46 -0
diagram_analysis/__init__.py +4 -0
diagram_analysis/analysis_json.py +346 -0
diagram_analysis/diagram_generator.py +486 -0
diagram_analysis/file_coverage.py +212 -0
diagram_analysis/incremental/__init__.py +63 -0
diagram_analysis/incremental/component_checker.py +236 -0
diagram_analysis/incremental/file_manager.py +217 -0
diagram_analysis/incremental/impact_analyzer.py +238 -0
diagram_analysis/incremental/io_utils.py +281 -0
diagram_analysis/incremental/models.py +72 -0
diagram_analysis/incremental/path_patching.py +164 -0
diagram_analysis/incremental/reexpansion.py +166 -0
diagram_analysis/incremental/scoped_analysis.py +227 -0
diagram_analysis/incremental/updater.py +464 -0
diagram_analysis/incremental/validation.py +48 -0
diagram_analysis/manifest.py +152 -0
diagram_analysis/version.py +6 -0
duckdb_crud.py +125 -0
github_action.py +172 -0
health/__init__.py +3 -0
health/checks/__init__.py +11 -0
health/checks/circular_deps.py +48 -0
health/checks/cohesion.py +93 -0
health/checks/coupling.py +140 -0
health/checks/function_size.py +85 -0
health/checks/god_class.py +167 -0
health/checks/inheritance.py +104 -0
health/checks/instability.py +77 -0
health/checks/unused_code_diagnostics.py +338 -0
health/config.py +172 -0
health/constants.py +19 -0
health/models.py +186 -0
health/runner.py +236 -0
install.py +518 -0
logging_config.py +105 -0
main.py +529 -0
monitoring/__init__.py +12 -0
monitoring/callbacks.py +163 -0
monitoring/context.py +158 -0
monitoring/mixin.py +16 -0
monitoring/paths.py +47 -0
monitoring/stats.py +50 -0
monitoring/writers.py +172 -0
output_generators/__init__.py +0 -0
output_generators/html.py +163 -0
output_generators/html_template.py +382 -0
output_generators/markdown.py +140 -0
output_generators/mdx.py +171 -0
output_generators/sphinx.py +175 -0
repo_utils/__init__.py +277 -0
repo_utils/change_detector.py +289 -0
repo_utils/errors.py +6 -0
repo_utils/git_diff.py +74 -0
repo_utils/ignore.py +341 -0
static_analyzer/__init__.py +335 -0
static_analyzer/analysis_cache.py +699 -0
static_analyzer/analysis_result.py +269 -0
static_analyzer/cluster_change_analyzer.py +391 -0
static_analyzer/cluster_helpers.py +79 -0
static_analyzer/constants.py +166 -0
static_analyzer/git_diff_analyzer.py +224 -0
static_analyzer/graph.py +746 -0
static_analyzer/incremental_orchestrator.py +671 -0
static_analyzer/java_config_scanner.py +232 -0
static_analyzer/java_utils.py +227 -0
static_analyzer/lsp_client/__init__.py +12 -0
static_analyzer/lsp_client/client.py +1642 -0
static_analyzer/lsp_client/diagnostics.py +62 -0
static_analyzer/lsp_client/java_client.py +517 -0
static_analyzer/lsp_client/language_settings.py +97 -0
static_analyzer/lsp_client/typescript_client.py +235 -0
static_analyzer/programming_language.py +152 -0
static_analyzer/reference_resolve_mixin.py +166 -0
static_analyzer/scanner.py +95 -0
static_analyzer/typescript_config_scanner.py +54 -0
tool_registry.py +433 -0
user_config.py +134 -0
utils.py +56 -0
vscode_constants.py +124 -0

duckdb_crud.py ADDED Viewed

@@ -0,0 +1,125 @@
+from filelock import FileLock
+import duckdb
+from typing import Optional
+import os
+DB_PATH = os.getenv("JOB_DB", "jobs.duckdb")
+LOCK_PATH = DB_PATH + ".lock"
+# -- DuckDB Connection Helper --
+def _connect():
+    return duckdb.connect(DB_PATH)
+# Initialize DB on startup
+def init_db():
+    # ensure directory exists
+    dir_path = os.path.dirname(DB_PATH)
+    if dir_path and not os.path.exists(dir_path):
+        os.makedirs(dir_path, exist_ok=True)
+    # wipe existing DB and lock files
+    if os.path.exists(DB_PATH):
+        try:
+            os.remove(DB_PATH)
+            os.remove(LOCK_PATH)
+        except OSError:
+            pass
+    # create fresh table
+    with FileLock(LOCK_PATH):
+        conn = _connect()
+        conn.execute(
+            """
+            CREATE TABLE IF NOT EXISTS jobs (
+              id TEXT PRIMARY KEY,
+              repo_url TEXT,
+              status TEXT,
+              result TEXT,
+              error TEXT,
+              created_at TIMESTAMP,
+              started_at TIMESTAMP,
+              finished_at TIMESTAMP
+            )
+            """
+        )
+        conn.close()
+# -- CRUD operations --
+def insert_job(job: dict):
+    with FileLock(LOCK_PATH):
+        conn = _connect()
+        conn.execute(
+            "INSERT INTO jobs VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
+            [
+                job["id"],
+                job["repo_url"],
+                job["status"],
+                job["result"],
+                job["error"],
+                job["created_at"],
+                job["started_at"],
+                job["finished_at"],
+            ],
+        )
+        conn.close()
+def update_job(job_id: str, **fields):
+    cols, vals = zip(*fields.items())
+    set_clause = ", ".join(f"{c} = ?" for c in cols)
+    with FileLock(LOCK_PATH):
+        conn = _connect()
+        conn.execute(
+            f"UPDATE jobs SET {set_clause} WHERE id = ?",
+            list(vals) + [job_id],
+        )
+        conn.close()
+def fetch_job(job_id: str) -> Optional[dict]:
+    conn = _connect()
+    res = conn.execute(
+        "SELECT id, repo_url, status, result, error, created_at, started_at, finished_at" " FROM jobs WHERE id = ?",
+        [job_id],
+    ).fetchall()
+    conn.close()
+    if not res:
+        return None
+    id_, repo_url, status, result, error, created_at, started_at, finished_at = res[0]
+    return {
+        "id": id_,
+        "repo_url": repo_url,
+        "status": status,
+        "result": result,
+        "error": error,
+        "created_at": created_at.isoformat() if created_at else None,
+        "started_at": started_at.isoformat() if started_at else None,
+        "finished_at": finished_at.isoformat() if finished_at else None,
+    }
+def fetch_all_jobs() -> list[dict]:
+    conn = _connect()
+    res = conn.execute(
+        "SELECT id, repo_url, status, result, error, created_at, started_at, finished_at"
+        " FROM jobs ORDER BY created_at DESC"
+    ).fetchall()
+    conn.close()
+    jobs = []
+    for row in res:
+        id_, repo_url, status, result, error, created_at, started_at, finished_at = row
+        jobs.append(
+            {
+                "id": id_,
+                "repo_url": repo_url,
+                "status": status,
+                "result": result,
+                "error": error,
+                "created_at": created_at.isoformat() if created_at else None,
+                "started_at": started_at.isoformat() if started_at else None,
+                "finished_at": finished_at.isoformat() if finished_at else None,
+            }
+        )
+    return jobs

github_action.py ADDED Viewed

@@ -0,0 +1,172 @@
+import json
+import logging
+import os
+import shutil
+from pathlib import Path
+from agents.agent_responses import AnalysisInsights
+from diagram_analysis import DiagramGenerator
+from diagram_analysis.analysis_json import build_id_to_name_map, parse_unified_analysis
+from output_generators.html import generate_html_file
+from output_generators.markdown import generate_markdown_file
+from output_generators.mdx import generate_mdx_file
+from output_generators.sphinx import generate_rst_file
+from repo_utils import checkout_repo, clone_repository
+from utils import create_temp_repo_folder, sanitize
+logger = logging.getLogger(__name__)
+def _load_all_analyses(analysis_path: Path) -> list[tuple[str, AnalysisInsights, set[str]]]:
+    """Load the unified analysis.json and return a list of (file_name, analysis, expanded_components) tuples.
+    Returns the root analysis as 'overview' plus one entry per expanded component.
+    """
+    with open(analysis_path, "r") as f:
+        data = json.load(f)
+    root_analysis, sub_analyses = parse_unified_analysis(data)
+    # Build a complete id-to-name mapping across all levels
+    id_to_name = build_id_to_name_map(root_analysis, sub_analyses)
+    # Root analysis: expanded components are those that have sub-analyses
+    root_expanded = set(sub_analyses.keys())
+    entries: list[tuple[str, AnalysisInsights, set[str]]] = [("overview", root_analysis, root_expanded)]
+    # Sub-analyses: determine which of their components are further expanded
+    for comp_id, sub_analysis in sub_analyses.items():
+        sub_expanded = {c.component_id for c in sub_analysis.components if c.component_id in sub_analyses}
+        comp_name = id_to_name.get(comp_id, comp_id)
+        fname = sanitize(comp_name)
+        entries.append((fname, sub_analysis, sub_expanded))
+    return entries
+def generate_markdown(
+    analysis_path: Path, repo_name: str, repo_url: str, target_branch: str, temp_repo_folder: Path, output_dir: str
+):
+    entries = _load_all_analyses(analysis_path)
+    for fname, analysis, expanded_components in entries:
+        logger.info(f"Generating markdown for: {fname}")
+        generate_markdown_file(
+            fname,
+            analysis,
+            repo_name,
+            repo_ref=f"{repo_url}/blob/{target_branch}/{output_dir}",
+            expanded_components=expanded_components,
+            temp_dir=temp_repo_folder,
+        )
+def generate_html(analysis_path: Path, repo_name: str, repo_url: str, target_branch: str, temp_repo_folder: Path):
+    entries = _load_all_analyses(analysis_path)
+    for fname, analysis, expanded_components in entries:
+        logger.info(f"Generating HTML for: {fname}")
+        generate_html_file(
+            fname,
+            analysis,
+            repo_name,
+            repo_ref=f"{repo_url}/blob/{target_branch}",
+            expanded_components=expanded_components,
+            temp_dir=temp_repo_folder,
+        )
+def generate_mdx(
+    analysis_path: Path, repo_name: str, repo_url: str, target_branch: str, temp_repo_folder: Path, output_dir: str
+):
+    entries = _load_all_analyses(analysis_path)
+    for fname, analysis, expanded_components in entries:
+        logger.info(f"Generating MDX for: {fname}")
+        generate_mdx_file(
+            fname,
+            analysis,
+            repo_name,
+            repo_ref=f"{repo_url}/blob/{target_branch}/{output_dir}",
+            expanded_components=expanded_components,
+            temp_dir=temp_repo_folder,
+        )
+def generate_rst(
+    analysis_path: Path, repo_name: str, repo_url: str, target_branch: str, temp_repo_folder: Path, output_dir: str
+):
+    entries = _load_all_analyses(analysis_path)
+    for fname, analysis, expanded_components in entries:
+        logger.info(f"Generating RST for: {fname}")
+        generate_rst_file(
+            fname,
+            analysis,
+            repo_name,
+            repo_ref=f"{repo_url}/blob/{target_branch}/{output_dir}",
+            expanded_components=expanded_components,
+            temp_dir=temp_repo_folder,
+        )
+def _seed_existing_analysis(existing_analysis_dir: Path, temp_repo_folder: Path) -> None:
+    """Copy existing analysis files into the temp folder so incremental analysis can use them."""
+    for filename in ("analysis.json", "analysis_manifest.json"):
+        src = existing_analysis_dir / filename
+        if src.is_file():
+            shutil.copy2(src, temp_repo_folder / filename)
+            logger.info(f"Seeded existing {filename} for incremental analysis")
+def generate_analysis(
+    repo_url: str,
+    source_branch: str,
+    target_branch: str,
+    extension: str,
+    output_dir: str = ".codeboarding",
+    existing_analysis_dir: str | None = None,
+):
+    """
+    Generate analysis for a GitHub repository URL.
+    This function is intended to be used in a GitHub Action context.
+    Args:
+        existing_analysis_dir: Path to a directory containing a previous analysis.json
+            and analysis_manifest.json. When provided, incremental analysis is attempted
+            before falling back to a full analysis.
+    """
+    repo_root = Path(os.getenv("REPO_ROOT", "repos"))
+    repo_name = clone_repository(repo_url, repo_root)
+    repo_dir = repo_root / repo_name
+    checkout_repo(repo_dir, source_branch)
+    temp_repo_folder = create_temp_repo_folder()
+    # Seed previous analysis files so incremental update can detect changes
+    if existing_analysis_dir:
+        _seed_existing_analysis(Path(existing_analysis_dir), temp_repo_folder)
+    generator = DiagramGenerator(
+        repo_location=repo_dir,
+        temp_folder=temp_repo_folder,
+        repo_name=repo_name,
+        output_dir=temp_repo_folder,
+        depth_level=int(os.getenv("DIAGRAM_DEPTH_LEVEL", "1")),
+    )
+    # Use smart analysis: tries incremental first, falls back to full
+    analysis_files = generator.generate_analysis_smart()
+    # The generator now returns a single analysis.json path
+    analysis_path = Path(analysis_files[0])
+    # Now generate the output docs:
+    match extension:
+        case ".md":
+            generate_markdown(analysis_path, repo_name, repo_url, target_branch, temp_repo_folder, output_dir)
+        case ".html":
+            generate_html(analysis_path, repo_name, repo_url, target_branch, temp_repo_folder)
+        case ".mdx":
+            generate_mdx(analysis_path, repo_name, repo_url, target_branch, temp_repo_folder, output_dir)
+        case ".rst":
+            generate_rst(analysis_path, repo_name, repo_url, target_branch, temp_repo_folder, output_dir)
+        case _:
+            raise ValueError(f"Unsupported extension: {extension}")
+    return temp_repo_folder

health/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from health.runner import run_health_checks
+__all__ = ["run_health_checks"]

health/checks/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""Health checks module."""
+from health.checks.unused_code_diagnostics import (
+    LSPDiagnosticsCollector,
+    check_unused_code_diagnostics,
+)
+__all__ = [
+    "LSPDiagnosticsCollector",
+    "check_unused_code_diagnostics",
+]

health/checks/circular_deps.py ADDED Viewed

@@ -0,0 +1,48 @@
+import logging
+import networkx as nx
+from health.models import CircularDependencyCheck, HealthCheckConfig
+logger = logging.getLogger(__name__)
+def check_circular_dependencies(package_dependencies: dict, config: HealthCheckConfig) -> CircularDependencyCheck:
+    """E6: Detect circular dependencies at the package level.
+    Circular dependencies make the system rigid, hard to modify, and
+    difficult to test in isolation.
+    """
+    cycles: list[str] = []
+    graph = nx.DiGraph()
+    for package, info in package_dependencies.items():
+        graph.add_node(package)
+        # Prefer import_deps (text-based imports only) over the combined imports
+        # key which may include LSP reference-based deps that inflate edges.
+        imports = info.get("import_deps", info.get("imports", []))
+        if isinstance(imports, dict):
+            imports = list(imports.keys())
+        for imported in imports:
+            if imported in package_dependencies:
+                graph.add_edge(package, imported)
+    total_packages = graph.number_of_nodes()
+    packages_in_cycles: set[str] = set()
+    try:
+        for cycle in nx.simple_cycles(graph):
+            if len(cycles) >= config.max_cycles_reported:
+                break
+            packages_in_cycles.update(cycle)
+            cycles.append(" -> ".join(cycle + [cycle[0]]))
+    except nx.NetworkXError:
+        logger.warning("Error while detecting cycles in package dependency graph")
+    return CircularDependencyCheck(
+        check_name="circular_dependencies",
+        description="Detects circular dependencies between packages",
+        cycles=cycles,
+        packages_checked=total_packages,
+        packages_in_cycles=len(packages_in_cycles),
+    )

health/checks/cohesion.py ADDED Viewed

@@ -0,0 +1,93 @@
+import logging
+from health.models import FindingEntity, FindingGroup, HealthCheckConfig, Severity, StandardCheckSummary
+from static_analyzer.graph import CallGraph
+logger = logging.getLogger(__name__)
+def check_component_cohesion(call_graph: CallGraph, config: HealthCheckConfig) -> StandardCheckSummary:
+    """E10: Measure component cohesion via internal vs external edge ratio per cluster.
+    For each cluster identified by the call graph clustering, compute:
+        cohesion = internal_edges / total_edges
+    Low cohesion means the cluster's nodes talk more to nodes outside the
+    cluster than inside it, suggesting the grouping may not reflect
+    actual code organization.
+    """
+    warning_entities: list[FindingEntity] = []
+    cluster_result = call_graph.cluster()
+    if not cluster_result.clusters:
+        return StandardCheckSummary(
+            check_name="component_cohesion",
+            description="Measures internal vs external edge ratio per component/cluster",
+            total_entities_checked=0,
+            findings_count=0,
+            score=1.0,
+            finding_groups=[],
+        )
+    total_checked = 0
+    for cluster_id, node_names in cluster_result.clusters.items():
+        internal_edges = 0
+        external_edges = 0
+        for node_name in node_names:
+            node = call_graph.nodes.get(node_name)
+            if not node:
+                continue
+            for called_fqn in node.methods_called_by_me:
+                if called_fqn in node_names:
+                    internal_edges += 1
+                else:
+                    external_edges += 1
+        total_edges = internal_edges + external_edges
+        if total_edges == 0:
+            continue
+        total_checked += 1
+        cohesion = internal_edges / total_edges
+        # Get representative file for the cluster
+        cluster_files = cluster_result.get_files_for_cluster(cluster_id)
+        representative_file = next(iter(cluster_files), None) if cluster_files else None
+        if cohesion <= config.cohesion_low:
+            warning_entities.append(
+                FindingEntity(
+                    entity_name=f"cluster_{cluster_id}",
+                    file_path=representative_file,
+                    line_start=None,
+                    line_end=None,
+                    metric_value=round(cohesion, 3),
+                )
+            )
+    finding_groups: list[FindingGroup] = []
+    if warning_entities:
+        finding_groups.append(
+            FindingGroup(
+                severity=Severity.WARNING,
+                threshold=config.cohesion_low,
+                description=f"Components with low cohesion (below {config.cohesion_low})",
+                entities=sorted(warning_entities, key=lambda e: e.metric_value),
+            )
+        )
+    total_findings = len(warning_entities)
+    passing = total_checked - total_findings
+    score = passing / total_checked if total_checked > 0 else 1.0
+    return StandardCheckSummary(
+        check_name="component_cohesion",
+        description="Measures internal vs external edge ratio per component/cluster",
+        total_entities_checked=total_checked,
+        findings_count=total_findings,
+        warning_count=len(warning_entities),
+        score=score,
+        finding_groups=finding_groups,
+    )

health/checks/coupling.py ADDED Viewed

@@ -0,0 +1,140 @@
+import logging
+from health.models import (
+    FindingEntity,
+    FindingGroup,
+    HealthCheckConfig,
+    Severity,
+    StandardCheckSummary,
+)
+from static_analyzer.graph import CallGraph
+logger = logging.getLogger(__name__)
+def collect_coupling_values(call_graph: CallGraph) -> tuple[list[float], list[float]]:
+    """Collect fan-out and fan-in values for all callable entities.
+    Returns:
+        A tuple of (fan_out_values, fan_in_values).
+    """
+    nx_graph = call_graph.to_networkx()
+    fan_out_values: list[float] = []
+    fan_in_values: list[float] = []
+    for node_name in nx_graph.nodes:
+        node = call_graph.nodes.get(node_name)
+        if node and (node.is_class() or node.is_data()):
+            continue
+        fan_out_values.append(float(nx_graph.out_degree(node_name)))
+        fan_in_values.append(float(nx_graph.in_degree(node_name)))
+    return fan_out_values, fan_in_values
+def check_fan_out(call_graph: CallGraph, config: HealthCheckConfig) -> StandardCheckSummary:
+    """E2: Check efferent coupling (fan-out) per function.
+    Fan-out measures how many other functions a given function calls.
+    High fan-out indicates a function that does too much or orchestrates
+    too many dependencies.
+    """
+    findings: list[FindingEntity] = []
+    total_checked = 0
+    threshold = config.fan_out_max
+    for fqn, node in call_graph.nodes.items():
+        if node.is_class() or node.is_data():
+            continue
+        fan_out = len(node.methods_called_by_me)
+        total_checked += 1
+        if fan_out >= threshold:
+            findings.append(
+                FindingEntity(
+                    entity_name=fqn,
+                    file_path=node.file_path,
+                    line_start=node.line_start,
+                    line_end=node.line_end,
+                    metric_value=fan_out,
+                )
+            )
+    finding_groups: list[FindingGroup] = []
+    if findings:
+        finding_groups.append(
+            FindingGroup(
+                severity=Severity.WARNING,
+                threshold=threshold,
+                description=f"Functions calling more than {threshold:.1f} other functions",
+                entities=sorted(findings, key=lambda e: e.metric_value, reverse=True),
+            )
+        )
+    score = (total_checked - len(findings)) / total_checked if total_checked > 0 else 1.0
+    return StandardCheckSummary(
+        check_name="fan_out",
+        description="Checks efferent coupling: how many other functions each function calls",
+        total_entities_checked=total_checked,
+        findings_count=len(findings),
+        warning_count=len(findings),
+        score=score,
+        finding_groups=finding_groups,
+    )
+def check_fan_in(call_graph: CallGraph, config: HealthCheckConfig) -> StandardCheckSummary:
+    """E3: Check afferent coupling (fan-in) per function.
+    Fan-in measures how many other functions call a given function.
+    High fan-in means the function is a critical dependency — changes
+    to it are high-risk and affect many callers.
+    """
+    findings: list[FindingEntity] = []
+    total_checked = 0
+    threshold = config.fan_in_max
+    nx_graph = call_graph.to_networkx()
+    for node_name in nx_graph.nodes:
+        node = call_graph.nodes.get(node_name)
+        if node and (node.is_class() or node.is_data()):
+            continue
+        fan_in = nx_graph.in_degree(node_name)
+        total_checked += 1
+        if fan_in >= threshold:
+            findings.append(
+                FindingEntity(
+                    entity_name=node_name,
+                    file_path=node.file_path if node else None,
+                    line_start=node.line_start if node else None,
+                    line_end=node.line_end if node else None,
+                    metric_value=fan_in,
+                )
+            )
+    finding_groups: list[FindingGroup] = []
+    if findings:
+        finding_groups.append(
+            FindingGroup(
+                severity=Severity.WARNING,
+                threshold=threshold,
+                description=f"Functions called by more than {threshold:.1f} other functions",
+                entities=sorted(findings, key=lambda e: e.metric_value, reverse=True),
+            )
+        )
+    score = (total_checked - len(findings)) / total_checked if total_checked > 0 else 1.0
+    return StandardCheckSummary(
+        check_name="fan_in",
+        description="Checks afferent coupling: how many other functions call each function",
+        total_entities_checked=total_checked,
+        findings_count=len(findings),
+        warning_count=len(findings),
+        score=score,
+        finding_groups=finding_groups,
+    )

health/checks/function_size.py ADDED Viewed

@@ -0,0 +1,85 @@
+import logging
+from health.models import (
+    FindingEntity,
+    FindingGroup,
+    HealthCheckConfig,
+    Severity,
+    StandardCheckSummary,
+)
+from repo_utils.ignore import is_test_or_infrastructure_file
+from static_analyzer.graph import CallGraph
+logger = logging.getLogger(__name__)
+def collect_function_sizes(call_graph: CallGraph) -> list[float]:
+    """Collect function sizes (line counts) for all callable entities in the graph."""
+    sizes: list[float] = []
+    for node in call_graph.nodes.values():
+        if node.is_class() or node.is_data():
+            continue
+        size = node.line_end - node.line_start
+        if size > 0:
+            sizes.append(float(size))
+    return sizes
+def check_function_size(call_graph: CallGraph, config: HealthCheckConfig) -> StandardCheckSummary:
+    """E1: Check function/method sizes across the call graph.
+    Flags functions that exceed line count thresholds. Large functions are
+    harder to understand, test, and maintain.
+    Excludes test and infrastructure files as they have different size norms.
+    """
+    findings: list[FindingEntity] = []
+    total_checked = 0
+    threshold = config.function_size_max
+    for fqn, node in call_graph.nodes.items():
+        if node.is_class() or node.is_data():
+            continue
+        # Skip test/infrastructure files
+        if is_test_or_infrastructure_file(node.file_path):
+            continue
+        size = node.line_end - node.line_start
+        if size <= 0:
+            continue
+        total_checked += 1
+        if size >= threshold:
+            findings.append(
+                FindingEntity(
+                    entity_name=fqn,
+                    file_path=node.file_path,
+                    line_start=node.line_start,
+                    line_end=node.line_end,
+                    metric_value=size,
+                )
+            )
+    finding_groups: list[FindingGroup] = []
+    if findings:
+        finding_groups.append(
+            FindingGroup(
+                severity=Severity.WARNING,
+                threshold=threshold,
+                description=f"Functions exceeding {threshold:.1f} lines",
+                entities=sorted(findings, key=lambda e: e.metric_value, reverse=True),
+            )
+        )
+    score = (total_checked - len(findings)) / total_checked if total_checked > 0 else 1.0
+    return StandardCheckSummary(
+        check_name="function_size",
+        description="Checks that functions/methods do not exceed line count thresholds",
+        total_entities_checked=total_checked,
+        findings_count=len(findings),
+        warning_count=len(findings),
+        score=score,
+        finding_groups=finding_groups,
+    )