PyPI - code-maat-python - Versions diffs - 0.1.0__py3-none-any.whl - Mend

code-maat-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

code_maat_python/__init__.py +12 -0
code_maat_python/__main__.py +5 -0
code_maat_python/analyses/__init__.py +39 -0
code_maat_python/analyses/age.py +101 -0
code_maat_python/analyses/authors.py +60 -0
code_maat_python/analyses/churn.py +353 -0
code_maat_python/analyses/communication.py +151 -0
code_maat_python/analyses/coupling.py +136 -0
code_maat_python/analyses/effort.py +210 -0
code_maat_python/analyses/entities.py +51 -0
code_maat_python/analyses/revisions.py +56 -0
code_maat_python/analyses/soc.py +90 -0
code_maat_python/analyses/summary.py +61 -0
code_maat_python/cli.py +822 -0
code_maat_python/output/__init__.py +0 -0
code_maat_python/parser.py +232 -0
code_maat_python/pipeline.py +112 -0
code_maat_python/transformers/__init__.py +0 -0
code_maat_python/transformers/grouper.py +204 -0
code_maat_python/transformers/team_mapper.py +132 -0
code_maat_python/transformers/time_grouper.py +146 -0
code_maat_python/utils/__init__.py +0 -0
code_maat_python/utils/math.py +105 -0
code_maat_python-0.1.0.dist-info/METADATA +545 -0
code_maat_python-0.1.0.dist-info/RECORD +28 -0
code_maat_python-0.1.0.dist-info/WHEEL +4 -0
code_maat_python-0.1.0.dist-info/entry_points.txt +3 -0
code_maat_python-0.1.0.dist-info/licenses/LICENSE +674 -0

code_maat_python/cli.py ADDED Viewed

@@ -0,0 +1,822 @@
+"""Command-line interface for code-maat-python.
+Modern CLI using click framework with subcommands for each analysis type.
+"""
+import sys
+from collections.abc import Callable
+from functools import wraps
+from pathlib import Path
+from typing import Any
+import click
+import pandas as pd
+from code_maat_python.analyses import (
+    analyze_authors,
+    analyze_coupling,
+    analyze_entities,
+    analyze_revisions,
+    analyze_soc,
+    analyze_summary,
+)
+from code_maat_python.analyses.age import code_age
+from code_maat_python.analyses.churn import (
+    abs_churn,
+    author_churn,
+    entity_churn,
+    entity_ownership,
+    main_dev,
+    refactoring_main_dev,
+)
+from code_maat_python.analyses.communication import communication
+from code_maat_python.analyses.effort import (
+    entity_effort,
+    fragmentation,
+    main_dev_by_revs,
+)
+from code_maat_python.parser import parse_git_log
+def validate_logfile(ctx: click.Context, param: click.Parameter, value: str) -> Path:
+    """Validate that the logfile exists and is readable.
+    Supports regular files, stdin, pipes, and process substitution (e.g., <(git log ...)).
+    Args:
+        ctx: Click context
+        param: Click parameter
+        value: Path string
+    Returns:
+        Path object if valid
+    Raises:
+        click.BadParameter: If file doesn't exist or isn't readable
+    """
+    path = Path(value)
+    # Check if path exists (regular files, symlinks, etc.)
+    # or if it's a special file (pipes, character devices like /dev/fd/N)
+    if not path.exists() and not (path.is_fifo() or path.is_char_device()):
+        raise click.BadParameter(f"File not found: {value}")
+    # Try to open and read to verify it's actually readable
+    try:
+        with open(path, 'r') as f:
+            # Just verify we can open it; don't read anything yet
+            pass
+    except (OSError, IOError) as e:
+        raise click.BadParameter(f"Cannot read file: {value} ({e})")
+    return path
+def common_options(func: Callable[..., Any]) -> Callable[..., Any]:
+    """Add common options to all analysis commands.
+    Adds --group, --team-map-file, --rows, and --output options.
+    """
+    @click.option(
+        "--group",
+        "-g",
+        type=str,
+        help="Architectural grouping specification file",
+    )
+    @click.option(
+        "--team-map-file",
+        "-p",
+        type=str,
+        help="Team mapping CSV file",
+    )
+    @click.option(
+        "--rows",
+        "-r",
+        type=int,
+        help="Maximum number of rows to output",
+    )
+    @click.option(
+        "--output",
+        "-o",
+        type=str,
+        help="Output CSV file (default: stdout)",
+    )
+    @wraps(func)
+    def wrapper(*args: Any, **kwargs: Any) -> Any:
+        return func(*args, **kwargs)
+    return wrapper
+def apply_transformers(
+    df: pd.DataFrame, group_file: str | None, team_map_file: str | None
+) -> pd.DataFrame:
+    """Apply transformers in correct order.
+    Args:
+        df: DataFrame with parsed git log data
+        group_file: Path to architectural grouping specification file
+        team_map_file: Path to team mapping CSV file
+    Returns:
+        Transformed DataFrame
+    """
+    result = df
+    # Apply architectural grouping first (reduces entity granularity)
+    if group_file:
+        from code_maat_python.transformers.grouper import (
+            load_group_specification_file,
+            map_entities_to_groups,
+        )
+        patterns = load_group_specification_file(group_file)
+        result = map_entities_to_groups(result, patterns)
+    # Apply team mapping second (reduces author granularity)
+    if team_map_file:
+        from code_maat_python.transformers.team_mapper import (
+            load_team_mapping_file,
+            map_authors_to_teams,
+        )
+        mapping = load_team_mapping_file(team_map_file)
+        result = map_authors_to_teams(result, mapping)
+    return result
+def output_results(df: pd.DataFrame, output: str | None, max_rows: int | None = None) -> None:
+    """Output analysis results to stdout or file.
+    Args:
+        df: DataFrame with analysis results
+        output: Output file path or None for stdout
+        max_rows: Maximum number of rows to output (None for all)
+    """
+    # Limit rows if specified
+    if max_rows and len(df) > max_rows:
+        df = df.head(max_rows)
+    if output:
+        df.to_csv(output, index=False)
+        click.echo(f"Results written to {output}", err=True)
+    else:
+        # Output to stdout
+        click.echo(df.to_csv(index=False))
+def handle_analysis_error(e: Exception) -> None:
+    """Handle analysis errors with helpful messages.
+    Args:
+        e: Exception that occurred
+    """
+    click.echo(f"Error during analysis: {str(e)}", err=True)
+    sys.exit(1)
+# Main command group
+@click.group()
+@click.version_option(version="0.1.0", prog_name="code-maat-python")
+def main() -> None:
+    """Code Maat Pandas - Modern Python tool for mining version control data.
+    Analyzes git repository logs to identify patterns, coupling, churn,
+    and other metrics useful for understanding code evolution.
+    \b
+    Example workflow:
+        1. Generate a git log:
+           git log --all -M -C --numstat --date=short \\
+               --pretty=format:'--%h--%cd--%cn' > git.log
+        2. Run an analysis:
+           code-maat-python coupling git.log --min-coupling 50
+        3. Save results to CSV:
+           code-maat-python revisions git.log --output results.csv
+    """
+    pass
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@common_options
+def authors(
+    logfile: Path,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Count distinct authors per entity with revision counts.
+    Shows how many authors have worked on each file and the number
+    of revisions, useful for identifying knowledge distribution.
+    \b
+    Example:
+        code-maat-python authors git.log
+        code-maat-python authors git.log --output authors.csv
+        code-maat-python authors git.log --group layers.txt --rows 20
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = analyze_authors(df)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@common_options
+def revisions(
+    logfile: Path,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Sort entities by revision frequency.
+    Lists all files sorted by number of revisions, useful for
+    identifying hotspots and frequently changed code.
+    \b
+    Example:
+        code-maat-python revisions git.log
+        code-maat-python revisions git.log --output revisions.csv
+        code-maat-python revisions git.log --group layers.txt --rows 10
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = analyze_revisions(df)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@common_options
+def entities(
+    logfile: Path,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """List all entities with basic statistics.
+    Shows all files in the repository with commit counts,
+    useful for understanding the scope of the codebase.
+    \b
+    Example:
+        code-maat-python entities git.log
+        code-maat-python entities git.log --output entities.csv
+        code-maat-python entities git.log --group layers.txt
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = analyze_entities(df)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@common_options
+def summary(
+    logfile: Path,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Generate overview statistics for the repository.
+    Provides high-level statistics including number of commits,
+    entities, authors, and date range of the repository history.
+    \b
+    Example:
+        code-maat-python summary git.log
+        code-maat-python summary git.log --output summary.csv
+        code-maat-python summary git.log --group layers.txt
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = analyze_summary(df)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@click.option(
+    "--min-revs",
+    type=int,
+    default=5,
+    show_default=True,
+    help="Minimum number of revisions for a module to be included",
+)
+@click.option(
+    "--min-shared-revs",
+    type=int,
+    default=5,
+    show_default=True,
+    help="Minimum number of shared revisions between modules",
+)
+@click.option(
+    "--min-coupling",
+    type=int,
+    default=30,
+    show_default=True,
+    help="Minimum coupling percentage (0-100)",
+)
+@click.option(
+    "--max-coupling",
+    type=int,
+    default=100,
+    show_default=True,
+    help="Maximum coupling percentage (0-100)",
+)
+@click.option(
+    "--max-changeset-size",
+    type=int,
+    default=30,
+    show_default=True,
+    help="Maximum number of files in a commit to consider",
+)
+@common_options
+def coupling(
+    logfile: Path,
+    min_revs: int,
+    min_shared_revs: int,
+    min_coupling: int,
+    max_coupling: int,
+    max_changeset_size: int,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Calculate logical coupling between files.
+    Identifies files that frequently change together, which may
+    indicate hidden dependencies or architectural issues.
+    Logical coupling is calculated as:
+    (shared_revisions / average_revisions) * 100
+    Large commits (> max-changeset-size) are filtered out to avoid
+    noise from bulk refactorings or automated changes.
+    \b
+    Example:
+        code-maat-python coupling git.log
+        code-maat-python coupling git.log --min-coupling 50
+        code-maat-python coupling git.log --min-revs 10 --min-shared-revs 5
+        code-maat-python coupling git.log --group layers.txt --rows 20
+        code-maat-python coupling git.log --output coupling.csv
+    """
+    try:
+        # Validate parameters
+        if not (0 <= min_coupling <= 100):
+            raise click.BadParameter("min-coupling must be between 0 and 100")
+        if not (0 <= max_coupling <= 100):
+            raise click.BadParameter("max-coupling must be between 0 and 100")
+        if min_coupling > max_coupling:
+            raise click.BadParameter("min-coupling must be <= max-coupling")
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = analyze_coupling(
+            df,
+            min_revs=min_revs,
+            min_shared_revs=min_shared_revs,
+            min_coupling=min_coupling,
+            max_coupling=max_coupling,
+            max_changeset_size=max_changeset_size,
+        )
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@click.option(
+    "--max-changeset-size",
+    type=int,
+    default=30,
+    show_default=True,
+    help="Maximum number of files in a commit to consider",
+)
+@common_options
+def soc(
+    logfile: Path,
+    max_changeset_size: int,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Calculate sum of coupling (SOC) for each entity.
+    A simpler metric than full logical coupling. For each commit
+    with m files, each file gets a SOC score of (m-1).
+    This provides a quick way to identify files that are often
+    changed together with other files.
+    \b
+    Example:
+        code-maat-python soc git.log
+        code-maat-python soc git.log --max-changeset-size 50
+        code-maat-python soc git.log --group layers.txt --rows 10
+        code-maat-python soc git.log --output soc.csv
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = analyze_soc(df, max_changeset_size=max_changeset_size)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@common_options
+def abs_churn_cmd(
+    logfile: Path,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Calculate absolute code churn per date.
+    Shows total lines added and deleted for each date in the
+    commit history, useful for identifying periods of high activity.
+    \b
+    Example:
+        code-maat-python abs-churn git.log
+        code-maat-python abs-churn git.log --output churn.csv
+        code-maat-python abs-churn git.log --rows 30
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = abs_churn(df)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@common_options
+def author_churn_cmd(
+    logfile: Path,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Calculate total churn per author.
+    Shows total lines added and deleted by each author across
+    all entities, useful for understanding individual contributions.
+    \b
+    Example:
+        code-maat-python author-churn git.log
+        code-maat-python author-churn git.log --team-map-file teams.csv
+        code-maat-python author-churn git.log --output author-churn.csv
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = author_churn(df)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@common_options
+def entity_churn_cmd(
+    logfile: Path,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Calculate absolute churn per entity.
+    Shows total lines added and deleted for each file, sorted by
+    lines added (a better predictor of post-release defects).
+    \b
+    Example:
+        code-maat-python entity-churn git.log
+        code-maat-python entity-churn git.log --group layers.txt --rows 20
+        code-maat-python entity-churn git.log --output entity-churn.csv
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = entity_churn(df)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@common_options
+def entity_ownership_cmd(
+    logfile: Path,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Calculate ownership of each entity by author based on churn.
+    Shows how much each author has contributed to each entity
+    in terms of lines added and deleted, identifying code ownership.
+    \b
+    Example:
+        code-maat-python entity-ownership git.log
+        code-maat-python entity-ownership git.log --group layers.txt
+        code-maat-python entity-ownership git.log --output ownership.csv
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = entity_ownership(df)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@common_options
+def main_dev_cmd(
+    logfile: Path,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Identify the main developer of each entity by lines added.
+    The main developer is the author who contributed the most lines
+    of code to each entity. Returns ownership percentage.
+    \b
+    Example:
+        code-maat-python main-dev git.log
+        code-maat-python main-dev git.log --group layers.txt --rows 15
+        code-maat-python main-dev git.log --output main-dev.csv
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = main_dev(df)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@common_options
+def refactoring_main_dev_cmd(
+    logfile: Path,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Identify the main developer of each entity by lines removed.
+    Alternative calculation identifying main developer as the author
+    who removed the most lines (representing refactoring effort).
+    \b
+    Example:
+        code-maat-python refactoring-main-dev git.log
+        code-maat-python refactoring-main-dev git.log --rows 10
+        code-maat-python refactoring-main-dev git.log --output refactoring.csv
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = refactoring_main_dev(df)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@common_options
+def entity_effort_cmd(
+    logfile: Path,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Calculate author contribution to each entity by revision count.
+    Identifies how many revisions each author contributed to each
+    entity, providing a measure of effort.
+    \b
+    Example:
+        code-maat-python entity-effort git.log
+        code-maat-python entity-effort git.log --group layers.txt
+        code-maat-python entity-effort git.log --output effort.csv
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = entity_effort(df)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@common_options
+def main_dev_by_revs_cmd(
+    logfile: Path,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Identify the main developer of each entity by revision count.
+    The main developer is the author who contributed the most
+    revisions to each entity. Returns ownership percentage.
+    \b
+    Example:
+        code-maat-python main-dev-by-revs git.log
+        code-maat-python main-dev-by-revs git.log --rows 15
+        code-maat-python main-dev-by-revs git.log --output main-dev-revs.csv
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = main_dev_by_revs(df)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@common_options
+def fragmentation_cmd(
+    logfile: Path,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Calculate fragmentation for each entity using fractal value.
+    The fractal value measures how fragmented contributions are
+    across authors. 0 = single author, approaching 1 = highly fragmented.
+    \b
+    Example:
+        code-maat-python fragmentation git.log
+        code-maat-python fragmentation git.log --group layers.txt --rows 20
+        code-maat-python fragmentation git.log --output fragmentation.csv
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = fragmentation(df)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@click.option(
+    "--reference-date",
+    "-d",
+    type=str,
+    help="Reference date for age calculation (default: today, format: YYYY-MM-DD)",
+)
+@common_options
+def age(
+    logfile: Path,
+    reference_date: str | None,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Calculate age of entities in months since last modification.
+    Identifies stale code that hasn't been modified recently, which
+    may indicate technical debt, abandoned features, or stable components.
+    \b
+    Example:
+        code-maat-python age git.log
+        code-maat-python age git.log --reference-date 2023-12-31
+        code-maat-python age git.log --group layers.txt --rows 25
+        code-maat-python age git.log --output age.csv
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        # Parse reference date if provided
+        ref_date = None
+        if reference_date:
+            ref_date = pd.Timestamp(reference_date)
+        result = code_age(df, reference_date=ref_date)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+@main.command()
+@click.argument("logfile", type=str, callback=validate_logfile)
+@click.option(
+    "--min-shared",
+    type=int,
+    default=5,
+    show_default=True,
+    help="Minimum number of shared entities between developers",
+)
+@click.option(
+    "--min-coupling",
+    type=int,
+    default=30,
+    show_default=True,
+    help="Minimum coupling strength percentage (0-100)",
+)
+@common_options
+def communication_cmd(
+    logfile: Path,
+    min_shared: int,
+    min_coupling: int,
+    group: str | None,
+    team_map_file: str | None,
+    rows: int | None,
+    output: str | None,
+) -> None:
+    """Calculate communication needs between developers.
+    Identifies developers who work on the same code files, indicating
+    a need for communication and coordination. Strength is normalized
+    by each developer's total workload.
+    \b
+    Example:
+        code-maat-python communication git.log
+        code-maat-python communication git.log --min-shared 10 --min-coupling 50
+        code-maat-python communication git.log --team-map-file teams.csv --rows 15
+        code-maat-python communication git.log --output communication.csv
+    """
+    try:
+        df = parse_git_log(logfile)
+        df = apply_transformers(df, group, team_map_file)
+        result = communication(df, min_shared=min_shared, min_coupling=min_coupling)
+        output_results(result, output, rows)
+    except Exception as e:
+        handle_analysis_error(e)
+if __name__ == "__main__":
+    main()