gitflow-analytics 3.11.1__py3-none-any.whl → 3.12.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/cli.py +8 -0
- gitflow_analytics/config/loader.py +1 -0
- gitflow_analytics/config/schema.py +1 -0
- gitflow_analytics/core/analyzer.py +26 -4
- gitflow_analytics/core/data_fetcher.py +98 -17
- gitflow_analytics/types/__init__.py +5 -0
- gitflow_analytics/types/commit_types.py +50 -0
- gitflow_analytics/utils/__init__.py +5 -0
- gitflow_analytics/utils/commit_utils.py +53 -0
- {gitflow_analytics-3.11.1.dist-info → gitflow_analytics-3.12.6.dist-info}/METADATA +74 -1
- {gitflow_analytics-3.11.1.dist-info → gitflow_analytics-3.12.6.dist-info}/RECORD +16 -12
- {gitflow_analytics-3.11.1.dist-info → gitflow_analytics-3.12.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-3.11.1.dist-info → gitflow_analytics-3.12.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-3.11.1.dist-info → gitflow_analytics-3.12.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-3.11.1.dist-info → gitflow_analytics-3.12.6.dist-info}/top_level.txt +0 -0
gitflow_analytics/_version.py
CHANGED
gitflow_analytics/cli.py
CHANGED
|
@@ -967,6 +967,7 @@ def analyze(
|
|
|
967
967
|
branch_mapping_rules=cfg.analysis.branch_mapping_rules,
|
|
968
968
|
allowed_ticket_platforms=cfg.get_effective_ticket_platforms(),
|
|
969
969
|
exclude_paths=cfg.analysis.exclude_paths,
|
|
970
|
+
exclude_merge_commits=cfg.analysis.exclude_merge_commits,
|
|
970
971
|
)
|
|
971
972
|
|
|
972
973
|
# Get commits from all repositories
|
|
@@ -1162,6 +1163,7 @@ def analyze(
|
|
|
1162
1163
|
ml_categorization_config=ml_config,
|
|
1163
1164
|
llm_config=llm_config,
|
|
1164
1165
|
branch_analysis_config=branch_analysis_config,
|
|
1166
|
+
exclude_merge_commits=cfg.analysis.exclude_merge_commits,
|
|
1165
1167
|
)
|
|
1166
1168
|
orchestrator = IntegrationOrchestrator(cfg, cache)
|
|
1167
1169
|
|
|
@@ -1289,6 +1291,7 @@ def analyze(
|
|
|
1289
1291
|
"enable_qualitative": enable_qualitative,
|
|
1290
1292
|
"enable_pm": enable_pm,
|
|
1291
1293
|
"pm_platforms": list(pm_platform) if pm_platform else [],
|
|
1294
|
+
"exclude_merge_commits": cfg.analysis.exclude_merge_commits,
|
|
1292
1295
|
},
|
|
1293
1296
|
)
|
|
1294
1297
|
|
|
@@ -1386,6 +1389,7 @@ def analyze(
|
|
|
1386
1389
|
cfg.analysis, "ticket_platforms", ["jira", "github", "clickup", "linear"]
|
|
1387
1390
|
),
|
|
1388
1391
|
exclude_paths=getattr(cfg.analysis, "exclude_paths", None),
|
|
1392
|
+
exclude_merge_commits=cfg.analysis.exclude_merge_commits,
|
|
1389
1393
|
)
|
|
1390
1394
|
|
|
1391
1395
|
# Initialize integrations for ticket fetching
|
|
@@ -1780,6 +1784,7 @@ def analyze(
|
|
|
1780
1784
|
["jira", "github", "clickup", "linear"],
|
|
1781
1785
|
),
|
|
1782
1786
|
exclude_paths=getattr(cfg.analysis, "exclude_paths", None),
|
|
1787
|
+
exclude_merge_commits=cfg.analysis.exclude_merge_commits,
|
|
1783
1788
|
)
|
|
1784
1789
|
|
|
1785
1790
|
# Initialize integrations for ticket fetching
|
|
@@ -4321,6 +4326,7 @@ def fetch(
|
|
|
4321
4326
|
branch_mapping_rules=getattr(cfg.analysis, "branch_mapping_rules", {}),
|
|
4322
4327
|
allowed_ticket_platforms=cfg.get_effective_ticket_platforms(),
|
|
4323
4328
|
exclude_paths=getattr(cfg.analysis, "exclude_paths", None),
|
|
4329
|
+
exclude_merge_commits=cfg.analysis.exclude_merge_commits,
|
|
4324
4330
|
)
|
|
4325
4331
|
|
|
4326
4332
|
# Initialize integrations for ticket fetching
|
|
@@ -5015,6 +5021,7 @@ def identities(config: Path, weeks: int, apply: bool) -> None:
|
|
|
5015
5021
|
ml_categorization_config=ml_config,
|
|
5016
5022
|
llm_config=llm_config,
|
|
5017
5023
|
branch_analysis_config=branch_analysis_config,
|
|
5024
|
+
exclude_merge_commits=cfg.analysis.exclude_merge_commits,
|
|
5018
5025
|
)
|
|
5019
5026
|
|
|
5020
5027
|
click.echo("🔍 Analyzing repositories for developer identities...")
|
|
@@ -5252,6 +5259,7 @@ def aliases_command(
|
|
|
5252
5259
|
ml_categorization_config=ml_config,
|
|
5253
5260
|
llm_config=llm_config,
|
|
5254
5261
|
branch_analysis_config=branch_analysis_config,
|
|
5262
|
+
exclude_merge_commits=cfg.analysis.exclude_merge_commits,
|
|
5255
5263
|
)
|
|
5256
5264
|
|
|
5257
5265
|
all_commits = []
|
|
@@ -593,6 +593,7 @@ class ConfigLoader:
|
|
|
593
593
|
),
|
|
594
594
|
exclude_message_patterns=analysis_data.get("exclude", {}).get("message_patterns", []),
|
|
595
595
|
exclude_paths=exclude_paths,
|
|
596
|
+
exclude_merge_commits=analysis_data.get("exclude_merge_commits", False),
|
|
596
597
|
similarity_threshold=analysis_data.get("identity", {}).get(
|
|
597
598
|
"similarity_threshold", 0.85
|
|
598
599
|
),
|
|
@@ -299,6 +299,7 @@ class AnalysisConfig:
|
|
|
299
299
|
exclude_authors: list[str] = field(default_factory=list)
|
|
300
300
|
exclude_message_patterns: list[str] = field(default_factory=list)
|
|
301
301
|
exclude_paths: list[str] = field(default_factory=list)
|
|
302
|
+
exclude_merge_commits: bool = False # Exclude merge commits from filtered line counts
|
|
302
303
|
similarity_threshold: float = 0.85
|
|
303
304
|
manual_identity_mappings: list[dict[str, Any]] = field(default_factory=list)
|
|
304
305
|
aliases_file: Optional[Path] = None # Path to shared aliases.yaml file
|
|
@@ -14,6 +14,8 @@ from git import Repo
|
|
|
14
14
|
|
|
15
15
|
from ..extractors.story_points import StoryPointExtractor
|
|
16
16
|
from ..extractors.tickets import TicketExtractor
|
|
17
|
+
from ..types import FilteredCommitStats
|
|
18
|
+
from ..utils.commit_utils import is_merge_commit
|
|
17
19
|
from .branch_mapper import BranchToProjectMapper
|
|
18
20
|
from .cache import GitAnalysisCache
|
|
19
21
|
from .progress import get_progress_service
|
|
@@ -45,6 +47,7 @@ class GitAnalyzer:
|
|
|
45
47
|
llm_config: Optional[dict[str, Any]] = None,
|
|
46
48
|
classification_config: Optional[dict[str, Any]] = None,
|
|
47
49
|
branch_analysis_config: Optional[dict[str, Any]] = None,
|
|
50
|
+
exclude_merge_commits: bool = False,
|
|
48
51
|
):
|
|
49
52
|
"""Initialize analyzer with cache and optional ML categorization and commit classification.
|
|
50
53
|
|
|
@@ -59,9 +62,11 @@ class GitAnalyzer:
|
|
|
59
62
|
llm_config: Configuration for LLM-based commit classification
|
|
60
63
|
classification_config: Configuration for commit classification
|
|
61
64
|
branch_analysis_config: Configuration for branch analysis optimization
|
|
65
|
+
exclude_merge_commits: Exclude merge commits from filtered line count calculations
|
|
62
66
|
"""
|
|
63
67
|
self.cache = cache
|
|
64
68
|
self.batch_size = batch_size
|
|
69
|
+
self.exclude_merge_commits = exclude_merge_commits
|
|
65
70
|
self.story_point_extractor = StoryPointExtractor(patterns=story_point_patterns)
|
|
66
71
|
|
|
67
72
|
# Initialize ticket extractor (ML or standard based on config and availability)
|
|
@@ -1149,7 +1154,11 @@ class GitAnalyzer:
|
|
|
1149
1154
|
return dir_name in path.parts
|
|
1150
1155
|
else:
|
|
1151
1156
|
# File pattern like *.min.js
|
|
1152
|
-
|
|
1157
|
+
# Check both filename AND full path to handle patterns like **/pnpm-lock.yaml
|
|
1158
|
+
# matching root-level files (e.g., pnpm-lock.yaml)
|
|
1159
|
+
return fnmatch.fnmatch(path.name, suffix_pattern) or fnmatch.fnmatch(
|
|
1160
|
+
filepath, suffix_pattern
|
|
1161
|
+
)
|
|
1153
1162
|
|
|
1154
1163
|
elif pattern.endswith("/**"):
|
|
1155
1164
|
# Pattern like vendor/** or docs/build/** - matches files inside directory at root level
|
|
@@ -1224,9 +1233,22 @@ class GitAnalyzer:
|
|
|
1224
1233
|
|
|
1225
1234
|
return False
|
|
1226
1235
|
|
|
1227
|
-
def _calculate_filtered_stats(self, commit: git.Commit) ->
|
|
1228
|
-
"""Calculate commit statistics excluding boilerplate/generated files using git diff --numstat.
|
|
1229
|
-
|
|
1236
|
+
def _calculate_filtered_stats(self, commit: git.Commit) -> FilteredCommitStats:
|
|
1237
|
+
"""Calculate commit statistics excluding boilerplate/generated files using git diff --numstat.
|
|
1238
|
+
|
|
1239
|
+
When exclude_merge_commits is enabled, merge commits (commits with 2+ parents) will have
|
|
1240
|
+
their filtered line counts set to 0 to exclude them from productivity metrics.
|
|
1241
|
+
"""
|
|
1242
|
+
filtered_stats: FilteredCommitStats = {"files": 0, "insertions": 0, "deletions": 0}
|
|
1243
|
+
|
|
1244
|
+
# Check if this is a merge commit and we should exclude it from filtered counts
|
|
1245
|
+
is_merge = is_merge_commit(commit)
|
|
1246
|
+
if self.exclude_merge_commits and is_merge:
|
|
1247
|
+
logger.debug(
|
|
1248
|
+
f"Excluding merge commit {commit.hexsha[:8]} from filtered line counts "
|
|
1249
|
+
f"(has {len(commit.parents)} parents)"
|
|
1250
|
+
)
|
|
1251
|
+
return filtered_stats # Return zeros for merge commits
|
|
1230
1252
|
|
|
1231
1253
|
# For initial commits or commits without parents
|
|
1232
1254
|
parent = commit.parents[0] if commit.parents else None
|
|
@@ -15,6 +15,7 @@ from datetime import datetime, timedelta, timezone
|
|
|
15
15
|
from pathlib import Path
|
|
16
16
|
from typing import Any, Optional
|
|
17
17
|
|
|
18
|
+
import git
|
|
18
19
|
from sqlalchemy import func
|
|
19
20
|
from sqlalchemy.orm import Session
|
|
20
21
|
|
|
@@ -28,6 +29,8 @@ from ..models.database import (
|
|
|
28
29
|
DailyCommitBatch,
|
|
29
30
|
DetailedTicketData,
|
|
30
31
|
)
|
|
32
|
+
from ..types import CommitStats
|
|
33
|
+
from ..utils.commit_utils import is_merge_commit
|
|
31
34
|
from .branch_mapper import BranchToProjectMapper
|
|
32
35
|
from .cache import GitAnalysisCache
|
|
33
36
|
from .git_timeout_wrapper import GitOperationTimeout, GitTimeoutWrapper, HeartbeatLogger
|
|
@@ -60,6 +63,7 @@ class GitDataFetcher:
|
|
|
60
63
|
allowed_ticket_platforms: Optional[list[str]] = None,
|
|
61
64
|
exclude_paths: Optional[list[str]] = None,
|
|
62
65
|
skip_remote_fetch: bool = False,
|
|
66
|
+
exclude_merge_commits: bool = False,
|
|
63
67
|
) -> None:
|
|
64
68
|
"""Initialize the data fetcher.
|
|
65
69
|
|
|
@@ -69,9 +73,11 @@ class GitDataFetcher:
|
|
|
69
73
|
allowed_ticket_platforms: List of allowed ticket platforms
|
|
70
74
|
exclude_paths: List of file paths to exclude from analysis
|
|
71
75
|
skip_remote_fetch: If True, skip git fetch/pull operations
|
|
76
|
+
exclude_merge_commits: Exclude merge commits from filtered line count calculations
|
|
72
77
|
"""
|
|
73
78
|
self.cache = cache
|
|
74
79
|
self.skip_remote_fetch = skip_remote_fetch
|
|
80
|
+
self.exclude_merge_commits = exclude_merge_commits
|
|
75
81
|
self.repository_status = {} # Track status of each repository
|
|
76
82
|
# CRITICAL FIX: Use the same database instance as the cache to avoid session conflicts
|
|
77
83
|
self.database = cache.db
|
|
@@ -646,7 +652,7 @@ class GitDataFetcher:
|
|
|
646
652
|
return daily_commits
|
|
647
653
|
|
|
648
654
|
def _extract_commit_data(
|
|
649
|
-
self, commit:
|
|
655
|
+
self, commit: git.Commit, branch_name: str, project_key: str, repo_path: Path
|
|
650
656
|
) -> Optional[dict[str, Any]]:
|
|
651
657
|
"""Extract comprehensive data from a Git commit.
|
|
652
658
|
|
|
@@ -1792,13 +1798,16 @@ class GitDataFetcher:
|
|
|
1792
1798
|
finally:
|
|
1793
1799
|
session.close()
|
|
1794
1800
|
|
|
1795
|
-
def _calculate_commit_stats(self, commit:
|
|
1801
|
+
def _calculate_commit_stats(self, commit: git.Commit) -> CommitStats:
|
|
1796
1802
|
"""Calculate commit statistics using reliable git diff --numstat with exclude_paths filtering.
|
|
1797
1803
|
|
|
1804
|
+
When exclude_merge_commits is enabled, merge commits (commits with 2+ parents) will have
|
|
1805
|
+
their filtered line counts set to 0 to exclude them from productivity metrics.
|
|
1806
|
+
|
|
1798
1807
|
Returns:
|
|
1799
|
-
|
|
1800
|
-
- 'files', 'insertions', 'deletions': filtered counts
|
|
1801
|
-
- 'raw_insertions', 'raw_deletions': unfiltered counts
|
|
1808
|
+
CommitStats dictionary with both raw and filtered statistics:
|
|
1809
|
+
- 'files', 'insertions', 'deletions': filtered counts (0 for merge commits if excluded)
|
|
1810
|
+
- 'raw_insertions', 'raw_deletions': unfiltered counts (always calculated)
|
|
1802
1811
|
|
|
1803
1812
|
THREAD SAFETY: This method is thread-safe as it works with commit objects
|
|
1804
1813
|
that have their own repo references.
|
|
@@ -1809,6 +1818,16 @@ class GitDataFetcher:
|
|
|
1809
1818
|
raw_stats = {"files": 0, "insertions": 0, "deletions": 0}
|
|
1810
1819
|
excluded_stats = {"files": 0, "insertions": 0, "deletions": 0}
|
|
1811
1820
|
|
|
1821
|
+
# Check if this is a merge commit and we should exclude it from filtered counts
|
|
1822
|
+
is_merge = is_merge_commit(commit)
|
|
1823
|
+
if self.exclude_merge_commits and is_merge:
|
|
1824
|
+
logger.debug(
|
|
1825
|
+
f"Excluding merge commit {commit.hexsha[:8]} from filtered line counts "
|
|
1826
|
+
f"(has {len(commit.parents)} parents)"
|
|
1827
|
+
)
|
|
1828
|
+
# Still need to calculate raw stats for the commit, but filtered stats will be 0
|
|
1829
|
+
# Continue with calculation but will return zeros for filtered stats at the end
|
|
1830
|
+
|
|
1812
1831
|
# For initial commits or commits without parents
|
|
1813
1832
|
parent = commit.parents[0] if commit.parents else None
|
|
1814
1833
|
|
|
@@ -1839,7 +1858,14 @@ class GitDataFetcher:
|
|
|
1839
1858
|
)
|
|
1840
1859
|
except GitOperationTimeout:
|
|
1841
1860
|
logger.warning(f"⏱️ Timeout calculating stats for commit {commit.hexsha[:8]}")
|
|
1842
|
-
|
|
1861
|
+
timeout_result: CommitStats = {
|
|
1862
|
+
"files": 0,
|
|
1863
|
+
"insertions": 0,
|
|
1864
|
+
"deletions": 0,
|
|
1865
|
+
"raw_insertions": 0,
|
|
1866
|
+
"raw_deletions": 0,
|
|
1867
|
+
}
|
|
1868
|
+
return timeout_result
|
|
1843
1869
|
|
|
1844
1870
|
# Parse the numstat output: insertions\tdeletions\tfilename
|
|
1845
1871
|
for line in diff_output.strip().split("\n"):
|
|
@@ -1902,10 +1928,27 @@ class GitDataFetcher:
|
|
|
1902
1928
|
# Log the error for debugging but don't crash
|
|
1903
1929
|
logger.warning(f"Error calculating commit stats for {commit.hexsha[:8]}: {e}")
|
|
1904
1930
|
|
|
1931
|
+
# If this is a merge commit and we're excluding them, return zeros for filtered stats
|
|
1932
|
+
# but keep the raw stats
|
|
1933
|
+
if self.exclude_merge_commits and is_merge:
|
|
1934
|
+
result: CommitStats = {
|
|
1935
|
+
"files": 0,
|
|
1936
|
+
"insertions": 0,
|
|
1937
|
+
"deletions": 0,
|
|
1938
|
+
"raw_insertions": raw_stats["insertions"],
|
|
1939
|
+
"raw_deletions": raw_stats["deletions"],
|
|
1940
|
+
}
|
|
1941
|
+
return result
|
|
1942
|
+
|
|
1905
1943
|
# Return both raw and filtered stats
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1944
|
+
result: CommitStats = {
|
|
1945
|
+
"files": stats["files"],
|
|
1946
|
+
"insertions": stats["insertions"],
|
|
1947
|
+
"deletions": stats["deletions"],
|
|
1948
|
+
"raw_insertions": raw_stats["insertions"],
|
|
1949
|
+
"raw_deletions": raw_stats["deletions"],
|
|
1950
|
+
}
|
|
1951
|
+
return result
|
|
1909
1952
|
|
|
1910
1953
|
def _store_day_commits_incremental(
|
|
1911
1954
|
self, repo_path: Path, date_str: str, commits: list[dict[str, Any]], project_key: str
|
|
@@ -1922,9 +1965,36 @@ class GitDataFetcher:
|
|
|
1922
1965
|
project_key: Project identifier
|
|
1923
1966
|
"""
|
|
1924
1967
|
try:
|
|
1968
|
+
# Collect summary statistics for INFO-level logging
|
|
1969
|
+
merge_count = 0
|
|
1970
|
+
excluded_file_count = 0
|
|
1971
|
+
total_excluded_insertions = 0
|
|
1972
|
+
total_excluded_deletions = 0
|
|
1973
|
+
|
|
1925
1974
|
# Transform commits to cache format
|
|
1926
1975
|
cache_format_commits = []
|
|
1927
1976
|
for commit in commits:
|
|
1977
|
+
# Track merge commits for summary logging
|
|
1978
|
+
if commit.get("is_merge", False):
|
|
1979
|
+
merge_count += 1
|
|
1980
|
+
|
|
1981
|
+
# Track excluded file statistics
|
|
1982
|
+
raw_insertions = commit.get("raw_insertions", commit.get("lines_added", 0))
|
|
1983
|
+
raw_deletions = commit.get("raw_deletions", commit.get("lines_deleted", 0))
|
|
1984
|
+
filtered_insertions = commit.get(
|
|
1985
|
+
"filtered_insertions", commit.get("lines_added", 0)
|
|
1986
|
+
)
|
|
1987
|
+
filtered_deletions = commit.get(
|
|
1988
|
+
"filtered_deletions", commit.get("lines_deleted", 0)
|
|
1989
|
+
)
|
|
1990
|
+
|
|
1991
|
+
excluded_insertions = raw_insertions - filtered_insertions
|
|
1992
|
+
excluded_deletions = raw_deletions - filtered_deletions
|
|
1993
|
+
if excluded_insertions > 0 or excluded_deletions > 0:
|
|
1994
|
+
excluded_file_count += 1
|
|
1995
|
+
total_excluded_insertions += excluded_insertions
|
|
1996
|
+
total_excluded_deletions += excluded_deletions
|
|
1997
|
+
|
|
1928
1998
|
cache_format_commit = {
|
|
1929
1999
|
"hash": commit["commit_hash"],
|
|
1930
2000
|
"author_name": commit.get("author_name", ""),
|
|
@@ -1935,15 +2005,11 @@ class GitDataFetcher:
|
|
|
1935
2005
|
"is_merge": commit.get("is_merge", False),
|
|
1936
2006
|
"files_changed_count": commit.get("files_changed_count", 0),
|
|
1937
2007
|
# Store raw unfiltered values
|
|
1938
|
-
"insertions":
|
|
1939
|
-
"deletions":
|
|
2008
|
+
"insertions": raw_insertions,
|
|
2009
|
+
"deletions": raw_deletions,
|
|
1940
2010
|
# Store filtered values
|
|
1941
|
-
"filtered_insertions":
|
|
1942
|
-
|
|
1943
|
-
),
|
|
1944
|
-
"filtered_deletions": commit.get(
|
|
1945
|
-
"filtered_deletions", commit.get("lines_deleted", 0)
|
|
1946
|
-
),
|
|
2011
|
+
"filtered_insertions": filtered_insertions,
|
|
2012
|
+
"filtered_deletions": filtered_deletions,
|
|
1947
2013
|
"story_points": commit.get("story_points"),
|
|
1948
2014
|
"ticket_references": commit.get("ticket_references", []),
|
|
1949
2015
|
}
|
|
@@ -1956,6 +2022,21 @@ class GitDataFetcher:
|
|
|
1956
2022
|
f"Incrementally stored {bulk_stats['inserted']} commits for {date_str} "
|
|
1957
2023
|
f"({bulk_stats['skipped']} already cached)"
|
|
1958
2024
|
)
|
|
2025
|
+
|
|
2026
|
+
# Summary logging at INFO level for user-facing visibility
|
|
2027
|
+
if self.exclude_merge_commits and merge_count > 0:
|
|
2028
|
+
logger.info(
|
|
2029
|
+
f"{date_str}: Excluded {merge_count} merge commits from filtered line counts "
|
|
2030
|
+
f"(exclude_merge_commits enabled)"
|
|
2031
|
+
)
|
|
2032
|
+
|
|
2033
|
+
if self.exclude_paths and excluded_file_count > 0:
|
|
2034
|
+
logger.info(
|
|
2035
|
+
f"{date_str}: Excluded changes from {excluded_file_count} commits "
|
|
2036
|
+
f"(+{total_excluded_insertions} -{total_excluded_deletions} lines) "
|
|
2037
|
+
f"due to path exclusions"
|
|
2038
|
+
)
|
|
2039
|
+
|
|
1959
2040
|
except Exception as e:
|
|
1960
2041
|
# Log error but don't fail - commits will be stored again in batch at the end
|
|
1961
2042
|
logger.warning(f"Failed to incrementally store commits for {date_str}: {e}")
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Type definitions for commit-related data structures."""
|
|
2
|
+
|
|
3
|
+
from typing import TypedDict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CommitStats(TypedDict):
|
|
7
|
+
"""Statistics for a single commit.
|
|
8
|
+
|
|
9
|
+
This type is used by DataFetcher._calculate_commit_stats() which provides
|
|
10
|
+
both filtered and raw (unfiltered) statistics. The filtered statistics
|
|
11
|
+
exclude files matching exclude_paths patterns, while raw statistics include
|
|
12
|
+
all changed files.
|
|
13
|
+
|
|
14
|
+
When exclude_merge_commits is enabled, merge commits will have filtered
|
|
15
|
+
counts set to 0 while raw counts reflect actual changes.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
files: Number of files changed (filtered by exclude_paths)
|
|
19
|
+
insertions: Lines added (filtered by exclude_paths)
|
|
20
|
+
deletions: Lines removed (filtered by exclude_paths)
|
|
21
|
+
raw_insertions: Lines added (unfiltered, all files)
|
|
22
|
+
raw_deletions: Lines removed (unfiltered, all files)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
files: int
|
|
26
|
+
insertions: int
|
|
27
|
+
deletions: int
|
|
28
|
+
raw_insertions: int
|
|
29
|
+
raw_deletions: int
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class FilteredCommitStats(TypedDict):
|
|
33
|
+
"""Filtered statistics for a single commit.
|
|
34
|
+
|
|
35
|
+
This type is used by GitAnalyzer._calculate_filtered_stats() which only
|
|
36
|
+
provides filtered statistics (no raw counts). The filtered statistics
|
|
37
|
+
exclude files matching exclude_paths patterns.
|
|
38
|
+
|
|
39
|
+
When exclude_merge_commits is enabled, merge commits will have all
|
|
40
|
+
counts set to 0 to exclude them from productivity metrics.
|
|
41
|
+
|
|
42
|
+
Attributes:
|
|
43
|
+
files: Number of files changed (filtered by exclude_paths)
|
|
44
|
+
insertions: Lines added (filtered by exclude_paths)
|
|
45
|
+
deletions: Lines removed (filtered by exclude_paths)
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
files: int
|
|
49
|
+
insertions: int
|
|
50
|
+
deletions: int
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Utilities for working with Git commit objects."""
|
|
2
|
+
|
|
3
|
+
import git
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def is_merge_commit(commit: git.Commit) -> bool:
|
|
7
|
+
"""Determine if a commit is a merge commit.
|
|
8
|
+
|
|
9
|
+
A merge commit is one with 2 or more parent commits. This includes:
|
|
10
|
+
- Standard merges (2 parents)
|
|
11
|
+
- Octopus merges (3+ parents)
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
commit: GitPython Commit object to check
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
True if commit has 2 or more parents, False otherwise
|
|
18
|
+
|
|
19
|
+
Examples:
|
|
20
|
+
>>> is_merge_commit(regular_commit) # 1 parent
|
|
21
|
+
False
|
|
22
|
+
>>> is_merge_commit(merge_commit) # 2 parents
|
|
23
|
+
True
|
|
24
|
+
>>> is_merge_commit(octopus_merge) # 3+ parents
|
|
25
|
+
True
|
|
26
|
+
>>> is_merge_commit(initial_commit) # 0 parents
|
|
27
|
+
False
|
|
28
|
+
"""
|
|
29
|
+
return len(commit.parents) > 1
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_parent_count(commit: git.Commit) -> int:
|
|
33
|
+
"""Get the number of parent commits.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
commit: GitPython Commit object
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Number of parent commits (0 for initial commit, 1 for regular, 2+ for merge)
|
|
40
|
+
"""
|
|
41
|
+
return len(commit.parents)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def is_initial_commit(commit: git.Commit) -> bool:
|
|
45
|
+
"""Determine if a commit is an initial commit (has no parents).
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
commit: GitPython Commit object to check
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
True if commit has no parents, False otherwise
|
|
52
|
+
"""
|
|
53
|
+
return len(commit.parents) == 0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gitflow-analytics
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.12.6
|
|
4
4
|
Summary: Analyze Git repositories for developer productivity insights
|
|
5
5
|
Author-email: Bob Matyas <bobmatnyc@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -235,6 +235,79 @@ gitflow-analytics -c config.yaml --weeks 8 --output ./reports
|
|
|
235
235
|
|
|
236
236
|
> 💡 **Need more configuration options?** See the [Complete Configuration Guide](docs/guides/configuration.md) for advanced features, integrations, and customization.
|
|
237
237
|
|
|
238
|
+
## 🎯 Excluding Merge Commits from Metrics
|
|
239
|
+
|
|
240
|
+
GitFlow Analytics can exclude merge commits from filtered line count calculations, following DORA metrics best practices.
|
|
241
|
+
|
|
242
|
+
### Why Exclude Merge Commits?
|
|
243
|
+
|
|
244
|
+
Merge commits represent repository management, not original development work:
|
|
245
|
+
- **Average merge commit**: 236.6 filtered lines vs 30.8 for regular commits (7.7x higher)
|
|
246
|
+
- Merge commits can **skew productivity metrics** and velocity calculations
|
|
247
|
+
- **DORA metrics best practice**: Focus on original development work, not repository management
|
|
248
|
+
|
|
249
|
+
### Configuration
|
|
250
|
+
|
|
251
|
+
Add this setting to your analysis configuration:
|
|
252
|
+
|
|
253
|
+
```yaml
|
|
254
|
+
analysis:
|
|
255
|
+
# Exclude merge commits from filtered line counts (DORA metrics best practice)
|
|
256
|
+
exclude_merge_commits: true # Default: false
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### Impact Example
|
|
260
|
+
|
|
261
|
+
Real metrics from EWTN dataset analysis:
|
|
262
|
+
|
|
263
|
+
| Metric | With Merge Commits | Without Merge Commits | Change |
|
|
264
|
+
|--------|-------------------|----------------------|--------|
|
|
265
|
+
| **Total Filtered Lines** | 138,730 | 54,808 | -60% |
|
|
266
|
+
| **Merge Commits** | 355 commits | 355 commits | (excluded from line counts) |
|
|
267
|
+
| **Regular Commits** | 1,426 commits | 1,426 commits | (unchanged) |
|
|
268
|
+
|
|
269
|
+
### What Gets Excluded?
|
|
270
|
+
|
|
271
|
+
When `exclude_merge_commits: true`:
|
|
272
|
+
|
|
273
|
+
✅ **Filtered Stats**: Merge commits (2+ parents) have `filtered_insertions = 0` and `filtered_deletions = 0`
|
|
274
|
+
✅ **Raw Stats**: Always preserved for all commits (accurate commit counts)
|
|
275
|
+
✅ **Reports**: Line count metrics reflect only original development work
|
|
276
|
+
|
|
277
|
+
❌ **Not affected**: Commit counts, developer activity tracking, ticket references
|
|
278
|
+
|
|
279
|
+
### When to Use
|
|
280
|
+
|
|
281
|
+
**✅ Enable when:**
|
|
282
|
+
- You want DORA-compliant metrics for productivity tracking
|
|
283
|
+
- Your workflow uses merge commits for pull requests
|
|
284
|
+
- You need accurate developer velocity without repository overhead
|
|
285
|
+
- You're comparing metrics across teams with different merge strategies
|
|
286
|
+
|
|
287
|
+
**❌ Disable when:**
|
|
288
|
+
- You want to track all repository activity including management overhead
|
|
289
|
+
- Merge commits represent significant manual conflict resolution in your workflow
|
|
290
|
+
- You're analyzing repositories without merge-heavy workflows
|
|
291
|
+
- You need to measure total repository churn including merges
|
|
292
|
+
|
|
293
|
+
### Example Configuration
|
|
294
|
+
|
|
295
|
+
```yaml
|
|
296
|
+
# Full configuration example
|
|
297
|
+
analysis:
|
|
298
|
+
weeks_back: 8
|
|
299
|
+
include_weekends: true
|
|
300
|
+
|
|
301
|
+
# DORA-compliant metrics: exclude merge commits
|
|
302
|
+
exclude_merge_commits: true
|
|
303
|
+
|
|
304
|
+
# Analyze ALL branches to capture feature branch work
|
|
305
|
+
branch_patterns:
|
|
306
|
+
- "*" # Include all branches (feature, develop, hotfix, etc.)
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
> 💡 **Pro Tip**: Combine `exclude_merge_commits: true` with `branch_patterns: ["*"]` to analyze all development work without merge overhead.
|
|
310
|
+
|
|
238
311
|
## 📊 Generated Reports
|
|
239
312
|
|
|
240
313
|
GitFlow Analytics generates comprehensive reports for different audiences:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
gitflow_analytics/__init__.py,sha256=W3Jaey5wuT1nBPehVLTIRkVIyBa5jgYOlBKc_UFfh-4,773
|
|
2
|
-
gitflow_analytics/_version.py,sha256=
|
|
3
|
-
gitflow_analytics/cli.py,sha256=
|
|
2
|
+
gitflow_analytics/_version.py,sha256=pewC7Cj-Xe3Qm1VHbqoeHBpNrY0jP-bgcVEirV9kWuU,138
|
|
3
|
+
gitflow_analytics/cli.py,sha256=ElQH2MiFVeQuYv4fosF5KVgMLRwJ0ILz1n4Ezt32s0M,274948
|
|
4
4
|
gitflow_analytics/config.py,sha256=XRuxvzLWyn_ML7mDCcuZ9-YFNAEsnt33vIuWxQQ_jxg,1033
|
|
5
5
|
gitflow_analytics/constants.py,sha256=GXEncUJS9ijOI5KWtQCTANwdqxPfXpw-4lNjhaWTKC4,2488
|
|
6
6
|
gitflow_analytics/verify_activity.py,sha256=aRQnmypf5NDasXudf2iz_WdJnCWtwlbAiJ5go0DJLSU,27050
|
|
@@ -16,16 +16,16 @@ gitflow_analytics/cli_wizards/run_launcher.py,sha256=J6G_C7IqxPg7_GhAfbV99D1dIIW
|
|
|
16
16
|
gitflow_analytics/config/__init__.py,sha256=KziRIbBJctB5LOLcKLzELWA1rXwjS6-C2_DeM_hT9rM,1133
|
|
17
17
|
gitflow_analytics/config/aliases.py,sha256=z9F0X6qbbF544Tw7sHlOoBj5mpRSddMkCpoKLzvVzDU,10960
|
|
18
18
|
gitflow_analytics/config/errors.py,sha256=IBKhAIwJ4gscZFnLDyE3jEp03wn2stPR7JQJXNSIfok,10386
|
|
19
|
-
gitflow_analytics/config/loader.py,sha256=
|
|
19
|
+
gitflow_analytics/config/loader.py,sha256=LpXUDyhLnlLcyPkDgFr1ustVhQmSS8Wn2ZiEaw_iNnY,38036
|
|
20
20
|
gitflow_analytics/config/profiles.py,sha256=yUjFAWW6uzOUdi5qlPE-QV9681HigyrLiSJFpL8X9A0,7967
|
|
21
21
|
gitflow_analytics/config/repository.py,sha256=u7JHcKvqmXOl3i7EmNUfJ6wtjzElxPMyXRkATnVyQ0I,4685
|
|
22
|
-
gitflow_analytics/config/schema.py,sha256=
|
|
22
|
+
gitflow_analytics/config/schema.py,sha256=ETxxWUwpAAwMXiXFkawoYcwJvvSo9D6zK0uHU-JLyS0,17270
|
|
23
23
|
gitflow_analytics/config/validator.py,sha256=l7AHjXYJ8wEmyA1rn2WiItZXtAiRb9YBLjFCDl53qKM,5907
|
|
24
24
|
gitflow_analytics/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
-
gitflow_analytics/core/analyzer.py,sha256=
|
|
25
|
+
gitflow_analytics/core/analyzer.py,sha256=apLbRFAOGDPCNnBTNOG_eXaVXh_QglO07t6p5sINnKo,59924
|
|
26
26
|
gitflow_analytics/core/branch_mapper.py,sha256=1L1ctrhTEqMZ61eS1nZRkcyaarLipeQgotw4HdXcSmM,7407
|
|
27
27
|
gitflow_analytics/core/cache.py,sha256=2SBzry3FoLCJyhu-I-AgNTSzN_MkA-DunzOAxq_lyTw,69152
|
|
28
|
-
gitflow_analytics/core/data_fetcher.py,sha256=
|
|
28
|
+
gitflow_analytics/core/data_fetcher.py,sha256=Adj1EE2RaHxSC6xmcagQb_ak9M9RgUC2GgfPuSrITME,106946
|
|
29
29
|
gitflow_analytics/core/git_auth.py,sha256=QP7U5_Mi9J-hEtoEhdjoMBl61nCukOGlL8PYXYSyN3g,6369
|
|
30
30
|
gitflow_analytics/core/git_timeout_wrapper.py,sha256=14K8PHKSOonW4hJpLigB5XQNSWxmFbMFbrpu8cT1h-M,12534
|
|
31
31
|
gitflow_analytics/core/identity.py,sha256=k7i-vcRJ2eiTU0_kYGY5QOhxcqnitibTTx7DVONW0kg,31237
|
|
@@ -118,11 +118,15 @@ gitflow_analytics/security/extractors/vulnerability_scanner.py,sha256=TWK1fPMN5s
|
|
|
118
118
|
gitflow_analytics/training/__init__.py,sha256=YT5p7Wm4U8trzLnbS5FASJBWPMKhqp3rlAThjpxWnxo,143
|
|
119
119
|
gitflow_analytics/training/model_loader.py,sha256=xGZLSopGxDhC--2XN6ytRgi2CyjOKY4zS4fZ-ZlO6lM,13245
|
|
120
120
|
gitflow_analytics/training/pipeline.py,sha256=PQegTk_-OsPexVyRDfiy-3Df-7pcs25C4vPASr-HT9E,19951
|
|
121
|
+
gitflow_analytics/types/__init__.py,sha256=v31ysjqF7jgCUkqAKaj9gqV3RDjL74sJRzX3uh7NxZA,156
|
|
122
|
+
gitflow_analytics/types/commit_types.py,sha256=Ub7Nyh5ajGQW_YVoVV_iQ1Y05aEHZd-YGA4xjOSHElc,1684
|
|
121
123
|
gitflow_analytics/ui/__init__.py,sha256=UBhYhZMvwlSrCuGWjkIdoP2zNbiQxOHOli-I8mqIZUE,441
|
|
122
124
|
gitflow_analytics/ui/progress_display.py,sha256=3xJnCOSs1DRVAfS-rTu37EsLfWDFW5-mbv-bPS9NMm4,59182
|
|
123
|
-
gitflow_analytics
|
|
124
|
-
gitflow_analytics
|
|
125
|
-
gitflow_analytics-3.
|
|
126
|
-
gitflow_analytics-3.
|
|
127
|
-
gitflow_analytics-3.
|
|
128
|
-
gitflow_analytics-3.
|
|
125
|
+
gitflow_analytics/utils/__init__.py,sha256=YE3E5Mx-LmVRqLIgUUwDmbstm6gkpeavYHrQmVjwR3o,197
|
|
126
|
+
gitflow_analytics/utils/commit_utils.py,sha256=TBgrWW73EODGOegGCF79ch0L0e5R6gpydNWutiQOa14,1356
|
|
127
|
+
gitflow_analytics-3.12.6.dist-info/licenses/LICENSE,sha256=xwvSwY1GYXpRpmbnFvvnbmMwpobnrdN9T821sGvjOY0,1066
|
|
128
|
+
gitflow_analytics-3.12.6.dist-info/METADATA,sha256=xl10qO6jZm4fVUFzQlxNwas8F1KakCkc8W0jYhbwcR0,39531
|
|
129
|
+
gitflow_analytics-3.12.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
130
|
+
gitflow_analytics-3.12.6.dist-info/entry_points.txt,sha256=ZOsX0GLsnMysp5FWPOfP_qyoS7WJ8IgcaDFDxWBYl1g,98
|
|
131
|
+
gitflow_analytics-3.12.6.dist-info/top_level.txt,sha256=CQyxZXjKvpSB1kgqqtuE0PCRqfRsXZJL8JrYpJKtkrk,18
|
|
132
|
+
gitflow_analytics-3.12.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|