gitflow-analytics 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +22 -0
- gitflow_analytics/_version.py +4 -0
- gitflow_analytics/cli.py +441 -0
- gitflow_analytics/config.py +215 -0
- gitflow_analytics/core/__init__.py +0 -0
- gitflow_analytics/core/analyzer.py +195 -0
- gitflow_analytics/core/branch_mapper.py +221 -0
- gitflow_analytics/core/cache.py +275 -0
- gitflow_analytics/core/identity.py +402 -0
- gitflow_analytics/extractors/__init__.py +0 -0
- gitflow_analytics/extractors/base.py +41 -0
- gitflow_analytics/extractors/story_points.py +128 -0
- gitflow_analytics/extractors/tickets.py +157 -0
- gitflow_analytics/integrations/__init__.py +0 -0
- gitflow_analytics/integrations/github_integration.py +160 -0
- gitflow_analytics/integrations/orchestrator.py +119 -0
- gitflow_analytics/metrics/__init__.py +0 -0
- gitflow_analytics/metrics/dora.py +327 -0
- gitflow_analytics/models/__init__.py +0 -0
- gitflow_analytics/models/database.py +171 -0
- gitflow_analytics/reports/__init__.py +0 -0
- gitflow_analytics/reports/analytics_writer.py +454 -0
- gitflow_analytics/reports/csv_writer.py +311 -0
- gitflow_analytics/reports/narrative_writer.py +263 -0
- gitflow_analytics-1.0.0.dist-info/METADATA +201 -0
- gitflow_analytics-1.0.0.dist-info/RECORD +30 -0
- gitflow_analytics-1.0.0.dist-info/WHEEL +5 -0
- gitflow_analytics-1.0.0.dist-info/entry_points.txt +2 -0
- gitflow_analytics-1.0.0.dist-info/licenses/LICENSE +21 -0
- gitflow_analytics-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""Git repository analyzer with batch processing support."""
|
|
2
|
+
import re
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import List, Dict, Any, Optional, Tuple, Generator
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import git
|
|
7
|
+
from git import Repo
|
|
8
|
+
from tqdm import tqdm
|
|
9
|
+
|
|
10
|
+
from .cache import GitAnalysisCache
|
|
11
|
+
from ..extractors.story_points import StoryPointExtractor
|
|
12
|
+
from ..extractors.tickets import TicketExtractor
|
|
13
|
+
from .branch_mapper import BranchToProjectMapper
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class GitAnalyzer:
|
|
17
|
+
"""Analyze Git repositories with caching and batch processing."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, cache: GitAnalysisCache, batch_size: int = 1000,
|
|
20
|
+
branch_mapping_rules: Optional[Dict[str, List[str]]] = None):
|
|
21
|
+
"""Initialize analyzer with cache."""
|
|
22
|
+
self.cache = cache
|
|
23
|
+
self.batch_size = batch_size
|
|
24
|
+
self.story_point_extractor = StoryPointExtractor()
|
|
25
|
+
self.ticket_extractor = TicketExtractor()
|
|
26
|
+
self.branch_mapper = BranchToProjectMapper(branch_mapping_rules)
|
|
27
|
+
|
|
28
|
+
def analyze_repository(self, repo_path: Path, since: datetime,
|
|
29
|
+
branch: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
30
|
+
"""Analyze a Git repository with batch processing."""
|
|
31
|
+
try:
|
|
32
|
+
repo = Repo(repo_path)
|
|
33
|
+
except Exception as e:
|
|
34
|
+
raise ValueError(f"Failed to open repository at {repo_path}: {e}")
|
|
35
|
+
|
|
36
|
+
# Get commits to analyze
|
|
37
|
+
commits = self._get_commits(repo, since, branch)
|
|
38
|
+
total_commits = len(commits)
|
|
39
|
+
|
|
40
|
+
if total_commits == 0:
|
|
41
|
+
return []
|
|
42
|
+
|
|
43
|
+
analyzed_commits = []
|
|
44
|
+
|
|
45
|
+
# Process in batches with progress bar
|
|
46
|
+
with tqdm(total=total_commits, desc=f"Analyzing {repo_path.name}") as pbar:
|
|
47
|
+
for batch in self._batch_commits(commits, self.batch_size):
|
|
48
|
+
batch_results = self._process_batch(repo, repo_path, batch)
|
|
49
|
+
analyzed_commits.extend(batch_results)
|
|
50
|
+
|
|
51
|
+
# Cache the batch
|
|
52
|
+
self.cache.cache_commits_batch(str(repo_path), batch_results)
|
|
53
|
+
|
|
54
|
+
pbar.update(len(batch))
|
|
55
|
+
|
|
56
|
+
return analyzed_commits
|
|
57
|
+
|
|
58
|
+
def _get_commits(self, repo: Repo, since: datetime,
|
|
59
|
+
branch: Optional[str] = None) -> List[git.Commit]:
|
|
60
|
+
"""Get commits from repository."""
|
|
61
|
+
if branch:
|
|
62
|
+
try:
|
|
63
|
+
commits = list(repo.iter_commits(branch, since=since))
|
|
64
|
+
except git.GitCommandError:
|
|
65
|
+
# Branch doesn't exist
|
|
66
|
+
return []
|
|
67
|
+
else:
|
|
68
|
+
# Get commits from all branches
|
|
69
|
+
commits = []
|
|
70
|
+
for ref in repo.refs:
|
|
71
|
+
if ref.name.startswith('origin/'):
|
|
72
|
+
continue # Skip remote branches
|
|
73
|
+
try:
|
|
74
|
+
branch_commits = list(repo.iter_commits(ref, since=since))
|
|
75
|
+
commits.extend(branch_commits)
|
|
76
|
+
except git.GitCommandError:
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
# Remove duplicates while preserving order
|
|
80
|
+
seen = set()
|
|
81
|
+
unique_commits = []
|
|
82
|
+
for commit in commits:
|
|
83
|
+
if commit.hexsha not in seen:
|
|
84
|
+
seen.add(commit.hexsha)
|
|
85
|
+
unique_commits.append(commit)
|
|
86
|
+
|
|
87
|
+
commits = unique_commits
|
|
88
|
+
|
|
89
|
+
# Sort by date
|
|
90
|
+
return sorted(commits, key=lambda c: c.committed_datetime)
|
|
91
|
+
|
|
92
|
+
def _batch_commits(self, commits: List[git.Commit],
|
|
93
|
+
batch_size: int) -> Generator[List[git.Commit], None, None]:
|
|
94
|
+
"""Yield batches of commits."""
|
|
95
|
+
for i in range(0, len(commits), batch_size):
|
|
96
|
+
yield commits[i:i + batch_size]
|
|
97
|
+
|
|
98
|
+
def _process_batch(self, repo: Repo, repo_path: Path,
|
|
99
|
+
commits: List[git.Commit]) -> List[Dict[str, Any]]:
|
|
100
|
+
"""Process a batch of commits."""
|
|
101
|
+
results = []
|
|
102
|
+
|
|
103
|
+
for commit in commits:
|
|
104
|
+
# Check cache first
|
|
105
|
+
cached = self.cache.get_cached_commit(str(repo_path), commit.hexsha)
|
|
106
|
+
if cached:
|
|
107
|
+
results.append(cached)
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
# Analyze commit
|
|
111
|
+
commit_data = self._analyze_commit(repo, commit, repo_path)
|
|
112
|
+
results.append(commit_data)
|
|
113
|
+
|
|
114
|
+
return results
|
|
115
|
+
|
|
116
|
+
def _analyze_commit(self, repo: Repo, commit: git.Commit, repo_path: Path) -> Dict[str, Any]:
|
|
117
|
+
"""Analyze a single commit."""
|
|
118
|
+
# Basic commit data
|
|
119
|
+
commit_data = {
|
|
120
|
+
'hash': commit.hexsha,
|
|
121
|
+
'author_name': commit.author.name,
|
|
122
|
+
'author_email': commit.author.email,
|
|
123
|
+
'message': commit.message,
|
|
124
|
+
'timestamp': commit.committed_datetime,
|
|
125
|
+
'is_merge': len(commit.parents) > 1
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
# Get branch name
|
|
129
|
+
commit_data['branch'] = self._get_commit_branch(repo, commit)
|
|
130
|
+
|
|
131
|
+
# Map branch to project
|
|
132
|
+
commit_data['inferred_project'] = self.branch_mapper.map_branch_to_project(
|
|
133
|
+
commit_data['branch'], repo_path
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Calculate metrics
|
|
137
|
+
stats = commit.stats.total
|
|
138
|
+
commit_data['files_changed'] = stats.get('files', 0)
|
|
139
|
+
commit_data['insertions'] = stats.get('insertions', 0)
|
|
140
|
+
commit_data['deletions'] = stats.get('deletions', 0)
|
|
141
|
+
|
|
142
|
+
# Extract story points
|
|
143
|
+
commit_data['story_points'] = self.story_point_extractor.extract_from_text(
|
|
144
|
+
commit.message
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Extract ticket references
|
|
148
|
+
commit_data['ticket_references'] = self.ticket_extractor.extract_from_text(
|
|
149
|
+
commit.message
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Calculate complexity delta
|
|
153
|
+
commit_data['complexity_delta'] = self._calculate_complexity_delta(commit)
|
|
154
|
+
|
|
155
|
+
return commit_data
|
|
156
|
+
|
|
157
|
+
def _get_commit_branch(self, repo: Repo, commit: git.Commit) -> str:
|
|
158
|
+
"""Get the branch name for a commit."""
|
|
159
|
+
# This is a simplified approach - getting the first branch that contains the commit
|
|
160
|
+
for branch in repo.branches:
|
|
161
|
+
if commit in repo.iter_commits(branch):
|
|
162
|
+
return branch.name
|
|
163
|
+
return 'unknown'
|
|
164
|
+
|
|
165
|
+
def _calculate_complexity_delta(self, commit: git.Commit) -> float:
|
|
166
|
+
"""Calculate complexity change for a commit."""
|
|
167
|
+
total_delta = 0.0
|
|
168
|
+
|
|
169
|
+
for diff in commit.diff(commit.parents[0] if commit.parents else None):
|
|
170
|
+
if not self._is_code_file(diff.b_path or diff.a_path or ''):
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
# Simple complexity estimation based on diff size
|
|
174
|
+
# In a real implementation, you'd parse the code and calculate cyclomatic complexity
|
|
175
|
+
if diff.new_file:
|
|
176
|
+
total_delta += diff.b_blob.size / 100 if diff.b_blob else 0
|
|
177
|
+
elif diff.deleted_file:
|
|
178
|
+
total_delta -= diff.a_blob.size / 100 if diff.a_blob else 0
|
|
179
|
+
else:
|
|
180
|
+
# Modified file - estimate based on change size
|
|
181
|
+
added = len(diff.diff.decode('utf-8', errors='ignore').split('\n+')) if diff.diff else 0
|
|
182
|
+
removed = len(diff.diff.decode('utf-8', errors='ignore').split('\n-')) if diff.diff else 0
|
|
183
|
+
total_delta += (added - removed) / 10
|
|
184
|
+
|
|
185
|
+
return total_delta
|
|
186
|
+
|
|
187
|
+
def _is_code_file(self, filepath: str) -> bool:
|
|
188
|
+
"""Check if file is a code file."""
|
|
189
|
+
code_extensions = {
|
|
190
|
+
'.py', '.js', '.ts', '.java', '.cpp', '.c', '.h', '.hpp',
|
|
191
|
+
'.go', '.rs', '.rb', '.php', '.swift', '.kt', '.scala',
|
|
192
|
+
'.cs', '.vb', '.r', '.m', '.mm', '.f90', '.f95', '.lua'
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return any(filepath.endswith(ext) for ext in code_extensions)
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""Map git branches to projects based on naming conventions."""
|
|
2
|
+
import re
|
|
3
|
+
from typing import Dict, List, Optional, Tuple
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BranchToProjectMapper:
|
|
8
|
+
"""Maps git branches to project keys based on conventions."""
|
|
9
|
+
|
|
10
|
+
def __init__(self, mapping_rules: Optional[Dict[str, List[str]]] = None):
|
|
11
|
+
"""
|
|
12
|
+
Initialize with custom mapping rules.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
mapping_rules: Dict mapping project keys to list of branch patterns
|
|
16
|
+
e.g., {'FRONTEND': ['feature/fe-*', 'frontend/*']}
|
|
17
|
+
"""
|
|
18
|
+
self.mapping_rules = mapping_rules or self._get_default_rules()
|
|
19
|
+
self.compiled_rules = self._compile_patterns()
|
|
20
|
+
|
|
21
|
+
def _get_default_rules(self) -> Dict[str, List[str]]:
|
|
22
|
+
"""Get default branch mapping rules."""
|
|
23
|
+
return {
|
|
24
|
+
'FRONTEND': [
|
|
25
|
+
r'^feature/fe[-/_]',
|
|
26
|
+
r'^feature/frontend[-/_]',
|
|
27
|
+
r'^frontend/',
|
|
28
|
+
r'^fe/',
|
|
29
|
+
r'[-/_]frontend[-/_]',
|
|
30
|
+
r'[-/_]fe[-/_]',
|
|
31
|
+
r'[-/_]ui[-/_]',
|
|
32
|
+
r'[-/_]web[-/_]'
|
|
33
|
+
],
|
|
34
|
+
'BACKEND': [
|
|
35
|
+
r'^feature/be[-/_]',
|
|
36
|
+
r'^feature/backend[-/_]',
|
|
37
|
+
r'^backend/',
|
|
38
|
+
r'^be/',
|
|
39
|
+
r'^api/',
|
|
40
|
+
r'[-/_]backend[-/_]',
|
|
41
|
+
r'[-/_]be[-/_]',
|
|
42
|
+
r'[-/_]api[-/_]',
|
|
43
|
+
r'[-/_]server[-/_]'
|
|
44
|
+
],
|
|
45
|
+
'SERVICE': [
|
|
46
|
+
r'^feature/service[-/_]',
|
|
47
|
+
r'^feature/svc[-/_]',
|
|
48
|
+
r'^service/',
|
|
49
|
+
r'^svc/',
|
|
50
|
+
r'[-/_]service[-/_]',
|
|
51
|
+
r'[-/_]svc[-/_]',
|
|
52
|
+
r'[-/_]microservice[-/_]'
|
|
53
|
+
],
|
|
54
|
+
'MOBILE': [
|
|
55
|
+
r'^feature/mobile[-/_]',
|
|
56
|
+
r'^feature/app[-/_]',
|
|
57
|
+
r'^mobile/',
|
|
58
|
+
r'^app/',
|
|
59
|
+
r'^ios/',
|
|
60
|
+
r'^android/',
|
|
61
|
+
r'[-/_]mobile[-/_]',
|
|
62
|
+
r'[-/_]app[-/_]',
|
|
63
|
+
r'[-/_]ios[-/_]',
|
|
64
|
+
r'[-/_]android[-/_]'
|
|
65
|
+
],
|
|
66
|
+
'DATA': [
|
|
67
|
+
r'^feature/data[-/_]',
|
|
68
|
+
r'^feature/etl[-/_]',
|
|
69
|
+
r'^data/',
|
|
70
|
+
r'^etl/',
|
|
71
|
+
r'^pipeline/',
|
|
72
|
+
r'[-/_]data[-/_]',
|
|
73
|
+
r'[-/_]etl[-/_]',
|
|
74
|
+
r'[-/_]pipeline[-/_]',
|
|
75
|
+
r'[-/_]analytics[-/_]'
|
|
76
|
+
],
|
|
77
|
+
'INFRA': [
|
|
78
|
+
r'^feature/infra[-/_]',
|
|
79
|
+
r'^feature/devops[-/_]',
|
|
80
|
+
r'^infra/',
|
|
81
|
+
r'^devops/',
|
|
82
|
+
r'^ops/',
|
|
83
|
+
r'[-/_]infra[-/_]',
|
|
84
|
+
r'[-/_]devops[-/_]',
|
|
85
|
+
r'[-/_]ops[-/_]',
|
|
86
|
+
r'[-/_]deployment[-/_]'
|
|
87
|
+
],
|
|
88
|
+
'SCRAPER': [
|
|
89
|
+
r'^feature/scraper[-/_]',
|
|
90
|
+
r'^feature/crawler[-/_]',
|
|
91
|
+
r'^scraper/',
|
|
92
|
+
r'^crawler/',
|
|
93
|
+
r'[-/_]scraper[-/_]',
|
|
94
|
+
r'[-/_]crawler[-/_]',
|
|
95
|
+
r'[-/_]scraping[-/_]'
|
|
96
|
+
]
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
def _compile_patterns(self) -> Dict[str, List[re.Pattern]]:
|
|
100
|
+
"""Compile regex patterns for efficiency."""
|
|
101
|
+
compiled = {}
|
|
102
|
+
for project, patterns in self.mapping_rules.items():
|
|
103
|
+
compiled[project] = [re.compile(pattern, re.IGNORECASE) for pattern in patterns]
|
|
104
|
+
return compiled
|
|
105
|
+
|
|
106
|
+
def map_branch_to_project(self, branch_name: str, repo_path: Optional[Path] = None) -> str:
|
|
107
|
+
"""
|
|
108
|
+
Map a branch name to a project key.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
branch_name: Git branch name
|
|
112
|
+
repo_path: Optional repository path for context
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Project key or 'UNKNOWN'
|
|
116
|
+
"""
|
|
117
|
+
if not branch_name or branch_name in ['main', 'master', 'develop', 'development']:
|
|
118
|
+
# Try to infer from repo path if available
|
|
119
|
+
if repo_path:
|
|
120
|
+
return self._infer_from_repo_path(repo_path)
|
|
121
|
+
return 'UNKNOWN'
|
|
122
|
+
|
|
123
|
+
# Check against compiled patterns
|
|
124
|
+
for project, patterns in self.compiled_rules.items():
|
|
125
|
+
for pattern in patterns:
|
|
126
|
+
if pattern.search(branch_name):
|
|
127
|
+
return project
|
|
128
|
+
|
|
129
|
+
# Try to extract from ticket references in branch name
|
|
130
|
+
ticket_project = self._extract_from_ticket(branch_name)
|
|
131
|
+
if ticket_project:
|
|
132
|
+
return ticket_project
|
|
133
|
+
|
|
134
|
+
# Try to infer from repo path if available
|
|
135
|
+
if repo_path:
|
|
136
|
+
return self._infer_from_repo_path(repo_path)
|
|
137
|
+
|
|
138
|
+
return 'UNKNOWN'
|
|
139
|
+
|
|
140
|
+
def _extract_from_ticket(self, branch_name: str) -> Optional[str]:
|
|
141
|
+
"""Extract project from ticket reference in branch name."""
|
|
142
|
+
# Common ticket patterns
|
|
143
|
+
ticket_patterns = [
|
|
144
|
+
r'([A-Z]{2,})-\d+', # JIRA style: PROJ-123
|
|
145
|
+
r'#([A-Z]{2,})\d+', # Hash prefix: #PROJ123
|
|
146
|
+
r'([A-Z]{2,})_\d+', # Underscore: PROJ_123
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
for pattern in ticket_patterns:
|
|
150
|
+
match = re.search(pattern, branch_name, re.IGNORECASE)
|
|
151
|
+
if match:
|
|
152
|
+
prefix = match.group(1).upper()
|
|
153
|
+
# Map common prefixes to projects
|
|
154
|
+
prefix_map = {
|
|
155
|
+
'FE': 'FRONTEND',
|
|
156
|
+
'BE': 'BACKEND',
|
|
157
|
+
'SVC': 'SERVICE',
|
|
158
|
+
'MOB': 'MOBILE',
|
|
159
|
+
'DATA': 'DATA',
|
|
160
|
+
'ETL': 'DATA',
|
|
161
|
+
'INFRA': 'INFRA',
|
|
162
|
+
'OPS': 'INFRA',
|
|
163
|
+
'SCRAPE': 'SCRAPER',
|
|
164
|
+
'CRAWL': 'SCRAPER'
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if prefix in prefix_map:
|
|
168
|
+
return prefix_map[prefix]
|
|
169
|
+
|
|
170
|
+
# Check if prefix matches any project key
|
|
171
|
+
for project in self.mapping_rules.keys():
|
|
172
|
+
if prefix == project or prefix in project:
|
|
173
|
+
return project
|
|
174
|
+
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
def _infer_from_repo_path(self, repo_path: Path) -> str:
|
|
178
|
+
"""Infer project from repository path."""
|
|
179
|
+
repo_name = repo_path.name.lower()
|
|
180
|
+
|
|
181
|
+
# Direct mapping
|
|
182
|
+
path_map = {
|
|
183
|
+
'frontend': 'FRONTEND',
|
|
184
|
+
'backend': 'BACKEND',
|
|
185
|
+
'service': 'SERVICE',
|
|
186
|
+
'service-ts': 'SERVICE_TS',
|
|
187
|
+
'services': 'SERVICES',
|
|
188
|
+
'mobile': 'MOBILE',
|
|
189
|
+
'ios': 'MOBILE',
|
|
190
|
+
'android': 'MOBILE',
|
|
191
|
+
'data': 'DATA',
|
|
192
|
+
'etl': 'DATA',
|
|
193
|
+
'infra': 'INFRA',
|
|
194
|
+
'infrastructure': 'INFRA',
|
|
195
|
+
'scraper': 'SCRAPER',
|
|
196
|
+
'crawler': 'SCRAPER',
|
|
197
|
+
'scrapers': 'SCRAPER'
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
for key, project in path_map.items():
|
|
201
|
+
if key in repo_name:
|
|
202
|
+
return project
|
|
203
|
+
|
|
204
|
+
# Check parent directory
|
|
205
|
+
if repo_path.parent.name.lower() in path_map:
|
|
206
|
+
return path_map[repo_path.parent.name.lower()]
|
|
207
|
+
|
|
208
|
+
return 'UNKNOWN'
|
|
209
|
+
|
|
210
|
+
def add_mapping_rule(self, project: str, patterns: List[str]):
|
|
211
|
+
"""Add custom mapping rules for a project."""
|
|
212
|
+
if project not in self.mapping_rules:
|
|
213
|
+
self.mapping_rules[project] = []
|
|
214
|
+
|
|
215
|
+
self.mapping_rules[project].extend(patterns)
|
|
216
|
+
|
|
217
|
+
# Recompile patterns
|
|
218
|
+
self.compiled_rules[project] = [
|
|
219
|
+
re.compile(pattern, re.IGNORECASE)
|
|
220
|
+
for pattern in self.mapping_rules[project]
|
|
221
|
+
]
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
"""Caching layer for Git analysis with SQLite backend."""
|
|
2
|
+
import hashlib
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
from typing import List, Optional, Dict, Any
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
|
|
8
|
+
from sqlalchemy.orm import Session
|
|
9
|
+
from sqlalchemy import and_
|
|
10
|
+
|
|
11
|
+
from ..models.database import Database, CachedCommit, PullRequestCache, IssueCache
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GitAnalysisCache:
|
|
15
|
+
"""Cache for Git analysis results."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, cache_dir: Path, ttl_hours: int = 168):
|
|
18
|
+
"""Initialize cache with SQLite backend."""
|
|
19
|
+
self.cache_dir = cache_dir
|
|
20
|
+
self.ttl_hours = ttl_hours
|
|
21
|
+
self.db = Database(cache_dir / 'gitflow_cache.db')
|
|
22
|
+
|
|
23
|
+
@contextmanager
|
|
24
|
+
def get_session(self):
|
|
25
|
+
"""Get database session context manager."""
|
|
26
|
+
session = self.db.get_session()
|
|
27
|
+
try:
|
|
28
|
+
yield session
|
|
29
|
+
session.commit()
|
|
30
|
+
except Exception:
|
|
31
|
+
session.rollback()
|
|
32
|
+
raise
|
|
33
|
+
finally:
|
|
34
|
+
session.close()
|
|
35
|
+
|
|
36
|
+
def get_cached_commit(self, repo_path: str, commit_hash: str) -> Optional[Dict[str, Any]]:
|
|
37
|
+
"""Retrieve cached commit data if not stale."""
|
|
38
|
+
with self.get_session() as session:
|
|
39
|
+
cached = session.query(CachedCommit).filter(
|
|
40
|
+
and_(
|
|
41
|
+
CachedCommit.repo_path == repo_path,
|
|
42
|
+
CachedCommit.commit_hash == commit_hash
|
|
43
|
+
)
|
|
44
|
+
).first()
|
|
45
|
+
|
|
46
|
+
if cached and not self._is_stale(cached.cached_at):
|
|
47
|
+
return self._commit_to_dict(cached)
|
|
48
|
+
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
def cache_commit(self, repo_path: str, commit_data: Dict[str, Any]):
|
|
52
|
+
"""Cache commit analysis results."""
|
|
53
|
+
with self.get_session() as session:
|
|
54
|
+
# Check if already exists
|
|
55
|
+
existing = session.query(CachedCommit).filter(
|
|
56
|
+
and_(
|
|
57
|
+
CachedCommit.repo_path == repo_path,
|
|
58
|
+
CachedCommit.commit_hash == commit_data['hash']
|
|
59
|
+
)
|
|
60
|
+
).first()
|
|
61
|
+
|
|
62
|
+
if existing:
|
|
63
|
+
# Update existing
|
|
64
|
+
for key, value in commit_data.items():
|
|
65
|
+
if hasattr(existing, key):
|
|
66
|
+
setattr(existing, key, value)
|
|
67
|
+
existing.cached_at = datetime.utcnow()
|
|
68
|
+
else:
|
|
69
|
+
# Create new
|
|
70
|
+
cached_commit = CachedCommit(
|
|
71
|
+
repo_path=repo_path,
|
|
72
|
+
commit_hash=commit_data['hash'],
|
|
73
|
+
author_name=commit_data.get('author_name'),
|
|
74
|
+
author_email=commit_data.get('author_email'),
|
|
75
|
+
message=commit_data.get('message'),
|
|
76
|
+
timestamp=commit_data.get('timestamp'),
|
|
77
|
+
branch=commit_data.get('branch'),
|
|
78
|
+
is_merge=commit_data.get('is_merge', False),
|
|
79
|
+
files_changed=commit_data.get('files_changed', 0),
|
|
80
|
+
insertions=commit_data.get('insertions', 0),
|
|
81
|
+
deletions=commit_data.get('deletions', 0),
|
|
82
|
+
complexity_delta=commit_data.get('complexity_delta', 0.0),
|
|
83
|
+
story_points=commit_data.get('story_points'),
|
|
84
|
+
ticket_references=commit_data.get('ticket_references', [])
|
|
85
|
+
)
|
|
86
|
+
session.add(cached_commit)
|
|
87
|
+
|
|
88
|
+
def cache_commits_batch(self, repo_path: str, commits: List[Dict[str, Any]]):
|
|
89
|
+
"""Cache multiple commits in a single transaction."""
|
|
90
|
+
with self.get_session() as session:
|
|
91
|
+
for commit_data in commits:
|
|
92
|
+
# Check if already exists
|
|
93
|
+
existing = session.query(CachedCommit).filter(
|
|
94
|
+
and_(
|
|
95
|
+
CachedCommit.repo_path == repo_path,
|
|
96
|
+
CachedCommit.commit_hash == commit_data['hash']
|
|
97
|
+
)
|
|
98
|
+
).first()
|
|
99
|
+
|
|
100
|
+
if existing:
|
|
101
|
+
# Update existing
|
|
102
|
+
for key, value in commit_data.items():
|
|
103
|
+
if key != 'hash' and hasattr(existing, key):
|
|
104
|
+
setattr(existing, key, value)
|
|
105
|
+
existing.cached_at = datetime.utcnow()
|
|
106
|
+
else:
|
|
107
|
+
# Create new
|
|
108
|
+
cached_commit = CachedCommit(
|
|
109
|
+
repo_path=repo_path,
|
|
110
|
+
commit_hash=commit_data['hash'],
|
|
111
|
+
author_name=commit_data.get('author_name'),
|
|
112
|
+
author_email=commit_data.get('author_email'),
|
|
113
|
+
message=commit_data.get('message'),
|
|
114
|
+
timestamp=commit_data.get('timestamp'),
|
|
115
|
+
branch=commit_data.get('branch'),
|
|
116
|
+
is_merge=commit_data.get('is_merge', False),
|
|
117
|
+
files_changed=commit_data.get('files_changed', 0),
|
|
118
|
+
insertions=commit_data.get('insertions', 0),
|
|
119
|
+
deletions=commit_data.get('deletions', 0),
|
|
120
|
+
complexity_delta=commit_data.get('complexity_delta', 0.0),
|
|
121
|
+
story_points=commit_data.get('story_points'),
|
|
122
|
+
ticket_references=commit_data.get('ticket_references', [])
|
|
123
|
+
)
|
|
124
|
+
session.add(cached_commit)
|
|
125
|
+
|
|
126
|
+
def get_cached_pr(self, repo_path: str, pr_number: int) -> Optional[Dict[str, Any]]:
|
|
127
|
+
"""Retrieve cached pull request data."""
|
|
128
|
+
with self.get_session() as session:
|
|
129
|
+
cached = session.query(PullRequestCache).filter(
|
|
130
|
+
and_(
|
|
131
|
+
PullRequestCache.repo_path == repo_path,
|
|
132
|
+
PullRequestCache.pr_number == pr_number
|
|
133
|
+
)
|
|
134
|
+
).first()
|
|
135
|
+
|
|
136
|
+
if cached and not self._is_stale(cached.cached_at):
|
|
137
|
+
return self._pr_to_dict(cached)
|
|
138
|
+
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
def cache_pr(self, repo_path: str, pr_data: Dict[str, Any]):
|
|
142
|
+
"""Cache pull request data."""
|
|
143
|
+
with self.get_session() as session:
|
|
144
|
+
cached_pr = PullRequestCache(
|
|
145
|
+
repo_path=repo_path,
|
|
146
|
+
pr_number=pr_data['number'],
|
|
147
|
+
title=pr_data.get('title'),
|
|
148
|
+
description=pr_data.get('description'),
|
|
149
|
+
author=pr_data.get('author'),
|
|
150
|
+
created_at=pr_data.get('created_at'),
|
|
151
|
+
merged_at=pr_data.get('merged_at'),
|
|
152
|
+
story_points=pr_data.get('story_points'),
|
|
153
|
+
labels=pr_data.get('labels', []),
|
|
154
|
+
commit_hashes=pr_data.get('commit_hashes', [])
|
|
155
|
+
)
|
|
156
|
+
session.merge(cached_pr)
|
|
157
|
+
|
|
158
|
+
def cache_issue(self, platform: str, issue_data: Dict[str, Any]):
|
|
159
|
+
"""Cache issue data from various platforms."""
|
|
160
|
+
with self.get_session() as session:
|
|
161
|
+
cached_issue = IssueCache(
|
|
162
|
+
platform=platform,
|
|
163
|
+
issue_id=str(issue_data['id']),
|
|
164
|
+
project_key=issue_data['project_key'],
|
|
165
|
+
title=issue_data.get('title'),
|
|
166
|
+
description=issue_data.get('description'),
|
|
167
|
+
status=issue_data.get('status'),
|
|
168
|
+
assignee=issue_data.get('assignee'),
|
|
169
|
+
created_at=issue_data.get('created_at'),
|
|
170
|
+
updated_at=issue_data.get('updated_at'),
|
|
171
|
+
resolved_at=issue_data.get('resolved_at'),
|
|
172
|
+
story_points=issue_data.get('story_points'),
|
|
173
|
+
labels=issue_data.get('labels', []),
|
|
174
|
+
platform_data=issue_data.get('platform_data', {})
|
|
175
|
+
)
|
|
176
|
+
session.merge(cached_issue)
|
|
177
|
+
|
|
178
|
+
def get_cached_issues(self, platform: str, project_key: str) -> List[Dict[str, Any]]:
|
|
179
|
+
"""Get all cached issues for a platform and project."""
|
|
180
|
+
with self.get_session() as session:
|
|
181
|
+
issues = session.query(IssueCache).filter(
|
|
182
|
+
and_(
|
|
183
|
+
IssueCache.platform == platform,
|
|
184
|
+
IssueCache.project_key == project_key
|
|
185
|
+
)
|
|
186
|
+
).all()
|
|
187
|
+
|
|
188
|
+
return [self._issue_to_dict(issue) for issue in issues
|
|
189
|
+
if not self._is_stale(issue.cached_at)]
|
|
190
|
+
|
|
191
|
+
def clear_stale_cache(self):
|
|
192
|
+
"""Remove stale cache entries."""
|
|
193
|
+
cutoff_time = datetime.utcnow() - timedelta(hours=self.ttl_hours)
|
|
194
|
+
|
|
195
|
+
with self.get_session() as session:
|
|
196
|
+
session.query(CachedCommit).filter(
|
|
197
|
+
CachedCommit.cached_at < cutoff_time
|
|
198
|
+
).delete()
|
|
199
|
+
|
|
200
|
+
session.query(PullRequestCache).filter(
|
|
201
|
+
PullRequestCache.cached_at < cutoff_time
|
|
202
|
+
).delete()
|
|
203
|
+
|
|
204
|
+
session.query(IssueCache).filter(
|
|
205
|
+
IssueCache.cached_at < cutoff_time
|
|
206
|
+
).delete()
|
|
207
|
+
|
|
208
|
+
def get_cache_stats(self) -> Dict[str, int]:
|
|
209
|
+
"""Get cache statistics."""
|
|
210
|
+
with self.get_session() as session:
|
|
211
|
+
stats = {
|
|
212
|
+
'cached_commits': session.query(CachedCommit).count(),
|
|
213
|
+
'cached_prs': session.query(PullRequestCache).count(),
|
|
214
|
+
'cached_issues': session.query(IssueCache).count(),
|
|
215
|
+
'stale_commits': session.query(CachedCommit).filter(
|
|
216
|
+
CachedCommit.cached_at < datetime.utcnow() - timedelta(hours=self.ttl_hours)
|
|
217
|
+
).count()
|
|
218
|
+
}
|
|
219
|
+
return stats
|
|
220
|
+
|
|
221
|
+
def _is_stale(self, cached_at: datetime) -> bool:
|
|
222
|
+
"""Check if cache entry is stale."""
|
|
223
|
+
if self.ttl_hours == 0: # No expiration
|
|
224
|
+
return False
|
|
225
|
+
return cached_at < datetime.utcnow() - timedelta(hours=self.ttl_hours)
|
|
226
|
+
|
|
227
|
+
def _commit_to_dict(self, commit: CachedCommit) -> Dict[str, Any]:
|
|
228
|
+
"""Convert CachedCommit to dictionary."""
|
|
229
|
+
return {
|
|
230
|
+
'hash': commit.commit_hash,
|
|
231
|
+
'author_name': commit.author_name,
|
|
232
|
+
'author_email': commit.author_email,
|
|
233
|
+
'message': commit.message,
|
|
234
|
+
'timestamp': commit.timestamp,
|
|
235
|
+
'branch': commit.branch,
|
|
236
|
+
'is_merge': commit.is_merge,
|
|
237
|
+
'files_changed': commit.files_changed,
|
|
238
|
+
'insertions': commit.insertions,
|
|
239
|
+
'deletions': commit.deletions,
|
|
240
|
+
'complexity_delta': commit.complexity_delta,
|
|
241
|
+
'story_points': commit.story_points,
|
|
242
|
+
'ticket_references': commit.ticket_references or []
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
def _pr_to_dict(self, pr: PullRequestCache) -> Dict[str, Any]:
|
|
246
|
+
"""Convert PullRequestCache to dictionary."""
|
|
247
|
+
return {
|
|
248
|
+
'number': pr.pr_number,
|
|
249
|
+
'title': pr.title,
|
|
250
|
+
'description': pr.description,
|
|
251
|
+
'author': pr.author,
|
|
252
|
+
'created_at': pr.created_at,
|
|
253
|
+
'merged_at': pr.merged_at,
|
|
254
|
+
'story_points': pr.story_points,
|
|
255
|
+
'labels': pr.labels or [],
|
|
256
|
+
'commit_hashes': pr.commit_hashes or []
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
def _issue_to_dict(self, issue: IssueCache) -> Dict[str, Any]:
|
|
260
|
+
"""Convert IssueCache to dictionary."""
|
|
261
|
+
return {
|
|
262
|
+
'platform': issue.platform,
|
|
263
|
+
'id': issue.issue_id,
|
|
264
|
+
'project_key': issue.project_key,
|
|
265
|
+
'title': issue.title,
|
|
266
|
+
'description': issue.description,
|
|
267
|
+
'status': issue.status,
|
|
268
|
+
'assignee': issue.assignee,
|
|
269
|
+
'created_at': issue.created_at,
|
|
270
|
+
'updated_at': issue.updated_at,
|
|
271
|
+
'resolved_at': issue.resolved_at,
|
|
272
|
+
'story_points': issue.story_points,
|
|
273
|
+
'labels': issue.labels or [],
|
|
274
|
+
'platform_data': issue.platform_data or {}
|
|
275
|
+
}
|