gitflow-analytics 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,195 @@
1
+ """Git repository analyzer with batch processing support."""
2
+ import re
3
+ from datetime import datetime
4
+ from typing import List, Dict, Any, Optional, Tuple, Generator
5
+ from pathlib import Path
6
+ import git
7
+ from git import Repo
8
+ from tqdm import tqdm
9
+
10
+ from .cache import GitAnalysisCache
11
+ from ..extractors.story_points import StoryPointExtractor
12
+ from ..extractors.tickets import TicketExtractor
13
+ from .branch_mapper import BranchToProjectMapper
14
+
15
+
16
+ class GitAnalyzer:
17
+ """Analyze Git repositories with caching and batch processing."""
18
+
19
+ def __init__(self, cache: GitAnalysisCache, batch_size: int = 1000,
20
+ branch_mapping_rules: Optional[Dict[str, List[str]]] = None):
21
+ """Initialize analyzer with cache."""
22
+ self.cache = cache
23
+ self.batch_size = batch_size
24
+ self.story_point_extractor = StoryPointExtractor()
25
+ self.ticket_extractor = TicketExtractor()
26
+ self.branch_mapper = BranchToProjectMapper(branch_mapping_rules)
27
+
28
+ def analyze_repository(self, repo_path: Path, since: datetime,
29
+ branch: Optional[str] = None) -> List[Dict[str, Any]]:
30
+ """Analyze a Git repository with batch processing."""
31
+ try:
32
+ repo = Repo(repo_path)
33
+ except Exception as e:
34
+ raise ValueError(f"Failed to open repository at {repo_path}: {e}")
35
+
36
+ # Get commits to analyze
37
+ commits = self._get_commits(repo, since, branch)
38
+ total_commits = len(commits)
39
+
40
+ if total_commits == 0:
41
+ return []
42
+
43
+ analyzed_commits = []
44
+
45
+ # Process in batches with progress bar
46
+ with tqdm(total=total_commits, desc=f"Analyzing {repo_path.name}") as pbar:
47
+ for batch in self._batch_commits(commits, self.batch_size):
48
+ batch_results = self._process_batch(repo, repo_path, batch)
49
+ analyzed_commits.extend(batch_results)
50
+
51
+ # Cache the batch
52
+ self.cache.cache_commits_batch(str(repo_path), batch_results)
53
+
54
+ pbar.update(len(batch))
55
+
56
+ return analyzed_commits
57
+
58
+ def _get_commits(self, repo: Repo, since: datetime,
59
+ branch: Optional[str] = None) -> List[git.Commit]:
60
+ """Get commits from repository."""
61
+ if branch:
62
+ try:
63
+ commits = list(repo.iter_commits(branch, since=since))
64
+ except git.GitCommandError:
65
+ # Branch doesn't exist
66
+ return []
67
+ else:
68
+ # Get commits from all branches
69
+ commits = []
70
+ for ref in repo.refs:
71
+ if ref.name.startswith('origin/'):
72
+ continue # Skip remote branches
73
+ try:
74
+ branch_commits = list(repo.iter_commits(ref, since=since))
75
+ commits.extend(branch_commits)
76
+ except git.GitCommandError:
77
+ continue
78
+
79
+ # Remove duplicates while preserving order
80
+ seen = set()
81
+ unique_commits = []
82
+ for commit in commits:
83
+ if commit.hexsha not in seen:
84
+ seen.add(commit.hexsha)
85
+ unique_commits.append(commit)
86
+
87
+ commits = unique_commits
88
+
89
+ # Sort by date
90
+ return sorted(commits, key=lambda c: c.committed_datetime)
91
+
92
+ def _batch_commits(self, commits: List[git.Commit],
93
+ batch_size: int) -> Generator[List[git.Commit], None, None]:
94
+ """Yield batches of commits."""
95
+ for i in range(0, len(commits), batch_size):
96
+ yield commits[i:i + batch_size]
97
+
98
+ def _process_batch(self, repo: Repo, repo_path: Path,
99
+ commits: List[git.Commit]) -> List[Dict[str, Any]]:
100
+ """Process a batch of commits."""
101
+ results = []
102
+
103
+ for commit in commits:
104
+ # Check cache first
105
+ cached = self.cache.get_cached_commit(str(repo_path), commit.hexsha)
106
+ if cached:
107
+ results.append(cached)
108
+ continue
109
+
110
+ # Analyze commit
111
+ commit_data = self._analyze_commit(repo, commit, repo_path)
112
+ results.append(commit_data)
113
+
114
+ return results
115
+
116
+ def _analyze_commit(self, repo: Repo, commit: git.Commit, repo_path: Path) -> Dict[str, Any]:
117
+ """Analyze a single commit."""
118
+ # Basic commit data
119
+ commit_data = {
120
+ 'hash': commit.hexsha,
121
+ 'author_name': commit.author.name,
122
+ 'author_email': commit.author.email,
123
+ 'message': commit.message,
124
+ 'timestamp': commit.committed_datetime,
125
+ 'is_merge': len(commit.parents) > 1
126
+ }
127
+
128
+ # Get branch name
129
+ commit_data['branch'] = self._get_commit_branch(repo, commit)
130
+
131
+ # Map branch to project
132
+ commit_data['inferred_project'] = self.branch_mapper.map_branch_to_project(
133
+ commit_data['branch'], repo_path
134
+ )
135
+
136
+ # Calculate metrics
137
+ stats = commit.stats.total
138
+ commit_data['files_changed'] = stats.get('files', 0)
139
+ commit_data['insertions'] = stats.get('insertions', 0)
140
+ commit_data['deletions'] = stats.get('deletions', 0)
141
+
142
+ # Extract story points
143
+ commit_data['story_points'] = self.story_point_extractor.extract_from_text(
144
+ commit.message
145
+ )
146
+
147
+ # Extract ticket references
148
+ commit_data['ticket_references'] = self.ticket_extractor.extract_from_text(
149
+ commit.message
150
+ )
151
+
152
+ # Calculate complexity delta
153
+ commit_data['complexity_delta'] = self._calculate_complexity_delta(commit)
154
+
155
+ return commit_data
156
+
157
+ def _get_commit_branch(self, repo: Repo, commit: git.Commit) -> str:
158
+ """Get the branch name for a commit."""
159
+ # This is a simplified approach - getting the first branch that contains the commit
160
+ for branch in repo.branches:
161
+ if commit in repo.iter_commits(branch):
162
+ return branch.name
163
+ return 'unknown'
164
+
165
+ def _calculate_complexity_delta(self, commit: git.Commit) -> float:
166
+ """Calculate complexity change for a commit."""
167
+ total_delta = 0.0
168
+
169
+ for diff in commit.diff(commit.parents[0] if commit.parents else None):
170
+ if not self._is_code_file(diff.b_path or diff.a_path or ''):
171
+ continue
172
+
173
+ # Simple complexity estimation based on diff size
174
+ # In a real implementation, you'd parse the code and calculate cyclomatic complexity
175
+ if diff.new_file:
176
+ total_delta += diff.b_blob.size / 100 if diff.b_blob else 0
177
+ elif diff.deleted_file:
178
+ total_delta -= diff.a_blob.size / 100 if diff.a_blob else 0
179
+ else:
180
+ # Modified file - estimate based on change size
181
+ added = len(diff.diff.decode('utf-8', errors='ignore').split('\n+')) if diff.diff else 0
182
+ removed = len(diff.diff.decode('utf-8', errors='ignore').split('\n-')) if diff.diff else 0
183
+ total_delta += (added - removed) / 10
184
+
185
+ return total_delta
186
+
187
+ def _is_code_file(self, filepath: str) -> bool:
188
+ """Check if file is a code file."""
189
+ code_extensions = {
190
+ '.py', '.js', '.ts', '.java', '.cpp', '.c', '.h', '.hpp',
191
+ '.go', '.rs', '.rb', '.php', '.swift', '.kt', '.scala',
192
+ '.cs', '.vb', '.r', '.m', '.mm', '.f90', '.f95', '.lua'
193
+ }
194
+
195
+ return any(filepath.endswith(ext) for ext in code_extensions)
@@ -0,0 +1,221 @@
1
+ """Map git branches to projects based on naming conventions."""
2
+ import re
3
+ from typing import Dict, List, Optional, Tuple
4
+ from pathlib import Path
5
+
6
+
7
+ class BranchToProjectMapper:
8
+ """Maps git branches to project keys based on conventions."""
9
+
10
+ def __init__(self, mapping_rules: Optional[Dict[str, List[str]]] = None):
11
+ """
12
+ Initialize with custom mapping rules.
13
+
14
+ Args:
15
+ mapping_rules: Dict mapping project keys to list of branch patterns
16
+ e.g., {'FRONTEND': ['feature/fe-*', 'frontend/*']}
17
+ """
18
+ self.mapping_rules = mapping_rules or self._get_default_rules()
19
+ self.compiled_rules = self._compile_patterns()
20
+
21
+ def _get_default_rules(self) -> Dict[str, List[str]]:
22
+ """Get default branch mapping rules."""
23
+ return {
24
+ 'FRONTEND': [
25
+ r'^feature/fe[-/_]',
26
+ r'^feature/frontend[-/_]',
27
+ r'^frontend/',
28
+ r'^fe/',
29
+ r'[-/_]frontend[-/_]',
30
+ r'[-/_]fe[-/_]',
31
+ r'[-/_]ui[-/_]',
32
+ r'[-/_]web[-/_]'
33
+ ],
34
+ 'BACKEND': [
35
+ r'^feature/be[-/_]',
36
+ r'^feature/backend[-/_]',
37
+ r'^backend/',
38
+ r'^be/',
39
+ r'^api/',
40
+ r'[-/_]backend[-/_]',
41
+ r'[-/_]be[-/_]',
42
+ r'[-/_]api[-/_]',
43
+ r'[-/_]server[-/_]'
44
+ ],
45
+ 'SERVICE': [
46
+ r'^feature/service[-/_]',
47
+ r'^feature/svc[-/_]',
48
+ r'^service/',
49
+ r'^svc/',
50
+ r'[-/_]service[-/_]',
51
+ r'[-/_]svc[-/_]',
52
+ r'[-/_]microservice[-/_]'
53
+ ],
54
+ 'MOBILE': [
55
+ r'^feature/mobile[-/_]',
56
+ r'^feature/app[-/_]',
57
+ r'^mobile/',
58
+ r'^app/',
59
+ r'^ios/',
60
+ r'^android/',
61
+ r'[-/_]mobile[-/_]',
62
+ r'[-/_]app[-/_]',
63
+ r'[-/_]ios[-/_]',
64
+ r'[-/_]android[-/_]'
65
+ ],
66
+ 'DATA': [
67
+ r'^feature/data[-/_]',
68
+ r'^feature/etl[-/_]',
69
+ r'^data/',
70
+ r'^etl/',
71
+ r'^pipeline/',
72
+ r'[-/_]data[-/_]',
73
+ r'[-/_]etl[-/_]',
74
+ r'[-/_]pipeline[-/_]',
75
+ r'[-/_]analytics[-/_]'
76
+ ],
77
+ 'INFRA': [
78
+ r'^feature/infra[-/_]',
79
+ r'^feature/devops[-/_]',
80
+ r'^infra/',
81
+ r'^devops/',
82
+ r'^ops/',
83
+ r'[-/_]infra[-/_]',
84
+ r'[-/_]devops[-/_]',
85
+ r'[-/_]ops[-/_]',
86
+ r'[-/_]deployment[-/_]'
87
+ ],
88
+ 'SCRAPER': [
89
+ r'^feature/scraper[-/_]',
90
+ r'^feature/crawler[-/_]',
91
+ r'^scraper/',
92
+ r'^crawler/',
93
+ r'[-/_]scraper[-/_]',
94
+ r'[-/_]crawler[-/_]',
95
+ r'[-/_]scraping[-/_]'
96
+ ]
97
+ }
98
+
99
+ def _compile_patterns(self) -> Dict[str, List[re.Pattern]]:
100
+ """Compile regex patterns for efficiency."""
101
+ compiled = {}
102
+ for project, patterns in self.mapping_rules.items():
103
+ compiled[project] = [re.compile(pattern, re.IGNORECASE) for pattern in patterns]
104
+ return compiled
105
+
106
+ def map_branch_to_project(self, branch_name: str, repo_path: Optional[Path] = None) -> str:
107
+ """
108
+ Map a branch name to a project key.
109
+
110
+ Args:
111
+ branch_name: Git branch name
112
+ repo_path: Optional repository path for context
113
+
114
+ Returns:
115
+ Project key or 'UNKNOWN'
116
+ """
117
+ if not branch_name or branch_name in ['main', 'master', 'develop', 'development']:
118
+ # Try to infer from repo path if available
119
+ if repo_path:
120
+ return self._infer_from_repo_path(repo_path)
121
+ return 'UNKNOWN'
122
+
123
+ # Check against compiled patterns
124
+ for project, patterns in self.compiled_rules.items():
125
+ for pattern in patterns:
126
+ if pattern.search(branch_name):
127
+ return project
128
+
129
+ # Try to extract from ticket references in branch name
130
+ ticket_project = self._extract_from_ticket(branch_name)
131
+ if ticket_project:
132
+ return ticket_project
133
+
134
+ # Try to infer from repo path if available
135
+ if repo_path:
136
+ return self._infer_from_repo_path(repo_path)
137
+
138
+ return 'UNKNOWN'
139
+
140
+ def _extract_from_ticket(self, branch_name: str) -> Optional[str]:
141
+ """Extract project from ticket reference in branch name."""
142
+ # Common ticket patterns
143
+ ticket_patterns = [
144
+ r'([A-Z]{2,})-\d+', # JIRA style: PROJ-123
145
+ r'#([A-Z]{2,})\d+', # Hash prefix: #PROJ123
146
+ r'([A-Z]{2,})_\d+', # Underscore: PROJ_123
147
+ ]
148
+
149
+ for pattern in ticket_patterns:
150
+ match = re.search(pattern, branch_name, re.IGNORECASE)
151
+ if match:
152
+ prefix = match.group(1).upper()
153
+ # Map common prefixes to projects
154
+ prefix_map = {
155
+ 'FE': 'FRONTEND',
156
+ 'BE': 'BACKEND',
157
+ 'SVC': 'SERVICE',
158
+ 'MOB': 'MOBILE',
159
+ 'DATA': 'DATA',
160
+ 'ETL': 'DATA',
161
+ 'INFRA': 'INFRA',
162
+ 'OPS': 'INFRA',
163
+ 'SCRAPE': 'SCRAPER',
164
+ 'CRAWL': 'SCRAPER'
165
+ }
166
+
167
+ if prefix in prefix_map:
168
+ return prefix_map[prefix]
169
+
170
+ # Check if prefix matches any project key
171
+ for project in self.mapping_rules.keys():
172
+ if prefix == project or prefix in project:
173
+ return project
174
+
175
+ return None
176
+
177
+ def _infer_from_repo_path(self, repo_path: Path) -> str:
178
+ """Infer project from repository path."""
179
+ repo_name = repo_path.name.lower()
180
+
181
+ # Direct mapping
182
+ path_map = {
183
+ 'frontend': 'FRONTEND',
184
+ 'backend': 'BACKEND',
185
+ 'service': 'SERVICE',
186
+ 'service-ts': 'SERVICE_TS',
187
+ 'services': 'SERVICES',
188
+ 'mobile': 'MOBILE',
189
+ 'ios': 'MOBILE',
190
+ 'android': 'MOBILE',
191
+ 'data': 'DATA',
192
+ 'etl': 'DATA',
193
+ 'infra': 'INFRA',
194
+ 'infrastructure': 'INFRA',
195
+ 'scraper': 'SCRAPER',
196
+ 'crawler': 'SCRAPER',
197
+ 'scrapers': 'SCRAPER'
198
+ }
199
+
200
+ for key, project in path_map.items():
201
+ if key in repo_name:
202
+ return project
203
+
204
+ # Check parent directory
205
+ if repo_path.parent.name.lower() in path_map:
206
+ return path_map[repo_path.parent.name.lower()]
207
+
208
+ return 'UNKNOWN'
209
+
210
+ def add_mapping_rule(self, project: str, patterns: List[str]):
211
+ """Add custom mapping rules for a project."""
212
+ if project not in self.mapping_rules:
213
+ self.mapping_rules[project] = []
214
+
215
+ self.mapping_rules[project].extend(patterns)
216
+
217
+ # Recompile patterns
218
+ self.compiled_rules[project] = [
219
+ re.compile(pattern, re.IGNORECASE)
220
+ for pattern in self.mapping_rules[project]
221
+ ]
@@ -0,0 +1,275 @@
1
+ """Caching layer for Git analysis with SQLite backend."""
2
+ import hashlib
3
+ from datetime import datetime, timedelta
4
+ from typing import List, Optional, Dict, Any
5
+ from pathlib import Path
6
+ from contextlib import contextmanager
7
+
8
+ from sqlalchemy.orm import Session
9
+ from sqlalchemy import and_
10
+
11
+ from ..models.database import Database, CachedCommit, PullRequestCache, IssueCache
12
+
13
+
14
+ class GitAnalysisCache:
15
+ """Cache for Git analysis results."""
16
+
17
+ def __init__(self, cache_dir: Path, ttl_hours: int = 168):
18
+ """Initialize cache with SQLite backend."""
19
+ self.cache_dir = cache_dir
20
+ self.ttl_hours = ttl_hours
21
+ self.db = Database(cache_dir / 'gitflow_cache.db')
22
+
23
+ @contextmanager
24
+ def get_session(self):
25
+ """Get database session context manager."""
26
+ session = self.db.get_session()
27
+ try:
28
+ yield session
29
+ session.commit()
30
+ except Exception:
31
+ session.rollback()
32
+ raise
33
+ finally:
34
+ session.close()
35
+
36
+ def get_cached_commit(self, repo_path: str, commit_hash: str) -> Optional[Dict[str, Any]]:
37
+ """Retrieve cached commit data if not stale."""
38
+ with self.get_session() as session:
39
+ cached = session.query(CachedCommit).filter(
40
+ and_(
41
+ CachedCommit.repo_path == repo_path,
42
+ CachedCommit.commit_hash == commit_hash
43
+ )
44
+ ).first()
45
+
46
+ if cached and not self._is_stale(cached.cached_at):
47
+ return self._commit_to_dict(cached)
48
+
49
+ return None
50
+
51
+ def cache_commit(self, repo_path: str, commit_data: Dict[str, Any]):
52
+ """Cache commit analysis results."""
53
+ with self.get_session() as session:
54
+ # Check if already exists
55
+ existing = session.query(CachedCommit).filter(
56
+ and_(
57
+ CachedCommit.repo_path == repo_path,
58
+ CachedCommit.commit_hash == commit_data['hash']
59
+ )
60
+ ).first()
61
+
62
+ if existing:
63
+ # Update existing
64
+ for key, value in commit_data.items():
65
+ if hasattr(existing, key):
66
+ setattr(existing, key, value)
67
+ existing.cached_at = datetime.utcnow()
68
+ else:
69
+ # Create new
70
+ cached_commit = CachedCommit(
71
+ repo_path=repo_path,
72
+ commit_hash=commit_data['hash'],
73
+ author_name=commit_data.get('author_name'),
74
+ author_email=commit_data.get('author_email'),
75
+ message=commit_data.get('message'),
76
+ timestamp=commit_data.get('timestamp'),
77
+ branch=commit_data.get('branch'),
78
+ is_merge=commit_data.get('is_merge', False),
79
+ files_changed=commit_data.get('files_changed', 0),
80
+ insertions=commit_data.get('insertions', 0),
81
+ deletions=commit_data.get('deletions', 0),
82
+ complexity_delta=commit_data.get('complexity_delta', 0.0),
83
+ story_points=commit_data.get('story_points'),
84
+ ticket_references=commit_data.get('ticket_references', [])
85
+ )
86
+ session.add(cached_commit)
87
+
88
+ def cache_commits_batch(self, repo_path: str, commits: List[Dict[str, Any]]):
89
+ """Cache multiple commits in a single transaction."""
90
+ with self.get_session() as session:
91
+ for commit_data in commits:
92
+ # Check if already exists
93
+ existing = session.query(CachedCommit).filter(
94
+ and_(
95
+ CachedCommit.repo_path == repo_path,
96
+ CachedCommit.commit_hash == commit_data['hash']
97
+ )
98
+ ).first()
99
+
100
+ if existing:
101
+ # Update existing
102
+ for key, value in commit_data.items():
103
+ if key != 'hash' and hasattr(existing, key):
104
+ setattr(existing, key, value)
105
+ existing.cached_at = datetime.utcnow()
106
+ else:
107
+ # Create new
108
+ cached_commit = CachedCommit(
109
+ repo_path=repo_path,
110
+ commit_hash=commit_data['hash'],
111
+ author_name=commit_data.get('author_name'),
112
+ author_email=commit_data.get('author_email'),
113
+ message=commit_data.get('message'),
114
+ timestamp=commit_data.get('timestamp'),
115
+ branch=commit_data.get('branch'),
116
+ is_merge=commit_data.get('is_merge', False),
117
+ files_changed=commit_data.get('files_changed', 0),
118
+ insertions=commit_data.get('insertions', 0),
119
+ deletions=commit_data.get('deletions', 0),
120
+ complexity_delta=commit_data.get('complexity_delta', 0.0),
121
+ story_points=commit_data.get('story_points'),
122
+ ticket_references=commit_data.get('ticket_references', [])
123
+ )
124
+ session.add(cached_commit)
125
+
126
+ def get_cached_pr(self, repo_path: str, pr_number: int) -> Optional[Dict[str, Any]]:
127
+ """Retrieve cached pull request data."""
128
+ with self.get_session() as session:
129
+ cached = session.query(PullRequestCache).filter(
130
+ and_(
131
+ PullRequestCache.repo_path == repo_path,
132
+ PullRequestCache.pr_number == pr_number
133
+ )
134
+ ).first()
135
+
136
+ if cached and not self._is_stale(cached.cached_at):
137
+ return self._pr_to_dict(cached)
138
+
139
+ return None
140
+
141
+ def cache_pr(self, repo_path: str, pr_data: Dict[str, Any]):
142
+ """Cache pull request data."""
143
+ with self.get_session() as session:
144
+ cached_pr = PullRequestCache(
145
+ repo_path=repo_path,
146
+ pr_number=pr_data['number'],
147
+ title=pr_data.get('title'),
148
+ description=pr_data.get('description'),
149
+ author=pr_data.get('author'),
150
+ created_at=pr_data.get('created_at'),
151
+ merged_at=pr_data.get('merged_at'),
152
+ story_points=pr_data.get('story_points'),
153
+ labels=pr_data.get('labels', []),
154
+ commit_hashes=pr_data.get('commit_hashes', [])
155
+ )
156
+ session.merge(cached_pr)
157
+
158
+ def cache_issue(self, platform: str, issue_data: Dict[str, Any]):
159
+ """Cache issue data from various platforms."""
160
+ with self.get_session() as session:
161
+ cached_issue = IssueCache(
162
+ platform=platform,
163
+ issue_id=str(issue_data['id']),
164
+ project_key=issue_data['project_key'],
165
+ title=issue_data.get('title'),
166
+ description=issue_data.get('description'),
167
+ status=issue_data.get('status'),
168
+ assignee=issue_data.get('assignee'),
169
+ created_at=issue_data.get('created_at'),
170
+ updated_at=issue_data.get('updated_at'),
171
+ resolved_at=issue_data.get('resolved_at'),
172
+ story_points=issue_data.get('story_points'),
173
+ labels=issue_data.get('labels', []),
174
+ platform_data=issue_data.get('platform_data', {})
175
+ )
176
+ session.merge(cached_issue)
177
+
178
+ def get_cached_issues(self, platform: str, project_key: str) -> List[Dict[str, Any]]:
179
+ """Get all cached issues for a platform and project."""
180
+ with self.get_session() as session:
181
+ issues = session.query(IssueCache).filter(
182
+ and_(
183
+ IssueCache.platform == platform,
184
+ IssueCache.project_key == project_key
185
+ )
186
+ ).all()
187
+
188
+ return [self._issue_to_dict(issue) for issue in issues
189
+ if not self._is_stale(issue.cached_at)]
190
+
191
+ def clear_stale_cache(self):
192
+ """Remove stale cache entries."""
193
+ cutoff_time = datetime.utcnow() - timedelta(hours=self.ttl_hours)
194
+
195
+ with self.get_session() as session:
196
+ session.query(CachedCommit).filter(
197
+ CachedCommit.cached_at < cutoff_time
198
+ ).delete()
199
+
200
+ session.query(PullRequestCache).filter(
201
+ PullRequestCache.cached_at < cutoff_time
202
+ ).delete()
203
+
204
+ session.query(IssueCache).filter(
205
+ IssueCache.cached_at < cutoff_time
206
+ ).delete()
207
+
208
+ def get_cache_stats(self) -> Dict[str, int]:
209
+ """Get cache statistics."""
210
+ with self.get_session() as session:
211
+ stats = {
212
+ 'cached_commits': session.query(CachedCommit).count(),
213
+ 'cached_prs': session.query(PullRequestCache).count(),
214
+ 'cached_issues': session.query(IssueCache).count(),
215
+ 'stale_commits': session.query(CachedCommit).filter(
216
+ CachedCommit.cached_at < datetime.utcnow() - timedelta(hours=self.ttl_hours)
217
+ ).count()
218
+ }
219
+ return stats
220
+
221
+ def _is_stale(self, cached_at: datetime) -> bool:
222
+ """Check if cache entry is stale."""
223
+ if self.ttl_hours == 0: # No expiration
224
+ return False
225
+ return cached_at < datetime.utcnow() - timedelta(hours=self.ttl_hours)
226
+
227
+ def _commit_to_dict(self, commit: CachedCommit) -> Dict[str, Any]:
228
+ """Convert CachedCommit to dictionary."""
229
+ return {
230
+ 'hash': commit.commit_hash,
231
+ 'author_name': commit.author_name,
232
+ 'author_email': commit.author_email,
233
+ 'message': commit.message,
234
+ 'timestamp': commit.timestamp,
235
+ 'branch': commit.branch,
236
+ 'is_merge': commit.is_merge,
237
+ 'files_changed': commit.files_changed,
238
+ 'insertions': commit.insertions,
239
+ 'deletions': commit.deletions,
240
+ 'complexity_delta': commit.complexity_delta,
241
+ 'story_points': commit.story_points,
242
+ 'ticket_references': commit.ticket_references or []
243
+ }
244
+
245
+ def _pr_to_dict(self, pr: PullRequestCache) -> Dict[str, Any]:
246
+ """Convert PullRequestCache to dictionary."""
247
+ return {
248
+ 'number': pr.pr_number,
249
+ 'title': pr.title,
250
+ 'description': pr.description,
251
+ 'author': pr.author,
252
+ 'created_at': pr.created_at,
253
+ 'merged_at': pr.merged_at,
254
+ 'story_points': pr.story_points,
255
+ 'labels': pr.labels or [],
256
+ 'commit_hashes': pr.commit_hashes or []
257
+ }
258
+
259
+ def _issue_to_dict(self, issue: IssueCache) -> Dict[str, Any]:
260
+ """Convert IssueCache to dictionary."""
261
+ return {
262
+ 'platform': issue.platform,
263
+ 'id': issue.issue_id,
264
+ 'project_key': issue.project_key,
265
+ 'title': issue.title,
266
+ 'description': issue.description,
267
+ 'status': issue.status,
268
+ 'assignee': issue.assignee,
269
+ 'created_at': issue.created_at,
270
+ 'updated_at': issue.updated_at,
271
+ 'resolved_at': issue.resolved_at,
272
+ 'story_points': issue.story_points,
273
+ 'labels': issue.labels or [],
274
+ 'platform_data': issue.platform_data or {}
275
+ }