git-recap 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: git-recap
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: A modular Python tool that aggregates and formats user-authored messages from repositories.
5
5
  Author: Bruno V.
6
6
  Author-email: bruno.vitorino@tecnico.ulisboa.pt
@@ -1,9 +1,11 @@
1
1
  from git_recap.providers.azure_fetcher import AzureFetcher
2
2
  from git_recap.providers.github_fetcher import GitHubFetcher
3
3
  from git_recap.providers.gitlab_fetcher import GitLabFetcher
4
+ from git_recap.providers.url_fetcher import URLFetcher
4
5
 
5
6
  __all__ = [
6
7
  "AzureFetcher",
7
8
  "GitHubFetcher",
8
- "GitLabFetcher"
9
- ]
9
+ "GitLabFetcher",
10
+ "URLFetcher"
11
+ ]
@@ -13,10 +13,23 @@ class AzureFetcher(BaseFetcher):
13
13
  self.connection = Connection(base_url=self.organization_url, creds=credentials)
14
14
  self.core_client = self.connection.clients.get_core_client()
15
15
  self.git_client = self.connection.clients.get_git_client()
16
- # If no authors provided, default to an empty list.
16
+ self.repos = self.get_repos()
17
+ # Azure DevOps doesn't provide an affiliation filter;
18
+ # we'll iterate over all repos in each project.
17
19
  if authors is None:
18
20
  self.authors = []
19
-
21
+
22
+ def get_repos(self):
23
+ projects = self.core_client.get_projects().value
24
+ # Get all repositories in each project
25
+ repos = [self.git_client.get_repositories(project.id) for project in projects]
26
+ return repos
27
+
28
+ @property
29
+ def repos_names(self)->List[str]:
30
+ "to be implemented later"
31
+ ...
32
+
20
33
  def _filter_by_date(self, date_obj: datetime) -> bool:
21
34
  if self.start_date and date_obj < self.start_date:
22
35
  return False
@@ -32,39 +45,35 @@ class AzureFetcher(BaseFetcher):
32
45
  def fetch_commits(self) -> List[Dict[str, Any]]:
33
46
  entries = []
34
47
  processed_commits = set()
35
- projects = self.core_client.get_projects().value
36
- for project in projects:
37
- repos = self.git_client.get_repositories(project.id)
38
- for repo in repos:
39
- if self.repo_filter and repo.name not in self.repo_filter:
48
+ for repo in self.repos:
49
+ if self.repo_filter and repo.name not in self.repo_filter:
50
+ continue
51
+ for author in self.authors:
52
+ try:
53
+ commits = self.git_client.get_commits(
54
+ project=repo.id,
55
+ repository_id=repo.id,
56
+ search_criteria={"author": author}
57
+ )
58
+ except Exception:
40
59
  continue
41
- # Iterate over provided authors
42
- for author in self.authors:
43
- try:
44
- commits = self.git_client.get_commits(
45
- project=project.id,
46
- repository_id=repo.id,
47
- search_criteria={"author": author}
48
- )
49
- except Exception:
50
- continue
51
- for commit in commits:
52
- # Azure DevOps returns a commit with an 'author' property
53
- commit_date = commit.author.date # type: datetime
54
- if self._filter_by_date(commit_date):
55
- sha = commit.commit_id
56
- if sha not in processed_commits:
57
- entry = {
58
- "type": "commit",
59
- "repo": repo.name,
60
- "message": commit.comment.strip(),
61
- "timestamp": commit_date,
62
- "sha": sha,
63
- }
64
- entries.append(entry)
65
- processed_commits.add(sha)
66
- if self._stop_fetching(commit_date):
67
- break
60
+ for commit in commits:
61
+ # Azure DevOps returns a commit with an 'author' property.
62
+ commit_date = commit.author.date # assumed datetime
63
+ if self._filter_by_date(commit_date):
64
+ sha = commit.commit_id
65
+ if sha not in processed_commits:
66
+ entry = {
67
+ "type": "commit",
68
+ "repo": repo.name,
69
+ "message": commit.comment.strip(),
70
+ "timestamp": commit_date,
71
+ "sha": sha,
72
+ }
73
+ entries.append(entry)
74
+ processed_commits.add(sha)
75
+ if self._stop_fetching(commit_date):
76
+ break
68
77
  return entries
69
78
 
70
79
  def fetch_pull_requests(self) -> List[Dict[str, Any]]:
@@ -130,9 +139,8 @@ class AzureFetcher(BaseFetcher):
130
139
 
131
140
  def fetch_issues(self) -> List[Dict[str, Any]]:
132
141
  entries = []
133
- # Azure DevOps issues are typically tracked as Work Items.
134
142
  wit_client = self.connection.clients.get_work_item_tracking_client()
135
- # Query work items for each author; this is a simplified WIQL query.
143
+ # Query work items for each author using a simplified WIQL query.
136
144
  for author in self.authors:
137
145
  wiql = f"SELECT [System.Id], [System.Title], [System.CreatedDate] FROM WorkItems WHERE [System.AssignedTo] CONTAINS '{author}'"
138
146
  try:
@@ -141,7 +149,6 @@ class AzureFetcher(BaseFetcher):
141
149
  continue
142
150
  for item_ref in query_result:
143
151
  work_item = wit_client.get_work_item(item_ref.id)
144
- # The created date is a string; convert it to a datetime.
145
152
  created_date = datetime.fromisoformat(work_item.fields["System.CreatedDate"])
146
153
  if self._filter_by_date(created_date):
147
154
  entry = {
@@ -25,6 +25,11 @@ class BaseFetcher(ABC):
25
25
  self.repo_filter = repo_filter or []
26
26
  self.limit = -1
27
27
  self.authors = [] if authors is None else authors
28
+
29
+ @property
30
+ @abstractmethod
31
+ def repos_names(self)->List[str]:
32
+ pass
28
33
 
29
34
  @abstractmethod
30
35
  def fetch_commits(self) -> List[str]:
@@ -46,7 +51,10 @@ class BaseFetcher(ABC):
46
51
  """
47
52
  commit_entries = self.fetch_commits()
48
53
  pr_entries = self.fetch_pull_requests()
49
- issue_entries = self.fetch_issues()
54
+ try:
55
+ issue_entries = self.fetch_issues()
56
+ except Exception as e:
57
+ issue_entries = []
50
58
 
51
59
  all_entries = pr_entries + commit_entries + issue_entries
52
60
 
@@ -7,9 +7,14 @@ class GitHubFetcher(BaseFetcher):
7
7
  def __init__(self, pat: str, start_date=None, end_date=None, repo_filter=None, authors=None):
8
8
  super().__init__(pat, start_date, end_date, repo_filter, authors)
9
9
  self.github = Github(self.pat)
10
- self.user = self.github.get_user()
10
+ self.user = self.github.get_user()
11
+ self.repos = self.user.get_repos(affiliation="owner,collaborator,organization_member")
11
12
  self.authors.append(self.user.login)
12
13
 
14
+ @property
15
+ def repos_names(self)->List[str]:
16
+ return [repo.name for repo in self.repos]
17
+
13
18
  def _stop_fetching(self, date_obj: datetime) -> bool:
14
19
  if self.start_date and date_obj < self.start_date:
15
20
  return True
@@ -25,8 +30,7 @@ class GitHubFetcher(BaseFetcher):
25
30
  def fetch_commits(self) -> List[Dict[str, Any]]:
26
31
  entries = []
27
32
  processed_commits = set()
28
- repos = self.user.get_repos()
29
- for repo in repos:
33
+ for repo in self.repos:
30
34
  if self.repo_filter and repo.name not in self.repo_filter:
31
35
  continue
32
36
  for author in self.authors:
@@ -53,13 +57,13 @@ class GitHubFetcher(BaseFetcher):
53
57
  entries = []
54
58
  # Maintain a local set to skip duplicate commits already captured in a PR.
55
59
  processed_pr_commits = set()
56
- repos = self.user.get_repos()
57
- for repo in repos:
60
+ # Retrieve repos where you're owner, a collaborator, or an organization member.
61
+ for repo in self.repos:
58
62
  if self.repo_filter and repo.name not in self.repo_filter:
59
63
  continue
60
64
  pulls = repo.get_pulls(state='all')
61
65
  for i, pr in enumerate(pulls, start=1):
62
- if pr.user.login not in self.authors:
66
+ if pr.user.login not in self.authors:
63
67
  continue
64
68
  pr_date = pr.updated_at # alternatively, use pr.created_at
65
69
  if not self._filter_by_date(pr_date):
@@ -97,6 +101,7 @@ class GitHubFetcher(BaseFetcher):
97
101
  break
98
102
  return entries
99
103
 
104
+
100
105
  def fetch_issues(self) -> List[Dict[str, Any]]:
101
106
  entries = []
102
107
  issues = self.user.get_issues()
@@ -8,12 +8,19 @@ class GitLabFetcher(BaseFetcher):
8
8
  super().__init__(pat, start_date, end_date, repo_filter, authors)
9
9
  self.gl = gitlab.Gitlab(url, private_token=self.pat)
10
10
  self.gl.auth()
11
+ # Instead of only owned projects, retrieve projects where you're a member.
12
+ self.projects = self.gl.projects.list(membership=True, all=True)
11
13
  # Default to the authenticated user's username if no authors are provided.
12
14
  if authors is None:
13
15
  self.authors = [self.gl.user.username]
14
16
  else:
15
17
  self.authors = authors
16
18
 
19
+ @property
20
+ def repos_names(self)->List[str]:
21
+ "to be implemented later"
22
+ return [project.name for project in self.projects]
23
+
17
24
  def _filter_by_date(self, date_str: str) -> bool:
18
25
  date_obj = datetime.fromisoformat(date_str)
19
26
  if self.start_date and date_obj < self.start_date:
@@ -31,8 +38,7 @@ class GitLabFetcher(BaseFetcher):
31
38
  def fetch_commits(self) -> List[Dict[str, Any]]:
32
39
  entries = []
33
40
  processed_commits = set()
34
- projects = self.gl.projects.list(owned=True, all=True)
35
- for project in projects:
41
+ for project in self.projects:
36
42
  if self.repo_filter and project.name not in self.repo_filter:
37
43
  continue
38
44
  for author in self.authors:
@@ -59,11 +65,10 @@ class GitLabFetcher(BaseFetcher):
59
65
  def fetch_pull_requests(self) -> List[Dict[str, Any]]:
60
66
  entries = []
61
67
  processed_pr_commits = set()
62
- projects = self.gl.projects.list(owned=True, all=True)
63
- for project in projects:
68
+ for project in self.projects:
64
69
  if self.repo_filter and project.name not in self.repo_filter:
65
70
  continue
66
- # Fetch merge requests (the GitLab equivalent of pull requests)
71
+ # Fetch merge requests (GitLab's pull requests)
67
72
  merge_requests = project.mergerequests.list(state='all', all=True)
68
73
  for mr in merge_requests:
69
74
  if mr.author['username'] not in self.authors:
@@ -105,8 +110,7 @@ class GitLabFetcher(BaseFetcher):
105
110
 
106
111
  def fetch_issues(self) -> List[Dict[str, Any]]:
107
112
  entries = []
108
- projects = self.gl.projects.list(owned=True, all=True)
109
- for project in projects:
113
+ for project in self.projects:
110
114
  if self.repo_filter and project.name not in self.repo_filter:
111
115
  continue
112
116
  issues = project.issues.list(assignee_id=self.gl.user.id)
@@ -0,0 +1,232 @@
1
+ import os
2
+ import re
3
+ import shutil
4
+ import subprocess
5
+ from pathlib import Path
6
+ import tempfile
7
+ from typing import List, Dict, Any, Optional
8
+ from datetime import datetime
9
+ from git_recap.providers.base_fetcher import BaseFetcher
10
+
11
+
12
+ class URLFetcher(BaseFetcher):
13
+ """Fetcher implementation for generic Git repository URLs."""
14
+
15
+ GIT_URL_PATTERN = re.compile(
16
+ r'^(?:http|https|git|ssh)://' # Protocol
17
+ r'(?:\S+@)?' # Optional username
18
+ r'([^/]+)' # Domain
19
+ r'(?:[:/])([^/]+/[^/]+?)(?:\.git)?$' # Repo path
20
+ )
21
+
22
+ def __init__(
23
+ self,
24
+ url: str,
25
+ start_date: Optional[datetime] = None,
26
+ end_date: Optional[datetime] = None,
27
+ repo_filter: Optional[List[str]] = None,
28
+ authors: Optional[List[str]] = None
29
+ ):
30
+ super().__init__(
31
+ pat="", # No PAT needed for URL fetcher
32
+ start_date=start_date,
33
+ end_date=end_date,
34
+ repo_filter=repo_filter,
35
+ authors=authors
36
+ )
37
+ self.url = self._normalize_url(url)
38
+ self.temp_dir = None
39
+ self._validate_url()
40
+ self._clone_repo()
41
+
42
+ def _normalize_url(self, url: str) -> str:
43
+ """Normalize the Git URL to ensure consistent format."""
44
+ url = url.strip()
45
+ if not url.endswith('.git'):
46
+ url += '.git'
47
+ if not any(url.startswith(proto) for proto in ('http://', 'https://', 'git://', 'ssh://')):
48
+ url = f'https://{url}'
49
+ return url
50
+
51
+ def _validate_url(self) -> None:
52
+ """Validate the Git repository URL using git ls-remote."""
53
+ if not self.GIT_URL_PATTERN.match(self.url):
54
+ raise ValueError(f"Invalid Git repository URL format: {self.url}")
55
+
56
+ try:
57
+ result = subprocess.run(
58
+ ["git", "ls-remote", self.url],
59
+ capture_output=True,
60
+ text=True,
61
+ check=True,
62
+ timeout=10 # Add timeout to prevent hanging
63
+ )
64
+ if not result.stdout.strip():
65
+ raise ValueError(f"URL {self.url} points to an empty repository")
66
+ except subprocess.TimeoutExpired:
67
+ raise ValueError(f"Timeout while validating URL {self.url}")
68
+ except subprocess.CalledProcessError as e:
69
+ raise ValueError(f"Invalid Git repository URL: {self.url}. Error: {e.stderr}") from e
70
+
71
+ def _clone_repo(self) -> None:
72
+ """Clone the repository to a temporary directory with all branches."""
73
+ self.temp_dir = tempfile.mkdtemp(prefix="gitrecap_")
74
+ try:
75
+ # First clone with --no-checkout to save bandwidth
76
+ subprocess.run(
77
+ ["git", "clone", "--no-checkout", self.url, self.temp_dir],
78
+ check=True,
79
+ capture_output=True,
80
+ text=True,
81
+ timeout=300
82
+ )
83
+
84
+ # Fetch all branches
85
+ subprocess.run(
86
+ ["git", "-C", self.temp_dir, "fetch", "--all"],
87
+ check=True,
88
+ capture_output=True,
89
+ text=True,
90
+ timeout=300
91
+ )
92
+
93
+ # Verify the cloned repository has at least one commit
94
+ verify_result = subprocess.run(
95
+ ["git", "-C", self.temp_dir, "rev-list", "--count", "--all"],
96
+ capture_output=True,
97
+ text=True,
98
+ check=True
99
+ )
100
+ if int(verify_result.stdout.strip()) == 0:
101
+ raise ValueError("Cloned repository has no commits")
102
+
103
+ except subprocess.TimeoutExpired:
104
+ raise RuntimeError("Repository cloning timed out")
105
+ except subprocess.CalledProcessError as e:
106
+ raise RuntimeError(f"Failed to clone repository: {e.stderr}") from e
107
+ except Exception as e:
108
+ self.clear()
109
+ raise RuntimeError(f"Unexpected error during cloning: {str(e)}") from e
110
+
111
+ @property
112
+ def repos_names(self) -> List[str]:
113
+ """Return list of repository names (single item for URL fetcher)."""
114
+ if not self.temp_dir:
115
+ return []
116
+
117
+ match = self.GIT_URL_PATTERN.match(self.url)
118
+ if not match:
119
+ return []
120
+
121
+ repo_name = match.group(2).split('/')[-1]
122
+ if repo_name.endswith(".git"):
123
+ repo_name = repo_name[:-4]
124
+
125
+ return [repo_name]
126
+
127
+ def _get_all_branches(self) -> List[str]:
128
+ """Get list of all remote branches in the repository."""
129
+ if not self.temp_dir:
130
+ return []
131
+
132
+ try:
133
+ result = subprocess.run(
134
+ ["git", "-C", self.temp_dir, "branch", "-r", "--format=%(refname:short)"],
135
+ capture_output=True,
136
+ text=True,
137
+ check=True
138
+ )
139
+ branches = [b.strip() for b in result.stdout.splitlines() if b.strip()]
140
+ # Filter out HEAD reference if present
141
+ return [b for b in branches if not b.endswith('/HEAD')]
142
+ except subprocess.CalledProcessError:
143
+ return []
144
+
145
+ def _run_git_log(self, extra_args: List[str] = None) -> List[Dict[str, Any]]:
146
+ """Run git log command with common arguments and parse output."""
147
+ if not self.temp_dir:
148
+ return []
149
+
150
+ args = [
151
+ "git",
152
+ "-C", self.temp_dir,
153
+ "log",
154
+ "--pretty=format:%H|%an|%ad|%s",
155
+ "--date=iso",
156
+ "--all" # Include all branches and tags
157
+ ]
158
+
159
+ if self.start_date:
160
+ args.extend(["--since", self.start_date.isoformat()])
161
+ if self.end_date:
162
+ args.extend(["--until", self.end_date.isoformat()])
163
+ if self.authors:
164
+ authors_filter = "|".join(self.authors)
165
+ args.extend(["--author", authors_filter])
166
+ if extra_args:
167
+ args.extend(extra_args)
168
+
169
+ try:
170
+ result = subprocess.run(
171
+ args,
172
+ capture_output=True,
173
+ text=True,
174
+ check=True,
175
+ timeout=120 # Increased timeout for large repositories
176
+ )
177
+ return self._parse_git_log(result.stdout)
178
+ except subprocess.TimeoutExpired:
179
+ return []
180
+ except subprocess.CalledProcessError:
181
+ return []
182
+
183
+ def _parse_git_log(self, log_output: str) -> List[Dict[str, Any]]:
184
+ """Parse git log output into structured data."""
185
+ entries = []
186
+ for line in log_output.splitlines():
187
+ if not line.strip():
188
+ continue
189
+
190
+ try:
191
+ sha, author, date_str, message = line.split("|", 3)
192
+ timestamp = datetime.fromisoformat(date_str)
193
+
194
+ if self.start_date and timestamp < self.start_date:
195
+ continue
196
+ if self.end_date and timestamp > self.end_date:
197
+ continue
198
+
199
+ entries.append({
200
+ "type": "commit",
201
+ "repo": self.repos_names[0],
202
+ "message": message,
203
+ "sha": sha,
204
+ "author": author,
205
+ "timestamp": timestamp
206
+ })
207
+ except ValueError:
208
+ continue # Skip malformed log entries
209
+
210
+ return entries
211
+
212
+ def fetch_commits(self) -> List[Dict[str, Any]]:
213
+ """Fetch commits from all branches in the cloned repository."""
214
+ return self._run_git_log()
215
+
216
+ def fetch_pull_requests(self) -> List[Dict[str, Any]]:
217
+ """Fetch pull requests (not implemented for generic Git URLs)."""
218
+ return []
219
+
220
+ def fetch_issues(self) -> List[Dict[str, Any]]:
221
+ """Fetch issues (not implemented for generic Git URLs)."""
222
+ return []
223
+
224
+ def clear(self) -> None:
225
+ """Clean up temporary directory."""
226
+ if self.temp_dir and os.path.exists(self.temp_dir):
227
+ try:
228
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
229
+ except Exception:
230
+ pass # Ensure we don't raise during cleanup
231
+ finally:
232
+ self.temp_dir = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: git-recap
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: A modular Python tool that aggregates and formats user-authored messages from repositories.
5
5
  Author: Bruno V.
6
6
  Author-email: bruno.vitorino@tecnico.ulisboa.pt
@@ -14,5 +14,6 @@ git_recap/providers/azure_fetcher.py
14
14
  git_recap/providers/base_fetcher.py
15
15
  git_recap/providers/github_fetcher.py
16
16
  git_recap/providers/gitlab_fetcher.py
17
+ git_recap/providers/url_fetcher.py
17
18
  tests/test_dummy_parser.py
18
19
  tests/test_parser.py
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
5
5
 
6
6
  setup(
7
7
  name="git-recap",
8
- version="0.1.0",
8
+ version="0.1.2",
9
9
  packages=find_packages(),
10
10
  install_requires=[
11
11
  "PyGithub==2.6.1",
@@ -22,4 +22,4 @@ setup(
22
22
  "License :: OSI Approved :: MIT License",
23
23
  "Operating System :: OS Independent",
24
24
  ],
25
- )
25
+ )
@@ -42,6 +42,10 @@ class DummyFetcher(BaseFetcher):
42
42
  if isinstance(entry["timestamp"], datetime):
43
43
  entry["timestamp"] = entry["timestamp"].isoformat()
44
44
  return entries
45
+
46
+ @property
47
+ def repos_names(self):
48
+ ...
45
49
 
46
50
  def test_get_authored_messages():
47
51
  # Create a dummy fetcher with a date range covering March 2025.
File without changes
File without changes
File without changes
File without changes