git-recap 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: git-recap
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: A modular Python tool that aggregates and formats user-authored messages from repositories.
5
5
  Author: Bruno V.
6
6
  Author-email: bruno.vitorino@tecnico.ulisboa.pt
@@ -1,9 +1,11 @@
1
1
  from git_recap.providers.azure_fetcher import AzureFetcher
2
2
  from git_recap.providers.github_fetcher import GitHubFetcher
3
3
  from git_recap.providers.gitlab_fetcher import GitLabFetcher
4
+ from git_recap.providers.url_fetcher import URLFetcher
4
5
 
5
6
  __all__ = [
6
7
  "AzureFetcher",
7
8
  "GitHubFetcher",
8
- "GitLabFetcher"
9
- ]
9
+ "GitLabFetcher",
10
+ "URLFetcher"
11
+ ]
@@ -0,0 +1,232 @@
1
+ import os
2
+ import re
3
+ import shutil
4
+ import subprocess
5
+ from pathlib import Path
6
+ import tempfile
7
+ from typing import List, Dict, Any, Optional
8
+ from datetime import datetime
9
+ from git_recap.providers.base_fetcher import BaseFetcher
10
+
11
+
12
+ class URLFetcher(BaseFetcher):
13
+ """Fetcher implementation for generic Git repository URLs."""
14
+
15
+ GIT_URL_PATTERN = re.compile(
16
+ r'^(?:http|https|git|ssh)://' # Protocol
17
+ r'(?:\S+@)?' # Optional username
18
+ r'([^/]+)' # Domain
19
+ r'(?:[:/])([^/]+/[^/]+?)(?:\.git)?$' # Repo path
20
+ )
21
+
22
+ def __init__(
23
+ self,
24
+ url: str,
25
+ start_date: Optional[datetime] = None,
26
+ end_date: Optional[datetime] = None,
27
+ repo_filter: Optional[List[str]] = None,
28
+ authors: Optional[List[str]] = None
29
+ ):
30
+ super().__init__(
31
+ pat="", # No PAT needed for URL fetcher
32
+ start_date=start_date,
33
+ end_date=end_date,
34
+ repo_filter=repo_filter,
35
+ authors=authors
36
+ )
37
+ self.url = self._normalize_url(url)
38
+ self.temp_dir = None
39
+ self._validate_url()
40
+ self._clone_repo()
41
+
42
+ def _normalize_url(self, url: str) -> str:
43
+ """Normalize the Git URL to ensure consistent format."""
44
+ url = url.strip()
45
+ if not url.endswith('.git'):
46
+ url += '.git'
47
+ if not any(url.startswith(proto) for proto in ('http://', 'https://', 'git://', 'ssh://')):
48
+ url = f'https://{url}'
49
+ return url
50
+
51
+ def _validate_url(self) -> None:
52
+ """Validate the Git repository URL using git ls-remote."""
53
+ if not self.GIT_URL_PATTERN.match(self.url):
54
+ raise ValueError(f"Invalid Git repository URL format: {self.url}")
55
+
56
+ try:
57
+ result = subprocess.run(
58
+ ["git", "ls-remote", self.url],
59
+ capture_output=True,
60
+ text=True,
61
+ check=True,
62
+ timeout=10 # Add timeout to prevent hanging
63
+ )
64
+ if not result.stdout.strip():
65
+ raise ValueError(f"URL {self.url} points to an empty repository")
66
+ except subprocess.TimeoutExpired:
67
+ raise ValueError(f"Timeout while validating URL {self.url}")
68
+ except subprocess.CalledProcessError as e:
69
+ raise ValueError(f"Invalid Git repository URL: {self.url}. Error: {e.stderr}") from e
70
+
71
+ def _clone_repo(self) -> None:
72
+ """Clone the repository to a temporary directory with all branches."""
73
+ self.temp_dir = tempfile.mkdtemp(prefix="gitrecap_")
74
+ try:
75
+ # First clone with --no-checkout to save bandwidth
76
+ subprocess.run(
77
+ ["git", "clone", "--no-checkout", self.url, self.temp_dir],
78
+ check=True,
79
+ capture_output=True,
80
+ text=True,
81
+ timeout=300
82
+ )
83
+
84
+ # Fetch all branches
85
+ subprocess.run(
86
+ ["git", "-C", self.temp_dir, "fetch", "--all"],
87
+ check=True,
88
+ capture_output=True,
89
+ text=True,
90
+ timeout=300
91
+ )
92
+
93
+ # Verify the cloned repository has at least one commit
94
+ verify_result = subprocess.run(
95
+ ["git", "-C", self.temp_dir, "rev-list", "--count", "--all"],
96
+ capture_output=True,
97
+ text=True,
98
+ check=True
99
+ )
100
+ if int(verify_result.stdout.strip()) == 0:
101
+ raise ValueError("Cloned repository has no commits")
102
+
103
+ except subprocess.TimeoutExpired:
104
+ raise RuntimeError("Repository cloning timed out")
105
+ except subprocess.CalledProcessError as e:
106
+ raise RuntimeError(f"Failed to clone repository: {e.stderr}") from e
107
+ except Exception as e:
108
+ self.clear()
109
+ raise RuntimeError(f"Unexpected error during cloning: {str(e)}") from e
110
+
111
+ @property
112
+ def repos_names(self) -> List[str]:
113
+ """Return list of repository names (single item for URL fetcher)."""
114
+ if not self.temp_dir:
115
+ return []
116
+
117
+ match = self.GIT_URL_PATTERN.match(self.url)
118
+ if not match:
119
+ return []
120
+
121
+ repo_name = match.group(2).split('/')[-1]
122
+ if repo_name.endswith(".git"):
123
+ repo_name = repo_name[:-4]
124
+
125
+ return [repo_name]
126
+
127
+ def _get_all_branches(self) -> List[str]:
128
+ """Get list of all remote branches in the repository."""
129
+ if not self.temp_dir:
130
+ return []
131
+
132
+ try:
133
+ result = subprocess.run(
134
+ ["git", "-C", self.temp_dir, "branch", "-r", "--format=%(refname:short)"],
135
+ capture_output=True,
136
+ text=True,
137
+ check=True
138
+ )
139
+ branches = [b.strip() for b in result.stdout.splitlines() if b.strip()]
140
+ # Filter out HEAD reference if present
141
+ return [b for b in branches if not b.endswith('/HEAD')]
142
+ except subprocess.CalledProcessError:
143
+ return []
144
+
145
+ def _run_git_log(self, extra_args: List[str] = None) -> List[Dict[str, Any]]:
146
+ """Run git log command with common arguments and parse output."""
147
+ if not self.temp_dir:
148
+ return []
149
+
150
+ args = [
151
+ "git",
152
+ "-C", self.temp_dir,
153
+ "log",
154
+ "--pretty=format:%H|%an|%ad|%s",
155
+ "--date=iso",
156
+ "--all" # Include all branches and tags
157
+ ]
158
+
159
+ if self.start_date:
160
+ args.extend(["--since", self.start_date.isoformat()])
161
+ if self.end_date:
162
+ args.extend(["--until", self.end_date.isoformat()])
163
+ if self.authors:
164
+ authors_filter = "|".join(self.authors)
165
+ args.extend(["--author", authors_filter])
166
+ if extra_args:
167
+ args.extend(extra_args)
168
+
169
+ try:
170
+ result = subprocess.run(
171
+ args,
172
+ capture_output=True,
173
+ text=True,
174
+ check=True,
175
+ timeout=120 # Increased timeout for large repositories
176
+ )
177
+ return self._parse_git_log(result.stdout)
178
+ except subprocess.TimeoutExpired:
179
+ return []
180
+ except subprocess.CalledProcessError:
181
+ return []
182
+
183
+ def _parse_git_log(self, log_output: str) -> List[Dict[str, Any]]:
184
+ """Parse git log output into structured data."""
185
+ entries = []
186
+ for line in log_output.splitlines():
187
+ if not line.strip():
188
+ continue
189
+
190
+ try:
191
+ sha, author, date_str, message = line.split("|", 3)
192
+ timestamp = datetime.fromisoformat(date_str)
193
+
194
+ if self.start_date and timestamp < self.start_date:
195
+ continue
196
+ if self.end_date and timestamp > self.end_date:
197
+ continue
198
+
199
+ entries.append({
200
+ "type": "commit",
201
+ "repo": self.repos_names[0],
202
+ "message": message,
203
+ "sha": sha,
204
+ "author": author,
205
+ "timestamp": timestamp
206
+ })
207
+ except ValueError:
208
+ continue # Skip malformed log entries
209
+
210
+ return entries
211
+
212
+ def fetch_commits(self) -> List[Dict[str, Any]]:
213
+ """Fetch commits from all branches in the cloned repository."""
214
+ return self._run_git_log()
215
+
216
+ def fetch_pull_requests(self) -> List[Dict[str, Any]]:
217
+ """Fetch pull requests (not implemented for generic Git URLs)."""
218
+ return []
219
+
220
+ def fetch_issues(self) -> List[Dict[str, Any]]:
221
+ """Fetch issues (not implemented for generic Git URLs)."""
222
+ return []
223
+
224
+ def clear(self) -> None:
225
+ """Clean up temporary directory."""
226
+ if self.temp_dir and os.path.exists(self.temp_dir):
227
+ try:
228
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
229
+ except Exception:
230
+ pass # Ensure we don't raise during cleanup
231
+ finally:
232
+ self.temp_dir = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: git-recap
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: A modular Python tool that aggregates and formats user-authored messages from repositories.
5
5
  Author: Bruno V.
6
6
  Author-email: bruno.vitorino@tecnico.ulisboa.pt
@@ -14,5 +14,6 @@ git_recap/providers/azure_fetcher.py
14
14
  git_recap/providers/base_fetcher.py
15
15
  git_recap/providers/github_fetcher.py
16
16
  git_recap/providers/gitlab_fetcher.py
17
+ git_recap/providers/url_fetcher.py
17
18
  tests/test_dummy_parser.py
18
19
  tests/test_parser.py
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
5
5
 
6
6
  setup(
7
7
  name="git-recap",
8
- version="0.1.1",
8
+ version="0.1.2",
9
9
  packages=find_packages(),
10
10
  install_requires=[
11
11
  "PyGithub==2.6.1",
File without changes
File without changes
File without changes
File without changes