git-recap 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {git_recap-0.1.1 → git_recap-0.1.2}/PKG-INFO +1 -1
- {git_recap-0.1.1 → git_recap-0.1.2}/git_recap/providers/__init__.py +4 -2
- git_recap-0.1.2/git_recap/providers/url_fetcher.py +232 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/git_recap.egg-info/PKG-INFO +1 -1
- {git_recap-0.1.1 → git_recap-0.1.2}/git_recap.egg-info/SOURCES.txt +1 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/setup.py +1 -1
- {git_recap-0.1.1 → git_recap-0.1.2}/LICENSE +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/README.md +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/git_recap/__init__.py +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/git_recap/fetcher.py +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/git_recap/providers/azure_fetcher.py +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/git_recap/providers/base_fetcher.py +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/git_recap/providers/github_fetcher.py +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/git_recap/providers/gitlab_fetcher.py +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/git_recap/utils.py +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/git_recap.egg-info/dependency_links.txt +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/git_recap.egg-info/requires.txt +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/git_recap.egg-info/top_level.txt +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/setup.cfg +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/tests/test_dummy_parser.py +0 -0
- {git_recap-0.1.1 → git_recap-0.1.2}/tests/test_parser.py +0 -0
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from git_recap.providers.azure_fetcher import AzureFetcher
|
|
2
2
|
from git_recap.providers.github_fetcher import GitHubFetcher
|
|
3
3
|
from git_recap.providers.gitlab_fetcher import GitLabFetcher
|
|
4
|
+
from git_recap.providers.url_fetcher import URLFetcher
|
|
4
5
|
|
|
5
6
|
__all__ = [
|
|
6
7
|
"AzureFetcher",
|
|
7
8
|
"GitHubFetcher",
|
|
8
|
-
"GitLabFetcher"
|
|
9
|
-
|
|
9
|
+
"GitLabFetcher",
|
|
10
|
+
"URLFetcher"
|
|
11
|
+
]
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import shutil
|
|
4
|
+
import subprocess
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import tempfile
|
|
7
|
+
from typing import List, Dict, Any, Optional
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from git_recap.providers.base_fetcher import BaseFetcher
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class URLFetcher(BaseFetcher):
|
|
13
|
+
"""Fetcher implementation for generic Git repository URLs."""
|
|
14
|
+
|
|
15
|
+
GIT_URL_PATTERN = re.compile(
|
|
16
|
+
r'^(?:http|https|git|ssh)://' # Protocol
|
|
17
|
+
r'(?:\S+@)?' # Optional username
|
|
18
|
+
r'([^/]+)' # Domain
|
|
19
|
+
r'(?:[:/])([^/]+/[^/]+?)(?:\.git)?$' # Repo path
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
url: str,
|
|
25
|
+
start_date: Optional[datetime] = None,
|
|
26
|
+
end_date: Optional[datetime] = None,
|
|
27
|
+
repo_filter: Optional[List[str]] = None,
|
|
28
|
+
authors: Optional[List[str]] = None
|
|
29
|
+
):
|
|
30
|
+
super().__init__(
|
|
31
|
+
pat="", # No PAT needed for URL fetcher
|
|
32
|
+
start_date=start_date,
|
|
33
|
+
end_date=end_date,
|
|
34
|
+
repo_filter=repo_filter,
|
|
35
|
+
authors=authors
|
|
36
|
+
)
|
|
37
|
+
self.url = self._normalize_url(url)
|
|
38
|
+
self.temp_dir = None
|
|
39
|
+
self._validate_url()
|
|
40
|
+
self._clone_repo()
|
|
41
|
+
|
|
42
|
+
def _normalize_url(self, url: str) -> str:
|
|
43
|
+
"""Normalize the Git URL to ensure consistent format."""
|
|
44
|
+
url = url.strip()
|
|
45
|
+
if not url.endswith('.git'):
|
|
46
|
+
url += '.git'
|
|
47
|
+
if not any(url.startswith(proto) for proto in ('http://', 'https://', 'git://', 'ssh://')):
|
|
48
|
+
url = f'https://{url}'
|
|
49
|
+
return url
|
|
50
|
+
|
|
51
|
+
def _validate_url(self) -> None:
|
|
52
|
+
"""Validate the Git repository URL using git ls-remote."""
|
|
53
|
+
if not self.GIT_URL_PATTERN.match(self.url):
|
|
54
|
+
raise ValueError(f"Invalid Git repository URL format: {self.url}")
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
result = subprocess.run(
|
|
58
|
+
["git", "ls-remote", self.url],
|
|
59
|
+
capture_output=True,
|
|
60
|
+
text=True,
|
|
61
|
+
check=True,
|
|
62
|
+
timeout=10 # Add timeout to prevent hanging
|
|
63
|
+
)
|
|
64
|
+
if not result.stdout.strip():
|
|
65
|
+
raise ValueError(f"URL {self.url} points to an empty repository")
|
|
66
|
+
except subprocess.TimeoutExpired:
|
|
67
|
+
raise ValueError(f"Timeout while validating URL {self.url}")
|
|
68
|
+
except subprocess.CalledProcessError as e:
|
|
69
|
+
raise ValueError(f"Invalid Git repository URL: {self.url}. Error: {e.stderr}") from e
|
|
70
|
+
|
|
71
|
+
def _clone_repo(self) -> None:
|
|
72
|
+
"""Clone the repository to a temporary directory with all branches."""
|
|
73
|
+
self.temp_dir = tempfile.mkdtemp(prefix="gitrecap_")
|
|
74
|
+
try:
|
|
75
|
+
# First clone with --no-checkout to save bandwidth
|
|
76
|
+
subprocess.run(
|
|
77
|
+
["git", "clone", "--no-checkout", self.url, self.temp_dir],
|
|
78
|
+
check=True,
|
|
79
|
+
capture_output=True,
|
|
80
|
+
text=True,
|
|
81
|
+
timeout=300
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Fetch all branches
|
|
85
|
+
subprocess.run(
|
|
86
|
+
["git", "-C", self.temp_dir, "fetch", "--all"],
|
|
87
|
+
check=True,
|
|
88
|
+
capture_output=True,
|
|
89
|
+
text=True,
|
|
90
|
+
timeout=300
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Verify the cloned repository has at least one commit
|
|
94
|
+
verify_result = subprocess.run(
|
|
95
|
+
["git", "-C", self.temp_dir, "rev-list", "--count", "--all"],
|
|
96
|
+
capture_output=True,
|
|
97
|
+
text=True,
|
|
98
|
+
check=True
|
|
99
|
+
)
|
|
100
|
+
if int(verify_result.stdout.strip()) == 0:
|
|
101
|
+
raise ValueError("Cloned repository has no commits")
|
|
102
|
+
|
|
103
|
+
except subprocess.TimeoutExpired:
|
|
104
|
+
raise RuntimeError("Repository cloning timed out")
|
|
105
|
+
except subprocess.CalledProcessError as e:
|
|
106
|
+
raise RuntimeError(f"Failed to clone repository: {e.stderr}") from e
|
|
107
|
+
except Exception as e:
|
|
108
|
+
self.clear()
|
|
109
|
+
raise RuntimeError(f"Unexpected error during cloning: {str(e)}") from e
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def repos_names(self) -> List[str]:
|
|
113
|
+
"""Return list of repository names (single item for URL fetcher)."""
|
|
114
|
+
if not self.temp_dir:
|
|
115
|
+
return []
|
|
116
|
+
|
|
117
|
+
match = self.GIT_URL_PATTERN.match(self.url)
|
|
118
|
+
if not match:
|
|
119
|
+
return []
|
|
120
|
+
|
|
121
|
+
repo_name = match.group(2).split('/')[-1]
|
|
122
|
+
if repo_name.endswith(".git"):
|
|
123
|
+
repo_name = repo_name[:-4]
|
|
124
|
+
|
|
125
|
+
return [repo_name]
|
|
126
|
+
|
|
127
|
+
def _get_all_branches(self) -> List[str]:
|
|
128
|
+
"""Get list of all remote branches in the repository."""
|
|
129
|
+
if not self.temp_dir:
|
|
130
|
+
return []
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
result = subprocess.run(
|
|
134
|
+
["git", "-C", self.temp_dir, "branch", "-r", "--format=%(refname:short)"],
|
|
135
|
+
capture_output=True,
|
|
136
|
+
text=True,
|
|
137
|
+
check=True
|
|
138
|
+
)
|
|
139
|
+
branches = [b.strip() for b in result.stdout.splitlines() if b.strip()]
|
|
140
|
+
# Filter out HEAD reference if present
|
|
141
|
+
return [b for b in branches if not b.endswith('/HEAD')]
|
|
142
|
+
except subprocess.CalledProcessError:
|
|
143
|
+
return []
|
|
144
|
+
|
|
145
|
+
def _run_git_log(self, extra_args: List[str] = None) -> List[Dict[str, Any]]:
|
|
146
|
+
"""Run git log command with common arguments and parse output."""
|
|
147
|
+
if not self.temp_dir:
|
|
148
|
+
return []
|
|
149
|
+
|
|
150
|
+
args = [
|
|
151
|
+
"git",
|
|
152
|
+
"-C", self.temp_dir,
|
|
153
|
+
"log",
|
|
154
|
+
"--pretty=format:%H|%an|%ad|%s",
|
|
155
|
+
"--date=iso",
|
|
156
|
+
"--all" # Include all branches and tags
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
if self.start_date:
|
|
160
|
+
args.extend(["--since", self.start_date.isoformat()])
|
|
161
|
+
if self.end_date:
|
|
162
|
+
args.extend(["--until", self.end_date.isoformat()])
|
|
163
|
+
if self.authors:
|
|
164
|
+
authors_filter = "|".join(self.authors)
|
|
165
|
+
args.extend(["--author", authors_filter])
|
|
166
|
+
if extra_args:
|
|
167
|
+
args.extend(extra_args)
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
result = subprocess.run(
|
|
171
|
+
args,
|
|
172
|
+
capture_output=True,
|
|
173
|
+
text=True,
|
|
174
|
+
check=True,
|
|
175
|
+
timeout=120 # Increased timeout for large repositories
|
|
176
|
+
)
|
|
177
|
+
return self._parse_git_log(result.stdout)
|
|
178
|
+
except subprocess.TimeoutExpired:
|
|
179
|
+
return []
|
|
180
|
+
except subprocess.CalledProcessError:
|
|
181
|
+
return []
|
|
182
|
+
|
|
183
|
+
def _parse_git_log(self, log_output: str) -> List[Dict[str, Any]]:
|
|
184
|
+
"""Parse git log output into structured data."""
|
|
185
|
+
entries = []
|
|
186
|
+
for line in log_output.splitlines():
|
|
187
|
+
if not line.strip():
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
sha, author, date_str, message = line.split("|", 3)
|
|
192
|
+
timestamp = datetime.fromisoformat(date_str)
|
|
193
|
+
|
|
194
|
+
if self.start_date and timestamp < self.start_date:
|
|
195
|
+
continue
|
|
196
|
+
if self.end_date and timestamp > self.end_date:
|
|
197
|
+
continue
|
|
198
|
+
|
|
199
|
+
entries.append({
|
|
200
|
+
"type": "commit",
|
|
201
|
+
"repo": self.repos_names[0],
|
|
202
|
+
"message": message,
|
|
203
|
+
"sha": sha,
|
|
204
|
+
"author": author,
|
|
205
|
+
"timestamp": timestamp
|
|
206
|
+
})
|
|
207
|
+
except ValueError:
|
|
208
|
+
continue # Skip malformed log entries
|
|
209
|
+
|
|
210
|
+
return entries
|
|
211
|
+
|
|
212
|
+
def fetch_commits(self) -> List[Dict[str, Any]]:
|
|
213
|
+
"""Fetch commits from all branches in the cloned repository."""
|
|
214
|
+
return self._run_git_log()
|
|
215
|
+
|
|
216
|
+
def fetch_pull_requests(self) -> List[Dict[str, Any]]:
|
|
217
|
+
"""Fetch pull requests (not implemented for generic Git URLs)."""
|
|
218
|
+
return []
|
|
219
|
+
|
|
220
|
+
def fetch_issues(self) -> List[Dict[str, Any]]:
|
|
221
|
+
"""Fetch issues (not implemented for generic Git URLs)."""
|
|
222
|
+
return []
|
|
223
|
+
|
|
224
|
+
def clear(self) -> None:
|
|
225
|
+
"""Clean up temporary directory."""
|
|
226
|
+
if self.temp_dir and os.path.exists(self.temp_dir):
|
|
227
|
+
try:
|
|
228
|
+
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
229
|
+
except Exception:
|
|
230
|
+
pass # Ensure we don't raise during cleanup
|
|
231
|
+
finally:
|
|
232
|
+
self.temp_dir = None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|