osslag 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
osslag/metrics/pvac.py ADDED
@@ -0,0 +1,166 @@
1
+ """Semantic Version Activity Categorizer (PVAC)
2
+
3
+ Author:
4
+ Shane Panter and Luke Hindman
5
+
6
+ Description:
7
+ This module provides a set of functions for categorizeing version strings
8
+ based on the official and extended semantic versioning policies. The module
9
+ also provides a function for calculating the version delta between two
10
+ packages based on the weighted sum of the major, minor, and patch version
11
+ numbers.
12
+ """
13
+
14
+ import re
15
+
16
+ """Regular expression patterns for matching version strings"""
17
+ version_mapping = [
18
+ # Official Semantic
19
+ {
20
+ "pattern": re.compile(
21
+ r"^(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
22
+ ),
23
+ "class_group": "Semantic",
24
+ },
25
+ # ExtendedSemantic: Match epoch prepended to version string based upon official versioning policy
26
+ {
27
+ "pattern": re.compile(
28
+ r"^((?P<epoch>0|[1-9]\d*):)?(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
29
+ ),
30
+ "class_group": "Extended-Semantic",
31
+ },
32
+ # Semi-Semantic: Allow version numbers to start with 0; Make the patch field optional and separated by either a . or a lower case p
33
+ {
34
+ "pattern": re.compile(
35
+ r"^((?P<epoch>0|[1-9]\d*):)?(?P<major>[0-9]\d*)\.(?P<minor>[0-9]\d*)((\.|p|pl)(?P<patch>[0-9]\d*))?(?:-(?P<prerelease>(?:[0-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:[0-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
36
+ ),
37
+ "class_group": "Semi-Semantic",
38
+ },
39
+ ]
40
+
41
+
42
+ def lookup_category(version_string):
43
+ """Given a version string, return a dictionary containing the category
44
+
45
+ Args:
46
+ version_string (string): The version string to categorize
47
+
48
+ Returns:
49
+ dict: A dictionary containing the category information
50
+
51
+ """
52
+ # Clean the string for standardized processing
53
+ version_string = version_string.strip()
54
+
55
+ version_dict = {
56
+ "category": None,
57
+ "epoch": None,
58
+ "major": None,
59
+ "minor": None,
60
+ "patch": None,
61
+ }
62
+ for map in version_mapping:
63
+ m = map["pattern"].match(version_string.strip())
64
+ if m is not None:
65
+ version_dict = {}
66
+ if "epoch" not in m.groupdict().keys() or m.groupdict()["epoch"] is None:
67
+ version_dict["epoch"] = 0
68
+ else:
69
+ version_dict["epoch"] = int(m.groupdict()["epoch"])
70
+
71
+ if "major" not in m.groupdict().keys() or m.groupdict()["major"] is None:
72
+ version_dict["major"] = 0
73
+ else:
74
+ version_dict["major"] = int(m.groupdict()["major"])
75
+
76
+ if "minor" not in m.groupdict().keys() or m.groupdict()["minor"] is None:
77
+ version_dict["minor"] = 0
78
+ else:
79
+ version_dict["minor"] = int(m.groupdict()["minor"])
80
+
81
+ if "patch" not in m.groupdict().keys() or m.groupdict()["patch"] is None:
82
+ version_dict["patch"] = 0
83
+ else:
84
+ version_dict["patch"] = int(m.groupdict()["patch"])
85
+
86
+ version_dict["category"] = map["class_group"]
87
+ break
88
+ return version_dict
89
+
90
+
91
+ def version_delta(packages, major_weight, minor_weight, patch_weight):
92
+ """Calculate the version delta between two packages based on the weighted
93
+ sum of the major, minor, and patch version numbers.
94
+
95
+ Args:
96
+ packages (tuple): A list of tuples containing version information
97
+ major_weight (float): The weight to apply to the major version number
98
+ minor_weight (float): The weight to apply to the minor version number
99
+ patch_weight (float): The weight to apply to the patch version number
100
+
101
+ Returns:
102
+ type: A single value representing the sum of the weighted version deltas
103
+
104
+ """
105
+ version_delta = 0
106
+
107
+ for version_tuple_A, version_tuple_B in packages:
108
+ # Destructure the version tuples
109
+ semanticA, epochA, majorA, minorA, patchA = version_tuple_A
110
+ semanticB, epochB, majorB, minorB, patchB = version_tuple_B
111
+
112
+ if epochA != epochB:
113
+ continue
114
+
115
+ if semanticA == "Unknown" or semanticB == "Unknown":
116
+ continue
117
+
118
+ weighted_version_A = (
119
+ (majorA * major_weight) + (minorA * minor_weight) + (patchA * patch_weight)
120
+ )
121
+ weighted_version_B = (
122
+ (majorB * major_weight) + (minorB * minor_weight) + (patchB * patch_weight)
123
+ )
124
+ version_delta += abs(weighted_version_B - weighted_version_A)
125
+
126
+ return version_delta
127
+
128
+
129
+ def categorize_development_activity(version_string_A, version_string_B):
130
+ """Calculate the development activity level between two version strings
131
+
132
+ Args:
133
+ version_string_A (string): The first version string to compare
134
+ version_string_B (string): The second version string to compare
135
+
136
+ Returns:
137
+ string: A string representing the development activity level between
138
+ the two version strings
139
+
140
+ """
141
+ class_A_dict = lookup_category(version_string_A)
142
+ class_B_dict = lookup_category(version_string_B)
143
+
144
+ if (
145
+ class_A_dict["category"] == "Unknown"
146
+ or class_A_dict["category"] is None
147
+ or class_B_dict["category"] == "Unknown"
148
+ or class_B_dict["category"] is None
149
+ ):
150
+ return "Unknown"
151
+
152
+ if class_A_dict["epoch"] != class_B_dict["epoch"]:
153
+ return "Unknown"
154
+
155
+ activity_level = "Unknown"
156
+
157
+ if class_A_dict["major"] != class_B_dict["major"]:
158
+ activity_level = "Very Active"
159
+ elif class_A_dict["minor"] != class_B_dict["minor"]:
160
+ activity_level = "Moderately Active"
161
+ elif class_A_dict["patch"] != class_B_dict["patch"]:
162
+ activity_level = "Lightly Active"
163
+ else:
164
+ activity_level = "Sedentary"
165
+
166
+ return activity_level
File without changes
@@ -0,0 +1,240 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ from datetime import datetime, timedelta
6
+ from typing import NamedTuple
7
+
8
+ import pandas as pd
9
+ import requests
10
+ from dotenv import load_dotenv
11
+ from github import Github
12
+ from github.GithubException import GithubException
13
+
14
+ from osslag.utils import vcs
15
+
16
+ load_dotenv() # Load environment variables from .env file if present
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Suppress overly verbose logging from urllib3 and requests
20
+ logging.getLogger("urllib3").setLevel(logging.CRITICAL)
21
+ logging.getLogger("requests").setLevel(logging.CRITICAL)
22
+
23
+ # Redirect PyGithub's logging to a file for debugging
24
+ github_logger = logging.getLogger("github")
25
+ filer_handler = logging.FileHandler("github_debug.log")
26
+ filer_handler.setLevel(logging.INFO)
27
+ github_logger.addHandler(filer_handler)
28
+ github_logger.setLevel(logging.CRITICAL)
29
+
30
+
31
+ class GithubAPIResult(NamedTuple):
32
+ success: bool
33
+ data: dict | None
34
+ error: str | None
35
+
36
+
37
+ def gh_get_rate_limit_info(github_token: str | None = None) -> dict | None:
38
+ """Retrieve GitHub API rate limit information.
39
+
40
+ Args:
41
+ github_token: Optional GitHub Personal Access Token. If not provided,
42
+ uses unauthenticated access limits.
43
+
44
+ Returns:
45
+ A dictionary with keys: 'limit', 'remaining', 'reset_datetime', 'authenticated'.
46
+ Returns None on error.
47
+
48
+ """
49
+ try:
50
+ gh = Github(github_token) if github_token else Github()
51
+ rate_limit = gh.get_rate_limit()
52
+ core = rate_limit.resources.core
53
+
54
+ # core.reset is a datetime object
55
+ reset_dt = (
56
+ core.reset
57
+ if isinstance(core.reset, datetime)
58
+ else datetime.fromtimestamp(core.reset)
59
+ )
60
+ # convert to local timezone
61
+ reset_dt = reset_dt.astimezone()
62
+ # convert to a naive datetime in local time
63
+ reset_dt = reset_dt.replace(tzinfo=None)
64
+
65
+ return {
66
+ "limit": core.limit,
67
+ "remaining": core.remaining,
68
+ "reset_datetime": reset_dt.strftime("%I:%M:%S %p"),
69
+ "authenticated": github_token is not None,
70
+ }
71
+ except GithubException:
72
+ return None
73
+ except Exception:
74
+ return None
75
+
76
+
77
+ def fetch_pull_requests(
78
+ repo_url: str,
79
+ github_token: str | None = None,
80
+ state: str = "all",
81
+ months: int | None = None,
82
+ ) -> pd.DataFrame:
83
+ """Retrieve pull requests for a GitHub repository via the GitHub API.
84
+
85
+ Args:
86
+ repo_url: HTTPS URL to the repository (e.g., https://github.com/owner/repo[.git]).
87
+ github_token: Optional GitHub token for authenticated requests (higher rate limits, private repos).
88
+ state: Filter by PR state: 'open', 'closed', or 'all' (default 'all').
89
+ months: Optional limit to PRs created within the last N months (approx 30 days per month).
90
+
91
+ Returns:
92
+ A list of dictionaries with PR metadata: 'number', 'title', 'state', 'user', 'created_at',
93
+ 'updated_at', 'closed_at', 'merged_at', 'html_url'. Returns None on error.
94
+
95
+ """
96
+ if months is not None and (not isinstance(months, int) or months < 1):
97
+ raise ValueError("months parameter must be a positive integer or None")
98
+
99
+ owner, repo = vcs.extract_owner_name_repo(repo_url)
100
+ github_token = github_token or os.getenv("GITHUB_TOKEN")
101
+ if owner is None or repo is None:
102
+ raise ValueError(f"Invalid GitHub repository URL: {repo_url}")
103
+
104
+ try:
105
+ gh = Github(github_token) if github_token else Github()
106
+ repo_obj = gh.get_repo(f"{owner}/{repo}")
107
+
108
+ # PyGithub supports state in {'open','closed','all'}
109
+ prs = repo_obj.get_pulls(state=state, sort="created", direction="desc")
110
+
111
+ cutoff = None
112
+ if months is not None:
113
+ cutoff = datetime.now() - timedelta(days=months * 30)
114
+
115
+ results: list[dict] = []
116
+ for pr in prs:
117
+ # Filter by months if requested
118
+ if cutoff is not None and pr.created_at < cutoff:
119
+ # Because we sorted desc by created time, we can stop early
120
+ break
121
+
122
+ results.append(
123
+ {
124
+ "number": pr.number,
125
+ "title": pr.title,
126
+ "state": pr.state,
127
+ "user": None if pr.user is None else pr.user.login,
128
+ "created_at": pr.created_at.isoformat() if pr.created_at else None,
129
+ "updated_at": pr.updated_at.isoformat() if pr.updated_at else None,
130
+ "closed_at": pr.closed_at.isoformat() if pr.closed_at else None,
131
+ "merged_at": pr.merged_at.isoformat() if pr.merged_at else None,
132
+ "html_url": pr.html_url,
133
+ }
134
+ )
135
+
136
+ return pd.DataFrame(results)
137
+ except GithubException as e:
138
+ raise ValueError(f"GitHub API error: {e.data.get('message', str(e))}") from e
139
+ except Exception as e:
140
+ raise ValueError(f"Failed to fetch pull requests: {str(e)}") from e
141
+
142
+
143
+ def gh_check_repo_exists(owner: str, repo: str) -> GithubAPIResult:
144
+ """Check if a GitHub repository exists via the API."""
145
+ github_token = os.getenv("GITHUB_TOKEN")
146
+
147
+ if github_token:
148
+ logger.debug("Using authenticated GitHub access")
149
+ else:
150
+ logger.warning("Using unauthenticated GitHub access (60 req/hr limit)")
151
+
152
+ url = f"https://api.github.com/repos/{owner}/{repo}"
153
+ headers = {"Accept": "application/vnd.github.v3+json"}
154
+ if github_token:
155
+ headers["Authorization"] = f"Bearer {github_token}"
156
+
157
+ try:
158
+ response = requests.get(url, headers=headers, timeout=30)
159
+
160
+ if response.status_code == 200:
161
+ return GithubAPIResult(data=None, error=None, success=True)
162
+
163
+ # Handle rate limiting explicitly
164
+ if response.status_code == 403:
165
+ remaining = response.headers.get("X-RateLimit-Remaining", "?")
166
+ reset_time_str = response.headers.get("X-RateLimit-Reset", "")
167
+ error_msg = (
168
+ f"Rate limited (remaining: {remaining}, resets: {reset_time_str})"
169
+ )
170
+ return GithubAPIResult(
171
+ data={"owner": owner, "repo": repo},
172
+ error=error_msg,
173
+ success=False,
174
+ )
175
+
176
+ # Handle actual not found
177
+ if response.status_code == 404:
178
+ error_msg = f"404 Not Found: {url}"
179
+ try:
180
+ error_data = response.json()
181
+ if "message" in error_data:
182
+ error_msg = error_data["message"]
183
+ except Exception:
184
+ pass
185
+ return GithubAPIResult(
186
+ data={"owner": owner, "repo": repo}, error=error_msg, success=False
187
+ )
188
+
189
+ # Other errors
190
+ error_msg = f"HTTP {response.status_code}"
191
+ try:
192
+ error_data = response.json()
193
+ if "message" in error_data:
194
+ error_msg = error_data["message"]
195
+ except Exception:
196
+ pass
197
+ logger.warning(f"GitHub API error for {owner}/{repo}: {error_msg}")
198
+ return GithubAPIResult(
199
+ data={"owner": owner, "repo": repo}, error=error_msg, success=False
200
+ )
201
+ except requests.RequestException as e:
202
+ return GithubAPIResult(
203
+ data={"owner": owner, "repo": repo}, error=str(e), success=False
204
+ )
205
+
206
+
207
+ def fetch_github_repo_metadata(
208
+ repo_url: str, github_token: str | None = None
209
+ ) -> pd.DataFrame:
210
+ """Fetch GitHub repo metadata given repo URL and token.
211
+
212
+ Handles rate limiting by catching RateLimitExceededException and waiting
213
+ for the reset time before retrying.
214
+ """
215
+ owner, repo = vcs.extract_owner_name_repo(repo_url)
216
+ github_token = github_token or os.getenv("GITHUB_TOKEN")
217
+ if owner is None or repo is None:
218
+ raise ValueError(f"Invalid repository URL: {repo_url}")
219
+
220
+ # Configure GitHub client with explicit timeout (30 seconds)
221
+ github_client = (
222
+ Github(github_token, timeout=30) if github_token else Github(timeout=30)
223
+ )
224
+ repo_obj = github_client.get_repo(f"{owner}/{repo}")
225
+ data = {
226
+ "repo_url": repo_url,
227
+ "full_name": repo_obj.full_name,
228
+ "description": repo_obj.description,
229
+ "stargazers_count": repo_obj.stargazers_count,
230
+ "forks_count": repo_obj.forks_count,
231
+ "open_issues_count": repo_obj.open_issues_count,
232
+ "watchers_count": repo_obj.watchers_count,
233
+ "created_at": repo_obj.created_at,
234
+ "updated_at": repo_obj.updated_at,
235
+ "pushed_at": repo_obj.pushed_at,
236
+ "archived": repo_obj.archived,
237
+ "license": str(repo_obj.license.spdx_id) if repo_obj.license else None,
238
+ "topics": ",".join(repo_obj.get_topics()),
239
+ }
240
+ return pd.DataFrame([data])