osslag 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- osslag/__init__.py +0 -0
- osslag/cli.py +1380 -0
- osslag/distro/__init__.py +0 -0
- osslag/distro/debian.py +382 -0
- osslag/distro/fedora.py +38 -0
- osslag/metrics/__init__.py +0 -0
- osslag/metrics/malta.py +585 -0
- osslag/metrics/pvac.py +166 -0
- osslag/utils/__init__.py +0 -0
- osslag/utils/github_helper.py +240 -0
- osslag/utils/vcs.py +543 -0
- osslag-1.0.0.dist-info/METADATA +46 -0
- osslag-1.0.0.dist-info/RECORD +15 -0
- osslag-1.0.0.dist-info/WHEEL +4 -0
- osslag-1.0.0.dist-info/entry_points.txt +3 -0
osslag/metrics/pvac.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""Semantic Version Activity Categorizer (PVAC)
|
|
2
|
+
|
|
3
|
+
Author:
|
|
4
|
+
Shane Panter and Luke Hindman
|
|
5
|
+
|
|
6
|
+
Description:
|
|
7
|
+
This module provides a set of functions for categorizeing version strings
|
|
8
|
+
based on the official and extended semantic versioning policies. The module
|
|
9
|
+
also provides a function for calculating the version delta between two
|
|
10
|
+
packages based on the weighted sum of the major, minor, and patch version
|
|
11
|
+
numbers.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
|
|
16
|
+
"""Regular expression patterns for matching version strings"""
|
|
17
|
+
version_mapping = [
|
|
18
|
+
# Official Semantic
|
|
19
|
+
{
|
|
20
|
+
"pattern": re.compile(
|
|
21
|
+
r"^(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
|
|
22
|
+
),
|
|
23
|
+
"class_group": "Semantic",
|
|
24
|
+
},
|
|
25
|
+
# ExtendedSemantic: Match epoch prepended to version string based upon official versioning policy
|
|
26
|
+
{
|
|
27
|
+
"pattern": re.compile(
|
|
28
|
+
r"^((?P<epoch>0|[1-9]\d*):)?(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
|
|
29
|
+
),
|
|
30
|
+
"class_group": "Extended-Semantic",
|
|
31
|
+
},
|
|
32
|
+
# Semi-Semantic: Allow version numbers to start with 0; Make the patch field optional and separated by either a . or a lower case p
|
|
33
|
+
{
|
|
34
|
+
"pattern": re.compile(
|
|
35
|
+
r"^((?P<epoch>0|[1-9]\d*):)?(?P<major>[0-9]\d*)\.(?P<minor>[0-9]\d*)((\.|p|pl)(?P<patch>[0-9]\d*))?(?:-(?P<prerelease>(?:[0-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:[0-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
|
|
36
|
+
),
|
|
37
|
+
"class_group": "Semi-Semantic",
|
|
38
|
+
},
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def lookup_category(version_string):
|
|
43
|
+
"""Given a version string, return a dictionary containing the category
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
version_string (string): The version string to categorize
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
dict: A dictionary containing the category information
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
# Clean the string for standardized processing
|
|
53
|
+
version_string = version_string.strip()
|
|
54
|
+
|
|
55
|
+
version_dict = {
|
|
56
|
+
"category": None,
|
|
57
|
+
"epoch": None,
|
|
58
|
+
"major": None,
|
|
59
|
+
"minor": None,
|
|
60
|
+
"patch": None,
|
|
61
|
+
}
|
|
62
|
+
for map in version_mapping:
|
|
63
|
+
m = map["pattern"].match(version_string.strip())
|
|
64
|
+
if m is not None:
|
|
65
|
+
version_dict = {}
|
|
66
|
+
if "epoch" not in m.groupdict().keys() or m.groupdict()["epoch"] is None:
|
|
67
|
+
version_dict["epoch"] = 0
|
|
68
|
+
else:
|
|
69
|
+
version_dict["epoch"] = int(m.groupdict()["epoch"])
|
|
70
|
+
|
|
71
|
+
if "major" not in m.groupdict().keys() or m.groupdict()["major"] is None:
|
|
72
|
+
version_dict["major"] = 0
|
|
73
|
+
else:
|
|
74
|
+
version_dict["major"] = int(m.groupdict()["major"])
|
|
75
|
+
|
|
76
|
+
if "minor" not in m.groupdict().keys() or m.groupdict()["minor"] is None:
|
|
77
|
+
version_dict["minor"] = 0
|
|
78
|
+
else:
|
|
79
|
+
version_dict["minor"] = int(m.groupdict()["minor"])
|
|
80
|
+
|
|
81
|
+
if "patch" not in m.groupdict().keys() or m.groupdict()["patch"] is None:
|
|
82
|
+
version_dict["patch"] = 0
|
|
83
|
+
else:
|
|
84
|
+
version_dict["patch"] = int(m.groupdict()["patch"])
|
|
85
|
+
|
|
86
|
+
version_dict["category"] = map["class_group"]
|
|
87
|
+
break
|
|
88
|
+
return version_dict
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def version_delta(packages, major_weight, minor_weight, patch_weight):
|
|
92
|
+
"""Calculate the version delta between two packages based on the weighted
|
|
93
|
+
sum of the major, minor, and patch version numbers.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
packages (tuple): A list of tuples containing version information
|
|
97
|
+
major_weight (float): The weight to apply to the major version number
|
|
98
|
+
minor_weight (float): The weight to apply to the minor version number
|
|
99
|
+
patch_weight (float): The weight to apply to the patch version number
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
type: A single value representing the sum of the weighted version deltas
|
|
103
|
+
|
|
104
|
+
"""
|
|
105
|
+
version_delta = 0
|
|
106
|
+
|
|
107
|
+
for version_tuple_A, version_tuple_B in packages:
|
|
108
|
+
# Destructure the version tuples
|
|
109
|
+
semanticA, epochA, majorA, minorA, patchA = version_tuple_A
|
|
110
|
+
semanticB, epochB, majorB, minorB, patchB = version_tuple_B
|
|
111
|
+
|
|
112
|
+
if epochA != epochB:
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
if semanticA == "Unknown" or semanticB == "Unknown":
|
|
116
|
+
continue
|
|
117
|
+
|
|
118
|
+
weighted_version_A = (
|
|
119
|
+
(majorA * major_weight) + (minorA * minor_weight) + (patchA * patch_weight)
|
|
120
|
+
)
|
|
121
|
+
weighted_version_B = (
|
|
122
|
+
(majorB * major_weight) + (minorB * minor_weight) + (patchB * patch_weight)
|
|
123
|
+
)
|
|
124
|
+
version_delta += abs(weighted_version_B - weighted_version_A)
|
|
125
|
+
|
|
126
|
+
return version_delta
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def categorize_development_activity(version_string_A, version_string_B):
|
|
130
|
+
"""Calculate the development activity level between two version strings
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
version_string_A (string): The first version string to compare
|
|
134
|
+
version_string_B (string): The second version string to compare
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
string: A string representing the development activity level between
|
|
138
|
+
the two version strings
|
|
139
|
+
|
|
140
|
+
"""
|
|
141
|
+
class_A_dict = lookup_category(version_string_A)
|
|
142
|
+
class_B_dict = lookup_category(version_string_B)
|
|
143
|
+
|
|
144
|
+
if (
|
|
145
|
+
class_A_dict["category"] == "Unknown"
|
|
146
|
+
or class_A_dict["category"] is None
|
|
147
|
+
or class_B_dict["category"] == "Unknown"
|
|
148
|
+
or class_B_dict["category"] is None
|
|
149
|
+
):
|
|
150
|
+
return "Unknown"
|
|
151
|
+
|
|
152
|
+
if class_A_dict["epoch"] != class_B_dict["epoch"]:
|
|
153
|
+
return "Unknown"
|
|
154
|
+
|
|
155
|
+
activity_level = "Unknown"
|
|
156
|
+
|
|
157
|
+
if class_A_dict["major"] != class_B_dict["major"]:
|
|
158
|
+
activity_level = "Very Active"
|
|
159
|
+
elif class_A_dict["minor"] != class_B_dict["minor"]:
|
|
160
|
+
activity_level = "Moderately Active"
|
|
161
|
+
elif class_A_dict["patch"] != class_B_dict["patch"]:
|
|
162
|
+
activity_level = "Lightly Active"
|
|
163
|
+
else:
|
|
164
|
+
activity_level = "Sedentary"
|
|
165
|
+
|
|
166
|
+
return activity_level
|
osslag/utils/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from typing import NamedTuple
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import requests
|
|
10
|
+
from dotenv import load_dotenv
|
|
11
|
+
from github import Github
|
|
12
|
+
from github.GithubException import GithubException
|
|
13
|
+
|
|
14
|
+
from osslag.utils import vcs
|
|
15
|
+
|
|
16
|
+
load_dotenv() # Load environment variables from .env file if present
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
# Suppress overly verbose logging from urllib3 and requests
|
|
20
|
+
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
|
|
21
|
+
logging.getLogger("requests").setLevel(logging.CRITICAL)
|
|
22
|
+
|
|
23
|
+
# Redirect PyGithub's logging to a file for debugging
|
|
24
|
+
github_logger = logging.getLogger("github")
|
|
25
|
+
filer_handler = logging.FileHandler("github_debug.log")
|
|
26
|
+
filer_handler.setLevel(logging.INFO)
|
|
27
|
+
github_logger.addHandler(filer_handler)
|
|
28
|
+
github_logger.setLevel(logging.CRITICAL)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class GithubAPIResult(NamedTuple):
|
|
32
|
+
success: bool
|
|
33
|
+
data: dict | None
|
|
34
|
+
error: str | None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def gh_get_rate_limit_info(github_token: str | None = None) -> dict | None:
|
|
38
|
+
"""Retrieve GitHub API rate limit information.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
github_token: Optional GitHub Personal Access Token. If not provided,
|
|
42
|
+
uses unauthenticated access limits.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
A dictionary with keys: 'limit', 'remaining', 'reset_datetime', 'authenticated'.
|
|
46
|
+
Returns None on error.
|
|
47
|
+
|
|
48
|
+
"""
|
|
49
|
+
try:
|
|
50
|
+
gh = Github(github_token) if github_token else Github()
|
|
51
|
+
rate_limit = gh.get_rate_limit()
|
|
52
|
+
core = rate_limit.resources.core
|
|
53
|
+
|
|
54
|
+
# core.reset is a datetime object
|
|
55
|
+
reset_dt = (
|
|
56
|
+
core.reset
|
|
57
|
+
if isinstance(core.reset, datetime)
|
|
58
|
+
else datetime.fromtimestamp(core.reset)
|
|
59
|
+
)
|
|
60
|
+
# convert to local timezone
|
|
61
|
+
reset_dt = reset_dt.astimezone()
|
|
62
|
+
# convert to a naive datetime in local time
|
|
63
|
+
reset_dt = reset_dt.replace(tzinfo=None)
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
"limit": core.limit,
|
|
67
|
+
"remaining": core.remaining,
|
|
68
|
+
"reset_datetime": reset_dt.strftime("%I:%M:%S %p"),
|
|
69
|
+
"authenticated": github_token is not None,
|
|
70
|
+
}
|
|
71
|
+
except GithubException:
|
|
72
|
+
return None
|
|
73
|
+
except Exception:
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def fetch_pull_requests(
|
|
78
|
+
repo_url: str,
|
|
79
|
+
github_token: str | None = None,
|
|
80
|
+
state: str = "all",
|
|
81
|
+
months: int | None = None,
|
|
82
|
+
) -> pd.DataFrame:
|
|
83
|
+
"""Retrieve pull requests for a GitHub repository via the GitHub API.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
repo_url: HTTPS URL to the repository (e.g., https://github.com/owner/repo[.git]).
|
|
87
|
+
github_token: Optional GitHub token for authenticated requests (higher rate limits, private repos).
|
|
88
|
+
state: Filter by PR state: 'open', 'closed', or 'all' (default 'all').
|
|
89
|
+
months: Optional limit to PRs created within the last N months (approx 30 days per month).
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
A list of dictionaries with PR metadata: 'number', 'title', 'state', 'user', 'created_at',
|
|
93
|
+
'updated_at', 'closed_at', 'merged_at', 'html_url'. Returns None on error.
|
|
94
|
+
|
|
95
|
+
"""
|
|
96
|
+
if months is not None and (not isinstance(months, int) or months < 1):
|
|
97
|
+
raise ValueError("months parameter must be a positive integer or None")
|
|
98
|
+
|
|
99
|
+
owner, repo = vcs.extract_owner_name_repo(repo_url)
|
|
100
|
+
github_token = github_token or os.getenv("GITHUB_TOKEN")
|
|
101
|
+
if owner is None or repo is None:
|
|
102
|
+
raise ValueError(f"Invalid GitHub repository URL: {repo_url}")
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
gh = Github(github_token) if github_token else Github()
|
|
106
|
+
repo_obj = gh.get_repo(f"{owner}/{repo}")
|
|
107
|
+
|
|
108
|
+
# PyGithub supports state in {'open','closed','all'}
|
|
109
|
+
prs = repo_obj.get_pulls(state=state, sort="created", direction="desc")
|
|
110
|
+
|
|
111
|
+
cutoff = None
|
|
112
|
+
if months is not None:
|
|
113
|
+
cutoff = datetime.now() - timedelta(days=months * 30)
|
|
114
|
+
|
|
115
|
+
results: list[dict] = []
|
|
116
|
+
for pr in prs:
|
|
117
|
+
# Filter by months if requested
|
|
118
|
+
if cutoff is not None and pr.created_at < cutoff:
|
|
119
|
+
# Because we sorted desc by created time, we can stop early
|
|
120
|
+
break
|
|
121
|
+
|
|
122
|
+
results.append(
|
|
123
|
+
{
|
|
124
|
+
"number": pr.number,
|
|
125
|
+
"title": pr.title,
|
|
126
|
+
"state": pr.state,
|
|
127
|
+
"user": None if pr.user is None else pr.user.login,
|
|
128
|
+
"created_at": pr.created_at.isoformat() if pr.created_at else None,
|
|
129
|
+
"updated_at": pr.updated_at.isoformat() if pr.updated_at else None,
|
|
130
|
+
"closed_at": pr.closed_at.isoformat() if pr.closed_at else None,
|
|
131
|
+
"merged_at": pr.merged_at.isoformat() if pr.merged_at else None,
|
|
132
|
+
"html_url": pr.html_url,
|
|
133
|
+
}
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
return pd.DataFrame(results)
|
|
137
|
+
except GithubException as e:
|
|
138
|
+
raise ValueError(f"GitHub API error: {e.data.get('message', str(e))}") from e
|
|
139
|
+
except Exception as e:
|
|
140
|
+
raise ValueError(f"Failed to fetch pull requests: {str(e)}") from e
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def gh_check_repo_exists(owner: str, repo: str) -> GithubAPIResult:
|
|
144
|
+
"""Check if a GitHub repository exists via the API."""
|
|
145
|
+
github_token = os.getenv("GITHUB_TOKEN")
|
|
146
|
+
|
|
147
|
+
if github_token:
|
|
148
|
+
logger.debug("Using authenticated GitHub access")
|
|
149
|
+
else:
|
|
150
|
+
logger.warning("Using unauthenticated GitHub access (60 req/hr limit)")
|
|
151
|
+
|
|
152
|
+
url = f"https://api.github.com/repos/{owner}/{repo}"
|
|
153
|
+
headers = {"Accept": "application/vnd.github.v3+json"}
|
|
154
|
+
if github_token:
|
|
155
|
+
headers["Authorization"] = f"Bearer {github_token}"
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
response = requests.get(url, headers=headers, timeout=30)
|
|
159
|
+
|
|
160
|
+
if response.status_code == 200:
|
|
161
|
+
return GithubAPIResult(data=None, error=None, success=True)
|
|
162
|
+
|
|
163
|
+
# Handle rate limiting explicitly
|
|
164
|
+
if response.status_code == 403:
|
|
165
|
+
remaining = response.headers.get("X-RateLimit-Remaining", "?")
|
|
166
|
+
reset_time_str = response.headers.get("X-RateLimit-Reset", "")
|
|
167
|
+
error_msg = (
|
|
168
|
+
f"Rate limited (remaining: {remaining}, resets: {reset_time_str})"
|
|
169
|
+
)
|
|
170
|
+
return GithubAPIResult(
|
|
171
|
+
data={"owner": owner, "repo": repo},
|
|
172
|
+
error=error_msg,
|
|
173
|
+
success=False,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Handle actual not found
|
|
177
|
+
if response.status_code == 404:
|
|
178
|
+
error_msg = f"404 Not Found: {url}"
|
|
179
|
+
try:
|
|
180
|
+
error_data = response.json()
|
|
181
|
+
if "message" in error_data:
|
|
182
|
+
error_msg = error_data["message"]
|
|
183
|
+
except Exception:
|
|
184
|
+
pass
|
|
185
|
+
return GithubAPIResult(
|
|
186
|
+
data={"owner": owner, "repo": repo}, error=error_msg, success=False
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Other errors
|
|
190
|
+
error_msg = f"HTTP {response.status_code}"
|
|
191
|
+
try:
|
|
192
|
+
error_data = response.json()
|
|
193
|
+
if "message" in error_data:
|
|
194
|
+
error_msg = error_data["message"]
|
|
195
|
+
except Exception:
|
|
196
|
+
pass
|
|
197
|
+
logger.warning(f"GitHub API error for {owner}/{repo}: {error_msg}")
|
|
198
|
+
return GithubAPIResult(
|
|
199
|
+
data={"owner": owner, "repo": repo}, error=error_msg, success=False
|
|
200
|
+
)
|
|
201
|
+
except requests.RequestException as e:
|
|
202
|
+
return GithubAPIResult(
|
|
203
|
+
data={"owner": owner, "repo": repo}, error=str(e), success=False
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def fetch_github_repo_metadata(
|
|
208
|
+
repo_url: str, github_token: str | None = None
|
|
209
|
+
) -> pd.DataFrame:
|
|
210
|
+
"""Fetch GitHub repo metadata given repo URL and token.
|
|
211
|
+
|
|
212
|
+
Handles rate limiting by catching RateLimitExceededException and waiting
|
|
213
|
+
for the reset time before retrying.
|
|
214
|
+
"""
|
|
215
|
+
owner, repo = vcs.extract_owner_name_repo(repo_url)
|
|
216
|
+
github_token = github_token or os.getenv("GITHUB_TOKEN")
|
|
217
|
+
if owner is None or repo is None:
|
|
218
|
+
raise ValueError(f"Invalid repository URL: {repo_url}")
|
|
219
|
+
|
|
220
|
+
# Configure GitHub client with explicit timeout (30 seconds)
|
|
221
|
+
github_client = (
|
|
222
|
+
Github(github_token, timeout=30) if github_token else Github(timeout=30)
|
|
223
|
+
)
|
|
224
|
+
repo_obj = github_client.get_repo(f"{owner}/{repo}")
|
|
225
|
+
data = {
|
|
226
|
+
"repo_url": repo_url,
|
|
227
|
+
"full_name": repo_obj.full_name,
|
|
228
|
+
"description": repo_obj.description,
|
|
229
|
+
"stargazers_count": repo_obj.stargazers_count,
|
|
230
|
+
"forks_count": repo_obj.forks_count,
|
|
231
|
+
"open_issues_count": repo_obj.open_issues_count,
|
|
232
|
+
"watchers_count": repo_obj.watchers_count,
|
|
233
|
+
"created_at": repo_obj.created_at,
|
|
234
|
+
"updated_at": repo_obj.updated_at,
|
|
235
|
+
"pushed_at": repo_obj.pushed_at,
|
|
236
|
+
"archived": repo_obj.archived,
|
|
237
|
+
"license": str(repo_obj.license.spdx_id) if repo_obj.license else None,
|
|
238
|
+
"topics": ",".join(repo_obj.get_topics()),
|
|
239
|
+
}
|
|
240
|
+
return pd.DataFrame([data])
|