ossa-scanner 0.1.3__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/PKG-INFO +4 -3
- ossa_scanner-0.1.6/ossa_scanner/__init__.py +1 -0
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner/scanner.py +54 -61
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner/uploader.py +0 -16
- ossa_scanner-0.1.6/ossa_scanner/utils/downloader.py +119 -0
- ossa_scanner-0.1.6/ossa_scanner/utils/hash_calculator.py +34 -0
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner/utils/os_detection.py +6 -1
- ossa_scanner-0.1.6/ossa_scanner/utils/package_manager.py +180 -0
- ossa_scanner-0.1.6/ossa_scanner/utils/swhid_calculator.py +35 -0
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner.egg-info/PKG-INFO +4 -3
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner.egg-info/requires.txt +1 -0
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/setup.py +3 -2
- ossa_scanner-0.1.3/ossa_scanner/__init__.py +0 -1
- ossa_scanner-0.1.3/ossa_scanner/utils/downloader.py +0 -47
- ossa_scanner-0.1.3/ossa_scanner/utils/hash_calculator.py +0 -8
- ossa_scanner-0.1.3/ossa_scanner/utils/package_manager.py +0 -128
- ossa_scanner-0.1.3/ossa_scanner/utils/swhid_calculator.py +0 -3
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/LICENSE +0 -0
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/README.md +0 -0
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner/cli.py +0 -0
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner/utils/__init__.py +0 -0
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner.egg-info/SOURCES.txt +0 -0
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner.egg-info/dependency_links.txt +0 -0
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner.egg-info/entry_points.txt +0 -0
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner.egg-info/top_level.txt +0 -0
- {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/setup.cfg +0 -0
@@ -1,12 +1,12 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ossa_scanner
|
3
|
-
Version: 0.1.
|
4
|
-
Summary:
|
3
|
+
Version: 0.1.6
|
4
|
+
Summary: Open Source Software Advisory generator for Core and Base Linux Packages.
|
5
5
|
Home-page: https://github.com/oscarvalenzuelab/ossa_scanner
|
6
6
|
Author: Oscar Valenzuela
|
7
7
|
Author-email: oscar.valenzuela.b@gmail.com
|
8
8
|
License: MIT
|
9
|
-
Keywords: linux packages SWHID open-source compliance
|
9
|
+
Keywords: linux packages SWHID open-source compliance ossa advisory
|
10
10
|
Classifier: Development Status :: 3 - Alpha
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
12
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -23,6 +23,7 @@ License-File: LICENSE
|
|
23
23
|
Requires-Dist: click
|
24
24
|
Requires-Dist: swh.model
|
25
25
|
Requires-Dist: distro
|
26
|
+
Requires-Dist: ssdeep
|
26
27
|
|
27
28
|
# ossa_scanner
|
28
29
|
Open Source Advisory Scanner (Generator)
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.1.6"
|
@@ -1,9 +1,13 @@
|
|
1
1
|
import os
|
2
|
+
import re
|
2
3
|
import json
|
4
|
+
import glob
|
5
|
+
import shutil
|
6
|
+
import subprocess
|
3
7
|
import hashlib
|
4
8
|
from datetime import datetime
|
5
9
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
6
|
-
from .utils.os_detection import detect_os
|
10
|
+
from .utils.os_detection import detect_os, detect_pm
|
7
11
|
from .utils.package_manager import list_packages, get_package_info
|
8
12
|
from .utils.downloader import download_source
|
9
13
|
from .utils.hash_calculator import calculate_file_hash
|
@@ -14,43 +18,16 @@ class Scanner:
|
|
14
18
|
self.output_dir = output_dir
|
15
19
|
self.temp_dir = temp_dir
|
16
20
|
self.os_type = detect_os()
|
21
|
+
self.pm_type = detect_pm()
|
17
22
|
self.threads = threads
|
18
23
|
os.makedirs(self.temp_dir, exist_ok=True)
|
19
24
|
|
20
25
|
def process_package(self, package):
|
21
|
-
"""
|
22
|
-
Processes a single package: downloads source, extracts, calculates hash and SWHID.
|
23
|
-
|
24
|
-
Args:
|
25
|
-
package (str): Package name to process.
|
26
|
-
|
27
|
-
Returns:
|
28
|
-
dict: Result of the processed package including hash and SWHID.
|
29
|
-
"""
|
30
26
|
try:
|
31
27
|
print(f"Processing package: {package}")
|
32
|
-
package_info = get_package_info(self.
|
33
|
-
|
34
|
-
|
35
|
-
# Download the source code to temp_dir
|
36
|
-
source_file = download_source(self.os_type, package, self.temp_dir)
|
37
|
-
print(f"Downloaded source file: {source_file}")
|
38
|
-
|
39
|
-
# Calculate hash of the source file
|
40
|
-
file_hash = calculate_file_hash(source_file)
|
41
|
-
print(f"Hash (SHA256) for {package}: {file_hash}")
|
42
|
-
|
43
|
-
# Extract source code directory in temp_dir
|
44
|
-
source_dir = os.path.join(self.temp_dir, package)
|
45
|
-
os.makedirs(source_dir, exist_ok=True)
|
46
|
-
|
47
|
-
# Calculate SWHID
|
48
|
-
swhid = calculate_swhid(source_dir)
|
49
|
-
print(f"SWHID for {package}: {swhid}")
|
50
|
-
|
51
|
-
# Save report
|
52
|
-
self.save_package_report(package, package_info, file_hash, swhid, source_file)
|
53
|
-
|
28
|
+
package_info = get_package_info(self.pm_type, package)
|
29
|
+
source_files = download_source(self.pm_type, package, self.temp_dir)
|
30
|
+
self.save_package_report(package, package_info, source_files)
|
54
31
|
except Exception as e:
|
55
32
|
print(f"Error processing package {package}: {e}")
|
56
33
|
|
@@ -58,9 +35,9 @@ class Scanner:
|
|
58
35
|
"""
|
59
36
|
Scans all packages in the repository and processes them in parallel.
|
60
37
|
"""
|
61
|
-
print(f"Detected
|
38
|
+
print(f"Detected Package Manager: {self.pm_type}")
|
62
39
|
print("Listing available packages...")
|
63
|
-
packages = list_packages(self.
|
40
|
+
packages = list_packages(self.pm_type)
|
64
41
|
with ThreadPoolExecutor(max_workers=self.threads) as executor:
|
65
42
|
# Submit tasks for parallel processing
|
66
43
|
future_to_package = {
|
@@ -75,43 +52,59 @@ class Scanner:
|
|
75
52
|
except Exception as e:
|
76
53
|
print(f"Exception occurred for package {package}: {e}")
|
77
54
|
|
78
|
-
def save_package_report(self, package, package_info,
|
79
|
-
"""
|
80
|
-
Save the report for a single package.
|
81
|
-
|
82
|
-
Args:
|
83
|
-
package (str): Package name.
|
84
|
-
package_info (dict): Information about the package.
|
85
|
-
file_hash (str): SHA256 hash of the downloaded source.
|
86
|
-
swhid (str): Software Heritage ID of the package.
|
87
|
-
"""
|
55
|
+
def save_package_report(self, package, package_info, source_files):
|
88
56
|
# Generate report filename
|
89
|
-
|
57
|
+
purl_name = package_info.get("name")
|
58
|
+
purl_version = package_info.get("version")
|
59
|
+
pkg_type = "deb" if self.pm_type == "apt" else "rpm" if self.pm_type == "yum" else self.pm_type
|
60
|
+
os_type = self.os_type
|
90
61
|
date_str = datetime.now().strftime("%Y%m%d")
|
91
|
-
report_filename = f"ossa-{date_str}-{
|
62
|
+
report_filename = f"ossa-{date_str}-{hash(package) % 10000}-{purl_name}.json"
|
92
63
|
report_path = os.path.join(self.output_dir, report_filename)
|
93
64
|
|
65
|
+
if package_info.get("version") != "*":
|
66
|
+
affected_versions = ["*.*", package_info.get("version")]
|
67
|
+
else:
|
68
|
+
affected_versions = ["*.*"]
|
69
|
+
|
70
|
+
artifacts = []
|
71
|
+
for source_file in source_files:
|
72
|
+
artifact = {}
|
73
|
+
|
74
|
+
# Clean up the artifact name
|
75
|
+
artifact_name = os.path.basename(source_file)
|
76
|
+
if "--" in artifact_name:
|
77
|
+
artifact_name = artifact_name.split("--")[-1]
|
78
|
+
artifact['url'] = "file://" + artifact_name
|
79
|
+
|
80
|
+
file_hash = calculate_file_hash(source_file)
|
81
|
+
artifact['hashes'] = file_hash
|
82
|
+
|
83
|
+
# Extract source code directory in temp_dir
|
84
|
+
# Only required if calculating SWHID
|
85
|
+
source_dir = os.path.join(self.temp_dir, package)
|
86
|
+
os.makedirs(source_dir, exist_ok=True)
|
87
|
+
swhid = calculate_swhid(source_dir, source_file)
|
88
|
+
artifact['swhid'] = swhid
|
89
|
+
|
90
|
+
artifacts.append(artifact)
|
91
|
+
|
94
92
|
# Create the report content
|
95
93
|
report = {
|
96
|
-
"id": f"OSSA-{date_str}-{
|
94
|
+
"id": f"OSSA-{date_str}-{hash(purl_name) % 10000}",
|
97
95
|
"version": "1.0.0",
|
98
|
-
"severity": "
|
99
|
-
"
|
100
|
-
"
|
96
|
+
"severity": package_info.get("severity", []),
|
97
|
+
"description": package_info.get("rason", []),
|
98
|
+
"title": f"Advisory for {purl_name}",
|
99
|
+
"package_name": purl_name,
|
101
100
|
"publisher": "Generated by OSSA Collector",
|
102
101
|
"last_updated": datetime.now().isoformat(),
|
103
102
|
"approvals": [{"consumption": True, "externalization": True}],
|
104
|
-
"description":
|
105
|
-
"purls": [f"pkg:{
|
106
|
-
"regex": [f"^pkg:{
|
107
|
-
"affected_versions":
|
108
|
-
"artifacts":
|
109
|
-
{
|
110
|
-
"url": f"file://{source_file}",
|
111
|
-
"hashes": {"sha256": file_hash},
|
112
|
-
"swhid": swhid
|
113
|
-
}
|
114
|
-
],
|
103
|
+
"description": package_info.get("summary", []),
|
104
|
+
"purls": [f"pkg:{pkg_type}/{os_type}/{purl_name}@{purl_version}"],
|
105
|
+
"regex": [f"^pkg:{pkg_type}/{os_type}/{purl_name}.*"],
|
106
|
+
"affected_versions": affected_versions,
|
107
|
+
"artifacts": artifacts,
|
115
108
|
"licenses": package_info.get("licenses", []),
|
116
109
|
"aliases": package_info.get("aliases", []),
|
117
110
|
"references": package_info.get("references", [])
|
@@ -12,15 +12,6 @@ class GitHubUploader:
|
|
12
12
|
self.base_url = "api.github.com"
|
13
13
|
|
14
14
|
def upload_file(self, file_path, repo_path, commit_message="Add scanner results"):
|
15
|
-
"""
|
16
|
-
Uploads a file to a GitHub repository.
|
17
|
-
|
18
|
-
Args:
|
19
|
-
file_path (str): Local file path to upload.
|
20
|
-
repo_path (str): Path in the GitHub repository.
|
21
|
-
commit_message (str): Commit message for the upload.
|
22
|
-
"""
|
23
|
-
# Read the file and encode it in base64
|
24
15
|
with open(file_path, "rb") as f:
|
25
16
|
content = f.read()
|
26
17
|
encoded_content = base64.b64encode(content).decode("utf-8")
|
@@ -54,13 +45,6 @@ class GitHubUploader:
|
|
54
45
|
raise Exception(f"GitHub API Error: {response.status}")
|
55
46
|
|
56
47
|
def upload_results(self, results_dir, repo_dir):
|
57
|
-
"""
|
58
|
-
Uploads all files in a directory to a specified path in the GitHub repo.
|
59
|
-
|
60
|
-
Args:
|
61
|
-
results_dir (str): Local directory containing results to upload.
|
62
|
-
repo_dir (str): Target directory in the GitHub repository.
|
63
|
-
"""
|
64
48
|
for root, _, files in os.walk(results_dir):
|
65
49
|
for file_name in files:
|
66
50
|
local_path = os.path.join(root, file_name)
|
@@ -0,0 +1,119 @@
|
|
1
|
+
import subprocess
|
2
|
+
import os
|
3
|
+
import shutil
|
4
|
+
import glob
|
5
|
+
|
6
|
+
def cleanup_extracted_files(folder_path):
|
7
|
+
"""Recursively clean up files and directories in the specified folder."""
|
8
|
+
try:
|
9
|
+
for file_path in glob.glob(f"{folder_path}/*"):
|
10
|
+
if os.path.isdir(file_path):
|
11
|
+
shutil.rmtree(file_path) # Recursively delete directories
|
12
|
+
print(f"Deleted directory: {file_path}")
|
13
|
+
else:
|
14
|
+
os.remove(file_path) # Delete files
|
15
|
+
print(f"Deleted file: {file_path}")
|
16
|
+
except Exception as e:
|
17
|
+
print(f"Failed to clean up {folder_path}: {e}")
|
18
|
+
|
19
|
+
def download_source(package_manager, package_name, output_dir):
|
20
|
+
try:
|
21
|
+
if package_manager == 'apt':
|
22
|
+
cmd = ['apt-get', 'source', package_name, '-d', output_dir]
|
23
|
+
subprocess.run(cmd, check=True)
|
24
|
+
elif package_manager in ['yum', 'dnf']:
|
25
|
+
p_hash = hash(package_name) % 10000
|
26
|
+
output_dir = os.path.join(output_dir, str(p_hash))
|
27
|
+
os.makedirs(output_dir, exist_ok=True)
|
28
|
+
source_path = get_rpm_source_package(package_name, output_dir)
|
29
|
+
if not source_path:
|
30
|
+
print(f"Source package for {package_name} not found in {package_name}.")
|
31
|
+
return
|
32
|
+
spec_file = extract_rpm_spec_file(source_path, output_dir)
|
33
|
+
project_url, source_url = (None, None)
|
34
|
+
if spec_file:
|
35
|
+
project_url, source_url, license = extract_rpm_info_from_spec(spec_file)
|
36
|
+
tarballs = extract_rpm_tarballs(source_path, output_dir)
|
37
|
+
return tarballs
|
38
|
+
elif package_manager == 'brew':
|
39
|
+
# Fetch the source tarball
|
40
|
+
cmd = ['brew', 'fetch', '--build-from-source', package_name]
|
41
|
+
subprocess.run(cmd, check=True, capture_output=True, text=True)
|
42
|
+
cache_dir = subprocess.run(
|
43
|
+
['brew', '--cache', package_name],
|
44
|
+
capture_output=True,
|
45
|
+
text=True,
|
46
|
+
check=True
|
47
|
+
).stdout.strip()
|
48
|
+
prefixes_to_remove = ['aarch64-elf-', 'arm-none-eabi-', 'other-prefix-']
|
49
|
+
stripped_package_name = package_name
|
50
|
+
for prefix in prefixes_to_remove:
|
51
|
+
if package_name.startswith(prefix):
|
52
|
+
stripped_package_name = package_name[len(prefix):]
|
53
|
+
break
|
54
|
+
cache_folder = os.path.dirname(cache_dir)
|
55
|
+
tarball_pattern = os.path.join(cache_folder, f"*{stripped_package_name}*")
|
56
|
+
matching_files = glob.glob(tarball_pattern)
|
57
|
+
if not matching_files:
|
58
|
+
raise FileNotFoundError(f"Tarball not found for {package_name} in {cache_folder}")
|
59
|
+
tarball_path = matching_files[0]
|
60
|
+
os.makedirs(output_dir, exist_ok=True)
|
61
|
+
target_path = os.path.join(output_dir, os.path.basename(tarball_path))
|
62
|
+
shutil.move(tarball_path, target_path)
|
63
|
+
return [target_path]
|
64
|
+
else:
|
65
|
+
raise ValueError("Unsupported package manager")
|
66
|
+
except subprocess.CalledProcessError as e:
|
67
|
+
print(f"Command failed: {e}")
|
68
|
+
return None
|
69
|
+
except Exception as e:
|
70
|
+
print(f"Error: {e}")
|
71
|
+
return None
|
72
|
+
|
73
|
+
def get_rpm_source_package(package_name, dest_dir="./source_packages"):
|
74
|
+
os.makedirs(dest_dir, exist_ok=True)
|
75
|
+
command = ["yumdownloader", "--source", "--destdir", dest_dir, package_name]
|
76
|
+
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
77
|
+
if result.returncode == 0:
|
78
|
+
for file in os.listdir(dest_dir):
|
79
|
+
if file.endswith(".src.rpm"):
|
80
|
+
return os.path.join(dest_dir, file)
|
81
|
+
return None
|
82
|
+
|
83
|
+
def extract_rpm_spec_file(srpm_path, dest_dir="./extracted_specs"):
|
84
|
+
os.makedirs(dest_dir, exist_ok=True)
|
85
|
+
try:
|
86
|
+
command = f"rpm2cpio {srpm_path} | cpio -idmv -D {dest_dir} > /tmp/ossa_gen.log"
|
87
|
+
subprocess.run(command, shell=True, check=True)
|
88
|
+
spec_files = [os.path.join(dest_dir, f) for f in os.listdir(dest_dir) if f.endswith(".spec")]
|
89
|
+
if spec_files:
|
90
|
+
return spec_files[0]
|
91
|
+
except subprocess.CalledProcessError as e:
|
92
|
+
print(f"Failed to extract spec file from {srpm_path}: {e}")
|
93
|
+
return None
|
94
|
+
|
95
|
+
def extract_rpm_tarballs(srpm_path, dest_dir="./extracted_sources"):
|
96
|
+
os.makedirs(dest_dir, exist_ok=True)
|
97
|
+
try:
|
98
|
+
tarballs = [os.path.join(dest_dir, f) for f in os.listdir(dest_dir) if f.endswith((".tar.gz", ".tar.bz2", ".tar.xz", ".tgz"))]
|
99
|
+
return tarballs
|
100
|
+
except subprocess.CalledProcessError as e:
|
101
|
+
print(f"Failed to extract tarballs from {srpm_path}: {e}")
|
102
|
+
return []
|
103
|
+
|
104
|
+
def extract_rpm_info_from_spec(spec_file_path):
|
105
|
+
project_url = None
|
106
|
+
source_url = None
|
107
|
+
license = None
|
108
|
+
try:
|
109
|
+
with open(spec_file_path, "r") as spec_file:
|
110
|
+
for line in spec_file:
|
111
|
+
if line.startswith("URL:"):
|
112
|
+
project_url = line.split(":", 1)[1].strip()
|
113
|
+
elif line.startswith("Source0:"):
|
114
|
+
source_url = line.split(":", 1)[1].strip()
|
115
|
+
elif line.startswith("License:"):
|
116
|
+
license = line.split(":", 1)[1].strip()
|
117
|
+
except FileNotFoundError:
|
118
|
+
print(f"Spec file not found: {spec_file_path}")
|
119
|
+
return project_url, source_url, license
|
@@ -0,0 +1,34 @@
|
|
1
|
+
import os
|
2
|
+
import json
|
3
|
+
import hashlib
|
4
|
+
import ssdeep
|
5
|
+
|
6
|
+
def calculate_file_hash(file_path):
|
7
|
+
file_hash = {}
|
8
|
+
file_hash['sha1'] = compute_sha1(file_path)
|
9
|
+
file_hash['sha256'] = compute_sha256(file_path)
|
10
|
+
file_hash['ssdeep'] = compute_fuzzy_hash(file_path)
|
11
|
+
file_hash['swhid'] = compute_swhid(file_path)
|
12
|
+
return file_hash
|
13
|
+
|
14
|
+
def compute_sha1(file_path):
|
15
|
+
sha1 = hashlib.sha1()
|
16
|
+
with open(file_path, "rb") as f:
|
17
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
18
|
+
sha1.update(chunk)
|
19
|
+
return sha1.hexdigest()
|
20
|
+
|
21
|
+
def compute_sha256(file_path):
|
22
|
+
sha256 = hashlib.sha256()
|
23
|
+
with open(file_path, "rb") as f:
|
24
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
25
|
+
sha256.update(chunk)
|
26
|
+
return sha256.hexdigest()
|
27
|
+
|
28
|
+
def compute_fuzzy_hash(file_path):
|
29
|
+
return ssdeep.hash_from_file(file_path)
|
30
|
+
|
31
|
+
def compute_swhid(file_path):
|
32
|
+
sha1_hash = compute_sha1(file_path)
|
33
|
+
swhid = f"swh:1:cnt:{sha1_hash}"
|
34
|
+
return swhid
|
@@ -1,6 +1,12 @@
|
|
1
|
+
import os
|
1
2
|
import distro
|
3
|
+
import subprocess
|
2
4
|
|
3
5
|
def detect_os():
|
6
|
+
dist = distro.id()
|
7
|
+
return dist
|
8
|
+
|
9
|
+
def detect_pm():
|
4
10
|
dist = distro.id()
|
5
11
|
if 'ubuntu' in dist or 'debian' in dist:
|
6
12
|
return 'apt'
|
@@ -10,4 +16,3 @@ def detect_os():
|
|
10
16
|
return 'brew'
|
11
17
|
else:
|
12
18
|
raise ValueError("Unsupported OS")
|
13
|
-
|
@@ -0,0 +1,180 @@
|
|
1
|
+
import subprocess
|
2
|
+
import re
|
3
|
+
|
4
|
+
|
5
|
+
def list_packages(package_manager):
|
6
|
+
if package_manager == 'apt':
|
7
|
+
result = subprocess.run(
|
8
|
+
['apt-cache', 'search', '.'],
|
9
|
+
capture_output=True,
|
10
|
+
text=True
|
11
|
+
)
|
12
|
+
elif package_manager in ['yum', 'dnf']:
|
13
|
+
result = subprocess.run(
|
14
|
+
['repoquery', '--all'],
|
15
|
+
capture_output=True,
|
16
|
+
text=True
|
17
|
+
)
|
18
|
+
elif package_manager == 'brew':
|
19
|
+
result = subprocess.run(
|
20
|
+
['brew', 'search', '.'],
|
21
|
+
capture_output=True,
|
22
|
+
text=True
|
23
|
+
)
|
24
|
+
else:
|
25
|
+
raise ValueError("ER1: Unsupported package manager for search")
|
26
|
+
|
27
|
+
packages = result.stdout.splitlines()
|
28
|
+
extracted_packages = []
|
29
|
+
max_packages = 500000
|
30
|
+
k_packages = 0
|
31
|
+
for line in packages:
|
32
|
+
if not line.strip() or line.startswith("==>"):
|
33
|
+
continue
|
34
|
+
extracted_packages.append(line.split()[0])
|
35
|
+
if k_packages >= max_packages:
|
36
|
+
break
|
37
|
+
k_packages += 1
|
38
|
+
|
39
|
+
return extracted_packages
|
40
|
+
|
41
|
+
|
42
|
+
def get_package_info(package_manager, package_name):
|
43
|
+
if package_manager == 'apt':
|
44
|
+
cmd = ['apt-cache', 'show', package_name]
|
45
|
+
elif package_manager in ['yum', 'dnf']:
|
46
|
+
cmd = ['repoquery', '--info', package_name]
|
47
|
+
elif package_manager == 'brew':
|
48
|
+
cmd = ['brew', 'info', package_name]
|
49
|
+
else:
|
50
|
+
raise ValueError("ER: Unsupported package manager for info")
|
51
|
+
|
52
|
+
try:
|
53
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
54
|
+
output = result.stdout
|
55
|
+
if package_manager == 'brew':
|
56
|
+
return parse_brew_info(output)
|
57
|
+
elif package_manager in ['yum', 'dnf']:
|
58
|
+
return parse_yum_info(output)
|
59
|
+
elif package_manager == 'apt':
|
60
|
+
return parse_apt_info(output)
|
61
|
+
except subprocess.CalledProcessError as e:
|
62
|
+
print(f"Command failed: {e}")
|
63
|
+
return None
|
64
|
+
|
65
|
+
|
66
|
+
def parse_brew_info(output):
|
67
|
+
"""Parses brew info output to extract license, website, and description."""
|
68
|
+
info = {}
|
69
|
+
info["name"] = "NOASSERTION"
|
70
|
+
info["version"] = "NOASSERTION"
|
71
|
+
info["licenses"] = "NOASSERTION"
|
72
|
+
info["severity"] = "NOASSERTION"
|
73
|
+
info["references"] = "NOASSERTION"
|
74
|
+
info["summary"] = "NOASSERTION"
|
75
|
+
lines = output.splitlines()
|
76
|
+
|
77
|
+
for i, line in enumerate(lines):
|
78
|
+
if line.startswith("==>") and ":" in line:
|
79
|
+
new_line = line.lstrip("==>").strip()
|
80
|
+
match1 = re.match(r"([^:]+):.*?([\d\.a-zA-Z]+)\s*\(", new_line)
|
81
|
+
match2 = re.match(r"([^:]+):", new_line)
|
82
|
+
if match1:
|
83
|
+
pname = match1.group(1).strip()
|
84
|
+
version = match1.group(2).strip()
|
85
|
+
elif match2:
|
86
|
+
pname = match2.group(1).strip()
|
87
|
+
version = "*"
|
88
|
+
info["name"] = pname
|
89
|
+
info["version"] = version
|
90
|
+
elif i == 1:
|
91
|
+
info["summary"] = line.strip()
|
92
|
+
elif line.startswith("https://"): # The website URL
|
93
|
+
info["references"] = line.strip()
|
94
|
+
elif line.startswith("License:"): # The license information
|
95
|
+
info["licenses"] = line.split(":", 1)[1].strip()
|
96
|
+
info["licenses"] = extract_spdx_ids(info["licenses"])
|
97
|
+
info["severity"], info["rason"] = license_classificaton(info["licenses"])
|
98
|
+
return info
|
99
|
+
|
100
|
+
def parse_yum_info(output):
|
101
|
+
info = {}
|
102
|
+
info["name"] = "NOASSERTION"
|
103
|
+
info["version"] = "NOASSERTION"
|
104
|
+
info["licenses"] = "NOASSERTION"
|
105
|
+
info["severity"] = "NOASSERTION"
|
106
|
+
info["references"] = "NOASSERTION"
|
107
|
+
info["summary"] = "NOASSERTION"
|
108
|
+
lines = output.splitlines()
|
109
|
+
for line in lines:
|
110
|
+
if line.startswith("License"):
|
111
|
+
info["licenses"] = line.split(":", 1)[1].strip()
|
112
|
+
info["licenses"] = extract_spdx_ids(info["licenses"])
|
113
|
+
info["severity"], info["rason"] = license_classificaton(info["licenses"])
|
114
|
+
elif line.startswith("URL"):
|
115
|
+
info["references"] = line.split(":", 1)[1].strip()
|
116
|
+
elif line.startswith("Name"):
|
117
|
+
info["name"] = line.split(":", 1)[1].strip()
|
118
|
+
elif line.startswith("Version"):
|
119
|
+
info["version"] = line.split(":", 1)[1].strip()
|
120
|
+
elif line.startswith("Summary"):
|
121
|
+
info["summary"] = line.split(":", 1)[1].strip()
|
122
|
+
return info
|
123
|
+
|
124
|
+
def parse_apt_info(output):
|
125
|
+
"""Parses apt-cache show output."""
|
126
|
+
info = {}
|
127
|
+
lines = output.splitlines()
|
128
|
+
|
129
|
+
for line in lines:
|
130
|
+
if line.startswith("License:") or "License" in line:
|
131
|
+
info["licenses"] = line.split(":", 1)[1].strip()
|
132
|
+
elif line.startswith("Homepage:"):
|
133
|
+
info["website"] = line.split(":", 1)[1].strip()
|
134
|
+
elif "Copyright" in line:
|
135
|
+
info["references"] = line.strip()
|
136
|
+
info["licenses"] = extract_spdx_ids(info["licenses"])
|
137
|
+
severity = license_classificaton(info["licenses"])
|
138
|
+
|
139
|
+
# Ensure all keys are present even if data is missing
|
140
|
+
return {
|
141
|
+
"licenses": info.get("licenses", "NOASSERTION"),
|
142
|
+
"copyright": info.get("copyright", "NOASSERTION"),
|
143
|
+
"references": info.get("references", "NOASSERTION"),
|
144
|
+
"severity": severity,
|
145
|
+
}
|
146
|
+
|
147
|
+
def extract_spdx_ids(license_string):
|
148
|
+
if not license_string.strip():
|
149
|
+
return "No valid SPDX licenses found"
|
150
|
+
raw_ids = re.split(r'(?i)\sAND\s|\sOR\s|\(|\)', license_string)
|
151
|
+
cleaned_ids = [spdx.strip() for spdx in raw_ids if spdx.strip()]
|
152
|
+
unique_spdx_ids = sorted(set(cleaned_ids))
|
153
|
+
return ", ".join(unique_spdx_ids) if unique_spdx_ids else "No valid SPDX licenses found"
|
154
|
+
|
155
|
+
def license_classificaton(licenses):
|
156
|
+
license_categories = {
|
157
|
+
"copyleft": ["GPL", "AGPL"],
|
158
|
+
"weak_copyleft": ["LGPL", "MPL", "EPL", "CDDL"],
|
159
|
+
"permissive": ["MIT", "BSD", "Apache"]
|
160
|
+
}
|
161
|
+
# Priority levels for each category
|
162
|
+
priority = {"copyleft": 1, "weak_copyleft": 2, "permissive": 3}
|
163
|
+
severity_map = {
|
164
|
+
"copyleft": ("High", "This package contains copyleft licenses, which impose strong obligations."),
|
165
|
+
"weak_copyleft": ("Medium", "This package contains weak copyleft licenses, which impose moderate obligations."),
|
166
|
+
"permissive": ("Informational", "This package contains permissive licenses, which impose minimal obligations."),
|
167
|
+
}
|
168
|
+
# Split multiple licenses and normalize them
|
169
|
+
license_list = [l.strip() for l in licenses.split(",")]
|
170
|
+
current_priority = float("inf")
|
171
|
+
selected_severity = "Informational"
|
172
|
+
selected_reason = "PURL identification for OSSBOMER"
|
173
|
+
for license in license_list:
|
174
|
+
for category, patterns in license_categories.items():
|
175
|
+
if any(license.upper().startswith(pattern.upper()) for pattern in patterns):
|
176
|
+
if priority[category] < current_priority:
|
177
|
+
current_priority = priority[category]
|
178
|
+
selected_severity, selected_reason = severity_map[category]
|
179
|
+
|
180
|
+
return selected_severity, selected_reason
|
@@ -0,0 +1,35 @@
|
|
1
|
+
import os
|
2
|
+
import glob
|
3
|
+
import shutil
|
4
|
+
import subprocess
|
5
|
+
|
6
|
+
def calculate_swhid(directory_path, file_path):
|
7
|
+
os.makedirs(directory_path, exist_ok=True)
|
8
|
+
try:
|
9
|
+
command = f"tar -xf {file_path} -C {directory_path}"
|
10
|
+
subprocess.run(command, shell=True, check=True)
|
11
|
+
command = ["swh.identify", directory_path]
|
12
|
+
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
13
|
+
if result.returncode == 0:
|
14
|
+
for line in result.stdout.strip().split("\n"):
|
15
|
+
if line.startswith("swh:1:dir:"):
|
16
|
+
swhid = line.split("\t")[0]
|
17
|
+
cleanup_extracted_files(directory_path)
|
18
|
+
return swhid
|
19
|
+
else:
|
20
|
+
print(f"Failed to compute folder SWHID: {result.stderr}")
|
21
|
+
except subprocess.CalledProcessError as e:
|
22
|
+
print(f"Failed to process tarball {file_path}: {e}")
|
23
|
+
finally:
|
24
|
+
cleanup_extracted_files(directory_path)
|
25
|
+
return None
|
26
|
+
|
27
|
+
def cleanup_extracted_files(directory_path):
|
28
|
+
try:
|
29
|
+
for file_path in glob.glob(f"{directory_path}/*"):
|
30
|
+
if os.path.isdir(file_path):
|
31
|
+
shutil.rmtree(file_path)
|
32
|
+
else:
|
33
|
+
os.remove(file_path)
|
34
|
+
except Exception as e:
|
35
|
+
print(f"Failed to clean up {directory_path}: {e}")
|
@@ -1,12 +1,12 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ossa_scanner
|
3
|
-
Version: 0.1.
|
4
|
-
Summary:
|
3
|
+
Version: 0.1.6
|
4
|
+
Summary: Open Source Software Advisory generator for Core and Base Linux Packages.
|
5
5
|
Home-page: https://github.com/oscarvalenzuelab/ossa_scanner
|
6
6
|
Author: Oscar Valenzuela
|
7
7
|
Author-email: oscar.valenzuela.b@gmail.com
|
8
8
|
License: MIT
|
9
|
-
Keywords: linux packages SWHID open-source compliance
|
9
|
+
Keywords: linux packages SWHID open-source compliance ossa advisory
|
10
10
|
Classifier: Development Status :: 3 - Alpha
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
12
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -23,6 +23,7 @@ License-File: LICENSE
|
|
23
23
|
Requires-Dist: click
|
24
24
|
Requires-Dist: swh.model
|
25
25
|
Requires-Dist: distro
|
26
|
+
Requires-Dist: ssdeep
|
26
27
|
|
27
28
|
# ossa_scanner
|
28
29
|
Open Source Advisory Scanner (Generator)
|
@@ -20,7 +20,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
20
20
|
setup(
|
21
21
|
name="ossa_scanner",
|
22
22
|
version=get_version(),
|
23
|
-
description="
|
23
|
+
description="Open Source Software Advisory generator for Core and Base Linux Packages.",
|
24
24
|
long_description=long_description,
|
25
25
|
long_description_content_type='text/markdown',
|
26
26
|
author="Oscar Valenzuela",
|
@@ -32,6 +32,7 @@ setup(
|
|
32
32
|
"click",
|
33
33
|
"swh.model",
|
34
34
|
"distro",
|
35
|
+
"ssdeep",
|
35
36
|
],
|
36
37
|
entry_points={
|
37
38
|
"console_scripts": [
|
@@ -51,5 +52,5 @@ setup(
|
|
51
52
|
"Programming Language :: Python :: 3.10",
|
52
53
|
"Operating System :: POSIX :: Linux",
|
53
54
|
],
|
54
|
-
keywords="linux packages SWHID open-source compliance",
|
55
|
+
keywords="linux packages SWHID open-source compliance ossa advisory",
|
55
56
|
)
|
@@ -1 +0,0 @@
|
|
1
|
-
__version__ = "0.1.3"
|
@@ -1,47 +0,0 @@
|
|
1
|
-
import subprocess
|
2
|
-
import os
|
3
|
-
import shutil
|
4
|
-
import glob
|
5
|
-
|
6
|
-
def download_source(package_manager, package_name, output_dir):
|
7
|
-
try:
|
8
|
-
if package_manager == 'apt':
|
9
|
-
cmd = ['apt-get', 'source', package_name, '-d', output_dir]
|
10
|
-
subprocess.run(cmd, check=True)
|
11
|
-
elif package_manager in ['yum', 'dnf']:
|
12
|
-
cmd = ['dnf', 'download', '--source', package_name, '--downloaddir', output_dir]
|
13
|
-
subprocess.run(cmd, check=True)
|
14
|
-
elif package_manager == 'brew':
|
15
|
-
# Fetch the source tarball
|
16
|
-
cmd = ['brew', 'fetch', '--build-from-source', package_name]
|
17
|
-
subprocess.run(cmd, check=True, capture_output=True, text=True)
|
18
|
-
cache_dir = subprocess.run(
|
19
|
-
['brew', '--cache', package_name],
|
20
|
-
capture_output=True,
|
21
|
-
text=True,
|
22
|
-
check=True
|
23
|
-
).stdout.strip()
|
24
|
-
prefixes_to_remove = ['aarch64-elf-', 'arm-none-eabi-', 'other-prefix-']
|
25
|
-
stripped_package_name = package_name
|
26
|
-
for prefix in prefixes_to_remove:
|
27
|
-
if package_name.startswith(prefix):
|
28
|
-
stripped_package_name = package_name[len(prefix):]
|
29
|
-
break
|
30
|
-
cache_folder = os.path.dirname(cache_dir)
|
31
|
-
tarball_pattern = os.path.join(cache_folder, f"*{stripped_package_name}*")
|
32
|
-
matching_files = glob.glob(tarball_pattern)
|
33
|
-
if not matching_files:
|
34
|
-
raise FileNotFoundError(f"Tarball not found for {package_name} in {cache_folder}")
|
35
|
-
tarball_path = matching_files[0]
|
36
|
-
os.makedirs(output_dir, exist_ok=True)
|
37
|
-
target_path = os.path.join(output_dir, os.path.basename(tarball_path))
|
38
|
-
shutil.move(tarball_path, target_path)
|
39
|
-
return target_path
|
40
|
-
else:
|
41
|
-
raise ValueError("Unsupported package manager")
|
42
|
-
except subprocess.CalledProcessError as e:
|
43
|
-
print(f"Command failed: {e}")
|
44
|
-
return None
|
45
|
-
except Exception as e:
|
46
|
-
print(f"Error: {e}")
|
47
|
-
return None
|
@@ -1,128 +0,0 @@
|
|
1
|
-
import subprocess
|
2
|
-
|
3
|
-
|
4
|
-
def list_packages(package_manager):
|
5
|
-
if package_manager == 'apt':
|
6
|
-
result = subprocess.run(
|
7
|
-
['apt-cache', 'search', '.'],
|
8
|
-
capture_output=True,
|
9
|
-
text=True
|
10
|
-
)
|
11
|
-
elif package_manager in ['yum', 'dnf']:
|
12
|
-
result = subprocess.run(
|
13
|
-
['repoquery', '--all'],
|
14
|
-
capture_output=True,
|
15
|
-
text=True
|
16
|
-
)
|
17
|
-
elif package_manager == 'brew':
|
18
|
-
result = subprocess.run(
|
19
|
-
['brew', 'search', '.'],
|
20
|
-
capture_output=True,
|
21
|
-
text=True
|
22
|
-
)
|
23
|
-
else:
|
24
|
-
raise ValueError("ER1: Unsupported package manager for search")
|
25
|
-
|
26
|
-
packages = result.stdout.splitlines()
|
27
|
-
extracted_packages = []
|
28
|
-
max_packages = 5
|
29
|
-
k_packages = 0
|
30
|
-
for line in packages:
|
31
|
-
if not line.strip() or line.startswith("==>"):
|
32
|
-
continue
|
33
|
-
extracted_packages.append(line.split()[0])
|
34
|
-
if k_packages >= max_packages:
|
35
|
-
break
|
36
|
-
k_packages += 1
|
37
|
-
|
38
|
-
return extracted_packages
|
39
|
-
|
40
|
-
|
41
|
-
def get_package_info(package_manager, package_name):
|
42
|
-
if package_manager == 'apt':
|
43
|
-
cmd = ['apt-cache', 'show', package_name]
|
44
|
-
elif package_manager in ['yum', 'dnf']:
|
45
|
-
cmd = ['repoquery', '--info', package_name]
|
46
|
-
elif package_manager == 'brew':
|
47
|
-
cmd = ['brew', 'info', package_name]
|
48
|
-
else:
|
49
|
-
raise ValueError("ER: Unsupported package manager for info")
|
50
|
-
|
51
|
-
try:
|
52
|
-
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
53
|
-
output = result.stdout
|
54
|
-
|
55
|
-
# Parse the output based on the package manager
|
56
|
-
if package_manager == 'brew':
|
57
|
-
return parse_brew_info(output)
|
58
|
-
elif package_manager in ['yum', 'dnf']:
|
59
|
-
return parse_yum_info(output)
|
60
|
-
elif package_manager == 'apt':
|
61
|
-
return parse_apt_info(output)
|
62
|
-
except subprocess.CalledProcessError as e:
|
63
|
-
print(f"Command failed: {e}")
|
64
|
-
return None
|
65
|
-
|
66
|
-
|
67
|
-
def parse_brew_info(output):
|
68
|
-
"""Parses brew info output to extract license, website, and description."""
|
69
|
-
info = {}
|
70
|
-
lines = output.splitlines()
|
71
|
-
info["license"] = "Unknown"
|
72
|
-
info["website"] = "Unknown"
|
73
|
-
info["description"] = "Unknown"
|
74
|
-
|
75
|
-
for i, line in enumerate(lines):
|
76
|
-
if i == 1: # The description is usually on the second line
|
77
|
-
info["description"] = line.strip()
|
78
|
-
elif line.startswith("https://"): # The website URL
|
79
|
-
info["website"] = line.strip()
|
80
|
-
elif line.startswith("License:"): # The license information
|
81
|
-
info["license"] = line.split(":", 1)[1].strip()
|
82
|
-
|
83
|
-
# Ensure all keys are present even if some fields are missing
|
84
|
-
return info
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
def parse_yum_info(output):
|
89
|
-
"""Parses yum repoquery --info output."""
|
90
|
-
info = {}
|
91
|
-
lines = output.splitlines()
|
92
|
-
|
93
|
-
for line in lines:
|
94
|
-
if line.startswith("License"):
|
95
|
-
info["license"] = line.split(":", 1)[1].strip()
|
96
|
-
elif line.startswith("URL"):
|
97
|
-
info["website"] = line.split(":", 1)[1].strip()
|
98
|
-
elif "Copyright" in line:
|
99
|
-
info["copyright"] = line.strip()
|
100
|
-
|
101
|
-
# Ensure all keys are present even if data is missing
|
102
|
-
return {
|
103
|
-
"license": info.get("license", "Unknown"),
|
104
|
-
"copyright": info.get("copyright", "Unknown"),
|
105
|
-
"website": info.get("website", "Unknown"),
|
106
|
-
}
|
107
|
-
|
108
|
-
|
109
|
-
def parse_apt_info(output):
|
110
|
-
"""Parses apt-cache show output."""
|
111
|
-
info = {}
|
112
|
-
lines = output.splitlines()
|
113
|
-
|
114
|
-
for line in lines:
|
115
|
-
if line.startswith("License:") or "License" in line:
|
116
|
-
info["license"] = line.split(":", 1)[1].strip()
|
117
|
-
elif line.startswith("Homepage:"):
|
118
|
-
info["website"] = line.split(":", 1)[1].strip()
|
119
|
-
elif "Copyright" in line:
|
120
|
-
info["copyright"] = line.strip()
|
121
|
-
|
122
|
-
# Ensure all keys are present even if data is missing
|
123
|
-
return {
|
124
|
-
"license": info.get("license", "Unknown"),
|
125
|
-
"copyright": info.get("copyright", "Unknown"),
|
126
|
-
"website": info.get("website", "Unknown"),
|
127
|
-
}
|
128
|
-
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|