ossa-scanner 0.1.3__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/PKG-INFO +4 -3
  2. ossa_scanner-0.1.6/ossa_scanner/__init__.py +1 -0
  3. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner/scanner.py +54 -61
  4. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner/uploader.py +0 -16
  5. ossa_scanner-0.1.6/ossa_scanner/utils/downloader.py +119 -0
  6. ossa_scanner-0.1.6/ossa_scanner/utils/hash_calculator.py +34 -0
  7. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner/utils/os_detection.py +6 -1
  8. ossa_scanner-0.1.6/ossa_scanner/utils/package_manager.py +180 -0
  9. ossa_scanner-0.1.6/ossa_scanner/utils/swhid_calculator.py +35 -0
  10. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner.egg-info/PKG-INFO +4 -3
  11. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner.egg-info/requires.txt +1 -0
  12. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/setup.py +3 -2
  13. ossa_scanner-0.1.3/ossa_scanner/__init__.py +0 -1
  14. ossa_scanner-0.1.3/ossa_scanner/utils/downloader.py +0 -47
  15. ossa_scanner-0.1.3/ossa_scanner/utils/hash_calculator.py +0 -8
  16. ossa_scanner-0.1.3/ossa_scanner/utils/package_manager.py +0 -128
  17. ossa_scanner-0.1.3/ossa_scanner/utils/swhid_calculator.py +0 -3
  18. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/LICENSE +0 -0
  19. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/README.md +0 -0
  20. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner/cli.py +0 -0
  21. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner/utils/__init__.py +0 -0
  22. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner.egg-info/SOURCES.txt +0 -0
  23. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner.egg-info/dependency_links.txt +0 -0
  24. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner.egg-info/entry_points.txt +0 -0
  25. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/ossa_scanner.egg-info/top_level.txt +0 -0
  26. {ossa_scanner-0.1.3 → ossa_scanner-0.1.6}/setup.cfg +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ossa_scanner
3
- Version: 0.1.3
4
- Summary: A Python library for scanning Linux packages, managing metadata, and generating SWHIDs.
3
+ Version: 0.1.6
4
+ Summary: Open Source Software Advisory generator for Core and Base Linux Packages.
5
5
  Home-page: https://github.com/oscarvalenzuelab/ossa_scanner
6
6
  Author: Oscar Valenzuela
7
7
  Author-email: oscar.valenzuela.b@gmail.com
8
8
  License: MIT
9
- Keywords: linux packages SWHID open-source compliance
9
+ Keywords: linux packages SWHID open-source compliance ossa advisory
10
10
  Classifier: Development Status :: 3 - Alpha
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: License :: OSI Approved :: MIT License
@@ -23,6 +23,7 @@ License-File: LICENSE
23
23
  Requires-Dist: click
24
24
  Requires-Dist: swh.model
25
25
  Requires-Dist: distro
26
+ Requires-Dist: ssdeep
26
27
 
27
28
  # ossa_scanner
28
29
  Open Source Advisory Scanner (Generator)
@@ -0,0 +1 @@
1
+ __version__ = "0.1.6"
@@ -1,9 +1,13 @@
1
1
  import os
2
+ import re
2
3
  import json
4
+ import glob
5
+ import shutil
6
+ import subprocess
3
7
  import hashlib
4
8
  from datetime import datetime
5
9
  from concurrent.futures import ThreadPoolExecutor, as_completed
6
- from .utils.os_detection import detect_os
10
+ from .utils.os_detection import detect_os, detect_pm
7
11
  from .utils.package_manager import list_packages, get_package_info
8
12
  from .utils.downloader import download_source
9
13
  from .utils.hash_calculator import calculate_file_hash
@@ -14,43 +18,16 @@ class Scanner:
14
18
  self.output_dir = output_dir
15
19
  self.temp_dir = temp_dir
16
20
  self.os_type = detect_os()
21
+ self.pm_type = detect_pm()
17
22
  self.threads = threads
18
23
  os.makedirs(self.temp_dir, exist_ok=True)
19
24
 
20
25
  def process_package(self, package):
21
- """
22
- Processes a single package: downloads source, extracts, calculates hash and SWHID.
23
-
24
- Args:
25
- package (str): Package name to process.
26
-
27
- Returns:
28
- dict: Result of the processed package including hash and SWHID.
29
- """
30
26
  try:
31
27
  print(f"Processing package: {package}")
32
- package_info = get_package_info(self.os_type, package)
33
- print(f"Fetched metadata for {package}")
34
-
35
- # Download the source code to temp_dir
36
- source_file = download_source(self.os_type, package, self.temp_dir)
37
- print(f"Downloaded source file: {source_file}")
38
-
39
- # Calculate hash of the source file
40
- file_hash = calculate_file_hash(source_file)
41
- print(f"Hash (SHA256) for {package}: {file_hash}")
42
-
43
- # Extract source code directory in temp_dir
44
- source_dir = os.path.join(self.temp_dir, package)
45
- os.makedirs(source_dir, exist_ok=True)
46
-
47
- # Calculate SWHID
48
- swhid = calculate_swhid(source_dir)
49
- print(f"SWHID for {package}: {swhid}")
50
-
51
- # Save report
52
- self.save_package_report(package, package_info, file_hash, swhid, source_file)
53
-
28
+ package_info = get_package_info(self.pm_type, package)
29
+ source_files = download_source(self.pm_type, package, self.temp_dir)
30
+ self.save_package_report(package, package_info, source_files)
54
31
  except Exception as e:
55
32
  print(f"Error processing package {package}: {e}")
56
33
 
@@ -58,9 +35,9 @@ class Scanner:
58
35
  """
59
36
  Scans all packages in the repository and processes them in parallel.
60
37
  """
61
- print(f"Detected OS: {self.os_type}")
38
+ print(f"Detected Package Manager: {self.pm_type}")
62
39
  print("Listing available packages...")
63
- packages = list_packages(self.os_type)
40
+ packages = list_packages(self.pm_type)
64
41
  with ThreadPoolExecutor(max_workers=self.threads) as executor:
65
42
  # Submit tasks for parallel processing
66
43
  future_to_package = {
@@ -75,43 +52,59 @@ class Scanner:
75
52
  except Exception as e:
76
53
  print(f"Exception occurred for package {package}: {e}")
77
54
 
78
- def save_package_report(self, package, package_info, file_hash, swhid, source_file):
79
- """
80
- Save the report for a single package.
81
-
82
- Args:
83
- package (str): Package name.
84
- package_info (dict): Information about the package.
85
- file_hash (str): SHA256 hash of the downloaded source.
86
- swhid (str): Software Heritage ID of the package.
87
- """
55
+ def save_package_report(self, package, package_info, source_files):
88
56
  # Generate report filename
89
- sha1_name = hashlib.sha1(package.encode()).hexdigest()
57
+ purl_name = package_info.get("name")
58
+ purl_version = package_info.get("version")
59
+ pkg_type = "deb" if self.pm_type == "apt" else "rpm" if self.pm_type == "yum" else self.pm_type
60
+ os_type = self.os_type
90
61
  date_str = datetime.now().strftime("%Y%m%d")
91
- report_filename = f"ossa-{date_str}-{sha1_name}-{package}.json"
62
+ report_filename = f"ossa-{date_str}-{hash(package) % 10000}-{purl_name}.json"
92
63
  report_path = os.path.join(self.output_dir, report_filename)
93
64
 
65
+ if package_info.get("version") != "*":
66
+ affected_versions = ["*.*", package_info.get("version")]
67
+ else:
68
+ affected_versions = ["*.*"]
69
+
70
+ artifacts = []
71
+ for source_file in source_files:
72
+ artifact = {}
73
+
74
+ # Clean up the artifact name
75
+ artifact_name = os.path.basename(source_file)
76
+ if "--" in artifact_name:
77
+ artifact_name = artifact_name.split("--")[-1]
78
+ artifact['url'] = "file://" + artifact_name
79
+
80
+ file_hash = calculate_file_hash(source_file)
81
+ artifact['hashes'] = file_hash
82
+
83
+ # Extract source code directory in temp_dir
84
+ # Only required if calculating SWHID
85
+ source_dir = os.path.join(self.temp_dir, package)
86
+ os.makedirs(source_dir, exist_ok=True)
87
+ swhid = calculate_swhid(source_dir, source_file)
88
+ artifact['swhid'] = swhid
89
+
90
+ artifacts.append(artifact)
91
+
94
92
  # Create the report content
95
93
  report = {
96
- "id": f"OSSA-{date_str}-{sha1_name.upper()}",
94
+ "id": f"OSSA-{date_str}-{hash(purl_name) % 10000}",
97
95
  "version": "1.0.0",
98
- "severity": "Informational",
99
- "title": f"Advisory for {package}",
100
- "package_name": package,
96
+ "severity": package_info.get("severity", []),
97
+ "description": package_info.get("rason", []),
98
+ "title": f"Advisory for {purl_name}",
99
+ "package_name": purl_name,
101
100
  "publisher": "Generated by OSSA Collector",
102
101
  "last_updated": datetime.now().isoformat(),
103
102
  "approvals": [{"consumption": True, "externalization": True}],
104
- "description": f"Automatically generated OSSA for the package {package}.",
105
- "purls": [f"pkg:{self.os_type}/{package}"],
106
- "regex": [f"^pkg:{self.os_type}/{package}.*"],
107
- "affected_versions": ["*.*"],
108
- "artifacts": [
109
- {
110
- "url": f"file://{source_file}",
111
- "hashes": {"sha256": file_hash},
112
- "swhid": swhid
113
- }
114
- ],
103
+ "description": package_info.get("summary", []),
104
+ "purls": [f"pkg:{pkg_type}/{os_type}/{purl_name}@{purl_version}"],
105
+ "regex": [f"^pkg:{pkg_type}/{os_type}/{purl_name}.*"],
106
+ "affected_versions": affected_versions,
107
+ "artifacts": artifacts,
115
108
  "licenses": package_info.get("licenses", []),
116
109
  "aliases": package_info.get("aliases", []),
117
110
  "references": package_info.get("references", [])
@@ -12,15 +12,6 @@ class GitHubUploader:
12
12
  self.base_url = "api.github.com"
13
13
 
14
14
  def upload_file(self, file_path, repo_path, commit_message="Add scanner results"):
15
- """
16
- Uploads a file to a GitHub repository.
17
-
18
- Args:
19
- file_path (str): Local file path to upload.
20
- repo_path (str): Path in the GitHub repository.
21
- commit_message (str): Commit message for the upload.
22
- """
23
- # Read the file and encode it in base64
24
15
  with open(file_path, "rb") as f:
25
16
  content = f.read()
26
17
  encoded_content = base64.b64encode(content).decode("utf-8")
@@ -54,13 +45,6 @@ class GitHubUploader:
54
45
  raise Exception(f"GitHub API Error: {response.status}")
55
46
 
56
47
  def upload_results(self, results_dir, repo_dir):
57
- """
58
- Uploads all files in a directory to a specified path in the GitHub repo.
59
-
60
- Args:
61
- results_dir (str): Local directory containing results to upload.
62
- repo_dir (str): Target directory in the GitHub repository.
63
- """
64
48
  for root, _, files in os.walk(results_dir):
65
49
  for file_name in files:
66
50
  local_path = os.path.join(root, file_name)
@@ -0,0 +1,119 @@
1
+ import subprocess
2
+ import os
3
+ import shutil
4
+ import glob
5
+
6
+ def cleanup_extracted_files(folder_path):
7
+ """Recursively clean up files and directories in the specified folder."""
8
+ try:
9
+ for file_path in glob.glob(f"{folder_path}/*"):
10
+ if os.path.isdir(file_path):
11
+ shutil.rmtree(file_path) # Recursively delete directories
12
+ print(f"Deleted directory: {file_path}")
13
+ else:
14
+ os.remove(file_path) # Delete files
15
+ print(f"Deleted file: {file_path}")
16
+ except Exception as e:
17
+ print(f"Failed to clean up {folder_path}: {e}")
18
+
19
+ def download_source(package_manager, package_name, output_dir):
20
+ try:
21
+ if package_manager == 'apt':
22
+ cmd = ['apt-get', 'source', package_name, '-d', output_dir]
23
+ subprocess.run(cmd, check=True)
24
+ elif package_manager in ['yum', 'dnf']:
25
+ p_hash = hash(package_name) % 10000
26
+ output_dir = os.path.join(output_dir, str(p_hash))
27
+ os.makedirs(output_dir, exist_ok=True)
28
+ source_path = get_rpm_source_package(package_name, output_dir)
29
+ if not source_path:
30
+ print(f"Source package for {package_name} not found in {package_name}.")
31
+ return
32
+ spec_file = extract_rpm_spec_file(source_path, output_dir)
33
+ project_url, source_url = (None, None)
34
+ if spec_file:
35
+ project_url, source_url, license = extract_rpm_info_from_spec(spec_file)
36
+ tarballs = extract_rpm_tarballs(source_path, output_dir)
37
+ return tarballs
38
+ elif package_manager == 'brew':
39
+ # Fetch the source tarball
40
+ cmd = ['brew', 'fetch', '--build-from-source', package_name]
41
+ subprocess.run(cmd, check=True, capture_output=True, text=True)
42
+ cache_dir = subprocess.run(
43
+ ['brew', '--cache', package_name],
44
+ capture_output=True,
45
+ text=True,
46
+ check=True
47
+ ).stdout.strip()
48
+ prefixes_to_remove = ['aarch64-elf-', 'arm-none-eabi-', 'other-prefix-']
49
+ stripped_package_name = package_name
50
+ for prefix in prefixes_to_remove:
51
+ if package_name.startswith(prefix):
52
+ stripped_package_name = package_name[len(prefix):]
53
+ break
54
+ cache_folder = os.path.dirname(cache_dir)
55
+ tarball_pattern = os.path.join(cache_folder, f"*{stripped_package_name}*")
56
+ matching_files = glob.glob(tarball_pattern)
57
+ if not matching_files:
58
+ raise FileNotFoundError(f"Tarball not found for {package_name} in {cache_folder}")
59
+ tarball_path = matching_files[0]
60
+ os.makedirs(output_dir, exist_ok=True)
61
+ target_path = os.path.join(output_dir, os.path.basename(tarball_path))
62
+ shutil.move(tarball_path, target_path)
63
+ return [target_path]
64
+ else:
65
+ raise ValueError("Unsupported package manager")
66
+ except subprocess.CalledProcessError as e:
67
+ print(f"Command failed: {e}")
68
+ return None
69
+ except Exception as e:
70
+ print(f"Error: {e}")
71
+ return None
72
+
73
+ def get_rpm_source_package(package_name, dest_dir="./source_packages"):
74
+ os.makedirs(dest_dir, exist_ok=True)
75
+ command = ["yumdownloader", "--source", "--destdir", dest_dir, package_name]
76
+ result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
77
+ if result.returncode == 0:
78
+ for file in os.listdir(dest_dir):
79
+ if file.endswith(".src.rpm"):
80
+ return os.path.join(dest_dir, file)
81
+ return None
82
+
83
+ def extract_rpm_spec_file(srpm_path, dest_dir="./extracted_specs"):
84
+ os.makedirs(dest_dir, exist_ok=True)
85
+ try:
86
+ command = f"rpm2cpio {srpm_path} | cpio -idmv -D {dest_dir} > /tmp/ossa_gen.log"
87
+ subprocess.run(command, shell=True, check=True)
88
+ spec_files = [os.path.join(dest_dir, f) for f in os.listdir(dest_dir) if f.endswith(".spec")]
89
+ if spec_files:
90
+ return spec_files[0]
91
+ except subprocess.CalledProcessError as e:
92
+ print(f"Failed to extract spec file from {srpm_path}: {e}")
93
+ return None
94
+
95
+ def extract_rpm_tarballs(srpm_path, dest_dir="./extracted_sources"):
96
+ os.makedirs(dest_dir, exist_ok=True)
97
+ try:
98
+ tarballs = [os.path.join(dest_dir, f) for f in os.listdir(dest_dir) if f.endswith((".tar.gz", ".tar.bz2", ".tar.xz", ".tgz"))]
99
+ return tarballs
100
+ except subprocess.CalledProcessError as e:
101
+ print(f"Failed to extract tarballs from {srpm_path}: {e}")
102
+ return []
103
+
104
+ def extract_rpm_info_from_spec(spec_file_path):
105
+ project_url = None
106
+ source_url = None
107
+ license = None
108
+ try:
109
+ with open(spec_file_path, "r") as spec_file:
110
+ for line in spec_file:
111
+ if line.startswith("URL:"):
112
+ project_url = line.split(":", 1)[1].strip()
113
+ elif line.startswith("Source0:"):
114
+ source_url = line.split(":", 1)[1].strip()
115
+ elif line.startswith("License:"):
116
+ license = line.split(":", 1)[1].strip()
117
+ except FileNotFoundError:
118
+ print(f"Spec file not found: {spec_file_path}")
119
+ return project_url, source_url, license
@@ -0,0 +1,34 @@
1
+ import os
2
+ import json
3
+ import hashlib
4
+ import ssdeep
5
+
6
+ def calculate_file_hash(file_path):
7
+ file_hash = {}
8
+ file_hash['sha1'] = compute_sha1(file_path)
9
+ file_hash['sha256'] = compute_sha256(file_path)
10
+ file_hash['ssdeep'] = compute_fuzzy_hash(file_path)
11
+ file_hash['swhid'] = compute_swhid(file_path)
12
+ return file_hash
13
+
14
+ def compute_sha1(file_path):
15
+ sha1 = hashlib.sha1()
16
+ with open(file_path, "rb") as f:
17
+ for chunk in iter(lambda: f.read(4096), b""):
18
+ sha1.update(chunk)
19
+ return sha1.hexdigest()
20
+
21
+ def compute_sha256(file_path):
22
+ sha256 = hashlib.sha256()
23
+ with open(file_path, "rb") as f:
24
+ for chunk in iter(lambda: f.read(4096), b""):
25
+ sha256.update(chunk)
26
+ return sha256.hexdigest()
27
+
28
+ def compute_fuzzy_hash(file_path):
29
+ return ssdeep.hash_from_file(file_path)
30
+
31
+ def compute_swhid(file_path):
32
+ sha1_hash = compute_sha1(file_path)
33
+ swhid = f"swh:1:cnt:{sha1_hash}"
34
+ return swhid
@@ -1,6 +1,12 @@
1
+ import os
1
2
  import distro
3
+ import subprocess
2
4
 
3
5
  def detect_os():
6
+ dist = distro.id()
7
+ return dist
8
+
9
+ def detect_pm():
4
10
  dist = distro.id()
5
11
  if 'ubuntu' in dist or 'debian' in dist:
6
12
  return 'apt'
@@ -10,4 +16,3 @@ def detect_os():
10
16
  return 'brew'
11
17
  else:
12
18
  raise ValueError("Unsupported OS")
13
-
@@ -0,0 +1,180 @@
1
+ import subprocess
2
+ import re
3
+
4
+
5
+ def list_packages(package_manager):
6
+ if package_manager == 'apt':
7
+ result = subprocess.run(
8
+ ['apt-cache', 'search', '.'],
9
+ capture_output=True,
10
+ text=True
11
+ )
12
+ elif package_manager in ['yum', 'dnf']:
13
+ result = subprocess.run(
14
+ ['repoquery', '--all'],
15
+ capture_output=True,
16
+ text=True
17
+ )
18
+ elif package_manager == 'brew':
19
+ result = subprocess.run(
20
+ ['brew', 'search', '.'],
21
+ capture_output=True,
22
+ text=True
23
+ )
24
+ else:
25
+ raise ValueError("ER1: Unsupported package manager for search")
26
+
27
+ packages = result.stdout.splitlines()
28
+ extracted_packages = []
29
+ max_packages = 500000
30
+ k_packages = 0
31
+ for line in packages:
32
+ if not line.strip() or line.startswith("==>"):
33
+ continue
34
+ extracted_packages.append(line.split()[0])
35
+ if k_packages >= max_packages:
36
+ break
37
+ k_packages += 1
38
+
39
+ return extracted_packages
40
+
41
+
42
+ def get_package_info(package_manager, package_name):
43
+ if package_manager == 'apt':
44
+ cmd = ['apt-cache', 'show', package_name]
45
+ elif package_manager in ['yum', 'dnf']:
46
+ cmd = ['repoquery', '--info', package_name]
47
+ elif package_manager == 'brew':
48
+ cmd = ['brew', 'info', package_name]
49
+ else:
50
+ raise ValueError("ER: Unsupported package manager for info")
51
+
52
+ try:
53
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
54
+ output = result.stdout
55
+ if package_manager == 'brew':
56
+ return parse_brew_info(output)
57
+ elif package_manager in ['yum', 'dnf']:
58
+ return parse_yum_info(output)
59
+ elif package_manager == 'apt':
60
+ return parse_apt_info(output)
61
+ except subprocess.CalledProcessError as e:
62
+ print(f"Command failed: {e}")
63
+ return None
64
+
65
+
66
+ def parse_brew_info(output):
67
+ """Parses brew info output to extract license, website, and description."""
68
+ info = {}
69
+ info["name"] = "NOASSERTION"
70
+ info["version"] = "NOASSERTION"
71
+ info["licenses"] = "NOASSERTION"
72
+ info["severity"] = "NOASSERTION"
73
+ info["references"] = "NOASSERTION"
74
+ info["summary"] = "NOASSERTION"
75
+ lines = output.splitlines()
76
+
77
+ for i, line in enumerate(lines):
78
+ if line.startswith("==>") and ":" in line:
79
+ new_line = line.lstrip("==>").strip()
80
+ match1 = re.match(r"([^:]+):.*?([\d\.a-zA-Z]+)\s*\(", new_line)
81
+ match2 = re.match(r"([^:]+):", new_line)
82
+ if match1:
83
+ pname = match1.group(1).strip()
84
+ version = match1.group(2).strip()
85
+ elif match2:
86
+ pname = match2.group(1).strip()
87
+ version = "*"
88
+ info["name"] = pname
89
+ info["version"] = version
90
+ elif i == 1:
91
+ info["summary"] = line.strip()
92
+ elif line.startswith("https://"): # The website URL
93
+ info["references"] = line.strip()
94
+ elif line.startswith("License:"): # The license information
95
+ info["licenses"] = line.split(":", 1)[1].strip()
96
+ info["licenses"] = extract_spdx_ids(info["licenses"])
97
+ info["severity"], info["rason"] = license_classificaton(info["licenses"])
98
+ return info
99
+
100
+ def parse_yum_info(output):
101
+ info = {}
102
+ info["name"] = "NOASSERTION"
103
+ info["version"] = "NOASSERTION"
104
+ info["licenses"] = "NOASSERTION"
105
+ info["severity"] = "NOASSERTION"
106
+ info["references"] = "NOASSERTION"
107
+ info["summary"] = "NOASSERTION"
108
+ lines = output.splitlines()
109
+ for line in lines:
110
+ if line.startswith("License"):
111
+ info["licenses"] = line.split(":", 1)[1].strip()
112
+ info["licenses"] = extract_spdx_ids(info["licenses"])
113
+ info["severity"], info["rason"] = license_classificaton(info["licenses"])
114
+ elif line.startswith("URL"):
115
+ info["references"] = line.split(":", 1)[1].strip()
116
+ elif line.startswith("Name"):
117
+ info["name"] = line.split(":", 1)[1].strip()
118
+ elif line.startswith("Version"):
119
+ info["version"] = line.split(":", 1)[1].strip()
120
+ elif line.startswith("Summary"):
121
+ info["summary"] = line.split(":", 1)[1].strip()
122
+ return info
123
+
124
+ def parse_apt_info(output):
125
+ """Parses apt-cache show output."""
126
+ info = {}
127
+ lines = output.splitlines()
128
+
129
+ for line in lines:
130
+ if line.startswith("License:") or "License" in line:
131
+ info["licenses"] = line.split(":", 1)[1].strip()
132
+ elif line.startswith("Homepage:"):
133
+ info["website"] = line.split(":", 1)[1].strip()
134
+ elif "Copyright" in line:
135
+ info["references"] = line.strip()
136
+ info["licenses"] = extract_spdx_ids(info["licenses"])
137
+ severity = license_classificaton(info["licenses"])
138
+
139
+ # Ensure all keys are present even if data is missing
140
+ return {
141
+ "licenses": info.get("licenses", "NOASSERTION"),
142
+ "copyright": info.get("copyright", "NOASSERTION"),
143
+ "references": info.get("references", "NOASSERTION"),
144
+ "severity": severity,
145
+ }
146
+
147
+ def extract_spdx_ids(license_string):
148
+ if not license_string.strip():
149
+ return "No valid SPDX licenses found"
150
+ raw_ids = re.split(r'(?i)\sAND\s|\sOR\s|\(|\)', license_string)
151
+ cleaned_ids = [spdx.strip() for spdx in raw_ids if spdx.strip()]
152
+ unique_spdx_ids = sorted(set(cleaned_ids))
153
+ return ", ".join(unique_spdx_ids) if unique_spdx_ids else "No valid SPDX licenses found"
154
+
155
+ def license_classificaton(licenses):
156
+ license_categories = {
157
+ "copyleft": ["GPL", "AGPL"],
158
+ "weak_copyleft": ["LGPL", "MPL", "EPL", "CDDL"],
159
+ "permissive": ["MIT", "BSD", "Apache"]
160
+ }
161
+ # Priority levels for each category
162
+ priority = {"copyleft": 1, "weak_copyleft": 2, "permissive": 3}
163
+ severity_map = {
164
+ "copyleft": ("High", "This package contains copyleft licenses, which impose strong obligations."),
165
+ "weak_copyleft": ("Medium", "This package contains weak copyleft licenses, which impose moderate obligations."),
166
+ "permissive": ("Informational", "This package contains permissive licenses, which impose minimal obligations."),
167
+ }
168
+ # Split multiple licenses and normalize them
169
+ license_list = [l.strip() for l in licenses.split(",")]
170
+ current_priority = float("inf")
171
+ selected_severity = "Informational"
172
+ selected_reason = "PURL identification for OSSBOMER"
173
+ for license in license_list:
174
+ for category, patterns in license_categories.items():
175
+ if any(license.upper().startswith(pattern.upper()) for pattern in patterns):
176
+ if priority[category] < current_priority:
177
+ current_priority = priority[category]
178
+ selected_severity, selected_reason = severity_map[category]
179
+
180
+ return selected_severity, selected_reason
@@ -0,0 +1,35 @@
1
+ import os
2
+ import glob
3
+ import shutil
4
+ import subprocess
5
+
6
+ def calculate_swhid(directory_path, file_path):
7
+ os.makedirs(directory_path, exist_ok=True)
8
+ try:
9
+ command = f"tar -xf {file_path} -C {directory_path}"
10
+ subprocess.run(command, shell=True, check=True)
11
+ command = ["swh.identify", directory_path]
12
+ result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
13
+ if result.returncode == 0:
14
+ for line in result.stdout.strip().split("\n"):
15
+ if line.startswith("swh:1:dir:"):
16
+ swhid = line.split("\t")[0]
17
+ cleanup_extracted_files(directory_path)
18
+ return swhid
19
+ else:
20
+ print(f"Failed to compute folder SWHID: {result.stderr}")
21
+ except subprocess.CalledProcessError as e:
22
+ print(f"Failed to process tarball {file_path}: {e}")
23
+ finally:
24
+ cleanup_extracted_files(directory_path)
25
+ return None
26
+
27
+ def cleanup_extracted_files(directory_path):
28
+ try:
29
+ for file_path in glob.glob(f"{directory_path}/*"):
30
+ if os.path.isdir(file_path):
31
+ shutil.rmtree(file_path)
32
+ else:
33
+ os.remove(file_path)
34
+ except Exception as e:
35
+ print(f"Failed to clean up {directory_path}: {e}")
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ossa_scanner
3
- Version: 0.1.3
4
- Summary: A Python library for scanning Linux packages, managing metadata, and generating SWHIDs.
3
+ Version: 0.1.6
4
+ Summary: Open Source Software Advisory generator for Core and Base Linux Packages.
5
5
  Home-page: https://github.com/oscarvalenzuelab/ossa_scanner
6
6
  Author: Oscar Valenzuela
7
7
  Author-email: oscar.valenzuela.b@gmail.com
8
8
  License: MIT
9
- Keywords: linux packages SWHID open-source compliance
9
+ Keywords: linux packages SWHID open-source compliance ossa advisory
10
10
  Classifier: Development Status :: 3 - Alpha
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: License :: OSI Approved :: MIT License
@@ -23,6 +23,7 @@ License-File: LICENSE
23
23
  Requires-Dist: click
24
24
  Requires-Dist: swh.model
25
25
  Requires-Dist: distro
26
+ Requires-Dist: ssdeep
26
27
 
27
28
  # ossa_scanner
28
29
  Open Source Advisory Scanner (Generator)
@@ -1,3 +1,4 @@
1
1
  click
2
2
  swh.model
3
3
  distro
4
+ ssdeep
@@ -20,7 +20,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
20
20
  setup(
21
21
  name="ossa_scanner",
22
22
  version=get_version(),
23
- description="A Python library for scanning Linux packages, managing metadata, and generating SWHIDs.",
23
+ description="Open Source Software Advisory generator for Core and Base Linux Packages.",
24
24
  long_description=long_description,
25
25
  long_description_content_type='text/markdown',
26
26
  author="Oscar Valenzuela",
@@ -32,6 +32,7 @@ setup(
32
32
  "click",
33
33
  "swh.model",
34
34
  "distro",
35
+ "ssdeep",
35
36
  ],
36
37
  entry_points={
37
38
  "console_scripts": [
@@ -51,5 +52,5 @@ setup(
51
52
  "Programming Language :: Python :: 3.10",
52
53
  "Operating System :: POSIX :: Linux",
53
54
  ],
54
- keywords="linux packages SWHID open-source compliance",
55
+ keywords="linux packages SWHID open-source compliance ossa advisory",
55
56
  )
@@ -1 +0,0 @@
1
- __version__ = "0.1.3"
@@ -1,47 +0,0 @@
1
- import subprocess
2
- import os
3
- import shutil
4
- import glob
5
-
6
- def download_source(package_manager, package_name, output_dir):
7
- try:
8
- if package_manager == 'apt':
9
- cmd = ['apt-get', 'source', package_name, '-d', output_dir]
10
- subprocess.run(cmd, check=True)
11
- elif package_manager in ['yum', 'dnf']:
12
- cmd = ['dnf', 'download', '--source', package_name, '--downloaddir', output_dir]
13
- subprocess.run(cmd, check=True)
14
- elif package_manager == 'brew':
15
- # Fetch the source tarball
16
- cmd = ['brew', 'fetch', '--build-from-source', package_name]
17
- subprocess.run(cmd, check=True, capture_output=True, text=True)
18
- cache_dir = subprocess.run(
19
- ['brew', '--cache', package_name],
20
- capture_output=True,
21
- text=True,
22
- check=True
23
- ).stdout.strip()
24
- prefixes_to_remove = ['aarch64-elf-', 'arm-none-eabi-', 'other-prefix-']
25
- stripped_package_name = package_name
26
- for prefix in prefixes_to_remove:
27
- if package_name.startswith(prefix):
28
- stripped_package_name = package_name[len(prefix):]
29
- break
30
- cache_folder = os.path.dirname(cache_dir)
31
- tarball_pattern = os.path.join(cache_folder, f"*{stripped_package_name}*")
32
- matching_files = glob.glob(tarball_pattern)
33
- if not matching_files:
34
- raise FileNotFoundError(f"Tarball not found for {package_name} in {cache_folder}")
35
- tarball_path = matching_files[0]
36
- os.makedirs(output_dir, exist_ok=True)
37
- target_path = os.path.join(output_dir, os.path.basename(tarball_path))
38
- shutil.move(tarball_path, target_path)
39
- return target_path
40
- else:
41
- raise ValueError("Unsupported package manager")
42
- except subprocess.CalledProcessError as e:
43
- print(f"Command failed: {e}")
44
- return None
45
- except Exception as e:
46
- print(f"Error: {e}")
47
- return None
@@ -1,8 +0,0 @@
1
- import hashlib
2
-
3
- def calculate_file_hash(file_path, algorithm='sha256'):
4
- hash_func = hashlib.new(algorithm)
5
- with open(file_path, 'rb') as f:
6
- while chunk := f.read(8192):
7
- hash_func.update(chunk)
8
- return hash_func.hexdigest()
@@ -1,128 +0,0 @@
1
- import subprocess
2
-
3
-
4
- def list_packages(package_manager):
5
- if package_manager == 'apt':
6
- result = subprocess.run(
7
- ['apt-cache', 'search', '.'],
8
- capture_output=True,
9
- text=True
10
- )
11
- elif package_manager in ['yum', 'dnf']:
12
- result = subprocess.run(
13
- ['repoquery', '--all'],
14
- capture_output=True,
15
- text=True
16
- )
17
- elif package_manager == 'brew':
18
- result = subprocess.run(
19
- ['brew', 'search', '.'],
20
- capture_output=True,
21
- text=True
22
- )
23
- else:
24
- raise ValueError("ER1: Unsupported package manager for search")
25
-
26
- packages = result.stdout.splitlines()
27
- extracted_packages = []
28
- max_packages = 5
29
- k_packages = 0
30
- for line in packages:
31
- if not line.strip() or line.startswith("==>"):
32
- continue
33
- extracted_packages.append(line.split()[0])
34
- if k_packages >= max_packages:
35
- break
36
- k_packages += 1
37
-
38
- return extracted_packages
39
-
40
-
41
- def get_package_info(package_manager, package_name):
42
- if package_manager == 'apt':
43
- cmd = ['apt-cache', 'show', package_name]
44
- elif package_manager in ['yum', 'dnf']:
45
- cmd = ['repoquery', '--info', package_name]
46
- elif package_manager == 'brew':
47
- cmd = ['brew', 'info', package_name]
48
- else:
49
- raise ValueError("ER: Unsupported package manager for info")
50
-
51
- try:
52
- result = subprocess.run(cmd, capture_output=True, text=True, check=True)
53
- output = result.stdout
54
-
55
- # Parse the output based on the package manager
56
- if package_manager == 'brew':
57
- return parse_brew_info(output)
58
- elif package_manager in ['yum', 'dnf']:
59
- return parse_yum_info(output)
60
- elif package_manager == 'apt':
61
- return parse_apt_info(output)
62
- except subprocess.CalledProcessError as e:
63
- print(f"Command failed: {e}")
64
- return None
65
-
66
-
67
- def parse_brew_info(output):
68
- """Parses brew info output to extract license, website, and description."""
69
- info = {}
70
- lines = output.splitlines()
71
- info["license"] = "Unknown"
72
- info["website"] = "Unknown"
73
- info["description"] = "Unknown"
74
-
75
- for i, line in enumerate(lines):
76
- if i == 1: # The description is usually on the second line
77
- info["description"] = line.strip()
78
- elif line.startswith("https://"): # The website URL
79
- info["website"] = line.strip()
80
- elif line.startswith("License:"): # The license information
81
- info["license"] = line.split(":", 1)[1].strip()
82
-
83
- # Ensure all keys are present even if some fields are missing
84
- return info
85
-
86
-
87
-
88
- def parse_yum_info(output):
89
- """Parses yum repoquery --info output."""
90
- info = {}
91
- lines = output.splitlines()
92
-
93
- for line in lines:
94
- if line.startswith("License"):
95
- info["license"] = line.split(":", 1)[1].strip()
96
- elif line.startswith("URL"):
97
- info["website"] = line.split(":", 1)[1].strip()
98
- elif "Copyright" in line:
99
- info["copyright"] = line.strip()
100
-
101
- # Ensure all keys are present even if data is missing
102
- return {
103
- "license": info.get("license", "Unknown"),
104
- "copyright": info.get("copyright", "Unknown"),
105
- "website": info.get("website", "Unknown"),
106
- }
107
-
108
-
109
- def parse_apt_info(output):
110
- """Parses apt-cache show output."""
111
- info = {}
112
- lines = output.splitlines()
113
-
114
- for line in lines:
115
- if line.startswith("License:") or "License" in line:
116
- info["license"] = line.split(":", 1)[1].strip()
117
- elif line.startswith("Homepage:"):
118
- info["website"] = line.split(":", 1)[1].strip()
119
- elif "Copyright" in line:
120
- info["copyright"] = line.strip()
121
-
122
- # Ensure all keys are present even if data is missing
123
- return {
124
- "license": info.get("license", "Unknown"),
125
- "copyright": info.get("copyright", "Unknown"),
126
- "website": info.get("website", "Unknown"),
127
- }
128
-
@@ -1,3 +0,0 @@
1
-
2
- def calculate_swhid(directory_path):
3
- return directory_path
File without changes
File without changes
File without changes