ossa-scanner 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ossa_scanner/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.1"
1
+ __version__ = "0.1.3"
ossa_scanner/cli.py CHANGED
@@ -1,35 +1,50 @@
1
1
  import argparse
2
+ import os
3
+ import shutil
2
4
  from .scanner import Scanner
3
5
  from .uploader import GitHubUploader
4
6
 
5
7
  def main():
6
8
  parser = argparse.ArgumentParser(description="OSSA Scanner CLI Tool")
7
- parser.add_argument('--output-dir', type=str, required=True, help="Directory to save downloaded source")
8
- parser.add_argument('--results-file', type=str, required=True, help="Path to save the JSON results")
9
9
  parser.add_argument('--threads', type=int, default=4, help="Number of threads for parallel processing")
10
10
  parser.add_argument('--upload', action='store_true', help="Upload results to GitHub")
11
- parser.add_argument('--repo-owner', type=str, help="GitHub repository owner")
12
- parser.add_argument('--repo-name', type=str, help="GitHub repository name")
13
- parser.add_argument('--token', type=str, help="GitHub token")
14
- parser.add_argument('--repo-dir', type=str, help="Target directory in GitHub repo for results")
11
+ parser.add_argument('--repo-owner', type=str, help="GitHub repository owner (required for upload)")
12
+ parser.add_argument('--repo-name', type=str, help="GitHub repository name (required for upload)")
13
+ parser.add_argument('--token', type=str, help="GitHub token (required for upload)")
14
+ parser.add_argument('--repo-dir', type=str, help="Target directory in GitHub repo for results (required for upload)")
15
+ parser.add_argument('--retain-temp', action='store_true', help="Retain the temporary directory for downloaded and extracted packages")
15
16
  args = parser.parse_args()
16
17
 
17
- # Initialize the scanner
18
- scanner = Scanner(output_dir=args.output_dir, threads=args.threads)
19
-
20
- # Perform scanning
21
- results = scanner.scan_packages()
18
+ # Define directories
19
+ reports_dir = os.path.join(os.getcwd(), "ossa_reports")
20
+ temp_dir = "/tmp/ossa_temp"
22
21
 
23
- # Save results locally
24
- scanner.save_results(results, args.results_file)
22
+ os.makedirs(reports_dir, exist_ok=True)
23
+ os.makedirs(temp_dir, exist_ok=True)
25
24
 
26
- # Upload results to GitHub if specified
27
- if args.upload:
28
- if not (args.repo_owner and args.repo_name and args.token and args.repo_dir):
29
- raise ValueError("GitHub upload requires --repo-owner, --repo-name, --token, and --repo-dir")
25
+ try:
26
+ # Initialize the scanner
27
+ scanner = Scanner(threads=args.threads, output_dir=reports_dir, temp_dir=temp_dir)
30
28
 
31
- uploader = GitHubUploader(args.token, args.repo_owner, args.repo_name)
32
- scanner.upload_results(args.results_file, uploader, args.repo_dir)
29
+ # Perform scanning
30
+ results = scanner.scan_packages()
31
+
32
+ # Handle GitHub upload if specified
33
+ if args.upload:
34
+ if not (args.repo_owner and args.repo_name and args.token and args.repo_dir):
35
+ raise ValueError("GitHub upload requires --repo-owner, --repo-name, --token, and --repo-dir")
36
+
37
+ uploader = GitHubUploader(args.token, args.repo_owner, args.repo_name)
38
+ for report_file in os.listdir(reports_dir):
39
+ report_path = os.path.join(reports_dir, report_file)
40
+ if os.path.isfile(report_path):
41
+ uploader.upload_file(report_path, os.path.join(args.repo_dir, report_file), "Add OSSA report")
42
+
43
+ finally:
44
+ # Clean up the temporary directory unless the user opts to retain it
45
+ if not args.retain_temp:
46
+ print(f"Cleaning up temporary directory: {temp_dir}")
47
+ shutil.rmtree(temp_dir, ignore_errors=True)
33
48
 
34
49
  if __name__ == "__main__":
35
50
  main()
ossa_scanner/scanner.py CHANGED
@@ -1,18 +1,21 @@
1
1
  import os
2
2
  import json
3
+ import hashlib
4
+ from datetime import datetime
3
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
6
  from .utils.os_detection import detect_os
5
7
  from .utils.package_manager import list_packages, get_package_info
6
8
  from .utils.downloader import download_source
7
9
  from .utils.hash_calculator import calculate_file_hash
8
10
  from .utils.swhid_calculator import calculate_swhid
9
- from .uploader import GitHubUploader
10
11
 
11
12
  class Scanner:
12
- def __init__(self, output_dir, threads=4):
13
+ def __init__(self, threads=4, output_dir="ossa_reports", temp_dir="/tmp/ossa_temp"):
13
14
  self.output_dir = output_dir
15
+ self.temp_dir = temp_dir
14
16
  self.os_type = detect_os()
15
17
  self.threads = threads
18
+ os.makedirs(self.temp_dir, exist_ok=True)
16
19
 
17
20
  def process_package(self, package):
18
21
  """
@@ -29,48 +32,35 @@ class Scanner:
29
32
  package_info = get_package_info(self.os_type, package)
30
33
  print(f"Fetched metadata for {package}")
31
34
 
32
- # Download the source code
33
- source_file = download_source(self.os_type, package, self.output_dir)
35
+ # Download the source code to temp_dir
36
+ source_file = download_source(self.os_type, package, self.temp_dir)
34
37
  print(f"Downloaded source file: {source_file}")
35
38
 
36
39
  # Calculate hash of the source file
37
40
  file_hash = calculate_file_hash(source_file)
38
41
  print(f"Hash (SHA256) for {package}: {file_hash}")
39
42
 
40
- # Extract source code directory
41
- source_dir = os.path.join(self.output_dir, package)
43
+ # Extract source code directory in temp_dir
44
+ source_dir = os.path.join(self.temp_dir, package)
42
45
  os.makedirs(source_dir, exist_ok=True)
43
46
 
44
47
  # Calculate SWHID
45
48
  swhid = calculate_swhid(source_dir)
46
49
  print(f"SWHID for {package}: {swhid}")
47
50
 
48
- return {
49
- "package": package,
50
- "info": package_info,
51
- "hash": file_hash,
52
- "swhid": swhid,
53
- }
51
+ # Save report
52
+ self.save_package_report(package, package_info, file_hash, swhid, source_file)
54
53
 
55
54
  except Exception as e:
56
55
  print(f"Error processing package {package}: {e}")
57
- return {
58
- "package": package,
59
- "error": str(e)
60
- }
61
56
 
62
57
  def scan_packages(self):
63
58
  """
64
59
  Scans all packages in the repository and processes them in parallel.
65
-
66
- Returns:
67
- list: List of results for each package.
68
60
  """
69
61
  print(f"Detected OS: {self.os_type}")
70
62
  print("Listing available packages...")
71
63
  packages = list_packages(self.os_type)
72
-
73
- results = []
74
64
  with ThreadPoolExecutor(max_workers=self.threads) as executor:
75
65
  # Submit tasks for parallel processing
76
66
  future_to_package = {
@@ -81,34 +71,53 @@ class Scanner:
81
71
  for future in as_completed(future_to_package):
82
72
  package = future_to_package[future]
83
73
  try:
84
- result = future.result()
85
- results.append(result)
74
+ future.result()
86
75
  except Exception as e:
87
76
  print(f"Exception occurred for package {package}: {e}")
88
- return results
89
77
 
90
- def save_results(self, results, output_file):
78
+ def save_package_report(self, package, package_info, file_hash, swhid, source_file):
91
79
  """
92
- Save the scan results to a JSON file.
80
+ Save the report for a single package.
93
81
 
94
82
  Args:
95
- results (list): List of results for each package.
96
- output_file (str): Path to save the JSON file.
83
+ package (str): Package name.
84
+ package_info (dict): Information about the package.
85
+ file_hash (str): SHA256 hash of the downloaded source.
86
+ swhid (str): Software Heritage ID of the package.
97
87
  """
98
- with open(output_file, "w") as f:
99
- json.dump(results, f, indent=4)
100
- print(f"Results saved to {output_file}")
101
-
102
- def upload_results(self, results_file, github_uploader, repo_dir):
103
- """
104
- Uploads the results file to GitHub.
105
-
106
- Args:
107
- results_file (str): Local results file path to upload.
108
- github_uploader (GitHubUploader): Instance of the GitHubUploader class.
109
- repo_dir (str): Path in the GitHub repository where the results will be uploaded.
110
- """
111
- print(f"Uploading results to GitHub: {repo_dir}")
112
- repo_path = os.path.join(repo_dir, os.path.basename(results_file))
113
- github_uploader.upload_file(results_file, repo_path, "Add scanning results")
114
-
88
+ # Generate report filename
89
+ sha1_name = hashlib.sha1(package.encode()).hexdigest()
90
+ date_str = datetime.now().strftime("%Y%m%d")
91
+ report_filename = f"ossa-{date_str}-{sha1_name}-{package}.json"
92
+ report_path = os.path.join(self.output_dir, report_filename)
93
+
94
+ # Create the report content
95
+ report = {
96
+ "id": f"OSSA-{date_str}-{sha1_name.upper()}",
97
+ "version": "1.0.0",
98
+ "severity": "Informational",
99
+ "title": f"Advisory for {package}",
100
+ "package_name": package,
101
+ "publisher": "Generated by OSSA Collector",
102
+ "last_updated": datetime.now().isoformat(),
103
+ "approvals": [{"consumption": True, "externalization": True}],
104
+ "description": f"Automatically generated OSSA for the package {package}.",
105
+ "purls": [f"pkg:{self.os_type}/{package}"],
106
+ "regex": [f"^pkg:{self.os_type}/{package}.*"],
107
+ "affected_versions": ["*.*"],
108
+ "artifacts": [
109
+ {
110
+ "url": f"file://{source_file}",
111
+ "hashes": {"sha256": file_hash},
112
+ "swhid": swhid
113
+ }
114
+ ],
115
+ "licenses": package_info.get("licenses", []),
116
+ "aliases": package_info.get("aliases", []),
117
+ "references": package_info.get("references", [])
118
+ }
119
+
120
+ # Save the report to the output directory
121
+ with open(report_path, "w") as f:
122
+ json.dump(report, f, indent=4)
123
+ print(f"Report saved: {report_path}")
@@ -1,11 +1,47 @@
1
1
  import subprocess
2
+ import os
3
+ import shutil
4
+ import glob
2
5
 
3
6
  def download_source(package_manager, package_name, output_dir):
4
- if package_manager == 'apt':
5
- cmd = ['apt-get', 'source', package_name, '-d', output_dir]
6
- elif package_manager in ['yum', 'dnf']:
7
- cmd = ['dnf', 'download', '--source', package_name, '--downloaddir', output_dir]
8
- else:
9
- raise ValueError("Unsupported package manager")
10
-
11
- subprocess.run(cmd)
7
+ try:
8
+ if package_manager == 'apt':
9
+ cmd = ['apt-get', 'source', package_name, '-d', output_dir]
10
+ subprocess.run(cmd, check=True)
11
+ elif package_manager in ['yum', 'dnf']:
12
+ cmd = ['dnf', 'download', '--source', package_name, '--downloaddir', output_dir]
13
+ subprocess.run(cmd, check=True)
14
+ elif package_manager == 'brew':
15
+ # Fetch the source tarball
16
+ cmd = ['brew', 'fetch', '--build-from-source', package_name]
17
+ subprocess.run(cmd, check=True, capture_output=True, text=True)
18
+ cache_dir = subprocess.run(
19
+ ['brew', '--cache', package_name],
20
+ capture_output=True,
21
+ text=True,
22
+ check=True
23
+ ).stdout.strip()
24
+ prefixes_to_remove = ['aarch64-elf-', 'arm-none-eabi-', 'other-prefix-']
25
+ stripped_package_name = package_name
26
+ for prefix in prefixes_to_remove:
27
+ if package_name.startswith(prefix):
28
+ stripped_package_name = package_name[len(prefix):]
29
+ break
30
+ cache_folder = os.path.dirname(cache_dir)
31
+ tarball_pattern = os.path.join(cache_folder, f"*{stripped_package_name}*")
32
+ matching_files = glob.glob(tarball_pattern)
33
+ if not matching_files:
34
+ raise FileNotFoundError(f"Tarball not found for {package_name} in {cache_folder}")
35
+ tarball_path = matching_files[0]
36
+ os.makedirs(output_dir, exist_ok=True)
37
+ target_path = os.path.join(output_dir, os.path.basename(tarball_path))
38
+ shutil.move(tarball_path, target_path)
39
+ return target_path
40
+ else:
41
+ raise ValueError("Unsupported package manager")
42
+ except subprocess.CalledProcessError as e:
43
+ print(f"Command failed: {e}")
44
+ return None
45
+ except Exception as e:
46
+ print(f"Error: {e}")
47
+ return None
@@ -1,5 +1,6 @@
1
1
  import subprocess
2
2
 
3
+
3
4
  def list_packages(package_manager):
4
5
  if package_manager == 'apt':
5
6
  result = subprocess.run(
@@ -20,18 +21,108 @@ def list_packages(package_manager):
20
21
  text=True
21
22
  )
22
23
  else:
23
- raise ValueError("Unsupported package manager")
24
+ raise ValueError("ER1: Unsupported package manager for search")
24
25
 
25
26
  packages = result.stdout.splitlines()
26
- return [pkg.split()[0] for pkg in packages]
27
+ extracted_packages = []
28
+ max_packages = 5
29
+ k_packages = 0
30
+ for line in packages:
31
+ if not line.strip() or line.startswith("==>"):
32
+ continue
33
+ extracted_packages.append(line.split()[0])
34
+ if k_packages >= max_packages:
35
+ break
36
+ k_packages += 1
37
+
38
+ return extracted_packages
39
+
27
40
 
28
41
  def get_package_info(package_manager, package_name):
29
42
  if package_manager == 'apt':
30
43
  cmd = ['apt-cache', 'show', package_name]
31
44
  elif package_manager in ['yum', 'dnf']:
32
45
  cmd = ['repoquery', '--info', package_name]
46
+ elif package_manager == 'brew':
47
+ cmd = ['brew', 'info', package_name]
33
48
  else:
34
- raise ValueError("Unsupported package manager")
49
+ raise ValueError("ER: Unsupported package manager for info")
50
+
51
+ try:
52
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
53
+ output = result.stdout
54
+
55
+ # Parse the output based on the package manager
56
+ if package_manager == 'brew':
57
+ return parse_brew_info(output)
58
+ elif package_manager in ['yum', 'dnf']:
59
+ return parse_yum_info(output)
60
+ elif package_manager == 'apt':
61
+ return parse_apt_info(output)
62
+ except subprocess.CalledProcessError as e:
63
+ print(f"Command failed: {e}")
64
+ return None
65
+
66
+
67
+ def parse_brew_info(output):
68
+ """Parses brew info output to extract license, website, and description."""
69
+ info = {}
70
+ lines = output.splitlines()
71
+ info["license"] = "Unknown"
72
+ info["website"] = "Unknown"
73
+ info["description"] = "Unknown"
74
+
75
+ for i, line in enumerate(lines):
76
+ if i == 1: # The description is usually on the second line
77
+ info["description"] = line.strip()
78
+ elif line.startswith("https://"): # The website URL
79
+ info["website"] = line.strip()
80
+ elif line.startswith("License:"): # The license information
81
+ info["license"] = line.split(":", 1)[1].strip()
82
+
83
+ # Ensure all keys are present even if some fields are missing
84
+ return info
85
+
86
+
87
+
88
+ def parse_yum_info(output):
89
+ """Parses yum repoquery --info output."""
90
+ info = {}
91
+ lines = output.splitlines()
92
+
93
+ for line in lines:
94
+ if line.startswith("License"):
95
+ info["license"] = line.split(":", 1)[1].strip()
96
+ elif line.startswith("URL"):
97
+ info["website"] = line.split(":", 1)[1].strip()
98
+ elif "Copyright" in line:
99
+ info["copyright"] = line.strip()
100
+
101
+ # Ensure all keys are present even if data is missing
102
+ return {
103
+ "license": info.get("license", "Unknown"),
104
+ "copyright": info.get("copyright", "Unknown"),
105
+ "website": info.get("website", "Unknown"),
106
+ }
107
+
108
+
109
+ def parse_apt_info(output):
110
+ """Parses apt-cache show output."""
111
+ info = {}
112
+ lines = output.splitlines()
113
+
114
+ for line in lines:
115
+ if line.startswith("License:") or "License" in line:
116
+ info["license"] = line.split(":", 1)[1].strip()
117
+ elif line.startswith("Homepage:"):
118
+ info["website"] = line.split(":", 1)[1].strip()
119
+ elif "Copyright" in line:
120
+ info["copyright"] = line.strip()
121
+
122
+ # Ensure all keys are present even if data is missing
123
+ return {
124
+ "license": info.get("license", "Unknown"),
125
+ "copyright": info.get("copyright", "Unknown"),
126
+ "website": info.get("website", "Unknown"),
127
+ }
35
128
 
36
- result = subprocess.run(cmd, capture_output=True, text=True)
37
- return result.stdout
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ossa_scanner
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: A Python library for scanning Linux packages, managing metadata, and generating SWHIDs.
5
5
  Home-page: https://github.com/oscarvalenzuelab/ossa_scanner
6
6
  Author: Oscar Valenzuela
@@ -0,0 +1,16 @@
1
+ ossa_scanner/__init__.py,sha256=XEqb2aiIn8fzGE68Mph4ck1FtQqsR_am0wRWvrYPffQ,22
2
+ ossa_scanner/cli.py,sha256=sgr8NFpf_Ut84KYFQjOKRxv8CfAMaTPhMo7DbR53lT4,2311
3
+ ossa_scanner/scanner.py,sha256=YOYB4-7EwQyZE6KU6_dyRD09tq6ntgmYvyxX02KgB5c,4885
4
+ ossa_scanner/uploader.py,sha256=X8bo7GqfpBjz2NlnvSwDR_rVqNoZDRPF2pnQMaVENbc,2436
5
+ ossa_scanner/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ ossa_scanner/utils/downloader.py,sha256=3ccwcde9yJ_SEP0mG9TDr2O0MMdA1p-K6hpzqme-KQ4,2081
7
+ ossa_scanner/utils/hash_calculator.py,sha256=i47KS_HoZNiSbGyd0iP9_TcDwxWS2SrmkIcNF2MWLcA,254
8
+ ossa_scanner/utils/os_detection.py,sha256=QdRKQ4li4SOHgBofe1qWf8OOcw8XvhM-XWUNu0Cy0a4,315
9
+ ossa_scanner/utils/package_manager.py,sha256=tWuQwgkFQjTzeisem0Gz8uFvWw5Cxd-Tft5HM8tIQmk,4028
10
+ ossa_scanner/utils/swhid_calculator.py,sha256=4Z0H2GmECMAJlvH6JBbUmaLXSLRNntyYEdxsS6CTEMQ,63
11
+ ossa_scanner-0.1.3.dist-info/LICENSE,sha256=9slQ_XNiEkio28l90NwihP7a90fCL2GQ6YhcVXTBls4,1064
12
+ ossa_scanner-0.1.3.dist-info/METADATA,sha256=22Fo5X2J06UlI-94hUZLBSGJvdzpHaK-GqKFDIDkF_Q,1043
13
+ ossa_scanner-0.1.3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
14
+ ossa_scanner-0.1.3.dist-info/entry_points.txt,sha256=UVoAo-wTPxT82g3cfqTs2CmQnazd57TAwhd9VwEKD1c,55
15
+ ossa_scanner-0.1.3.dist-info/top_level.txt,sha256=uUp5CvhZfJLapXn9DyUXvgH7QK3uzF2ibH943lWN5Bs,13
16
+ ossa_scanner-0.1.3.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- ossa_scanner/__init__.py,sha256=rnObPjuBcEStqSO0S6gsdS_ot8ITOQjVj_-P1LUUYpg,22
2
- ossa_scanner/cli.py,sha256=hyRUOgp9kcwFtQrIeyth5vTxeK7eOlxfn5R9E7HX5sA,1640
3
- ossa_scanner/scanner.py,sha256=O1gKFfa1yknTNcQWGJOR3sKnFgcEZ0qZzhf2VqORLNM,4083
4
- ossa_scanner/uploader.py,sha256=X8bo7GqfpBjz2NlnvSwDR_rVqNoZDRPF2pnQMaVENbc,2436
5
- ossa_scanner/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- ossa_scanner/utils/downloader.py,sha256=5fV531x-oiFTyh6B17Afi4W72zFC2ejopvMpn1qNpw4,408
7
- ossa_scanner/utils/hash_calculator.py,sha256=i47KS_HoZNiSbGyd0iP9_TcDwxWS2SrmkIcNF2MWLcA,254
8
- ossa_scanner/utils/os_detection.py,sha256=QdRKQ4li4SOHgBofe1qWf8OOcw8XvhM-XWUNu0Cy0a4,315
9
- ossa_scanner/utils/package_manager.py,sha256=SC6NMHsH4EX689OL_D4lqMzBkXHYeGlt2wum_uCV4tA,1124
10
- ossa_scanner/utils/swhid_calculator.py,sha256=4Z0H2GmECMAJlvH6JBbUmaLXSLRNntyYEdxsS6CTEMQ,63
11
- ossa_scanner-0.1.1.dist-info/LICENSE,sha256=9slQ_XNiEkio28l90NwihP7a90fCL2GQ6YhcVXTBls4,1064
12
- ossa_scanner-0.1.1.dist-info/METADATA,sha256=sgvxMtorRONY1Mfp22MW6aYltiILEyKXVb5K1x9p-rQ,1043
13
- ossa_scanner-0.1.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
14
- ossa_scanner-0.1.1.dist-info/entry_points.txt,sha256=UVoAo-wTPxT82g3cfqTs2CmQnazd57TAwhd9VwEKD1c,55
15
- ossa_scanner-0.1.1.dist-info/top_level.txt,sha256=uUp5CvhZfJLapXn9DyUXvgH7QK3uzF2ibH943lWN5Bs,13
16
- ossa_scanner-0.1.1.dist-info/RECORD,,