ossa-scanner 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/PKG-INFO +2 -1
  2. ossa_scanner-0.1.4/ossa_scanner/__init__.py +1 -0
  3. ossa_scanner-0.1.4/ossa_scanner/cli.py +50 -0
  4. ossa_scanner-0.1.4/ossa_scanner/scanner.py +120 -0
  5. ossa_scanner-0.1.4/ossa_scanner/utils/hash_calculator.py +35 -0
  6. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/ossa_scanner/utils/package_manager.py +29 -22
  7. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/ossa_scanner.egg-info/PKG-INFO +2 -1
  8. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/ossa_scanner.egg-info/requires.txt +1 -0
  9. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/setup.py +1 -0
  10. ossa_scanner-0.1.2/ossa_scanner/__init__.py +0 -1
  11. ossa_scanner-0.1.2/ossa_scanner/cli.py +0 -35
  12. ossa_scanner-0.1.2/ossa_scanner/scanner.py +0 -113
  13. ossa_scanner-0.1.2/ossa_scanner/utils/hash_calculator.py +0 -8
  14. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/LICENSE +0 -0
  15. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/README.md +0 -0
  16. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/ossa_scanner/uploader.py +0 -0
  17. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/ossa_scanner/utils/__init__.py +0 -0
  18. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/ossa_scanner/utils/downloader.py +0 -0
  19. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/ossa_scanner/utils/os_detection.py +0 -0
  20. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/ossa_scanner/utils/swhid_calculator.py +0 -0
  21. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/ossa_scanner.egg-info/SOURCES.txt +0 -0
  22. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/ossa_scanner.egg-info/dependency_links.txt +0 -0
  23. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/ossa_scanner.egg-info/entry_points.txt +0 -0
  24. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/ossa_scanner.egg-info/top_level.txt +0 -0
  25. {ossa_scanner-0.1.2 → ossa_scanner-0.1.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ossa_scanner
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: A Python library for scanning Linux packages, managing metadata, and generating SWHIDs.
5
5
  Home-page: https://github.com/oscarvalenzuelab/ossa_scanner
6
6
  Author: Oscar Valenzuela
@@ -23,6 +23,7 @@ License-File: LICENSE
23
23
  Requires-Dist: click
24
24
  Requires-Dist: swh.model
25
25
  Requires-Dist: distro
26
+ Requires-Dist: ssdeep
26
27
 
27
28
  # ossa_scanner
28
29
  Open Source Advisory Scanner (Generator)
@@ -0,0 +1 @@
1
+ __version__ = "0.1.4"
@@ -0,0 +1,50 @@
1
+ import argparse
2
+ import os
3
+ import shutil
4
+ from .scanner import Scanner
5
+ from .uploader import GitHubUploader
6
+
7
+ def main():
8
+ parser = argparse.ArgumentParser(description="OSSA Scanner CLI Tool")
9
+ parser.add_argument('--threads', type=int, default=4, help="Number of threads for parallel processing")
10
+ parser.add_argument('--upload', action='store_true', help="Upload results to GitHub")
11
+ parser.add_argument('--repo-owner', type=str, help="GitHub repository owner (required for upload)")
12
+ parser.add_argument('--repo-name', type=str, help="GitHub repository name (required for upload)")
13
+ parser.add_argument('--token', type=str, help="GitHub token (required for upload)")
14
+ parser.add_argument('--repo-dir', type=str, help="Target directory in GitHub repo for results (required for upload)")
15
+ parser.add_argument('--retain-temp', action='store_true', help="Retain the temporary directory for downloaded and extracted packages")
16
+ args = parser.parse_args()
17
+
18
+ # Define directories
19
+ reports_dir = os.path.join(os.getcwd(), "ossa_reports")
20
+ temp_dir = "/tmp/ossa_temp"
21
+
22
+ os.makedirs(reports_dir, exist_ok=True)
23
+ os.makedirs(temp_dir, exist_ok=True)
24
+
25
+ try:
26
+ # Initialize the scanner
27
+ scanner = Scanner(threads=args.threads, output_dir=reports_dir, temp_dir=temp_dir)
28
+
29
+ # Perform scanning
30
+ results = scanner.scan_packages()
31
+
32
+ # Handle GitHub upload if specified
33
+ if args.upload:
34
+ if not (args.repo_owner and args.repo_name and args.token and args.repo_dir):
35
+ raise ValueError("GitHub upload requires --repo-owner, --repo-name, --token, and --repo-dir")
36
+
37
+ uploader = GitHubUploader(args.token, args.repo_owner, args.repo_name)
38
+ for report_file in os.listdir(reports_dir):
39
+ report_path = os.path.join(reports_dir, report_file)
40
+ if os.path.isfile(report_path):
41
+ uploader.upload_file(report_path, os.path.join(args.repo_dir, report_file), "Add OSSA report")
42
+
43
+ finally:
44
+ # Clean up the temporary directory unless the user opts to retain it
45
+ if not args.retain_temp:
46
+ print(f"Cleaning up temporary directory: {temp_dir}")
47
+ shutil.rmtree(temp_dir, ignore_errors=True)
48
+
49
+ if __name__ == "__main__":
50
+ main()
@@ -0,0 +1,120 @@
1
+ import os
2
+ import json
3
+ import hashlib
4
+ from datetime import datetime
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from .utils.os_detection import detect_os
7
+ from .utils.package_manager import list_packages, get_package_info
8
+ from .utils.downloader import download_source
9
+ from .utils.hash_calculator import calculate_file_hash
10
+ from .utils.swhid_calculator import calculate_swhid
11
+
12
+ class Scanner:
13
+ def __init__(self, threads=4, output_dir="ossa_reports", temp_dir="/tmp/ossa_temp"):
14
+ self.output_dir = output_dir
15
+ self.temp_dir = temp_dir
16
+ self.os_type = detect_os()
17
+ self.threads = threads
18
+ os.makedirs(self.temp_dir, exist_ok=True)
19
+
20
+ def process_package(self, package):
21
+ try:
22
+ print(f"Processing package: {package}")
23
+ package_info = get_package_info(self.os_type, package)
24
+ print(f"Fetched metadata for {package}")
25
+
26
+ source_file = download_source(self.os_type, package, self.temp_dir)
27
+ print(f"Downloaded source file: {source_file}")
28
+
29
+ file_hash = calculate_file_hash(source_file)
30
+ print(f"Hash (SHA256) for {package}: {file_hash}")
31
+
32
+ # Extract source code directory in temp_dir
33
+ source_dir = os.path.join(self.temp_dir, package)
34
+ os.makedirs(source_dir, exist_ok=True)
35
+
36
+ # Calculate SWHID
37
+ swhid = calculate_swhid(source_dir)
38
+ print(f"SWHID for {package}: {swhid}")
39
+
40
+ # Save report
41
+ self.save_package_report(package, package_info, file_hash, swhid, source_file)
42
+
43
+ except Exception as e:
44
+ print(f"Error processing package {package}: {e}")
45
+
46
+ def scan_packages(self):
47
+ """
48
+ Scans all packages in the repository and processes them in parallel.
49
+ """
50
+ print(f"Detected OS: {self.os_type}")
51
+ print("Listing available packages...")
52
+ packages = list_packages(self.os_type)
53
+ with ThreadPoolExecutor(max_workers=self.threads) as executor:
54
+ # Submit tasks for parallel processing
55
+ future_to_package = {
56
+ executor.submit(self.process_package, package): package
57
+ for package in packages
58
+ }
59
+
60
+ for future in as_completed(future_to_package):
61
+ package = future_to_package[future]
62
+ try:
63
+ future.result()
64
+ except Exception as e:
65
+ print(f"Exception occurred for package {package}: {e}")
66
+
67
+ def save_package_report(self, package, package_info, file_hash, swhid, source_file):
68
+ """
69
+ Save the report for a single package.
70
+
71
+ Args:
72
+ package (str): Package name.
73
+ package_info (dict): Information about the package.
74
+ file_hash (str): SHA256 hash of the downloaded source.
75
+ swhid (str): Software Heritage ID of the package.
76
+ """
77
+ # Generate report filename
78
+ date_str = datetime.now().strftime("%Y%m%d")
79
+ report_filename = f"ossa-{date_str}-{hash(package) % 10000}-{package}.json"
80
+ report_path = os.path.join(self.output_dir, report_filename)
81
+
82
+ # This need to be moved to a different class
83
+ artifact_name = source_file
84
+ if "tmp/" in source_file:
85
+ artifact_name = os.path.basename(source_file)
86
+ if "--" in artifact_name:
87
+ artifact_name = artifact_name.split("--")[-1]
88
+
89
+ # Create the report content
90
+ report = {
91
+ "id": f"OSSA-{date_str}-{hash(package) % 10000}",
92
+ "version": "1.0.0",
93
+ "severity": package_info.get("severity", []),
94
+ "title": f"Advisory for {package}",
95
+ "package_name": package,
96
+ "publisher": "Generated by OSSA Collector",
97
+ "last_updated": datetime.now().isoformat(),
98
+ "approvals": [{"consumption": True, "externalization": True}],
99
+ "description": f"Automatically generated OSSA for the package {package}.",
100
+ "purls": [f"pkg:{self.os_type}/{package}"],
101
+ "regex": [f"^pkg:{self.os_type}/{package}.*"],
102
+ "affected_versions": ["*.*"],
103
+ "artifacts": [
104
+ {
105
+ "url": f"file://{artifact_name}",
106
+ "hashes": {
107
+ "sha1": file_hash['sha1'], "sha256": file_hash['sha256'],
108
+ "ssdeep": file_hash['ssdeep'], "swhid": file_hash['swhid']},
109
+ "swhid": swhid
110
+ }
111
+ ],
112
+ "licenses": package_info.get("licenses", []),
113
+ "aliases": package_info.get("aliases", []),
114
+ "references": package_info.get("references", [])
115
+ }
116
+
117
+ # Save the report to the output directory
118
+ with open(report_path, "w") as f:
119
+ json.dump(report, f, indent=4)
120
+ print(f"Report saved: {report_path}")
@@ -0,0 +1,35 @@
1
+ import os
2
+ import json
3
+ import hashlib
4
+ import ssdeep
5
+
6
+ def calculate_file_hash(file_path):
7
+ file_hash = {}
8
+ file_hash['sha1'] = compute_sha1(file_path)
9
+ file_hash['sha256'] = compute_sha256(file_path)
10
+ file_hash['ssdeep'] = compute_fuzzy_hash(file_path)
11
+ file_hash['swhid'] = compute_swhid(file_path)
12
+ print(file_hash)
13
+ return file_hash
14
+
15
+ def compute_sha1(file_path):
16
+ sha1 = hashlib.sha1()
17
+ with open(file_path, "rb") as f:
18
+ for chunk in iter(lambda: f.read(4096), b""):
19
+ sha1.update(chunk)
20
+ return sha1.hexdigest()
21
+
22
+ def compute_sha256(file_path):
23
+ sha256 = hashlib.sha256()
24
+ with open(file_path, "rb") as f:
25
+ for chunk in iter(lambda: f.read(4096), b""):
26
+ sha256.update(chunk)
27
+ return sha256.hexdigest()
28
+
29
+ def compute_fuzzy_hash(file_path):
30
+ return ssdeep.hash_from_file(file_path)
31
+
32
+ def compute_swhid(file_path):
33
+ sha1_hash = compute_sha1(file_path)
34
+ swhid = f"swh:1:cnt:{sha1_hash}"
35
+ return swhid
@@ -25,7 +25,7 @@ def list_packages(package_manager):
25
25
 
26
26
  packages = result.stdout.splitlines()
27
27
  extracted_packages = []
28
- max_packages = 5
28
+ max_packages = 2
29
29
  k_packages = 0
30
30
  for line in packages:
31
31
  if not line.strip() or line.startswith("==>"):
@@ -51,8 +51,6 @@ def get_package_info(package_manager, package_name):
51
51
  try:
52
52
  result = subprocess.run(cmd, capture_output=True, text=True, check=True)
53
53
  output = result.stdout
54
-
55
- # Parse the output based on the package manager
56
54
  if package_manager == 'brew':
57
55
  return parse_brew_info(output)
58
56
  elif package_manager in ['yum', 'dnf']:
@@ -68,23 +66,21 @@ def parse_brew_info(output):
68
66
  """Parses brew info output to extract license, website, and description."""
69
67
  info = {}
70
68
  lines = output.splitlines()
71
- info["license"] = "Unknown"
72
- info["website"] = "Unknown"
73
- info["description"] = "Unknown"
69
+ info["licenses"] = "NOASSERTION"
70
+ info["references"] = "NOASSERTION"
71
+ info["description"] = "NOASSERTION"
74
72
 
75
73
  for i, line in enumerate(lines):
76
74
  if i == 1: # The description is usually on the second line
77
75
  info["description"] = line.strip()
78
76
  elif line.startswith("https://"): # The website URL
79
- info["website"] = line.strip()
77
+ info["references"] = line.strip()
80
78
  elif line.startswith("License:"): # The license information
81
- info["license"] = line.split(":", 1)[1].strip()
79
+ info["licenses"] = line.split(":", 1)[1].strip()
80
+ info["severity"] = license_classificaton(info["licenses"])
82
81
 
83
- # Ensure all keys are present even if some fields are missing
84
82
  return info
85
83
 
86
-
87
-
88
84
  def parse_yum_info(output):
89
85
  """Parses yum repoquery --info output."""
90
86
  info = {}
@@ -92,17 +88,19 @@ def parse_yum_info(output):
92
88
 
93
89
  for line in lines:
94
90
  if line.startswith("License"):
95
- info["license"] = line.split(":", 1)[1].strip()
91
+ info["licenses"] = line.split(":", 1)[1].strip()
96
92
  elif line.startswith("URL"):
97
- info["website"] = line.split(":", 1)[1].strip()
93
+ info["references"] = line.split(":", 1)[1].strip()
98
94
  elif "Copyright" in line:
99
- info["copyright"] = line.strip()
95
+ info["references"] = line.strip()
96
+ severity = license_classificaton(info["licenses"])
100
97
 
101
98
  # Ensure all keys are present even if data is missing
102
99
  return {
103
- "license": info.get("license", "Unknown"),
104
- "copyright": info.get("copyright", "Unknown"),
105
- "website": info.get("website", "Unknown"),
100
+ "licenses": info.get("licenses", "NOASSERTION"),
101
+ "copyright": info.get("copyright", "NOASSERTION"),
102
+ "references": info.get("references", "NOASSERTION"),
103
+ "severity": severity,
106
104
  }
107
105
 
108
106
 
@@ -113,16 +111,25 @@ def parse_apt_info(output):
113
111
 
114
112
  for line in lines:
115
113
  if line.startswith("License:") or "License" in line:
116
- info["license"] = line.split(":", 1)[1].strip()
114
+ info["licenses"] = line.split(":", 1)[1].strip()
117
115
  elif line.startswith("Homepage:"):
118
116
  info["website"] = line.split(":", 1)[1].strip()
119
117
  elif "Copyright" in line:
120
- info["copyright"] = line.strip()
118
+ info["references"] = line.strip()
119
+ severity = license_classificaton(info["licenses"])
121
120
 
122
121
  # Ensure all keys are present even if data is missing
123
122
  return {
124
- "license": info.get("license", "Unknown"),
125
- "copyright": info.get("copyright", "Unknown"),
126
- "website": info.get("website", "Unknown"),
123
+ "licenses": info.get("licenses", "NOASSERTION"),
124
+ "copyright": info.get("copyright", "NOASSERTION"),
125
+ "references": info.get("references", "NOASSERTION"),
126
+ "severity": severity,
127
127
  }
128
128
 
129
+ def license_classificaton(licenses):
130
+ copyleft_licenses = ['GPL', 'CDDL', 'MPL']
131
+ severity = "Informational"
132
+ for cl_license in copyleft_licenses:
133
+ if cl_license.lower() in licenses:
134
+ severity = "Medium"
135
+ return severity
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ossa_scanner
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: A Python library for scanning Linux packages, managing metadata, and generating SWHIDs.
5
5
  Home-page: https://github.com/oscarvalenzuelab/ossa_scanner
6
6
  Author: Oscar Valenzuela
@@ -23,6 +23,7 @@ License-File: LICENSE
23
23
  Requires-Dist: click
24
24
  Requires-Dist: swh.model
25
25
  Requires-Dist: distro
26
+ Requires-Dist: ssdeep
26
27
 
27
28
  # ossa_scanner
28
29
  Open Source Advisory Scanner (Generator)
@@ -1,3 +1,4 @@
1
1
  click
2
2
  swh.model
3
3
  distro
4
+ ssdeep
@@ -32,6 +32,7 @@ setup(
32
32
  "click",
33
33
  "swh.model",
34
34
  "distro",
35
+ "ssdeep",
35
36
  ],
36
37
  entry_points={
37
38
  "console_scripts": [
@@ -1 +0,0 @@
1
- __version__ = "0.1.2"
@@ -1,35 +0,0 @@
1
- import argparse
2
- from .scanner import Scanner
3
- from .uploader import GitHubUploader
4
-
5
- def main():
6
- parser = argparse.ArgumentParser(description="OSSA Scanner CLI Tool")
7
- parser.add_argument('--output-dir', type=str, required=True, help="Directory to save downloaded source")
8
- parser.add_argument('--results-file', type=str, required=True, help="Path to save the JSON results")
9
- parser.add_argument('--threads', type=int, default=4, help="Number of threads for parallel processing")
10
- parser.add_argument('--upload', action='store_true', help="Upload results to GitHub")
11
- parser.add_argument('--repo-owner', type=str, help="GitHub repository owner")
12
- parser.add_argument('--repo-name', type=str, help="GitHub repository name")
13
- parser.add_argument('--token', type=str, help="GitHub token")
14
- parser.add_argument('--repo-dir', type=str, help="Target directory in GitHub repo for results")
15
- args = parser.parse_args()
16
-
17
- # Initialize the scanner
18
- scanner = Scanner(output_dir=args.output_dir, threads=args.threads)
19
-
20
- # Perform scanning
21
- results = scanner.scan_packages()
22
-
23
- # Save results locally
24
- scanner.save_results(results, args.results_file)
25
-
26
- # Upload results to GitHub if specified
27
- if args.upload:
28
- if not (args.repo_owner and args.repo_name and args.token and args.repo_dir):
29
- raise ValueError("GitHub upload requires --repo-owner, --repo-name, --token, and --repo-dir")
30
-
31
- uploader = GitHubUploader(args.token, args.repo_owner, args.repo_name)
32
- scanner.upload_results(args.results_file, uploader, args.repo_dir)
33
-
34
- if __name__ == "__main__":
35
- main()
@@ -1,113 +0,0 @@
1
- import os
2
- import json
3
- from concurrent.futures import ThreadPoolExecutor, as_completed
4
- from .utils.os_detection import detect_os
5
- from .utils.package_manager import list_packages, get_package_info
6
- from .utils.downloader import download_source
7
- from .utils.hash_calculator import calculate_file_hash
8
- from .utils.swhid_calculator import calculate_swhid
9
- from .uploader import GitHubUploader
10
-
11
- class Scanner:
12
- def __init__(self, output_dir, threads=4):
13
- self.output_dir = output_dir
14
- self.os_type = detect_os()
15
- self.threads = threads
16
-
17
- def process_package(self, package):
18
- """
19
- Processes a single package: downloads source, extracts, calculates hash and SWHID.
20
-
21
- Args:
22
- package (str): Package name to process.
23
-
24
- Returns:
25
- dict: Result of the processed package including hash and SWHID.
26
- """
27
- try:
28
- print(f"Processing package: {package}")
29
- package_info = get_package_info(self.os_type, package)
30
- print(f"Fetched metadata for {package}")
31
-
32
- # Download the source code
33
- source_file = download_source(self.os_type, package, self.output_dir)
34
- print(f"Downloaded source file: {source_file}")
35
-
36
- # Calculate hash of the source file
37
- file_hash = calculate_file_hash(source_file)
38
- print(f"Hash (SHA256) for {package}: {file_hash}")
39
-
40
- # Extract source code directory
41
- source_dir = os.path.join(self.output_dir, package)
42
- os.makedirs(source_dir, exist_ok=True)
43
-
44
- # Calculate SWHID
45
- swhid = calculate_swhid(source_dir)
46
- print(f"SWHID for {package}: {swhid}")
47
-
48
- return {
49
- "package": package,
50
- "info": package_info,
51
- "hash": file_hash,
52
- "swhid": swhid,
53
- }
54
-
55
- except Exception as e:
56
- print(f"Error processing package {package}: {e}")
57
- return {
58
- "package": package,
59
- "error": str(e)
60
- }
61
-
62
- def scan_packages(self):
63
- """
64
- Scans all packages in the repository and processes them in parallel.
65
-
66
- Returns:
67
- list: List of results for each package.
68
- """
69
- print(f"Detected OS: {self.os_type}")
70
- print("Listing available packages...")
71
- packages = list_packages(self.os_type)
72
- results = []
73
- with ThreadPoolExecutor(max_workers=self.threads) as executor:
74
- # Submit tasks for parallel processing
75
- future_to_package = {
76
- executor.submit(self.process_package, package): package
77
- for package in packages
78
- }
79
-
80
- for future in as_completed(future_to_package):
81
- package = future_to_package[future]
82
- try:
83
- result = future.result()
84
- results.append(result)
85
- except Exception as e:
86
- print(f"Exception occurred for package {package}: {e}")
87
- return results
88
-
89
- def save_results(self, results, output_file):
90
- """
91
- Save the scan results to a JSON file.
92
-
93
- Args:
94
- results (list): List of results for each package.
95
- output_file (str): Path to save the JSON file.
96
- """
97
- with open(output_file, "w") as f:
98
- json.dump(results, f, indent=4)
99
- print(f"Results saved to {output_file}")
100
-
101
- def upload_results(self, results_file, github_uploader, repo_dir):
102
- """
103
- Uploads the results file to GitHub.
104
-
105
- Args:
106
- results_file (str): Local results file path to upload.
107
- github_uploader (GitHubUploader): Instance of the GitHubUploader class.
108
- repo_dir (str): Path in the GitHub repository where the results will be uploaded.
109
- """
110
- print(f"Uploading results to GitHub: {repo_dir}")
111
- repo_path = os.path.join(repo_dir, os.path.basename(results_file))
112
- github_uploader.upload_file(results_file, repo_path, "Add scanning results")
113
-
@@ -1,8 +0,0 @@
1
- import hashlib
2
-
3
- def calculate_file_hash(file_path, algorithm='sha256'):
4
- hash_func = hashlib.new(algorithm)
5
- with open(file_path, 'rb') as f:
6
- while chunk := f.read(8192):
7
- hash_func.update(chunk)
8
- return hash_func.hexdigest()
File without changes
File without changes
File without changes