PyPI - ossa-scanner - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

ossa-scanner 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

ossa_scanner/__init__.py +1 -1
ossa_scanner/cli.py +34 -19
ossa_scanner/scanner.py +61 -54
ossa_scanner/utils/hash_calculator.py +33 -6
ossa_scanner/utils/package_manager.py +29 -22
{ossa_scanner-0.1.2.dist-info → ossa_scanner-0.1.4.dist-info}/METADATA +2 -1
ossa_scanner-0.1.4.dist-info/RECORD +16 -0
ossa_scanner-0.1.2.dist-info/RECORD +0 -16
{ossa_scanner-0.1.2.dist-info → ossa_scanner-0.1.4.dist-info}/LICENSE +0 -0
{ossa_scanner-0.1.2.dist-info → ossa_scanner-0.1.4.dist-info}/WHEEL +0 -0
{ossa_scanner-0.1.2.dist-info → ossa_scanner-0.1.4.dist-info}/entry_points.txt +0 -0
{ossa_scanner-0.1.2.dist-info → ossa_scanner-0.1.4.dist-info}/top_level.txt +0 -0

ossa_scanner/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.2"
1	+ __version__ = "0.1.4"

ossa_scanner/cli.py CHANGED Viewed

@@ -1,35 +1,50 @@
 import argparse
+import os
+import shutil
 from .scanner import Scanner
 from .uploader import GitHubUploader
 def main():
     parser = argparse.ArgumentParser(description="OSSA Scanner CLI Tool")
-    parser.add_argument('--output-dir', type=str, required=True, help="Directory to save downloaded source")
-    parser.add_argument('--results-file', type=str, required=True, help="Path to save the JSON results")
     parser.add_argument('--threads', type=int, default=4, help="Number of threads for parallel processing")
     parser.add_argument('--upload', action='store_true', help="Upload results to GitHub")
-    parser.add_argument('--repo-owner', type=str, help="GitHub repository owner")
-    parser.add_argument('--repo-name', type=str, help="GitHub repository name")
-    parser.add_argument('--token', type=str, help="GitHub token")
-    parser.add_argument('--repo-dir', type=str, help="Target directory in GitHub repo for results")
+    parser.add_argument('--repo-owner', type=str, help="GitHub repository owner (required for upload)")
+    parser.add_argument('--repo-name', type=str, help="GitHub repository name (required for upload)")
+    parser.add_argument('--token', type=str, help="GitHub token (required for upload)")
+    parser.add_argument('--repo-dir', type=str, help="Target directory in GitHub repo for results (required for upload)")
+    parser.add_argument('--retain-temp', action='store_true', help="Retain the temporary directory for downloaded and extracted packages")
     args = parser.parse_args()
-    # Initialize the scanner
-    scanner = Scanner(output_dir=args.output_dir, threads=args.threads)
-    # Perform scanning
-    results = scanner.scan_packages()
+    # Define directories
+    reports_dir = os.path.join(os.getcwd(), "ossa_reports")
+    temp_dir = "/tmp/ossa_temp"
-    # Save results locally
-    scanner.save_results(results, args.results_file)
+    os.makedirs(reports_dir, exist_ok=True)
+    os.makedirs(temp_dir, exist_ok=True)
-    # Upload results to GitHub if specified
-    if args.upload:
-        if not (args.repo_owner and args.repo_name and args.token and args.repo_dir):
-            raise ValueError("GitHub upload requires --repo-owner, --repo-name, --token, and --repo-dir")
+    try:
+        # Initialize the scanner
+        scanner = Scanner(threads=args.threads, output_dir=reports_dir, temp_dir=temp_dir)
-        uploader = GitHubUploader(args.token, args.repo_owner, args.repo_name)
-        scanner.upload_results(args.results_file, uploader, args.repo_dir)
+        # Perform scanning
+        results = scanner.scan_packages()
+        # Handle GitHub upload if specified
+        if args.upload:
+            if not (args.repo_owner and args.repo_name and args.token and args.repo_dir):
+                raise ValueError("GitHub upload requires --repo-owner, --repo-name, --token, and --repo-dir")
+            uploader = GitHubUploader(args.token, args.repo_owner, args.repo_name)
+            for report_file in os.listdir(reports_dir):
+                report_path = os.path.join(reports_dir, report_file)
+                if os.path.isfile(report_path):
+                    uploader.upload_file(report_path, os.path.join(args.repo_dir, report_file), "Add OSSA report")
+    finally:
+        # Clean up the temporary directory unless the user opts to retain it
+        if not args.retain_temp:
+            print(f"Cleaning up temporary directory: {temp_dir}")
+            shutil.rmtree(temp_dir, ignore_errors=True)
 if __name__ == "__main__":
     main()

ossa_scanner/scanner.py CHANGED Viewed

@@ -1,75 +1,55 @@
 import os
 import json
+import hashlib
+from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from .utils.os_detection import detect_os
 from .utils.package_manager import list_packages, get_package_info
 from .utils.downloader import download_source
 from .utils.hash_calculator import calculate_file_hash
 from .utils.swhid_calculator import calculate_swhid
-from .uploader import GitHubUploader
 class Scanner:
-    def __init__(self, output_dir, threads=4):
+    def __init__(self, threads=4, output_dir="ossa_reports", temp_dir="/tmp/ossa_temp"):
         self.output_dir = output_dir
+        self.temp_dir = temp_dir
         self.os_type = detect_os()
         self.threads = threads
+        os.makedirs(self.temp_dir, exist_ok=True)
     def process_package(self, package):
-        """
-        Processes a single package: downloads source, extracts, calculates hash and SWHID.
-        Args:
-            package (str): Package name to process.
-        Returns:
-            dict: Result of the processed package including hash and SWHID.
-        """
         try:
             print(f"Processing package: {package}")
             package_info = get_package_info(self.os_type, package)
             print(f"Fetched metadata for {package}")
-            # Download the source code
-            source_file = download_source(self.os_type, package, self.output_dir)
+            source_file = download_source(self.os_type, package, self.temp_dir)
             print(f"Downloaded source file: {source_file}")
-            # Calculate hash of the source file
             file_hash = calculate_file_hash(source_file)
             print(f"Hash (SHA256) for {package}: {file_hash}")
-            # Extract source code directory
-            source_dir = os.path.join(self.output_dir, package)
+            # Extract source code directory in temp_dir
+            source_dir = os.path.join(self.temp_dir, package)
             os.makedirs(source_dir, exist_ok=True)
             # Calculate SWHID
             swhid = calculate_swhid(source_dir)
             print(f"SWHID for {package}: {swhid}")
-            return {
-                "package": package,
-                "info": package_info,
-                "hash": file_hash,
-                "swhid": swhid,
-            }
+            # Save report
+            self.save_package_report(package, package_info, file_hash, swhid, source_file)
         except Exception as e:
             print(f"Error processing package {package}: {e}")
-            return {
-                "package": package,
-                "error": str(e)
-            }
     def scan_packages(self):
         """
         Scans all packages in the repository and processes them in parallel.
-        Returns:
-            list: List of results for each package.
         """
         print(f"Detected OS: {self.os_type}")
         print("Listing available packages...")
         packages = list_packages(self.os_type)
-        results = []
         with ThreadPoolExecutor(max_workers=self.threads) as executor:
             # Submit tasks for parallel processing
             future_to_package = {
@@ -80,34 +60,61 @@ class Scanner:
             for future in as_completed(future_to_package):
                 package = future_to_package[future]
                 try:
-                    result = future.result()
-                    results.append(result)
+                    future.result()
                 except Exception as e:
                     print(f"Exception occurred for package {package}: {e}")
-        return results
-    def save_results(self, results, output_file):
+    def save_package_report(self, package, package_info, file_hash, swhid, source_file):
         """
-        Save the scan results to a JSON file.
+        Save the report for a single package.
         Args:
-            results (list): List of results for each package.
-            output_file (str): Path to save the JSON file.
-        """
-        with open(output_file, "w") as f:
-            json.dump(results, f, indent=4)
-        print(f"Results saved to {output_file}")
-    def upload_results(self, results_file, github_uploader, repo_dir):
+            package (str): Package name.
+            package_info (dict): Information about the package.
+            file_hash (str): SHA256 hash of the downloaded source.
+            swhid (str): Software Heritage ID of the package.
         """
-        Uploads the results file to GitHub.
-        Args:
-            results_file (str): Local results file path to upload.
-            github_uploader (GitHubUploader): Instance of the GitHubUploader class.
-            repo_dir (str): Path in the GitHub repository where the results will be uploaded.
-        """
-        print(f"Uploading results to GitHub: {repo_dir}")
-        repo_path = os.path.join(repo_dir, os.path.basename(results_file))
-        github_uploader.upload_file(results_file, repo_path, "Add scanning results")
+        # Generate report filename
+        date_str = datetime.now().strftime("%Y%m%d")
+        report_filename = f"ossa-{date_str}-{hash(package) % 10000}-{package}.json"
+        report_path = os.path.join(self.output_dir, report_filename)
+        # This need to be moved to a different class
+        artifact_name = source_file
+        if "tmp/" in source_file:
+            artifact_name = os.path.basename(source_file)
+        if "--" in artifact_name:
+            artifact_name = artifact_name.split("--")[-1]
+        # Create the report content
+        report = {
+            "id": f"OSSA-{date_str}-{hash(package) % 10000}",
+            "version": "1.0.0",
+            "severity": package_info.get("severity", []),
+            "title": f"Advisory for {package}",
+            "package_name": package,
+            "publisher": "Generated by OSSA Collector",
+            "last_updated": datetime.now().isoformat(),
+            "approvals": [{"consumption": True, "externalization": True}],
+            "description": f"Automatically generated OSSA for the package {package}.",
+            "purls": [f"pkg:{self.os_type}/{package}"],
+            "regex": [f"^pkg:{self.os_type}/{package}.*"],
+            "affected_versions": ["*.*"],
+            "artifacts": [
+                {
+                    "url": f"file://{artifact_name}",
+                    "hashes": {
+                        "sha1": file_hash['sha1'], "sha256": file_hash['sha256'],
+                        "ssdeep": file_hash['ssdeep'], "swhid": file_hash['swhid']},
+                    "swhid": swhid
+                }
+            ],
+            "licenses": package_info.get("licenses", []),
+            "aliases": package_info.get("aliases", []),
+            "references": package_info.get("references", [])
+        }
+        # Save the report to the output directory
+        with open(report_path, "w") as f:
+            json.dump(report, f, indent=4)
+        print(f"Report saved: {report_path}")

ossa_scanner/utils/hash_calculator.py CHANGED Viewed

@@ -1,8 +1,35 @@
+import os
+import json
 import hashlib
+import ssdeep
-def calculate_file_hash(file_path, algorithm='sha256'):
-    hash_func = hashlib.new(algorithm)
-    with open(file_path, 'rb') as f:
-        while chunk := f.read(8192):
-            hash_func.update(chunk)
-    return hash_func.hexdigest()
+def calculate_file_hash(file_path):
+    file_hash = {}
+    file_hash['sha1'] = compute_sha1(file_path)
+    file_hash['sha256'] = compute_sha256(file_path)
+    file_hash['ssdeep'] = compute_fuzzy_hash(file_path)
+    file_hash['swhid'] = compute_swhid(file_path)
+    print(file_hash)
+    return file_hash
+def compute_sha1(file_path):
+    sha1 = hashlib.sha1()
+    with open(file_path, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            sha1.update(chunk)
+    return sha1.hexdigest()
+def compute_sha256(file_path):
+    sha256 = hashlib.sha256()
+    with open(file_path, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            sha256.update(chunk)
+    return sha256.hexdigest()
+def compute_fuzzy_hash(file_path):
+    return ssdeep.hash_from_file(file_path)
+def compute_swhid(file_path):
+    sha1_hash = compute_sha1(file_path)
+    swhid = f"swh:1:cnt:{sha1_hash}"
+    return swhid

ossa_scanner/utils/package_manager.py CHANGED Viewed

@@ -25,7 +25,7 @@ def list_packages(package_manager):
     packages = result.stdout.splitlines()
     extracted_packages = []
-    max_packages = 5
+    max_packages = 2
     k_packages = 0
     for line in packages:
         if not line.strip() or line.startswith("==>"):
@@ -51,8 +51,6 @@ def get_package_info(package_manager, package_name):
     try:
         result = subprocess.run(cmd, capture_output=True, text=True, check=True)
         output = result.stdout
-        # Parse the output based on the package manager
         if package_manager == 'brew':
             return parse_brew_info(output)
         elif package_manager in ['yum', 'dnf']:
@@ -68,23 +66,21 @@ def parse_brew_info(output):
     """Parses brew info output to extract license, website, and description."""
     info = {}
     lines = output.splitlines()
-    info["license"] = "Unknown"
-    info["website"] = "Unknown"
-    info["description"] = "Unknown"
+    info["licenses"] = "NOASSERTION"
+    info["references"] = "NOASSERTION"
+    info["description"] = "NOASSERTION"
     for i, line in enumerate(lines):
         if i == 1:  # The description is usually on the second line
             info["description"] = line.strip()
         elif line.startswith("https://"):  # The website URL
-            info["website"] = line.strip()
+            info["references"] = line.strip()
         elif line.startswith("License:"):  # The license information
-            info["license"] = line.split(":", 1)[1].strip()
+            info["licenses"] = line.split(":", 1)[1].strip()
+    info["severity"] = license_classificaton(info["licenses"])
-    # Ensure all keys are present even if some fields are missing
     return info
 def parse_yum_info(output):
     """Parses yum repoquery --info output."""
     info = {}
@@ -92,17 +88,19 @@ def parse_yum_info(output):
     for line in lines:
         if line.startswith("License"):
-            info["license"] = line.split(":", 1)[1].strip()
+            info["licenses"] = line.split(":", 1)[1].strip()
         elif line.startswith("URL"):
-            info["website"] = line.split(":", 1)[1].strip()
+            info["references"] = line.split(":", 1)[1].strip()
         elif "Copyright" in line:
-            info["copyright"] = line.strip()
+            info["references"] = line.strip()
+        severity = license_classificaton(info["licenses"])
     # Ensure all keys are present even if data is missing
     return {
-        "license": info.get("license", "Unknown"),
-        "copyright": info.get("copyright", "Unknown"),
-        "website": info.get("website", "Unknown"),
+        "licenses": info.get("licenses", "NOASSERTION"),
+        "copyright": info.get("copyright", "NOASSERTION"),
+        "references": info.get("references", "NOASSERTION"),
+        "severity": severity,
     }
@@ -113,16 +111,25 @@ def parse_apt_info(output):
     for line in lines:
         if line.startswith("License:") or "License" in line:
-            info["license"] = line.split(":", 1)[1].strip()
+            info["licenses"] = line.split(":", 1)[1].strip()
         elif line.startswith("Homepage:"):
             info["website"] = line.split(":", 1)[1].strip()
         elif "Copyright" in line:
-            info["copyright"] = line.strip()
+            info["references"] = line.strip()
+        severity = license_classificaton(info["licenses"])
     # Ensure all keys are present even if data is missing
     return {
-        "license": info.get("license", "Unknown"),
-        "copyright": info.get("copyright", "Unknown"),
-        "website": info.get("website", "Unknown"),
+        "licenses": info.get("licenses", "NOASSERTION"),
+        "copyright": info.get("copyright", "NOASSERTION"),
+        "references": info.get("references", "NOASSERTION"),
+        "severity": severity,
     }
+def license_classificaton(licenses):
+    copyleft_licenses = ['GPL', 'CDDL', 'MPL']
+    severity = "Informational"
+    for cl_license in copyleft_licenses:
+        if cl_license.lower() in licenses:
+            severity = "Medium"
+    return severity

{ossa_scanner-0.1.2.dist-info → ossa_scanner-0.1.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ossa_scanner
-Version: 0.1.2
+Version: 0.1.4
 Summary: A Python library for scanning Linux packages, managing metadata, and generating SWHIDs.
 Home-page: https://github.com/oscarvalenzuelab/ossa_scanner
 Author: Oscar Valenzuela
@@ -23,6 +23,7 @@ License-File: LICENSE
 Requires-Dist: click
 Requires-Dist: swh.model
 Requires-Dist: distro
+Requires-Dist: ssdeep
 # ossa_scanner
 Open Source Advisory Scanner (Generator)

ossa_scanner-0.1.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+ossa_scanner/__init__.py,sha256=Wzf5T3NBDfhQoTnhnRNHSlAsE0XMqbclXG-M81Vas70,22
+ossa_scanner/cli.py,sha256=sgr8NFpf_Ut84KYFQjOKRxv8CfAMaTPhMo7DbR53lT4,2311
+ossa_scanner/scanner.py,sha256=Z4Pb20RS8VaZw4aUPPaVhxRjoOMWdN7ePFOOeijlVT8,4903
+ossa_scanner/uploader.py,sha256=X8bo7GqfpBjz2NlnvSwDR_rVqNoZDRPF2pnQMaVENbc,2436
+ossa_scanner/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ossa_scanner/utils/downloader.py,sha256=3ccwcde9yJ_SEP0mG9TDr2O0MMdA1p-K6hpzqme-KQ4,2081
+ossa_scanner/utils/hash_calculator.py,sha256=or1HmK_vW6M5vgBWQud-GJjeDElmr64HnkR7FHwIx1Y,981
+ossa_scanner/utils/os_detection.py,sha256=QdRKQ4li4SOHgBofe1qWf8OOcw8XvhM-XWUNu0Cy0a4,315
+ossa_scanner/utils/package_manager.py,sha256=xi1bVU5CRxVz0CzdnYfrKQP6__a-qdRp9YOJ-94A6A0,4462
+ossa_scanner/utils/swhid_calculator.py,sha256=4Z0H2GmECMAJlvH6JBbUmaLXSLRNntyYEdxsS6CTEMQ,63
+ossa_scanner-0.1.4.dist-info/LICENSE,sha256=9slQ_XNiEkio28l90NwihP7a90fCL2GQ6YhcVXTBls4,1064
+ossa_scanner-0.1.4.dist-info/METADATA,sha256=cHvocgib0KYIlF0GUasImIc9fJwPxunIgPwZNwAFs3k,1065
+ossa_scanner-0.1.4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+ossa_scanner-0.1.4.dist-info/entry_points.txt,sha256=UVoAo-wTPxT82g3cfqTs2CmQnazd57TAwhd9VwEKD1c,55
+ossa_scanner-0.1.4.dist-info/top_level.txt,sha256=uUp5CvhZfJLapXn9DyUXvgH7QK3uzF2ibH943lWN5Bs,13
+ossa_scanner-0.1.4.dist-info/RECORD,,

ossa_scanner-0.1.2.dist-info/RECORD DELETED Viewed

@@ -1,16 +0,0 @@
-ossa_scanner/__init__.py,sha256=YvuYzWnKtqBb-IqG8HAu-nhIYAsgj9Vmc_b9o7vO-js,22
-ossa_scanner/cli.py,sha256=hyRUOgp9kcwFtQrIeyth5vTxeK7eOlxfn5R9E7HX5sA,1640
-ossa_scanner/scanner.py,sha256=SAkiBLjAuO3dklbHPgXs0p047buO6Pp51RROq6G7Yq8,4082
-ossa_scanner/uploader.py,sha256=X8bo7GqfpBjz2NlnvSwDR_rVqNoZDRPF2pnQMaVENbc,2436
-ossa_scanner/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ossa_scanner/utils/downloader.py,sha256=3ccwcde9yJ_SEP0mG9TDr2O0MMdA1p-K6hpzqme-KQ4,2081
-ossa_scanner/utils/hash_calculator.py,sha256=i47KS_HoZNiSbGyd0iP9_TcDwxWS2SrmkIcNF2MWLcA,254
-ossa_scanner/utils/os_detection.py,sha256=QdRKQ4li4SOHgBofe1qWf8OOcw8XvhM-XWUNu0Cy0a4,315
-ossa_scanner/utils/package_manager.py,sha256=tWuQwgkFQjTzeisem0Gz8uFvWw5Cxd-Tft5HM8tIQmk,4028
-ossa_scanner/utils/swhid_calculator.py,sha256=4Z0H2GmECMAJlvH6JBbUmaLXSLRNntyYEdxsS6CTEMQ,63
-ossa_scanner-0.1.2.dist-info/LICENSE,sha256=9slQ_XNiEkio28l90NwihP7a90fCL2GQ6YhcVXTBls4,1064
-ossa_scanner-0.1.2.dist-info/METADATA,sha256=dWWsJKRvqN1vdal81dAseom9Cb1OwLjuOZAllrfOoMs,1043
-ossa_scanner-0.1.2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-ossa_scanner-0.1.2.dist-info/entry_points.txt,sha256=UVoAo-wTPxT82g3cfqTs2CmQnazd57TAwhd9VwEKD1c,55
-ossa_scanner-0.1.2.dist-info/top_level.txt,sha256=uUp5CvhZfJLapXn9DyUXvgH7QK3uzF2ibH943lWN5Bs,13
-ossa_scanner-0.1.2.dist-info/RECORD,,

{ossa_scanner-0.1.2.dist-info → ossa_scanner-0.1.4.dist-info}/LICENSE RENAMED Viewed

File without changes

{ossa_scanner-0.1.2.dist-info → ossa_scanner-0.1.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{ossa_scanner-0.1.2.dist-info → ossa_scanner-0.1.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ossa_scanner-0.1.2.dist-info → ossa_scanner-0.1.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

ossa-scanner 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

ossa-scanner 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl