ossa-scanner 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ossa_scanner/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.3"
1
+ __version__ = "0.1.4"
ossa_scanner/scanner.py CHANGED
@@ -18,25 +18,14 @@ class Scanner:
18
18
  os.makedirs(self.temp_dir, exist_ok=True)
19
19
 
20
20
  def process_package(self, package):
21
- """
22
- Processes a single package: downloads source, extracts, calculates hash and SWHID.
23
-
24
- Args:
25
- package (str): Package name to process.
26
-
27
- Returns:
28
- dict: Result of the processed package including hash and SWHID.
29
- """
30
21
  try:
31
22
  print(f"Processing package: {package}")
32
23
  package_info = get_package_info(self.os_type, package)
33
24
  print(f"Fetched metadata for {package}")
34
25
 
35
- # Download the source code to temp_dir
36
26
  source_file = download_source(self.os_type, package, self.temp_dir)
37
27
  print(f"Downloaded source file: {source_file}")
38
28
 
39
- # Calculate hash of the source file
40
29
  file_hash = calculate_file_hash(source_file)
41
30
  print(f"Hash (SHA256) for {package}: {file_hash}")
42
31
 
@@ -86,16 +75,22 @@ class Scanner:
86
75
  swhid (str): Software Heritage ID of the package.
87
76
  """
88
77
  # Generate report filename
89
- sha1_name = hashlib.sha1(package.encode()).hexdigest()
90
78
  date_str = datetime.now().strftime("%Y%m%d")
91
- report_filename = f"ossa-{date_str}-{sha1_name}-{package}.json"
79
+ report_filename = f"ossa-{date_str}-{hash(package) % 10000}-{package}.json"
92
80
  report_path = os.path.join(self.output_dir, report_filename)
93
81
 
82
+ # This need to be moved to a different class
83
+ artifact_name = source_file
84
+ if "tmp/" in source_file:
85
+ artifact_name = os.path.basename(source_file)
86
+ if "--" in artifact_name:
87
+ artifact_name = artifact_name.split("--")[-1]
88
+
94
89
  # Create the report content
95
90
  report = {
96
- "id": f"OSSA-{date_str}-{sha1_name.upper()}",
91
+ "id": f"OSSA-{date_str}-{hash(package) % 10000}",
97
92
  "version": "1.0.0",
98
- "severity": "Informational",
93
+ "severity": package_info.get("severity", []),
99
94
  "title": f"Advisory for {package}",
100
95
  "package_name": package,
101
96
  "publisher": "Generated by OSSA Collector",
@@ -107,8 +102,10 @@ class Scanner:
107
102
  "affected_versions": ["*.*"],
108
103
  "artifacts": [
109
104
  {
110
- "url": f"file://{source_file}",
111
- "hashes": {"sha256": file_hash},
105
+ "url": f"file://{artifact_name}",
106
+ "hashes": {
107
+ "sha1": file_hash['sha1'], "sha256": file_hash['sha256'],
108
+ "ssdeep": file_hash['ssdeep'], "swhid": file_hash['swhid']},
112
109
  "swhid": swhid
113
110
  }
114
111
  ],
@@ -1,8 +1,35 @@
1
+ import os
2
+ import json
1
3
  import hashlib
4
+ import ssdeep
2
5
 
3
- def calculate_file_hash(file_path, algorithm='sha256'):
4
- hash_func = hashlib.new(algorithm)
5
- with open(file_path, 'rb') as f:
6
- while chunk := f.read(8192):
7
- hash_func.update(chunk)
8
- return hash_func.hexdigest()
6
+ def calculate_file_hash(file_path):
7
+ file_hash = {}
8
+ file_hash['sha1'] = compute_sha1(file_path)
9
+ file_hash['sha256'] = compute_sha256(file_path)
10
+ file_hash['ssdeep'] = compute_fuzzy_hash(file_path)
11
+ file_hash['swhid'] = compute_swhid(file_path)
12
+ print(file_hash)
13
+ return file_hash
14
+
15
+ def compute_sha1(file_path):
16
+ sha1 = hashlib.sha1()
17
+ with open(file_path, "rb") as f:
18
+ for chunk in iter(lambda: f.read(4096), b""):
19
+ sha1.update(chunk)
20
+ return sha1.hexdigest()
21
+
22
+ def compute_sha256(file_path):
23
+ sha256 = hashlib.sha256()
24
+ with open(file_path, "rb") as f:
25
+ for chunk in iter(lambda: f.read(4096), b""):
26
+ sha256.update(chunk)
27
+ return sha256.hexdigest()
28
+
29
+ def compute_fuzzy_hash(file_path):
30
+ return ssdeep.hash_from_file(file_path)
31
+
32
+ def compute_swhid(file_path):
33
+ sha1_hash = compute_sha1(file_path)
34
+ swhid = f"swh:1:cnt:{sha1_hash}"
35
+ return swhid
@@ -25,7 +25,7 @@ def list_packages(package_manager):
25
25
 
26
26
  packages = result.stdout.splitlines()
27
27
  extracted_packages = []
28
- max_packages = 5
28
+ max_packages = 2
29
29
  k_packages = 0
30
30
  for line in packages:
31
31
  if not line.strip() or line.startswith("==>"):
@@ -51,8 +51,6 @@ def get_package_info(package_manager, package_name):
51
51
  try:
52
52
  result = subprocess.run(cmd, capture_output=True, text=True, check=True)
53
53
  output = result.stdout
54
-
55
- # Parse the output based on the package manager
56
54
  if package_manager == 'brew':
57
55
  return parse_brew_info(output)
58
56
  elif package_manager in ['yum', 'dnf']:
@@ -68,23 +66,21 @@ def parse_brew_info(output):
68
66
  """Parses brew info output to extract license, website, and description."""
69
67
  info = {}
70
68
  lines = output.splitlines()
71
- info["license"] = "Unknown"
72
- info["website"] = "Unknown"
73
- info["description"] = "Unknown"
69
+ info["licenses"] = "NOASSERTION"
70
+ info["references"] = "NOASSERTION"
71
+ info["description"] = "NOASSERTION"
74
72
 
75
73
  for i, line in enumerate(lines):
76
74
  if i == 1: # The description is usually on the second line
77
75
  info["description"] = line.strip()
78
76
  elif line.startswith("https://"): # The website URL
79
- info["website"] = line.strip()
77
+ info["references"] = line.strip()
80
78
  elif line.startswith("License:"): # The license information
81
- info["license"] = line.split(":", 1)[1].strip()
79
+ info["licenses"] = line.split(":", 1)[1].strip()
80
+ info["severity"] = license_classificaton(info["licenses"])
82
81
 
83
- # Ensure all keys are present even if some fields are missing
84
82
  return info
85
83
 
86
-
87
-
88
84
  def parse_yum_info(output):
89
85
  """Parses yum repoquery --info output."""
90
86
  info = {}
@@ -92,17 +88,19 @@ def parse_yum_info(output):
92
88
 
93
89
  for line in lines:
94
90
  if line.startswith("License"):
95
- info["license"] = line.split(":", 1)[1].strip()
91
+ info["licenses"] = line.split(":", 1)[1].strip()
96
92
  elif line.startswith("URL"):
97
- info["website"] = line.split(":", 1)[1].strip()
93
+ info["references"] = line.split(":", 1)[1].strip()
98
94
  elif "Copyright" in line:
99
- info["copyright"] = line.strip()
95
+ info["references"] = line.strip()
96
+ severity = license_classificaton(info["licenses"])
100
97
 
101
98
  # Ensure all keys are present even if data is missing
102
99
  return {
103
- "license": info.get("license", "Unknown"),
104
- "copyright": info.get("copyright", "Unknown"),
105
- "website": info.get("website", "Unknown"),
100
+ "licenses": info.get("licenses", "NOASSERTION"),
101
+ "copyright": info.get("copyright", "NOASSERTION"),
102
+ "references": info.get("references", "NOASSERTION"),
103
+ "severity": severity,
106
104
  }
107
105
 
108
106
 
@@ -113,16 +111,25 @@ def parse_apt_info(output):
113
111
 
114
112
  for line in lines:
115
113
  if line.startswith("License:") or "License" in line:
116
- info["license"] = line.split(":", 1)[1].strip()
114
+ info["licenses"] = line.split(":", 1)[1].strip()
117
115
  elif line.startswith("Homepage:"):
118
116
  info["website"] = line.split(":", 1)[1].strip()
119
117
  elif "Copyright" in line:
120
- info["copyright"] = line.strip()
118
+ info["references"] = line.strip()
119
+ severity = license_classificaton(info["licenses"])
121
120
 
122
121
  # Ensure all keys are present even if data is missing
123
122
  return {
124
- "license": info.get("license", "Unknown"),
125
- "copyright": info.get("copyright", "Unknown"),
126
- "website": info.get("website", "Unknown"),
123
+ "licenses": info.get("licenses", "NOASSERTION"),
124
+ "copyright": info.get("copyright", "NOASSERTION"),
125
+ "references": info.get("references", "NOASSERTION"),
126
+ "severity": severity,
127
127
  }
128
128
 
129
+ def license_classificaton(licenses):
130
+ copyleft_licenses = ['GPL', 'CDDL', 'MPL']
131
+ severity = "Informational"
132
+ for cl_license in copyleft_licenses:
133
+ if cl_license.lower() in licenses:
134
+ severity = "Medium"
135
+ return severity
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ossa_scanner
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: A Python library for scanning Linux packages, managing metadata, and generating SWHIDs.
5
5
  Home-page: https://github.com/oscarvalenzuelab/ossa_scanner
6
6
  Author: Oscar Valenzuela
@@ -23,6 +23,7 @@ License-File: LICENSE
23
23
  Requires-Dist: click
24
24
  Requires-Dist: swh.model
25
25
  Requires-Dist: distro
26
+ Requires-Dist: ssdeep
26
27
 
27
28
  # ossa_scanner
28
29
  Open Source Advisory Scanner (Generator)
@@ -0,0 +1,16 @@
1
+ ossa_scanner/__init__.py,sha256=Wzf5T3NBDfhQoTnhnRNHSlAsE0XMqbclXG-M81Vas70,22
2
+ ossa_scanner/cli.py,sha256=sgr8NFpf_Ut84KYFQjOKRxv8CfAMaTPhMo7DbR53lT4,2311
3
+ ossa_scanner/scanner.py,sha256=Z4Pb20RS8VaZw4aUPPaVhxRjoOMWdN7ePFOOeijlVT8,4903
4
+ ossa_scanner/uploader.py,sha256=X8bo7GqfpBjz2NlnvSwDR_rVqNoZDRPF2pnQMaVENbc,2436
5
+ ossa_scanner/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ ossa_scanner/utils/downloader.py,sha256=3ccwcde9yJ_SEP0mG9TDr2O0MMdA1p-K6hpzqme-KQ4,2081
7
+ ossa_scanner/utils/hash_calculator.py,sha256=or1HmK_vW6M5vgBWQud-GJjeDElmr64HnkR7FHwIx1Y,981
8
+ ossa_scanner/utils/os_detection.py,sha256=QdRKQ4li4SOHgBofe1qWf8OOcw8XvhM-XWUNu0Cy0a4,315
9
+ ossa_scanner/utils/package_manager.py,sha256=xi1bVU5CRxVz0CzdnYfrKQP6__a-qdRp9YOJ-94A6A0,4462
10
+ ossa_scanner/utils/swhid_calculator.py,sha256=4Z0H2GmECMAJlvH6JBbUmaLXSLRNntyYEdxsS6CTEMQ,63
11
+ ossa_scanner-0.1.4.dist-info/LICENSE,sha256=9slQ_XNiEkio28l90NwihP7a90fCL2GQ6YhcVXTBls4,1064
12
+ ossa_scanner-0.1.4.dist-info/METADATA,sha256=cHvocgib0KYIlF0GUasImIc9fJwPxunIgPwZNwAFs3k,1065
13
+ ossa_scanner-0.1.4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
14
+ ossa_scanner-0.1.4.dist-info/entry_points.txt,sha256=UVoAo-wTPxT82g3cfqTs2CmQnazd57TAwhd9VwEKD1c,55
15
+ ossa_scanner-0.1.4.dist-info/top_level.txt,sha256=uUp5CvhZfJLapXn9DyUXvgH7QK3uzF2ibH943lWN5Bs,13
16
+ ossa_scanner-0.1.4.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- ossa_scanner/__init__.py,sha256=XEqb2aiIn8fzGE68Mph4ck1FtQqsR_am0wRWvrYPffQ,22
2
- ossa_scanner/cli.py,sha256=sgr8NFpf_Ut84KYFQjOKRxv8CfAMaTPhMo7DbR53lT4,2311
3
- ossa_scanner/scanner.py,sha256=YOYB4-7EwQyZE6KU6_dyRD09tq6ntgmYvyxX02KgB5c,4885
4
- ossa_scanner/uploader.py,sha256=X8bo7GqfpBjz2NlnvSwDR_rVqNoZDRPF2pnQMaVENbc,2436
5
- ossa_scanner/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- ossa_scanner/utils/downloader.py,sha256=3ccwcde9yJ_SEP0mG9TDr2O0MMdA1p-K6hpzqme-KQ4,2081
7
- ossa_scanner/utils/hash_calculator.py,sha256=i47KS_HoZNiSbGyd0iP9_TcDwxWS2SrmkIcNF2MWLcA,254
8
- ossa_scanner/utils/os_detection.py,sha256=QdRKQ4li4SOHgBofe1qWf8OOcw8XvhM-XWUNu0Cy0a4,315
9
- ossa_scanner/utils/package_manager.py,sha256=tWuQwgkFQjTzeisem0Gz8uFvWw5Cxd-Tft5HM8tIQmk,4028
10
- ossa_scanner/utils/swhid_calculator.py,sha256=4Z0H2GmECMAJlvH6JBbUmaLXSLRNntyYEdxsS6CTEMQ,63
11
- ossa_scanner-0.1.3.dist-info/LICENSE,sha256=9slQ_XNiEkio28l90NwihP7a90fCL2GQ6YhcVXTBls4,1064
12
- ossa_scanner-0.1.3.dist-info/METADATA,sha256=22Fo5X2J06UlI-94hUZLBSGJvdzpHaK-GqKFDIDkF_Q,1043
13
- ossa_scanner-0.1.3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
14
- ossa_scanner-0.1.3.dist-info/entry_points.txt,sha256=UVoAo-wTPxT82g3cfqTs2CmQnazd57TAwhd9VwEKD1c,55
15
- ossa_scanner-0.1.3.dist-info/top_level.txt,sha256=uUp5CvhZfJLapXn9DyUXvgH7QK3uzF2ibH943lWN5Bs,13
16
- ossa_scanner-0.1.3.dist-info/RECORD,,