scanoss 1.27.1__py3-none-any.whl → 1.43.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. protoc_gen_swagger/options/annotations_pb2.py +18 -12
  2. protoc_gen_swagger/options/annotations_pb2.pyi +48 -0
  3. protoc_gen_swagger/options/annotations_pb2_grpc.py +20 -0
  4. protoc_gen_swagger/options/openapiv2_pb2.py +110 -99
  5. protoc_gen_swagger/options/openapiv2_pb2.pyi +1317 -0
  6. protoc_gen_swagger/options/openapiv2_pb2_grpc.py +20 -0
  7. scanoss/__init__.py +1 -1
  8. scanoss/api/common/v2/scanoss_common_pb2.py +49 -22
  9. scanoss/api/common/v2/scanoss_common_pb2_grpc.py +25 -0
  10. scanoss/api/components/v2/scanoss_components_pb2.py +68 -43
  11. scanoss/api/components/v2/scanoss_components_pb2_grpc.py +83 -22
  12. scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +136 -47
  13. scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +650 -33
  14. scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +56 -37
  15. scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +64 -12
  16. scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +74 -31
  17. scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +252 -13
  18. scanoss/api/licenses/__init__.py +23 -0
  19. scanoss/api/licenses/v2/__init__.py +23 -0
  20. scanoss/api/licenses/v2/scanoss_licenses_pb2.py +84 -0
  21. scanoss/api/licenses/v2/scanoss_licenses_pb2_grpc.py +302 -0
  22. scanoss/api/scanning/v2/scanoss_scanning_pb2.py +32 -21
  23. scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +49 -8
  24. scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +50 -23
  25. scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +151 -16
  26. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +78 -31
  27. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +282 -18
  28. scanoss/cli.py +1000 -186
  29. scanoss/components.py +80 -50
  30. scanoss/constants.py +7 -1
  31. scanoss/cryptography.py +89 -55
  32. scanoss/csvoutput.py +13 -7
  33. scanoss/cyclonedx.py +141 -9
  34. scanoss/data/build_date.txt +1 -1
  35. scanoss/data/osadl-copyleft.json +133 -0
  36. scanoss/delta.py +197 -0
  37. scanoss/export/__init__.py +23 -0
  38. scanoss/export/dependency_track.py +227 -0
  39. scanoss/file_filters.py +2 -163
  40. scanoss/filecount.py +37 -38
  41. scanoss/gitlabqualityreport.py +214 -0
  42. scanoss/header_filter.py +563 -0
  43. scanoss/inspection/policy_check/__init__.py +0 -0
  44. scanoss/inspection/policy_check/dependency_track/__init__.py +0 -0
  45. scanoss/inspection/policy_check/dependency_track/project_violation.py +479 -0
  46. scanoss/inspection/{policy_check.py → policy_check/policy_check.py} +65 -72
  47. scanoss/inspection/policy_check/scanoss/__init__.py +0 -0
  48. scanoss/inspection/{copyleft.py → policy_check/scanoss/copyleft.py} +89 -73
  49. scanoss/inspection/{undeclared_component.py → policy_check/scanoss/undeclared_component.py} +52 -46
  50. scanoss/inspection/summary/__init__.py +0 -0
  51. scanoss/inspection/summary/component_summary.py +170 -0
  52. scanoss/inspection/{license_summary.py → summary/license_summary.py} +62 -12
  53. scanoss/inspection/summary/match_summary.py +341 -0
  54. scanoss/inspection/utils/file_utils.py +44 -0
  55. scanoss/inspection/utils/license_utils.py +57 -71
  56. scanoss/inspection/utils/markdown_utils.py +63 -0
  57. scanoss/inspection/{inspect_base.py → utils/scan_result_processor.py} +53 -67
  58. scanoss/osadl.py +125 -0
  59. scanoss/scanner.py +135 -253
  60. scanoss/scanners/folder_hasher.py +47 -32
  61. scanoss/scanners/scanner_hfh.py +50 -18
  62. scanoss/scanoss_settings.py +33 -3
  63. scanoss/scanossapi.py +23 -25
  64. scanoss/scanossbase.py +1 -1
  65. scanoss/scanossgrpc.py +543 -289
  66. scanoss/services/dependency_track_service.py +132 -0
  67. scanoss/spdxlite.py +11 -4
  68. scanoss/threadeddependencies.py +19 -18
  69. scanoss/threadedscanning.py +10 -0
  70. scanoss/utils/scanoss_scan_results_utils.py +41 -0
  71. scanoss/winnowing.py +71 -19
  72. {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/METADATA +8 -5
  73. scanoss-1.43.1.dist-info/RECORD +110 -0
  74. scanoss/inspection/component_summary.py +0 -94
  75. scanoss-1.27.1.dist-info/RECORD +0 -87
  76. {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/WHEEL +0 -0
  77. {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/entry_points.txt +0 -0
  78. {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/licenses/LICENSE +0 -0
  79. {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,170 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2025, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+ import json
25
+ from typing import Any
26
+
27
+ from ...scanossbase import ScanossBase
28
+ from ..policy_check.policy_check import T
29
+ from ..utils.scan_result_processor import ScanResultProcessor
30
+
31
+
32
+ class ComponentSummary(ScanossBase):
33
+
34
+ def __init__( # noqa: PLR0913
35
+ self,
36
+ debug: bool = False,
37
+ trace: bool = False,
38
+ quiet: bool = False,
39
+ filepath: str = None,
40
+ format_type: str = 'json',
41
+ output: str = None,
42
+ ):
43
+ """
44
+ Initialize the ComponentSummary class.
45
+
46
+ :param debug: Enable debug mode
47
+ :param trace: Enable trace mode
48
+ :param quiet: Enable quiet mode
49
+ :param filepath: Path to the file containing component data
50
+ :param format_type: Output format ('json' or 'md')
51
+ """
52
+ super().__init__(debug, trace, quiet)
53
+ self.filepath = filepath
54
+ self.output = output
55
+ self.results_processor = ScanResultProcessor(debug, trace, quiet, filepath)
56
+
57
+
58
+ def _json(self, data: dict[str,Any]) -> dict[str,Any]:
59
+ """
60
+ Format component summary data as JSON.
61
+
62
+ This method returns the component summary data in its original JSON structure
63
+ without any transformation. The data can be directly serialized to JSON format.
64
+
65
+ :param data: Dictionary containing component summary information including:
66
+ - components: List of component-license pairs with status and metadata
67
+ - totalComponents: Total number of unique components
68
+ - undeclaredComponents: Number of components with 'pending' status
69
+ - declaredComponents: Number of components with 'identified' status
70
+ - totalFilesDetected: Total count of files where components were detected
71
+ - totalFilesUndeclared: Count of files with undeclared components
72
+ - totalFilesDeclared: Count of files with declared components
73
+ :return: The same data dictionary, ready for JSON serialization
74
+ """
75
+ return data
76
+
77
+ def _markdown(self, data: list[T]) -> dict[str, Any]:
78
+ """
79
+ Format component summary data as Markdown (not yet implemented).
80
+
81
+ This method is intended to convert component summary data into a human-readable
82
+ Markdown format with tables and formatted sections.
83
+
84
+ :param data: List of component summary items to format
85
+ :return: Dictionary containing formatted Markdown output
86
+ """
87
+ pass
88
+
89
+ def _jira_markdown(self, data: list[T]) -> dict[str, Any]:
90
+ """
91
+ Format component summary data as Jira-flavored Markdown (not yet implemented).
92
+
93
+ This method is intended to convert component summary data into Jira-compatible
94
+ Markdown format, which may include Jira-specific syntax for tables and formatting.
95
+
96
+ :param data: List of component summary items to format
97
+ :return: Dictionary containing Jira-formatted Markdown output
98
+ """
99
+ pass
100
+
101
+ def _get_component_summary_from_components(self, scan_components: list)-> dict:
102
+ """
103
+ Get a component summary from detected components.
104
+
105
+ :param scan_components: List of all components
106
+ :return: Dict with license summary information
107
+ """
108
+ # A component is considered unique by its combination of PURL (Package URL) and license
109
+ component_licenses = self.results_processor.group_components_by_license(scan_components)
110
+ total_components = len(component_licenses)
111
+ # Get undeclared components
112
+ undeclared_components = len([c for c in component_licenses if c['status'] == 'pending'])
113
+
114
+ components: list = []
115
+ total_undeclared_files = 0
116
+ total_files_detected = 0
117
+ for component in scan_components:
118
+ total_files_detected += component['count']
119
+ total_undeclared_files += component['undeclared']
120
+ components.append({
121
+ 'purl': component['purl'],
122
+ 'version': component['version'],
123
+ 'count': component['count'],
124
+ 'undeclared': component['undeclared'],
125
+ 'declared': component['count'] - component['undeclared'],
126
+ })
127
+ ## End for loop components
128
+ return {
129
+ "components": component_licenses,
130
+ 'totalComponents': total_components,
131
+ 'undeclaredComponents': undeclared_components,
132
+ 'declaredComponents': total_components - undeclared_components,
133
+ 'totalFilesDetected': total_files_detected,
134
+ 'totalFilesUndeclared': total_undeclared_files,
135
+ 'totalFilesDeclared': total_files_detected - total_undeclared_files,
136
+ }
137
+
138
+ def _get_components(self):
139
+ """
140
+ Extract and process components from results and their dependencies.
141
+
142
+ This method performs the following steps:
143
+ 1. Validates that `self.results` is loaded. Returns `None` if not.
144
+ 2. Extracts file, snippet, and dependency components into a dictionary.
145
+ 3. Converts components to a list and processes their licenses.
146
+
147
+ :return: A list of processed components with license data, or `None` if `self.results` is not set.
148
+ """
149
+ if self.results_processor.get_results() is None:
150
+ raise ValueError(f'Error: No results found in {self.filepath}')
151
+
152
+ components: dict = {}
153
+ # Extract component and license data from file and dependency results. Both helpers mutate `components`
154
+ self.results_processor.get_components_data(components)
155
+ return self.results_processor.convert_components_to_list(components)
156
+
157
+ def _format(self, component_summary) -> str:
158
+ # TODO: Implement formatter to support dynamic outputs
159
+ json_data = self._json(component_summary)
160
+ return json.dumps(json_data, indent=2)
161
+
162
+ def run(self):
163
+ components = self._get_components()
164
+ component_summary = self._get_component_summary_from_components(components)
165
+ output = self._format(component_summary)
166
+ self.print_to_file_or_stdout(output, self.output)
167
+ return component_summary
168
+ #
169
+ # End of ComponentSummary Class
170
+ #
@@ -23,11 +23,14 @@ SPDX-License-Identifier: MIT
23
23
  """
24
24
 
25
25
  import json
26
+ from typing import Any
26
27
 
27
- from .inspect_base import InspectBase
28
+ from ...scanossbase import ScanossBase
29
+ from ..policy_check.policy_check import T
30
+ from ..utils.scan_result_processor import ScanResultProcessor
28
31
 
29
32
 
30
- class LicenseSummary(InspectBase):
33
+ class LicenseSummary(ScanossBase):
31
34
  """
32
35
  SCANOSS LicenseSummary class
33
36
  Inspects results and generates comprehensive license summaries from detected components.
@@ -55,7 +58,7 @@ class LicenseSummary(InspectBase):
55
58
  Initialize the LicenseSummary class.
56
59
 
57
60
  :param debug: Enable debug mode
58
- :param trace: Enable trace mode (default True)
61
+ :param trace: Enable trace mode
59
62
  :param quiet: Enable quiet mode
60
63
  :param filepath: Path to the file containing component data
61
64
  :param output: Path to save detailed output
@@ -63,8 +66,8 @@ class LicenseSummary(InspectBase):
63
66
  :param exclude: Licenses to exclude from the analysis
64
67
  :param explicit: Explicitly defined licenses
65
68
  """
66
- super().__init__(debug, trace, quiet, filepath, output)
67
- self.license_util.init(include, exclude, explicit)
69
+ super().__init__(debug=debug, trace=trace, quiet=quiet)
70
+ self.results_processor = ScanResultProcessor(debug, trace, quiet, filepath, include, exclude, explicit)
68
71
  self.filepath = filepath
69
72
  self.output = output
70
73
  self.status = status
@@ -72,6 +75,47 @@ class LicenseSummary(InspectBase):
72
75
  self.exclude = exclude
73
76
  self.explicit = explicit
74
77
 
78
+ def _json(self, data: dict[str,Any]) -> dict[str, Any]:
79
+ """
80
+ Format license summary data as JSON.
81
+
82
+ This method is intended to return the license summary data in JSON structure
83
+ for serialization. The data should include license information with copyleft
84
+ analysis and license statistics.
85
+
86
+ :param data: List of license summary items to format
87
+ :return: Dictionary containing license summary information including:
88
+ - licenses: List of detected licenses with SPDX IDs, URLs, and copyleft status
89
+ - detectedLicenses: Total number of unique licenses
90
+ - detectedLicensesWithCopyleft: Count of licenses marked as copyleft
91
+ """
92
+ return data
93
+
94
+ def _markdown(self, data: list[T]) -> dict[str, Any]:
95
+ """
96
+ Format license summary data as Markdown (not yet implemented).
97
+
98
+ This method is intended to convert license summary data into a human-readable
99
+ Markdown format with tables and formatted sections.
100
+
101
+ :param data: List of license summary items to format
102
+ :return: Dictionary containing formatted Markdown output
103
+ """
104
+ pass
105
+
106
+ def _jira_markdown(self, data: list[T]) -> dict[str, Any]:
107
+ """
108
+ Format license summary data as Jira-flavored Markdown (not yet implemented).
109
+
110
+ This method is intended to convert license summary data into Jira-compatible
111
+ Markdown format, which may include Jira-specific syntax for tables and formatting.
112
+
113
+ :param data: List of license summary items to format
114
+ :return: Dictionary containing Jira-formatted Markdown output
115
+ """
116
+ pass
117
+
118
+
75
119
  def _get_licenses_summary_from_components(self, components: list)-> dict:
76
120
  """
77
121
  Get a license summary from detected components.
@@ -80,7 +124,7 @@ class LicenseSummary(InspectBase):
80
124
  :return: Dict with license summary information
81
125
  """
82
126
  # A component is considered unique by its combination of PURL (Package URL) and license
83
- component_licenses = self._group_components_by_license(components)
127
+ component_licenses = self.results_processor.group_components_by_license(components)
84
128
  license_component_count = {}
85
129
  # Count license per component
86
130
  for lic in component_licenses:
@@ -122,19 +166,25 @@ class LicenseSummary(InspectBase):
122
166
 
123
167
  :return: A list of processed components with license data, or `None` if `self.results` is not set.
124
168
  """
125
- if self.results is None:
126
- return None
169
+ if self.results_processor.get_results() is None:
170
+ raise ValueError(f'Error: No results found in {self.filepath}')
127
171
 
128
172
  components: dict = {}
129
173
  # Extract component and license data from file and dependency results. Both helpers mutate `components`
130
- self._get_components_data(self.results, components)
131
- self._get_dependencies_data(self.results, components)
132
- return self._convert_components_to_list(components)
174
+ self.results_processor.get_components_data(components)
175
+ self.results_processor.get_dependencies_data(components)
176
+ return self.results_processor.convert_components_to_list(components)
177
+
178
+ def _format(self, license_summary) -> str:
179
+ # TODO: Implement formatter to support dynamic outputs
180
+ json_data = self._json(license_summary)
181
+ return json.dumps(json_data, indent=2)
133
182
 
134
183
  def run(self):
135
184
  components = self._get_components()
136
185
  license_summary = self._get_licenses_summary_from_components(components)
137
- self.print_to_file_or_stdout(json.dumps(license_summary, indent=2), self.output)
186
+ output = self._format(license_summary)
187
+ self.print_to_file_or_stdout(output, self.output)
138
188
  return license_summary
139
189
  #
140
190
  # End of LicenseSummary Class
@@ -0,0 +1,341 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2025, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ from dataclasses import dataclass
26
+
27
+ from ...scanossbase import ScanossBase
28
+ from ...utils import scanoss_scan_results_utils
29
+ from ..utils.file_utils import load_json_file
30
+ from ..utils.markdown_utils import generate_table
31
+
32
+
33
+ @dataclass
34
+ class MatchSummaryItem:
35
+ """
36
+ Represents a single match entry in the SCANOSS results.
37
+
38
+ This data class encapsulates all the relevant information about a component
39
+ match found during scanning, including file location, license details, and
40
+ match quality metrics.
41
+ """
42
+ file: str
43
+ file_url: str
44
+ license: str
45
+ similarity: str
46
+ purl: str
47
+ purl_url: str
48
+ version: str
49
+ lines: str
50
+
51
+
52
+ @dataclass
53
+ class ComponentMatchSummary:
54
+ """
55
+ Container for categorized SCANOSS match results.
56
+
57
+ Organizes matches into two categories: full file matches and snippet matches.
58
+ This separation allows for different presentation and analysis of match types.
59
+ """
60
+ files: list[MatchSummaryItem]
61
+ snippet: list[MatchSummaryItem]
62
+
63
+ class MatchSummary(ScanossBase):
64
+ """
65
+ Generates Markdown summaries from SCANOSS scan results.
66
+
67
+ This class processes SCANOSS scan results and creates human-readable Markdown
68
+ reports with collapsible sections for file and snippet matches. The reports
69
+ include clickable links to files when a line range
70
+ prefix is provided.
71
+ """
72
+
73
+ def __init__( # noqa: PLR0913
74
+ self,
75
+ debug: bool = False,
76
+ trace: bool = False,
77
+ quiet: bool = False,
78
+ line_range_prefix: str = None,
79
+ scanoss_results_path: str = None,
80
+ output: str = None,
81
+ ):
82
+ """
83
+ Initialize the Matches Summary generator.
84
+
85
+ :param debug: Enable debug output for troubleshooting
86
+ :param trace: Enable trace-level logging for detailed execution tracking
87
+ :param quiet: Suppress informational messages
88
+ :param line_range_prefix: Base URL prefix for GitLab file links with line ranges
89
+ (e.g., 'https://gitlab.com/org/project/-/blob/main')
90
+ :param scanoss_results_path: Path to SCANOSS scan results file in JSON format
91
+ :param output: Output file path for the generated Markdown report (default: stdout)
92
+ """
93
+ super().__init__(debug=debug, trace=trace, quiet=quiet)
94
+ self.scanoss_results_path = scanoss_results_path
95
+ self.line_range_prefix = line_range_prefix
96
+ self.output = output
97
+ self.print_debug("Initializing MatchSummary class")
98
+
99
+
100
+ def _get_match_summary_item(self, file_name: str, result: dict) -> MatchSummaryItem:
101
+ """
102
+ Create a MatchSummaryItem from a single scan result.
103
+
104
+ Processes a SCANOSS scan result and creates a MatchSummaryItem with appropriate
105
+ file URLs, license information, and line ranges. Handles both snippet matches
106
+ (with specific line ranges) and file matches (entire file).
107
+
108
+ :param file_name: Name of the scanned file (relative path in the repository)
109
+ :param result: SCANOSS scan result dictionary containing match details
110
+ :return: Populated match summary item with all relevant information
111
+ """
112
+ self.print_trace(f"Creating match summary item for file: {file_name}, id: {result.get('id')}")
113
+
114
+ if result.get('id') == "snippet":
115
+ # Snippet match: create URL with line range anchor
116
+ lines = scanoss_scan_results_utils.get_lines(result.get('lines'))
117
+ end_line = lines[len(lines) - 1] if len(lines) > 1 else lines[0]
118
+ file_url = f"{self.line_range_prefix}/{file_name}#L{lines[0]}-L{end_line}"
119
+
120
+ self.print_trace(f"Snippet match: lines {lines[0]}-{end_line}, purl: {result.get('purl')[0]}")
121
+
122
+ return MatchSummaryItem(
123
+ file_url=file_url,
124
+ file=file_name,
125
+ license=result.get('licenses')[0].get('name'),
126
+ similarity=result.get('matched'),
127
+ purl=result.get('purl')[0],
128
+ purl_url=result.get('url'),
129
+ version=result.get('version'),
130
+ lines=f"{lines[0]}-{lines[len(lines) - 1] if len(lines) > 1 else lines[0]}"
131
+ )
132
+ # File match: create URL without line range
133
+ self.print_trace(f"File match: {file_name}, purl: {result.get('purl')[0]}, version: {result.get('version')}")
134
+
135
+ return MatchSummaryItem(
136
+ file=file_name,
137
+ file_url=f"{self.line_range_prefix}/{file_name}",
138
+ license=result.get('licenses')[0].get('name'),
139
+ similarity=result.get('matched'),
140
+ purl=result.get('purl')[0],
141
+ purl_url=result.get('url'),
142
+ version=result.get('version'),
143
+ lines="all"
144
+ )
145
+
146
+ def _validate_result(self, file_name: str, result: dict) -> bool:
147
+ """
148
+ Validate that a scan result has all required fields.
149
+
150
+ :param file_name: Name of the file being validated
151
+ :param result: The scan result to validate
152
+ :return: True if valid, False otherwise
153
+ """
154
+ validations = [
155
+ ('id', 'No id found'),
156
+ ('lines', 'No lines found'),
157
+ ('purl', 'No purl found'),
158
+ ('licenses', 'No licenses found'),
159
+ ('version', 'No version found'),
160
+ ('matched', 'No matched found'),
161
+ ('url', 'No url found'),
162
+ ]
163
+
164
+ for field, error_msg in validations:
165
+ if not result.get(field):
166
+ self.print_debug(f'ERROR: {error_msg} for file {file_name}')
167
+ return False
168
+
169
+ # Additional validation for non-empty lists
170
+ if len(result.get('purl')) == 0:
171
+ self.print_debug(f'ERROR: No purl found for file {file_name}')
172
+ return False
173
+ if len(result.get('licenses')) == 0:
174
+ self.print_debug(f'ERROR: Empty licenses list for file {file_name}')
175
+ return False
176
+
177
+ return True
178
+
179
+ def _get_matches_summary(self) -> ComponentMatchSummary:
180
+ """
181
+ Parse SCANOSS scan results and create categorized match summaries.
182
+
183
+ Loads the SCANOSS scan results file and processes each match, validating
184
+ required fields and categorizing matches into file matches and snippet matches.
185
+ Skips invalid or incomplete results with debug messages.
186
+ """
187
+ self.print_debug(f"Loading scan results from: {self.scanoss_results_path}")
188
+
189
+ # Load scan results from JSON file
190
+ scan_results = load_json_file(self.scanoss_results_path)
191
+ gitlab_matches_summary = ComponentMatchSummary(files=[], snippet=[])
192
+
193
+ self.print_debug(f"Processing {len(scan_results)} files from scan results")
194
+ self.print_trace(f"Line range prefix set to: {self.line_range_prefix}")
195
+
196
+ # Process each file and its results
197
+ for file_name, results in scan_results.items():
198
+ self.print_trace(f"Processing file: {file_name} with {len(results)} results")
199
+
200
+ for result in results:
201
+ # Skip non-matches
202
+ if result.get('id') == "none":
203
+ self.print_debug(f'Skipping non-match for file {file_name}')
204
+ continue
205
+
206
+ # Validate required fields
207
+ if not self._validate_result(file_name, result):
208
+ continue
209
+
210
+ # Create summary item and categorize by match type
211
+ summary_item = self._get_match_summary_item(file_name, result)
212
+ if result.get('id') == "snippet":
213
+ gitlab_matches_summary.snippet.append(summary_item)
214
+ self.print_trace(f"Added snippet match for {file_name}")
215
+ else:
216
+ gitlab_matches_summary.files.append(summary_item)
217
+ self.print_trace(f"Added file match for {file_name}")
218
+
219
+ self.print_debug(
220
+ f"Match summary complete: {len(gitlab_matches_summary.files)} file matches, "
221
+ f"{len(gitlab_matches_summary.snippet)} snippet matches"
222
+ )
223
+
224
+ return gitlab_matches_summary
225
+
226
+
227
+ def _markdown(self, gitlab_matches_summary: ComponentMatchSummary) -> str:
228
+ """
229
+ Generate Markdown from match summaries.
230
+
231
+ Creates a formatted Markdown document with collapsible sections for file
232
+ and snippet matches.
233
+
234
+ :param gitlab_matches_summary: Container with categorized file and snippet matches to format
235
+ :return: Complete Markdown document with formatted match tables
236
+ """
237
+ self.print_debug("Generating Markdown from match summaries")
238
+
239
+ if len(gitlab_matches_summary.files) == 0 and len(gitlab_matches_summary.snippet) == 0:
240
+ self.print_debug("No matches to format - returning empty string")
241
+ return ""
242
+
243
+ self.print_trace(
244
+ f"Formatting {len(gitlab_matches_summary.files)} file matches and "
245
+ f"{len(gitlab_matches_summary.snippet)} snippet matches"
246
+ )
247
+
248
+ # Define table headers
249
+ file_match_headers = ['File', 'License', 'Similarity', 'PURL', 'Version']
250
+ snippet_match_headers = ['File', 'License', 'Similarity', 'PURL', 'Version', 'Lines']
251
+
252
+ # Build file matches table
253
+ self.print_trace("Building file matches table")
254
+ file_match_rows = []
255
+ for file_match in gitlab_matches_summary.files:
256
+ row = [
257
+ f"[{file_match.file}]({file_match.file_url})",
258
+ file_match.license,
259
+ file_match.similarity,
260
+ f"[{file_match.purl}]({file_match.purl_url})",
261
+ file_match.version,
262
+ ]
263
+ file_match_rows.append(row)
264
+ file_match_table = generate_table(file_match_headers, file_match_rows)
265
+
266
+ # Build snippet matches table
267
+ self.print_trace("Building snippet matches table")
268
+ snippet_match_rows = []
269
+ for snippet_match in gitlab_matches_summary.snippet:
270
+ row = [
271
+ f"[{snippet_match.file}]({snippet_match.file_url})",
272
+ snippet_match.license,
273
+ snippet_match.similarity,
274
+ f"[{snippet_match.purl}]({snippet_match.purl_url})",
275
+ snippet_match.version,
276
+ snippet_match.lines
277
+ ]
278
+ snippet_match_rows.append(row)
279
+ snippet_match_table = generate_table(snippet_match_headers, snippet_match_rows)
280
+
281
+ # Assemble complete Markdown document
282
+ markdown = ""
283
+ markdown += "### SCANOSS Match Summary\n\n"
284
+
285
+ # File matches section (collapsible)
286
+ markdown += "<details>\n"
287
+ markdown += "<summary>File Match Summary</summary>\n\n"
288
+ markdown += file_match_table
289
+ markdown += "\n</details>\n"
290
+
291
+ # Snippet matches section (collapsible)
292
+ markdown += "<details>\n"
293
+ markdown += "<summary>Snippet Match Summary</summary>\n\n"
294
+ markdown += snippet_match_table
295
+ markdown += "\n</details>\n"
296
+
297
+ self.print_trace(f"Markdown generation complete (length: {len(markdown)} characters)")
298
+ self.print_debug("Match summary Markdown generation complete")
299
+ return markdown
300
+
301
+ def run(self):
302
+ """
303
+ Execute the matches summary generation process.
304
+
305
+ This is the main entry point for generating the matches summary report.
306
+ It orchestrates the entire workflow:
307
+ 1. Loads and parses SCANOSS scan results
308
+ 2. Validates and categorizes matches
309
+ 3. Generates Markdown report
310
+ 4. Outputs to file or stdout
311
+ """
312
+ self.print_debug("Starting match summary generation process")
313
+ self.print_trace(
314
+ f"Configuration - Results path: {self.scanoss_results_path}, Output: {self.output}, "
315
+ f"Line range prefix: {self.line_range_prefix}"
316
+ )
317
+
318
+ # Load and process scan results into categorized matches
319
+ self.print_trace("Loading and processing scan results")
320
+ matches = self._get_matches_summary()
321
+
322
+ # Format matches as GitLab-compatible Markdown
323
+ self.print_trace("Generating Markdown output")
324
+ matches_md = self._markdown(matches)
325
+ if matches_md == "":
326
+ self.print_debug("No matches found - exiting")
327
+ self.print_stdout("No matches found.")
328
+ return
329
+
330
+ # Output to file or stdout
331
+ self.print_trace("Writing output")
332
+ if self.output:
333
+ self.print_debug(f"Writing match summary to file: {self.output}")
334
+ else:
335
+ self.print_debug("Writing match summary to 'stdout'")
336
+
337
+ self.print_to_file_or_stdout(matches_md, self.output)
338
+ self.print_debug("Match summary generation complete")
339
+
340
+
341
+
@@ -0,0 +1,44 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2025, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ import json
26
+ import os
27
+
28
+
29
+ def load_json_file(file_path: str) -> dict:
30
+ """
31
+ Load the file
32
+
33
+ :param file_path: file path to the JSON file
34
+
35
+ Returns:
36
+ Dict[str, Any]: The parsed JSON data
37
+ """
38
+ if not os.path.exists(file_path):
39
+ raise ValueError(f'The file "{file_path}" does not exist.')
40
+ with open(file_path, 'r') as jsonfile:
41
+ try:
42
+ return json.load(jsonfile)
43
+ except Exception as e:
44
+ raise ValueError(f'ERROR: Problem parsing input JSON: {e}')