scanoss 1.27.1__py3-none-any.whl → 1.43.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. protoc_gen_swagger/options/annotations_pb2.py +18 -12
  2. protoc_gen_swagger/options/annotations_pb2.pyi +48 -0
  3. protoc_gen_swagger/options/annotations_pb2_grpc.py +20 -0
  4. protoc_gen_swagger/options/openapiv2_pb2.py +110 -99
  5. protoc_gen_swagger/options/openapiv2_pb2.pyi +1317 -0
  6. protoc_gen_swagger/options/openapiv2_pb2_grpc.py +20 -0
  7. scanoss/__init__.py +1 -1
  8. scanoss/api/common/v2/scanoss_common_pb2.py +49 -22
  9. scanoss/api/common/v2/scanoss_common_pb2_grpc.py +25 -0
  10. scanoss/api/components/v2/scanoss_components_pb2.py +68 -43
  11. scanoss/api/components/v2/scanoss_components_pb2_grpc.py +83 -22
  12. scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +136 -47
  13. scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +650 -33
  14. scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +56 -37
  15. scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +64 -12
  16. scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +74 -31
  17. scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +252 -13
  18. scanoss/api/licenses/__init__.py +23 -0
  19. scanoss/api/licenses/v2/__init__.py +23 -0
  20. scanoss/api/licenses/v2/scanoss_licenses_pb2.py +84 -0
  21. scanoss/api/licenses/v2/scanoss_licenses_pb2_grpc.py +302 -0
  22. scanoss/api/scanning/v2/scanoss_scanning_pb2.py +32 -21
  23. scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +49 -8
  24. scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +50 -23
  25. scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +151 -16
  26. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +78 -31
  27. scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +282 -18
  28. scanoss/cli.py +1000 -186
  29. scanoss/components.py +80 -50
  30. scanoss/constants.py +7 -1
  31. scanoss/cryptography.py +89 -55
  32. scanoss/csvoutput.py +13 -7
  33. scanoss/cyclonedx.py +141 -9
  34. scanoss/data/build_date.txt +1 -1
  35. scanoss/data/osadl-copyleft.json +133 -0
  36. scanoss/delta.py +197 -0
  37. scanoss/export/__init__.py +23 -0
  38. scanoss/export/dependency_track.py +227 -0
  39. scanoss/file_filters.py +2 -163
  40. scanoss/filecount.py +37 -38
  41. scanoss/gitlabqualityreport.py +214 -0
  42. scanoss/header_filter.py +563 -0
  43. scanoss/inspection/policy_check/__init__.py +0 -0
  44. scanoss/inspection/policy_check/dependency_track/__init__.py +0 -0
  45. scanoss/inspection/policy_check/dependency_track/project_violation.py +479 -0
  46. scanoss/inspection/{policy_check.py → policy_check/policy_check.py} +65 -72
  47. scanoss/inspection/policy_check/scanoss/__init__.py +0 -0
  48. scanoss/inspection/{copyleft.py → policy_check/scanoss/copyleft.py} +89 -73
  49. scanoss/inspection/{undeclared_component.py → policy_check/scanoss/undeclared_component.py} +52 -46
  50. scanoss/inspection/summary/__init__.py +0 -0
  51. scanoss/inspection/summary/component_summary.py +170 -0
  52. scanoss/inspection/{license_summary.py → summary/license_summary.py} +62 -12
  53. scanoss/inspection/summary/match_summary.py +341 -0
  54. scanoss/inspection/utils/file_utils.py +44 -0
  55. scanoss/inspection/utils/license_utils.py +57 -71
  56. scanoss/inspection/utils/markdown_utils.py +63 -0
  57. scanoss/inspection/{inspect_base.py → utils/scan_result_processor.py} +53 -67
  58. scanoss/osadl.py +125 -0
  59. scanoss/scanner.py +135 -253
  60. scanoss/scanners/folder_hasher.py +47 -32
  61. scanoss/scanners/scanner_hfh.py +50 -18
  62. scanoss/scanoss_settings.py +33 -3
  63. scanoss/scanossapi.py +23 -25
  64. scanoss/scanossbase.py +1 -1
  65. scanoss/scanossgrpc.py +543 -289
  66. scanoss/services/dependency_track_service.py +132 -0
  67. scanoss/spdxlite.py +11 -4
  68. scanoss/threadeddependencies.py +19 -18
  69. scanoss/threadedscanning.py +10 -0
  70. scanoss/utils/scanoss_scan_results_utils.py +41 -0
  71. scanoss/winnowing.py +71 -19
  72. {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/METADATA +8 -5
  73. scanoss-1.43.1.dist-info/RECORD +110 -0
  74. scanoss/inspection/component_summary.py +0 -94
  75. scanoss-1.27.1.dist-info/RECORD +0 -87
  76. {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/WHEEL +0 -0
  77. {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/entry_points.txt +0 -0
  78. {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/licenses/LICENSE +0 -0
  79. {scanoss-1.27.1.dist-info → scanoss-1.43.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,132 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2025, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ import requests
26
+
27
+ from ..scanossbase import ScanossBase
28
+
29
+ HTTP_OK = 200
30
+
31
+ class DependencyTrackService(ScanossBase):
32
+
33
+ def __init__(
34
+ self,
35
+ api_key: str,
36
+ url: str,
37
+ debug: bool = False,
38
+ trace: bool = False,
39
+ quiet: bool = False,
40
+ ):
41
+ super().__init__(debug=debug, trace=trace, quiet=quiet)
42
+ if not url:
43
+ raise ValueError("Error: Dependency Track URL is required")
44
+ self.url = url.strip().rstrip('/')
45
+ if not api_key:
46
+ raise ValueError("Error: Dependency Track API key is required")
47
+ self.api_key = api_key
48
+
49
+ def get_project_by_name_version(self, name, version):
50
+ """
51
+ Get project information by name and version from Dependency Track
52
+
53
+ Args:
54
+ name: Project name to search for
55
+ version: Project version to search for
56
+
57
+ Returns:
58
+ dict: Project data if found, None otherwise
59
+ """
60
+ if not name or not version:
61
+ self.print_stderr('Error: Missing name or version.')
62
+ return None
63
+ # Use the project search endpoint
64
+ params = {
65
+ 'name': name,
66
+ 'version': version
67
+ }
68
+ self.print_debug(f'Searching for project by: {params}')
69
+ return self.get_dep_track_data(f'{self.url}/api/v1/project/lookup', params)
70
+
71
+ def get_project_status(self, upload_token):
72
+ """
73
+ Get Dependency Track project processing status.
74
+
75
+ Queries the Dependency Track API to check if the project upload
76
+ processing is complete using the upload token.
77
+
78
+ Returns:
79
+ dict: Project status information or None if request fails
80
+ """
81
+ if not upload_token:
82
+ self.print_stderr('Error: Missing upload token. Cannot search for project status.')
83
+ return None
84
+ self.print_trace(f'URL: {self.url} Upload token: {upload_token}')
85
+ return self.get_dep_track_data(f'{self.url}/api/v1/event/token/{upload_token}')
86
+
87
+ def get_project_violations(self,project_id:str):
88
+ """
89
+ Get project violations from Dependency Track.
90
+
91
+ Waits for project processing to complete, then retrieves all policy
92
+ violations for the specified project ID.
93
+
94
+ Returns:
95
+ List of policy violations or None if the request fails
96
+ """
97
+ if not project_id:
98
+ self.print_stderr('Error: Missing project id. Cannot search for project violations.')
99
+ return None
100
+ # Return the result as-is - None indicates API failure, empty list means no violations
101
+ return self.get_dep_track_data(f'{self.url}/api/v1/violation/project/{project_id}')
102
+
103
+ def get_project_by_id(self, project_id:str):
104
+ """
105
+ Get a Dependency Track project by id.
106
+
107
+ Queries the Dependency Track API to get a project by id
108
+
109
+ Returns:
110
+ dict
111
+ """
112
+ if not project_id:
113
+ self.print_stderr('Error: Missing project id. Cannot search for project.')
114
+ return None
115
+ self.print_trace(f'URL: {self.url}, UUID: {project_id}')
116
+ return self.get_dep_track_data(f'{self.url}/api/v1/project/{project_id}')
117
+
118
+ def get_dep_track_data(self, uri, params=None):
119
+ if not uri:
120
+ self.print_stderr('Error: Missing URI. Cannot search for project.')
121
+ return None
122
+ req_headers = {'X-Api-Key': self.api_key, 'Content-Type': 'application/json'}
123
+ try:
124
+ if params:
125
+ response = requests.get(uri, headers=req_headers, params=params)
126
+ else:
127
+ response = requests.get(uri, headers=req_headers)
128
+ response.raise_for_status() # Raises an HTTPError for bad responses
129
+ return response.json()
130
+ except requests.exceptions.RequestException as e:
131
+ self.print_stderr(f"Error: Problem getting project data: {e}")
132
+ return None
scanoss/spdxlite.py CHANGED
@@ -71,9 +71,12 @@ class SpdxLite:
71
71
  :param data: json - JSON object
72
72
  :return: summary dictionary
73
73
  """
74
- if not data:
74
+ if data is None:
75
75
  self.print_stderr('ERROR: No JSON data provided to parse.')
76
76
  return None
77
+ if len(data) == 0:
78
+ self.print_debug('Warning: Empty scan results provided. Returning empty summary.')
79
+ return {}
77
80
 
78
81
  self.print_debug('Processing raw results into summary format...')
79
82
  return self._process_files(data)
@@ -223,7 +226,9 @@ class SpdxLite:
223
226
  Process license information and remove duplicates.
224
227
 
225
228
  This method filters license information to include only licenses from trusted sources
226
- ('component_declared' or 'license_file') and removes any duplicate license names.
229
+ ('component_declared', 'license_file', 'file_header'). Licenses with an unspecified
230
+ source (None or '') are allowed. Non-empty, non-allowed sources are excluded. It also
231
+ removes any duplicate license names.
227
232
  The result is a simplified list of license dictionaries containing only the 'id' field.
228
233
 
229
234
  Args:
@@ -244,7 +249,7 @@ class SpdxLite:
244
249
  for license_info in licenses:
245
250
  name = license_info.get('name')
246
251
  source = license_info.get('source')
247
- if source not in ("component_declared", "license_file", "file_header"):
252
+ if source not in (None, '') and source not in ("component_declared", "license_file", "file_header"):
248
253
  continue
249
254
  if name and name not in seen_names:
250
255
  processed_licenses.append({'id': name})
@@ -277,9 +282,11 @@ class SpdxLite:
277
282
  :return: True if successful, False otherwise
278
283
  """
279
284
  raw_data = self.parse(data)
280
- if not raw_data:
285
+ if raw_data is None:
281
286
  self.print_stderr('ERROR: No SPDX data returned for the JSON string provided.')
282
287
  return False
288
+ if len(raw_data) == 0:
289
+ self.print_debug('Warning: Empty scan results - generating minimal SPDX Lite document with no packages.')
283
290
 
284
291
  self.load_license_data()
285
292
  spdx_document = self._create_base_document(raw_data)
@@ -22,12 +22,12 @@ SPDX-License-Identifier: MIT
22
22
  THE SOFTWARE.
23
23
  """
24
24
 
25
- import threading
26
- import queue
27
25
  import json
28
- from enum import Enum
29
- from typing import Dict, Optional, Set
26
+ import queue
27
+ import threading
30
28
  from dataclasses import dataclass
29
+ from enum import Enum
30
+ from typing import Dict
31
31
 
32
32
  from .scancodedeps import ScancodeDeps
33
33
  from .scanossbase import ScanossBase
@@ -63,7 +63,7 @@ class ThreadedDependencies(ScanossBase):
63
63
  inputs: queue.Queue = queue.Queue()
64
64
  output: queue.Queue = queue.Queue()
65
65
 
66
- def __init__(
66
+ def __init__( # noqa: PLR0913
67
67
  self,
68
68
  sc_deps: ScancodeDeps,
69
69
  grpc_api: ScanossGrpc,
@@ -180,13 +180,15 @@ class ThreadedDependencies(ScanossBase):
180
180
  return self.filter_dependencies(
181
181
  deps, lambda purl: (exclude and purl not in exclude) or (not exclude and purl in include)
182
182
  )
183
+ return None
183
184
 
184
- def scan_dependencies(
185
+ def scan_dependencies( # noqa: PLR0912
185
186
  self, dep_scope: SCOPE = None, dep_scope_include: str = None, dep_scope_exclude: str = None
186
187
  ) -> None:
187
188
  """
188
189
  Scan for dependencies from the given file/dir or from an input file (from the input queue).
189
190
  """
191
+ # TODO refactor to simplify branches based on PLR0912
190
192
  current_thread = threading.get_ident()
191
193
  self.print_trace(f'Starting dependency worker {current_thread}...')
192
194
  try:
@@ -194,18 +196,17 @@ class ThreadedDependencies(ScanossBase):
194
196
  deps = None
195
197
  if what_to_scan.startswith(DEP_FILE_PREFIX): # We have a pre-parsed dependency file, load it
196
198
  deps = self.sc_deps.load_from_file(what_to_scan.strip(DEP_FILE_PREFIX))
197
- else: # Search the file/folder for dependency files to parse
198
- if not self.sc_deps.run_scan(what_to_scan=what_to_scan):
199
- self._errors = True
200
- else:
201
- deps = self.sc_deps.produce_from_file()
202
- if dep_scope is not None:
203
- self.print_debug(f'Filtering {dep_scope.name} dependencies')
204
- if dep_scope_include is not None:
205
- self.print_debug(f"Including dependencies with '{dep_scope_include.split(',')}' scopes")
206
- if dep_scope_exclude is not None:
207
- self.print_debug(f"Excluding dependencies with '{dep_scope_exclude.split(',')}' scopes")
208
- deps = self.filter_dependencies_by_scopes(deps, dep_scope, dep_scope_include, dep_scope_exclude)
199
+ elif not self.sc_deps.run_scan(what_to_scan=what_to_scan):
200
+ self._errors = True
201
+ else:
202
+ deps = self.sc_deps.produce_from_file()
203
+ if dep_scope is not None:
204
+ self.print_debug(f'Filtering {dep_scope.name} dependencies')
205
+ if dep_scope_include is not None:
206
+ self.print_debug(f"Including dependencies with '{dep_scope_include.split(',')}' scopes")
207
+ if dep_scope_exclude is not None:
208
+ self.print_debug(f"Excluding dependencies with '{dep_scope_exclude.split(',')}' scopes")
209
+ deps = self.filter_dependencies_by_scopes(deps, dep_scope, dep_scope_include, dep_scope_exclude)
209
210
 
210
211
  if not self._errors:
211
212
  if deps is None:
@@ -22,6 +22,7 @@ SPDX-License-Identifier: MIT
22
22
  THE SOFTWARE.
23
23
  """
24
24
 
25
+ import atexit
25
26
  import os
26
27
  import queue
27
28
  import sys
@@ -77,6 +78,8 @@ class ThreadedScanning(ScanossBase):
77
78
  if nb_threads > MAX_ALLOWED_THREADS:
78
79
  self.print_msg(f'Warning: Requested threads too large: {nb_threads}. Reducing to {MAX_ALLOWED_THREADS}')
79
80
  self.nb_threads = MAX_ALLOWED_THREADS
81
+ # Register cleanup to ensure progress bar is finished on exit
82
+ atexit.register(self.complete_bar)
80
83
 
81
84
  @staticmethod
82
85
  def __count_files_in_wfp(wfp: str):
@@ -101,6 +104,13 @@ class ThreadedScanning(ScanossBase):
101
104
  if self.bar:
102
105
  self.bar.finish()
103
106
 
107
+ def __del__(self):
108
+ """Ensure progress bar is cleaned up when object is destroyed"""
109
+ try:
110
+ self.complete_bar()
111
+ except Exception:
112
+ pass # Ignore errors during cleanup
113
+
104
114
  def set_bar(self, bar: Bar) -> None:
105
115
  """
106
116
  Set the Progress Bar to display progress while scanning
@@ -0,0 +1,41 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2025, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ def get_lines(lines: str) -> list:
26
+ """
27
+ Parse line range string into a list of line numbers.
28
+
29
+ Converts SCANOSS line notation (e.g., '10-20,25-30') into a flat list
30
+ of individual line numbers for processing.
31
+
32
+ :param lines: Comma-separated line ranges in SCANOSS format (e.g., '10-20,25-30')
33
+ :return: Flat list of all line numbers extracted from the ranges
34
+ """
35
+ lines_list = []
36
+ lines = lines.split(',')
37
+ for line in lines:
38
+ line_parts = line.split('-')
39
+ for part in line_parts:
40
+ lines_list.append(int(part))
41
+ return lines_list
scanoss/winnowing.py CHANGED
@@ -37,6 +37,7 @@ from typing import Tuple
37
37
  from binaryornot.check import is_binary
38
38
  from crc32c import crc32c
39
39
 
40
+ from .header_filter import HeaderFilter
40
41
  from .scanossbase import ScanossBase
41
42
 
42
43
  # Winnowing configuration. DO NOT CHANGE.
@@ -172,6 +173,8 @@ class Winnowing(ScanossBase):
172
173
  strip_hpsm_ids=None,
173
174
  strip_snippet_ids=None,
174
175
  skip_md5_ids=None,
176
+ skip_headers: bool = False,
177
+ skip_headers_limit: int = 0,
175
178
  ):
176
179
  """
177
180
  Instantiate Winnowing class
@@ -198,7 +201,9 @@ class Winnowing(ScanossBase):
198
201
  self.strip_hpsm_ids = strip_hpsm_ids
199
202
  self.strip_snippet_ids = strip_snippet_ids
200
203
  self.hpsm = hpsm
204
+ self.skip_headers = skip_headers
201
205
  self.is_windows = platform.system() == 'Windows'
206
+ self.header_filter = HeaderFilter(debug=debug, trace=trace, quiet=quiet, skip_limit=skip_headers_limit)
202
207
  if hpsm:
203
208
  self.crc8_maxim_dow_table = []
204
209
  self.crc8_generate_table()
@@ -353,6 +358,48 @@ class Winnowing(ScanossBase):
353
358
  self.print_debug(f'Stripped snippet ids from {file}')
354
359
  return wfp
355
360
 
361
+ def __strip_lines_until_offset(self, file: str, wfp: str, line_offset: int) -> str:
362
+ """
363
+ Strip lines from the WFP up to and including the line_offset
364
+
365
+ :param file: name of fingerprinted file
366
+ :param wfp: WFP to clean
367
+ :param line_offset: line number offset to strip up to
368
+ :return: updated WFP
369
+ """
370
+ # No offset specified, return original WFP
371
+ if line_offset <= 0:
372
+ return wfp
373
+ lines = wfp.split('\n')
374
+ filtered_lines = []
375
+ start_line_added = False
376
+ for line in lines:
377
+ # Check if a line contains snippet data (format: line_number=hash,hash,...)
378
+ line_details = line.split('=')
379
+ if line_details[0].isdigit():
380
+ try:
381
+ line_num = int(line_details[0])
382
+ # Keep lines that are after the offset
383
+ # (line_offset is the last line previous to real code)
384
+ if line_num > line_offset:
385
+ # Add the start_line tag before the first snippet line
386
+ if not start_line_added:
387
+ filtered_lines.append(f'start_line={line_offset}')
388
+ start_line_added = True
389
+ filtered_lines.append(line)
390
+ except (ValueError, IndexError) as e:
391
+ self.print_stderr(f'Error decoding line number from line {line} in {file}: {e}')
392
+ # Keep non-snippet lines (like file=, hpsm=, etc.)
393
+ filtered_lines.append(line)
394
+ else:
395
+ # Keep non-snippet lines (like file=, hpsm=, etc.)
396
+ filtered_lines.append(line)
397
+ # End for loop comment
398
+ wfp = '\n'.join(filtered_lines)
399
+ if start_line_added:
400
+ self.print_debug(f'Stripped lines up to offset {line_offset} from {file}')
401
+ return wfp
402
+
356
403
  def __detect_line_endings(self, contents: bytes) -> Tuple[bool, bool, bool]:
357
404
  """Detect the types of line endings present in file contents.
358
405
 
@@ -362,13 +409,14 @@ class Winnowing(ScanossBase):
362
409
  Returns:
363
410
  Tuple of (has_crlf, has_lf_only, has_cr_only, has_mixed) indicating which line ending types are present.
364
411
  """
412
+ if not contents:
413
+ self.print_debug('Warning: No file contents provided')
365
414
  has_crlf = b'\r\n' in contents
366
415
  # For LF detection, we need to find LF that's not part of CRLF
367
416
  content_without_crlf = contents.replace(b'\r\n', b'')
368
417
  has_standalone_lf = b'\n' in content_without_crlf
369
418
  # For CR detection, we need to find CR that's not part of CRLF
370
419
  has_standalone_cr = b'\r' in content_without_crlf
371
-
372
420
  return has_crlf, has_standalone_lf, has_standalone_cr
373
421
 
374
422
  def __calculate_opposite_line_ending_hash(self, contents: bytes):
@@ -384,13 +432,11 @@ class Winnowing(ScanossBase):
384
432
  Hash with opposite line endings as hex string, or None if no line endings detected.
385
433
  """
386
434
  has_crlf, has_standalone_lf, has_standalone_cr = self.__detect_line_endings(contents)
387
-
388
435
  if not has_crlf and not has_standalone_lf and not has_standalone_cr:
436
+ self.print_debug('No line endings detected in file contents')
389
437
  return None
390
-
391
- # Normalize all line endings to LF first
438
+ # Normalise all line endings to LF first
392
439
  normalized = contents.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
393
-
394
440
  # Determine the dominant line ending type
395
441
  if has_crlf and not has_standalone_lf and not has_standalone_cr:
396
442
  # File is Windows (CRLF) - produce Unix (LF) hash
@@ -398,7 +444,7 @@ class Winnowing(ScanossBase):
398
444
  else:
399
445
  # File is Unix (LF/CR) or mixed - produce Windows (CRLF) hash
400
446
  opposite_contents = normalized.replace(b'\n', b'\r\n')
401
-
447
+ # Return the MD5 hash of the opposite contents
402
448
  return hashlib.md5(opposite_contents).hexdigest()
403
449
 
404
450
  def wfp_for_contents(self, file: str, bin_file: bool, contents: bytes) -> str: # noqa: PLR0912, PLR0915
@@ -420,27 +466,26 @@ class Winnowing(ScanossBase):
420
466
  # Print file line
421
467
  content_length = len(contents)
422
468
  original_filename = file
423
-
424
469
  if self.is_windows:
425
470
  original_filename = file.replace('\\', '/')
426
471
  wfp_filename = repr(original_filename).strip("'") # return a utf-8 compatible version of the filename
427
- if self.obfuscate: # hide the real size of the file and its name, but keep the suffix
472
+ # hide the real size of the file and its name but keep the suffix
473
+ if self.obfuscate:
428
474
  wfp_filename = f'{self.ob_count}{pathlib.Path(original_filename).suffix}'
429
475
  self.ob_count = self.ob_count + 1
430
476
  self.file_map[wfp_filename] = original_filename # Save the file name map for later (reverse lookup)
431
-
477
+ # Construct the WFP header
432
478
  wfp = 'file={0},{1},{2}\n'.format(file_md5, content_length, wfp_filename)
433
-
434
- # Add opposite line ending hash based on line ending analysis
479
+ # Add the opposite line ending hash based on line ending analysis
435
480
  if not bin_file:
436
481
  opposite_hash = self.__calculate_opposite_line_ending_hash(contents)
437
482
  if opposite_hash is not None:
438
483
  wfp += f'fh2={opposite_hash}\n'
439
-
440
484
  # We don't process snippets for binaries, or other uninteresting files, or if we're requested to skip
441
- if bin_file or self.skip_snippets or self.__skip_snippets(file, contents.decode('utf-8', 'ignore')):
485
+ decoded_contents = contents.decode('utf-8', 'ignore')
486
+ if bin_file or self.skip_snippets or self.__skip_snippets(file, decoded_contents):
442
487
  return wfp
443
- # Add HPSM
488
+ # Add HPSM (calculated from original contents, not filtered)
444
489
  if self.hpsm:
445
490
  hpsm = self.__strip_hpsm(file, self.calc_hpsm(contents))
446
491
  if len(hpsm) > 0:
@@ -448,7 +493,7 @@ class Winnowing(ScanossBase):
448
493
  # Initialize variables
449
494
  gram = ''
450
495
  window = []
451
- line = 1
496
+ line = 1 # Line counter for WFP generation
452
497
  last_hash = MAX_CRC32
453
498
  last_line = 0
454
499
  output = ''
@@ -503,12 +548,19 @@ class Winnowing(ScanossBase):
503
548
  wfp += output + '\n'
504
549
  else:
505
550
  self.print_debug(f'Warning: skipping output in WFP for {file} - "{output}"')
506
-
551
+ # Warn if we don't have any WFP content
507
552
  if wfp is None or wfp == '':
508
553
  self.print_stderr(f'Warning: No WFP content data for {file}')
509
- elif self.strip_snippet_ids:
510
- wfp = self.__strip_snippets(file, wfp)
511
-
554
+ else:
555
+ # Apply line filter to remove headers, comments, and imports from the beginning (if enabled)
556
+ if self.skip_headers:
557
+ line_offset = self.header_filter.filter(file, decoded_contents)
558
+ if line_offset > 0:
559
+ wfp = self.__strip_lines_until_offset(file, wfp, line_offset)
560
+ # Strip snippet IDs from the WFP (if enabled)
561
+ if self.strip_snippet_ids:
562
+ wfp = self.__strip_snippets(file, wfp)
563
+ # Return the WFP contents
512
564
  return wfp
513
565
 
514
566
  def calc_hpsm(self, content):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scanoss
3
- Version: 1.27.1
3
+ Version: 1.43.1
4
4
  Summary: Simple Python library to leverage the SCANOSS APIs
5
5
  Home-page: https://scanoss.com
6
6
  Author: SCANOSS
@@ -13,15 +13,16 @@ Classifier: License :: OSI Approved :: MIT License
13
13
  Classifier: Operating System :: OS Independent
14
14
  Classifier: Development Status :: 5 - Production/Stable
15
15
  Classifier: Programming Language :: Python :: 3
16
- Requires-Python: >=3.7
16
+ Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: requests
20
20
  Requires-Dist: crc32c>=2.2
21
21
  Requires-Dist: binaryornot
22
22
  Requires-Dist: progress
23
- Requires-Dist: grpcio>1.42.0
24
- Requires-Dist: protobuf>3.19.1
23
+ Requires-Dist: grpcio>=1.73.1
24
+ Requires-Dist: protobuf>=6.3.1
25
+ Requires-Dist: protoc-gen-openapiv2
25
26
  Requires-Dist: pypac
26
27
  Requires-Dist: pyOpenSSL
27
28
  Requires-Dist: google-api-core
@@ -30,6 +31,8 @@ Requires-Dist: packageurl-python
30
31
  Requires-Dist: pathspec
31
32
  Requires-Dist: jsonschema
32
33
  Requires-Dist: crc
34
+ Requires-Dist: protoc-gen-openapiv2
35
+ Requires-Dist: cyclonedx-python-lib[validation]
33
36
  Provides-Extra: fast-winnowing
34
37
  Requires-Dist: scanoss_winnowing>=0.5.0; extra == "fast-winnowing"
35
38
  Dynamic: license-file
@@ -174,7 +177,7 @@ if __name__ == "__main__":
174
177
  ```
175
178
 
176
179
  ## Requirements
177
- Python 3.7 or higher.
180
+ Python 3.9 or higher.
178
181
 
179
182
  ## Source code
180
183
  The source for this package can be found [here](https://github.com/scanoss/scanoss.py).