scanoss 1.20.3__tar.gz → 1.20.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {scanoss-1.20.3/src/scanoss.egg-info → scanoss-1.20.5}/PKG-INFO +1 -1
  2. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/__init__.py +1 -1
  3. scanoss-1.20.5/src/scanoss/data/build_date.txt +1 -0
  4. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/scanossgrpc.py +91 -53
  5. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/spdxlite.py +281 -30
  6. {scanoss-1.20.3 → scanoss-1.20.5/src/scanoss.egg-info}/PKG-INFO +1 -1
  7. scanoss-1.20.3/src/scanoss/data/build_date.txt +0 -1
  8. {scanoss-1.20.3 → scanoss-1.20.5}/LICENSE +0 -0
  9. {scanoss-1.20.3 → scanoss-1.20.5}/PACKAGE.md +0 -0
  10. {scanoss-1.20.3 → scanoss-1.20.5}/README.md +0 -0
  11. {scanoss-1.20.3 → scanoss-1.20.5}/pyproject.toml +0 -0
  12. {scanoss-1.20.3 → scanoss-1.20.5}/setup.cfg +0 -0
  13. {scanoss-1.20.3 → scanoss-1.20.5}/src/protoc_gen_swagger/__init__.py +0 -0
  14. {scanoss-1.20.3 → scanoss-1.20.5}/src/protoc_gen_swagger/options/__init__.py +0 -0
  15. {scanoss-1.20.3 → scanoss-1.20.5}/src/protoc_gen_swagger/options/annotations_pb2.py +0 -0
  16. {scanoss-1.20.3 → scanoss-1.20.5}/src/protoc_gen_swagger/options/annotations_pb2_grpc.py +0 -0
  17. {scanoss-1.20.3 → scanoss-1.20.5}/src/protoc_gen_swagger/options/openapiv2_pb2.py +0 -0
  18. {scanoss-1.20.3 → scanoss-1.20.5}/src/protoc_gen_swagger/options/openapiv2_pb2_grpc.py +0 -0
  19. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/__init__.py +0 -0
  20. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/common/__init__.py +0 -0
  21. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/common/v2/__init__.py +0 -0
  22. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/common/v2/scanoss_common_pb2.py +0 -0
  23. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/common/v2/scanoss_common_pb2_grpc.py +0 -0
  24. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/components/__init__.py +0 -0
  25. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/components/v2/__init__.py +0 -0
  26. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/components/v2/scanoss_components_pb2.py +0 -0
  27. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/components/v2/scanoss_components_pb2_grpc.py +0 -0
  28. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +0 -0
  29. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +0 -0
  30. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/dependencies/__init__.py +0 -0
  31. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/dependencies/v2/__init__.py +0 -0
  32. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +0 -0
  33. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +0 -0
  34. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/provenance/__init__.py +0 -0
  35. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/provenance/v2/__init__.py +0 -0
  36. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/provenance/v2/scanoss_provenance_pb2.py +0 -0
  37. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/provenance/v2/scanoss_provenance_pb2_grpc.py +0 -0
  38. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/scanning/__init__.py +0 -0
  39. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/scanning/v2/__init__.py +0 -0
  40. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/scanning/v2/scanoss_scanning_pb2.py +0 -0
  41. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +0 -0
  42. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/semgrep/__init__.py +0 -0
  43. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/semgrep/v2/__init__.py +0 -0
  44. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +0 -0
  45. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +0 -0
  46. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/vulnerabilities/__init__.py +0 -0
  47. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/vulnerabilities/v2/__init__.py +0 -0
  48. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +0 -0
  49. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +0 -0
  50. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/cli.py +0 -0
  51. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/components.py +0 -0
  52. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/csvoutput.py +0 -0
  53. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/cyclonedx.py +0 -0
  54. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/data/scanoss-settings-schema.json +0 -0
  55. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/data/spdx-exceptions.json +0 -0
  56. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/data/spdx-licenses.json +0 -0
  57. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/file_filters.py +0 -0
  58. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/filecount.py +0 -0
  59. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/inspection/__init__.py +0 -0
  60. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/inspection/copyleft.py +0 -0
  61. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/inspection/policy_check.py +0 -0
  62. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/inspection/undeclared_component.py +0 -0
  63. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/inspection/utils/license_utils.py +0 -0
  64. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/results.py +0 -0
  65. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/scancodedeps.py +0 -0
  66. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/scanner.py +0 -0
  67. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/scanoss_settings.py +0 -0
  68. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/scanossapi.py +0 -0
  69. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/scanossbase.py +0 -0
  70. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/scanpostprocessor.py +0 -0
  71. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/scantype.py +0 -0
  72. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/threadeddependencies.py +0 -0
  73. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/threadedscanning.py +0 -0
  74. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/utils/__init__.py +0 -0
  75. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/utils/file.py +0 -0
  76. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss/winnowing.py +0 -0
  77. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss.egg-info/SOURCES.txt +0 -0
  78. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss.egg-info/dependency_links.txt +0 -0
  79. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss.egg-info/entry_points.txt +0 -0
  80. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss.egg-info/requires.txt +0 -0
  81. {scanoss-1.20.3 → scanoss-1.20.5}/src/scanoss.egg-info/top_level.txt +0 -0
  82. {scanoss-1.20.3 → scanoss-1.20.5}/tests/test_csv_output.py +0 -0
  83. {scanoss-1.20.3 → scanoss-1.20.5}/tests/test_file_filters.py +0 -0
  84. {scanoss-1.20.3 → scanoss-1.20.5}/tests/test_policy_inspect.py +0 -0
  85. {scanoss-1.20.3 → scanoss-1.20.5}/tests/test_scan_post_processor.py +0 -0
  86. {scanoss-1.20.3 → scanoss-1.20.5}/tests/test_spdxlite.py +0 -0
  87. {scanoss-1.20.3 → scanoss-1.20.5}/tests/test_winnowing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: scanoss
3
- Version: 1.20.3
3
+ Version: 1.20.5
4
4
  Summary: Simple Python library to leverage the SCANOSS APIs
5
5
  Home-page: https://scanoss.com
6
6
  Author: SCANOSS
@@ -22,4 +22,4 @@ SPDX-License-Identifier: MIT
22
22
  THE SOFTWARE.
23
23
  """
24
24
 
25
- __version__ = '1.20.3'
25
+ __version__ = '1.20.5'
@@ -0,0 +1 @@
1
+ date: 20250314141411, utime: 1741961651
@@ -22,50 +22,58 @@ SPDX-License-Identifier: MIT
22
22
  THE SOFTWARE.
23
23
  """
24
24
 
25
+ import concurrent.futures
26
+ import json
25
27
  import os
26
28
  import uuid
29
+ from urllib.parse import urlparse
27
30
 
28
31
  import grpc
29
- import json
30
-
31
32
  from google.protobuf.json_format import MessageToDict, ParseDict
32
33
  from pypac.parser import PACFile
33
34
  from pypac.resolver import ProxyResolver
34
- from urllib.parse import urlparse
35
35
 
36
- from .api.components.v2.scanoss_components_pb2_grpc import ComponentsStub
37
- from .api.cryptography.v2.scanoss_cryptography_pb2_grpc import CryptographyStub
38
- from .api.dependencies.v2.scanoss_dependencies_pb2_grpc import DependenciesStub
39
- from .api.vulnerabilities.v2.scanoss_vulnerabilities_pb2_grpc import VulnerabilitiesStub
40
- from .api.provenance.v2.scanoss_provenance_pb2_grpc import ProvenanceStub
41
- from .api.semgrep.v2.scanoss_semgrep_pb2_grpc import SemgrepStub
42
- from .api.cryptography.v2.scanoss_cryptography_pb2 import AlgorithmResponse
43
- from .api.dependencies.v2.scanoss_dependencies_pb2 import DependencyRequest, DependencyResponse
44
- from .api.common.v2.scanoss_common_pb2 import EchoRequest, EchoResponse, StatusResponse, StatusCode, PurlRequest
45
- from .api.vulnerabilities.v2.scanoss_vulnerabilities_pb2 import VulnerabilityResponse
46
- from .api.semgrep.v2.scanoss_semgrep_pb2 import SemgrepResponse
36
+ from . import __version__
37
+ from .api.common.v2.scanoss_common_pb2 import (
38
+ EchoRequest,
39
+ EchoResponse,
40
+ PurlRequest,
41
+ StatusCode,
42
+ StatusResponse,
43
+ )
47
44
  from .api.components.v2.scanoss_components_pb2 import (
48
45
  CompSearchRequest,
49
46
  CompSearchResponse,
50
47
  CompVersionRequest,
51
48
  CompVersionResponse,
52
49
  )
50
+ from .api.components.v2.scanoss_components_pb2_grpc import ComponentsStub
51
+ from .api.cryptography.v2.scanoss_cryptography_pb2 import AlgorithmResponse
52
+ from .api.cryptography.v2.scanoss_cryptography_pb2_grpc import CryptographyStub
53
+ from .api.dependencies.v2.scanoss_dependencies_pb2 import DependencyRequest
54
+ from .api.dependencies.v2.scanoss_dependencies_pb2_grpc import DependenciesStub
53
55
  from .api.provenance.v2.scanoss_provenance_pb2 import ProvenanceResponse
56
+ from .api.provenance.v2.scanoss_provenance_pb2_grpc import ProvenanceStub
57
+ from .api.semgrep.v2.scanoss_semgrep_pb2 import SemgrepResponse
58
+ from .api.semgrep.v2.scanoss_semgrep_pb2_grpc import SemgrepStub
59
+ from .api.vulnerabilities.v2.scanoss_vulnerabilities_pb2 import VulnerabilityResponse
60
+ from .api.vulnerabilities.v2.scanoss_vulnerabilities_pb2_grpc import VulnerabilitiesStub
54
61
  from .scanossbase import ScanossBase
55
- from . import __version__
56
62
 
57
63
  DEFAULT_URL = 'https://api.osskb.org' # default free service URL
58
64
  DEFAULT_URL2 = 'https://api.scanoss.com' # default premium service URL
59
65
  SCANOSS_GRPC_URL = os.environ.get('SCANOSS_GRPC_URL') if os.environ.get('SCANOSS_GRPC_URL') else DEFAULT_URL
60
66
  SCANOSS_API_KEY = os.environ.get('SCANOSS_API_KEY') if os.environ.get('SCANOSS_API_KEY') else ''
61
67
 
68
+ MAX_CONCURRENT_REQUESTS = 5
69
+
62
70
 
63
71
  class ScanossGrpc(ScanossBase):
64
72
  """
65
73
  Client for gRPC functionality
66
74
  """
67
75
 
68
- def __init__(
76
+ def __init__( # noqa: PLR0913
69
77
  self,
70
78
  url: str = None,
71
79
  debug: bool = False,
@@ -222,31 +230,54 @@ class ScanossGrpc(ScanossBase):
222
230
  :return: Server response or None
223
231
  """
224
232
  if not dependencies:
225
- self.print_stderr(f'ERROR: No message supplied to send to gRPC service.')
233
+ self.print_stderr('ERROR: No message supplied to send to gRPC service.')
226
234
  return None
227
- request_id = str(uuid.uuid4())
228
- resp: DependencyResponse
229
- try:
230
- files_json = dependencies.get('files')
231
- if files_json is None or len(files_json) == 0:
232
- self.print_stderr(f'ERROR: No dependency data supplied to send to gRPC service.')
235
+
236
+ files_json = dependencies.get('files')
237
+
238
+ if files_json is None or len(files_json) == 0:
239
+ self.print_stderr('ERROR: No dependency data supplied to send to gRPC service.')
240
+ return None
241
+
242
+ def process_file(file):
243
+ request_id = str(uuid.uuid4())
244
+ try:
245
+ file_request = {'files': [file]}
246
+
247
+ request = ParseDict(file_request, DependencyRequest())
248
+ request.depth = depth
249
+ metadata = self.metadata[:]
250
+ metadata.append(('x-request-id', request_id))
251
+ self.print_debug(f'Sending dependency data for decoration (rqId: {request_id})...')
252
+ resp = self.dependencies_stub.GetDependencies(request, metadata=metadata, timeout=self.timeout)
253
+
254
+ return MessageToDict(resp, preserving_proto_field_name=True)
255
+ except Exception as e:
256
+ self.print_stderr(
257
+ f'ERROR: {e.__class__.__name__} Problem encountered sending gRPC message (rqId: {request_id}): {e}'
258
+ )
233
259
  return None
234
- request = ParseDict(dependencies, DependencyRequest()) # Parse the JSON/Dict into the dependency object
235
- request.depth = depth
236
- metadata = self.metadata[:]
237
- metadata.append(('x-request-id', request_id)) # Set a Request ID
238
- self.print_debug(f'Sending dependency data for decoration (rqId: {request_id})...')
239
- resp = self.dependencies_stub.GetDependencies(request, metadata=metadata, timeout=self.timeout)
240
- except Exception as e:
241
- self.print_stderr(
242
- f'ERROR: {e.__class__.__name__} Problem encountered sending gRPC message (rqId: {request_id}): {e}'
243
- )
244
- else:
245
- if resp:
246
- if not self._check_status_response(resp.status, request_id):
247
- return None
248
- return MessageToDict(resp, preserving_proto_field_name=True) # Convert gRPC response to a dictionary
249
- return None
260
+
261
+ all_responses = []
262
+ with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_CONCURRENT_REQUESTS) as executor:
263
+ future_to_file = {executor.submit(process_file, file): file for file in files_json}
264
+
265
+ for future in concurrent.futures.as_completed(future_to_file):
266
+ response = future.result()
267
+ if response:
268
+ all_responses.append(response)
269
+
270
+ SUCCESS_STATUS = 'SUCCESS'
271
+
272
+ merged_response = {'files': [], 'status': {'status': SUCCESS_STATUS, 'message': 'Success'}}
273
+ for response in all_responses:
274
+ if response:
275
+ if 'files' in response and len(response['files']) > 0:
276
+ merged_response['files'].append(response['files'][0])
277
+ # Overwrite the status if the any of the responses was not successful
278
+ if 'status' in response and response['status']['status'] != SUCCESS_STATUS:
279
+ merged_response['status'] = response['status']
280
+ return merged_response
250
281
 
251
282
  def get_crypto_json(self, purls: dict) -> dict:
252
283
  """
@@ -255,7 +286,7 @@ class ScanossGrpc(ScanossBase):
255
286
  :return: Server response or None
256
287
  """
257
288
  if not purls:
258
- self.print_stderr(f'ERROR: No message supplied to send to gRPC service.')
289
+ self.print_stderr('ERROR: No message supplied to send to gRPC service.')
259
290
  return None
260
291
  request_id = str(uuid.uuid4())
261
292
  resp: AlgorithmResponse
@@ -285,7 +316,7 @@ class ScanossGrpc(ScanossBase):
285
316
  :return: Server response or None
286
317
  """
287
318
  if not purls:
288
- self.print_stderr(f'ERROR: No message supplied to send to gRPC service.')
319
+ self.print_stderr('ERROR: No message supplied to send to gRPC service.')
289
320
  return None
290
321
  request_id = str(uuid.uuid4())
291
322
  resp: VulnerabilityResponse
@@ -315,7 +346,7 @@ class ScanossGrpc(ScanossBase):
315
346
  :return: Server response or None
316
347
  """
317
348
  if not purls:
318
- self.print_stderr(f'ERROR: No message supplied to send to gRPC service.')
349
+ self.print_stderr('ERROR: No message supplied to send to gRPC service.')
319
350
  return None
320
351
  request_id = str(uuid.uuid4())
321
352
  resp: SemgrepResponse
@@ -345,7 +376,7 @@ class ScanossGrpc(ScanossBase):
345
376
  :return: Server response or None
346
377
  """
347
378
  if not search:
348
- self.print_stderr(f'ERROR: No message supplied to send to gRPC service.')
379
+ self.print_stderr('ERROR: No message supplied to send to gRPC service.')
349
380
  return None
350
381
  request_id = str(uuid.uuid4())
351
382
  resp: CompSearchResponse
@@ -375,7 +406,7 @@ class ScanossGrpc(ScanossBase):
375
406
  :return: Server response or None
376
407
  """
377
408
  if not search:
378
- self.print_stderr(f'ERROR: No message supplied to send to gRPC service.')
409
+ self.print_stderr('ERROR: No message supplied to send to gRPC service.')
379
410
  return None
380
411
  request_id = str(uuid.uuid4())
381
412
  resp: CompVersionResponse
@@ -404,6 +435,10 @@ class ScanossGrpc(ScanossBase):
404
435
  :param status_response: Status Response
405
436
  :return: True if successful, False otherwise
406
437
  """
438
+
439
+ SUCCEDED_WITH_WARNINGS_STATUS_CODE = 2
440
+ FAILED_STATUS_CODE = 3
441
+
407
442
  if not status_response:
408
443
  self.print_stderr(f'Warning: No status response supplied (rqId: {request_id}). Assuming it was ok.')
409
444
  return True
@@ -411,11 +446,11 @@ class ScanossGrpc(ScanossBase):
411
446
  status_code: StatusCode = status_response.status
412
447
  if status_code > 1:
413
448
  ret_val = False # default to failed
414
- msg = "Unsuccessful"
415
- if status_code == 2:
416
- msg = "Succeeded with warnings"
449
+ msg = 'Unsuccessful'
450
+ if status_code == SUCCEDED_WITH_WARNINGS_STATUS_CODE:
451
+ msg = 'Succeeded with warnings'
417
452
  ret_val = True # No need to fail as it succeeded with warnings
418
- elif status_code == 3:
453
+ elif status_code == FAILED_STATUS_CODE:
419
454
  msg = 'Failed with warnings'
420
455
  self.print_stderr(f'{msg} (rqId: {request_id} - status: {status_code}): {status_response.message}')
421
456
  return ret_val
@@ -428,10 +463,10 @@ class ScanossGrpc(ScanossBase):
428
463
  :param self:
429
464
  """
430
465
  if self.grpc_proxy:
431
- self.print_debug(f'Setting GRPC (grpc_proxy) proxy...')
466
+ self.print_debug('Setting GRPC (grpc_proxy) proxy...')
432
467
  os.environ['grpc_proxy'] = self.grpc_proxy
433
468
  elif self.proxy:
434
- self.print_debug(f'Setting GRPC (http_proxy/https_proxy) proxies...')
469
+ self.print_debug('Setting GRPC (http_proxy/https_proxy) proxies...')
435
470
  os.environ['http_proxy'] = self.proxy
436
471
  os.environ['https_proxy'] = self.proxy
437
472
  elif self.pac:
@@ -450,7 +485,7 @@ class ScanossGrpc(ScanossBase):
450
485
  :return: Server response or None
451
486
  """
452
487
  if not purls:
453
- self.print_stderr(f'ERROR: No message supplied to send to gRPC service.')
488
+ self.print_stderr('ERROR: No message supplied to send to gRPC service.')
454
489
  return None
455
490
  request_id = str(uuid.uuid4())
456
491
  resp: ProvenanceResponse
@@ -461,8 +496,9 @@ class ScanossGrpc(ScanossBase):
461
496
  self.print_debug(f'Sending data for provenance decoration (rqId: {request_id})...')
462
497
  resp = self.provenance_stub.GetComponentProvenance(request, metadata=metadata, timeout=self.timeout)
463
498
  except Exception as e:
464
- self.print_stderr(f'ERROR: {e.__class__.__name__} Problem encountered sending gRPC message '
465
- f'(rqId: {request_id}): {e}')
499
+ self.print_stderr(
500
+ f'ERROR: {e.__class__.__name__} Problem encountered sending gRPC message (rqId: {request_id}): {e}'
501
+ )
466
502
  else:
467
503
  if resp:
468
504
  if not self._check_status_response(resp.status, request_id):
@@ -470,6 +506,8 @@ class ScanossGrpc(ScanossBase):
470
506
  resp_dict = MessageToDict(resp, preserving_proto_field_name=True) # Convert gRPC response to a dict
471
507
  return resp_dict
472
508
  return None
509
+
510
+
473
511
  #
474
512
  # End of ScanossGrpc Class
475
513
  #
@@ -31,6 +31,7 @@ import re
31
31
  import sys
32
32
 
33
33
  import importlib_resources
34
+ from packageurl import PackageURL
34
35
 
35
36
  from . import __version__
36
37
 
@@ -78,15 +79,31 @@ class SpdxLite:
78
79
  return self._process_files(data)
79
80
 
80
81
  def _process_files(self, data: json) -> dict:
81
- """Process each file in the data and build summary."""
82
+ """
83
+ Process raw results and build a component summary.
84
+
85
+ Args:
86
+ data: JSON data containing raw results
87
+
88
+ Returns:
89
+ dict: The built summary dictionary
90
+ """
82
91
  summary = {}
83
92
  for file_path in data:
84
93
  file_details = data.get(file_path)
85
- self._process_file_entries(file_path, file_details, summary)
94
+ # summary is passed by reference and modified inside the function
95
+ self._process_entries(file_path, file_details, summary)
86
96
  return summary
87
97
 
88
- def _process_file_entries(self, file_path: str, file_details: list, summary: dict):
89
- """Process entries for a single file."""
98
+ def _process_entries(self, file_path: str, file_details: list, summary: dict):
99
+ """
100
+ Process entries for a single file.
101
+
102
+ Args:
103
+ file_path: Path to the file being processed
104
+ file_details: Results of the file
105
+ summary: Reference to summary dictionary that will be modified in place
106
+ """
90
107
  for entry in file_details:
91
108
  id_details = entry.get('id')
92
109
  if not id_details or id_details == 'none':
@@ -95,10 +112,17 @@ class SpdxLite:
95
112
  if id_details == 'dependency':
96
113
  self._process_dependency_entry(file_path, entry, summary)
97
114
  else:
98
- self._process_normal_entry(file_path, entry, summary)
115
+ self._process_file_entry(file_path, entry, summary)
99
116
 
100
117
  def _process_dependency_entry(self, file_path: str, entry: dict, summary: dict):
101
- """Process a dependency type entry."""
118
+ """
119
+ Process a dependency type entry.
120
+
121
+ Args:
122
+ file_path: Path to the file being processed
123
+ entry: The dependency entry to process
124
+ summary: Reference to summary dictionary that will be modified in place
125
+ """
102
126
  dependencies = entry.get('dependencies')
103
127
  if not dependencies:
104
128
  self.print_stderr(f'Warning: No Dependencies found for {file_path}')
@@ -108,11 +132,18 @@ class SpdxLite:
108
132
  purl = dep.get('purl')
109
133
  if not self._is_valid_purl(file_path, dep, purl, summary):
110
134
  continue
111
-
135
+ # Modifying the summary dictionary directly as it's passed by reference
112
136
  summary[purl] = self._create_dependency_summary(dep)
113
137
 
114
- def _process_normal_entry(self, file_path: str, entry: dict, summary: dict):
115
- """Process a normal file type entry."""
138
+ def _process_file_entry(self, file_path: str, entry: dict, summary: dict):
139
+ """
140
+ Process file entry.
141
+
142
+ Args:
143
+ file_path: Path to the file being processed
144
+ entry: Process file match entry
145
+ summary: Reference to summary dictionary that will be modified in place
146
+ """
116
147
  purls = entry.get('purl')
117
148
  if not purls:
118
149
  self.print_stderr(f'Purl block missing for {file_path}')
@@ -122,10 +153,21 @@ class SpdxLite:
122
153
  if not self._is_valid_purl(file_path, entry, purl, summary):
123
154
  return
124
155
 
125
- summary[purl] = self._create_normal_summary(entry)
156
+ summary[purl] = self._create_file_summary(entry)
126
157
 
127
158
  def _is_valid_purl(self, file_path: str, entry: dict, purl: str, summary: dict) -> bool:
128
- """Check if PURL is valid and not already processed."""
159
+ """
160
+ Check if purl is valid and not already processed.
161
+
162
+ Args:
163
+ file_path: Path to the file being processed
164
+ entry: The entry containing the PURL
165
+ purl: The PURL to validate
166
+ summary: Reference to summary dictionary to check for existing entries
167
+
168
+ Returns:
169
+ bool: True if purl is valid and not already processed
170
+ """
129
171
  if not purl:
130
172
  self.print_stderr(f'Warning: No PURL found for {file_path}: {entry}')
131
173
  return False
@@ -137,15 +179,37 @@ class SpdxLite:
137
179
  return True
138
180
 
139
181
  def _create_dependency_summary(self, dep: dict) -> dict:
140
- """Create summary for dependency entry."""
182
+ """
183
+ Create summary for dependency entry.
184
+
185
+ This method extracts relevant fields from a dependency entry and creates a
186
+ standardized summary dictionary. It handles fields like component, version,
187
+ and URL, with special processing for licenses.
188
+
189
+ Args:
190
+ dep (dict): The dependency entry containing component information
191
+
192
+ Returns:
193
+ dict: A new summary dictionary containing the extracted and processed fields
194
+ """
141
195
  summary = {}
142
196
  for field in ['component', 'version', 'url']:
143
197
  summary[field] = dep.get(field, '')
144
198
  summary['licenses'] = self._process_licenses(dep.get('licenses'))
145
199
  return summary
146
200
 
147
- def _create_normal_summary(self, entry: dict) -> dict:
148
- """Create summary for normal file entry."""
201
+ def _create_file_summary(self, entry: dict) -> dict:
202
+ """
203
+ Create summary for file entry.
204
+
205
+ This method extracts set of fields from file entry and creates a standardized summary dictionary.
206
+
207
+ Args:
208
+ entry (dict): The file entry containing the metadata to summarize
209
+
210
+ Returns:
211
+ dict: A new summary dictionary containing all extracted and processed fields
212
+ """
149
213
  summary = {}
150
214
  fields = ['id', 'vendor', 'component', 'version', 'latest',
151
215
  'url', 'url_hash', 'download_url']
@@ -155,7 +219,22 @@ class SpdxLite:
155
219
  return summary
156
220
 
157
221
  def _process_licenses(self, licenses: list) -> list:
158
- """Process license information and remove duplicates."""
222
+ """
223
+ Process license information and remove duplicates.
224
+
225
+ This method filters license information to include only licenses from trusted sources
226
+ ('component_declared' or 'license_file') and removes any duplicate license names.
227
+ The result is a simplified list of license dictionaries containing only the 'id' field.
228
+
229
+ Args:
230
+ licenses (list): A list of license dictionaries, each containing at least 'name'
231
+ and 'source' fields. Can be None or empty.
232
+
233
+ Returns:
234
+ list: A filtered and deduplicated list of license dictionaries, where each
235
+ dictionary contains only an 'id' field matching the original license name.
236
+ Returns an empty list if input is None or empty.
237
+ """
159
238
  if not licenses:
160
239
  return []
161
240
 
@@ -164,6 +243,9 @@ class SpdxLite:
164
243
 
165
244
  for license_info in licenses:
166
245
  name = license_info.get('name')
246
+ source = license_info.get('source')
247
+ if source not in ("component_declared", "license_file", "file_header"):
248
+ continue
167
249
  if name and name not in seen_names:
168
250
  processed_licenses.append({'id': name})
169
251
  seen_names.add(name)
@@ -205,7 +287,30 @@ class SpdxLite:
205
287
  return self._write_output(spdx_document, output_file)
206
288
 
207
289
  def _create_base_document(self, raw_data: dict) -> dict:
208
- """Create the base SPDX document structure."""
290
+ """
291
+ Create the base SPDX document structure.
292
+
293
+ This method initializes a new SPDX document with standard fields required by
294
+ the SPDX 2.2 specification. It generates a unique document namespace using
295
+ a hash of the raw data and current timestamp.
296
+
297
+ Args:
298
+ raw_data (dict): The raw component data used to create a unique identifier
299
+ for the document namespace
300
+
301
+ Returns:
302
+ dict: A dictionary containing the base SPDX document structure with the
303
+ following fields:
304
+ - spdxVersion: The SPDX specification version
305
+ - dataLicense: The license for the SPDX document itself
306
+ - SPDXID: The document's unique identifier
307
+ - name: The name of the SBOM
308
+ - creationInfo: Information about when and how the document was created
309
+ - documentNamespace: A unique URI for this document
310
+ - documentDescribes: List of packages described (initially empty)
311
+ - hasExtractedLicensingInfos: List of licenses (initially empty)
312
+ - packages: List of package information (initially empty)
313
+ """
209
314
  now = datetime.datetime.utcnow()
210
315
  md5hex = hashlib.md5(f'{raw_data}-{now}'.encode('utf-8')).hexdigest()
211
316
 
@@ -222,7 +327,23 @@ class SpdxLite:
222
327
  }
223
328
 
224
329
  def _create_creation_info(self, timestamp: datetime.datetime) -> dict:
225
- """Create the creation info section."""
330
+ """
331
+ Create the creation info section of an SPDX document.
332
+
333
+ This method generates the creation information required by the SPDX specification,
334
+ including timestamps, creator information, and document type.
335
+
336
+ Args:
337
+ timestamp (datetime.datetime): The UTC timestamp representing when the
338
+ document was created
339
+
340
+ Returns:
341
+ dict: A dictionary containing creation information with the following fields:
342
+ - created: ISO 8601 formatted timestamp
343
+ - creators: List of entities involved in creating the document
344
+ (tool, person, and organization)
345
+ - comment: Additional information about the SBOM type
346
+ """
226
347
  return {
227
348
  'created': timestamp.strftime('%Y-%m-%dT%H:%M:%SZ'),
228
349
  'creators': [
@@ -234,7 +355,25 @@ class SpdxLite:
234
355
  }
235
356
 
236
357
  def _process_packages(self, raw_data: dict, spdx_document: dict):
237
- """Process packages and add them to the SPDX document."""
358
+ """
359
+ Process packages and add them to the SPDX document.
360
+
361
+ This method iterates through the raw component data, creates package information
362
+ for each component, and adds them to the SPDX document. It also collects
363
+ license references to be processed separately.
364
+
365
+ Args:
366
+ raw_data (dict): Dictionary of package data indexed by PURL
367
+ (Package URL identifiers)
368
+ spdx_document (dict): Reference to the SPDX document being built,
369
+ which will be modified in place
370
+
371
+ Note:
372
+ This method modifies the spdx_document dictionary in place by:
373
+ 1. Adding package information to the 'packages' list
374
+ 2. Adding package SPDXIDs to the 'documentDescribes' list
375
+ 3. Indirectly populating 'hasExtractedLicensingInfos' via _process_license_refs()
376
+ """
238
377
  lic_refs = set()
239
378
 
240
379
  for purl, comp in raw_data.items():
@@ -245,7 +384,36 @@ class SpdxLite:
245
384
  self._process_license_refs(lic_refs, spdx_document)
246
385
 
247
386
  def _create_package_info(self, purl: str, comp: dict, lic_refs: set) -> dict:
248
- """Create package information for SPDX document."""
387
+ """
388
+ Create package information for SPDX document.
389
+
390
+ This method generates a complete package information entry following the SPDX
391
+ specification format. It creates a unique identifier for the package based on
392
+ its PURL and version, processes license information, and formats all required
393
+ fields for the SPDX document.
394
+
395
+ Args:
396
+ purl (str): Package URL identifier for the component
397
+ comp (dict): Component information dictionary containing metadata like
398
+ component name, version, URLs, and license information
399
+ lic_refs (set): Reference to a set that will be populated with license
400
+ references found in this package. This set is modified in place.
401
+
402
+ Returns:
403
+ dict: A dictionary containing all required SPDX package fields including:
404
+ - name: Component name
405
+ - SPDXID: Unique identifier for this package within the document
406
+ - versionInfo: Component version
407
+ - downloadLocation: URL where the package can be downloaded
408
+ - homepage: Component homepage URL
409
+ - licenseDeclared: Formatted license expression
410
+ - licenseConcluded: NOASSERTION as automated conclusion isn't possible
411
+ - filesAnalyzed: False as files are not individually analyzed
412
+ - copyrightText: NOASSERTION as copyright text isn't available
413
+ - supplier: Organization name from vendor information
414
+ - externalRefs: Package URL reference for package manager integration
415
+ - checksums: MD5 hash of the package if available
416
+ """
249
417
  lic_text = self._process_package_licenses(comp.get('licenses', []), lic_refs)
250
418
  comp_ver = comp.get('version')
251
419
  purl_ver = f'{purl}@{comp_ver}'
@@ -265,7 +433,7 @@ class SpdxLite:
265
433
  'externalRefs': [
266
434
  {
267
435
  'referenceCategory': 'PACKAGE-MANAGER',
268
- 'referenceLocator': purl_ver,
436
+ 'referenceLocator': PackageURL.from_string(purl_ver).to_string(),
269
437
  'referenceType': 'purl'
270
438
  }
271
439
  ],
@@ -278,20 +446,47 @@ class SpdxLite:
278
446
  }
279
447
 
280
448
  def _process_package_licenses(self, licenses: list, lic_refs: set) -> str:
281
- """Process licenses and return license text."""
449
+ """
450
+ Process licenses and return license text formatted for SPDX.
451
+
452
+ This method processes a list of license objects, extracts valid license IDs,
453
+ converts them to SPDX format, and combines them into a properly formatted
454
+ license expression.
455
+
456
+ Args:
457
+ licenses (list): List of license dictionaries, each containing at least
458
+ an 'id' field
459
+ lic_refs (set): Reference to a set that will collect license references.
460
+ This set is modified in place.
461
+
462
+ Returns:
463
+ str: A formatted license expression string following SPDX syntax.
464
+ Returns 'NOASSERTION' if no valid licenses are found.
465
+ """
282
466
  if not licenses:
283
467
  return 'NOASSERTION'
284
468
 
285
469
  lic_set = set()
286
470
  for lic in licenses:
287
471
  lc_id = lic.get('id')
288
- if lc_id:
289
- self._process_license_id(lc_id, lic_refs, lic_set)
472
+ self._process_license_id(lc_id, lic_refs, lic_set)
290
473
 
291
474
  return self._format_license_text(lic_set)
292
475
 
293
476
  def _process_license_id(self, lc_id: str, lic_refs: set, lic_set: set):
294
- """Process individual license ID."""
477
+ """
478
+ Process individual license ID and add to appropriate sets.
479
+
480
+ This method attempts to convert a license ID to its SPDX equivalent.
481
+ If not found in the SPDX license list, it's formatted as a LicenseRef
482
+ and added to the license references set.
483
+
484
+ Args:
485
+ lc_id (str): The license ID to process
486
+ lic_refs (set): Reference to a set that collects license references
487
+ for later processing. Modified in place.
488
+ lic_set (set): Reference to a set collecting all license IDs for
489
+ """
295
490
  spdx_id = self.get_spdx_license_id(lc_id)
296
491
  if not spdx_id:
297
492
  if not lc_id.startswith('LicenseRef'):
@@ -300,7 +495,20 @@ class SpdxLite:
300
495
  lic_set.add(spdx_id if spdx_id else lc_id)
301
496
 
302
497
  def _format_license_text(self, lic_set: set) -> str:
303
- """Format the license text with proper syntax."""
498
+ """
499
+ Format the license text with proper SPDX syntax.
500
+
501
+ This method combines multiple license IDs with the 'AND' operator
502
+ according to SPDX specification rules. If multiple licenses are present,
503
+ the expression is enclosed in parentheses.
504
+
505
+ Args:
506
+ lic_set (set): Set of license IDs to format
507
+
508
+ Returns:
509
+ str: A properly formatted SPDX license expression.
510
+ Returns 'NOASSERTION' if the set is empty.
511
+ """
304
512
  if not lic_set:
305
513
  return 'NOASSERTION'
306
514
 
@@ -310,13 +518,44 @@ class SpdxLite:
310
518
  return lic_text
311
519
 
312
520
  def _process_license_refs(self, lic_refs: set, spdx_document: dict):
313
- """Process and add license references to the document."""
521
+ """
522
+ Process and add license references to the SPDX document.
523
+
524
+ This method processes each license reference in the provided set
525
+ and adds corresponding license information to the SPDX document's
526
+ extracted licensing information section.
527
+
528
+ Args:
529
+ lic_refs (set): Set of license references to process
530
+ spdx_document (dict): Reference to the SPDX document being built,
531
+ which will be modified in place
532
+
533
+ Note:
534
+ This method modifies the spdx_document dictionary in place by adding
535
+ entries to the 'hasExtractedLicensingInfos' list.
536
+ """
314
537
  for lic_ref in lic_refs:
315
538
  license_info = self._parse_license_ref(lic_ref)
316
539
  spdx_document['hasExtractedLicensingInfos'].append(license_info)
317
540
 
318
541
  def _parse_license_ref(self, lic_ref: str) -> dict:
319
- """Parse license reference and create info dictionary."""
542
+ """
543
+ Parse license reference and create info dictionary for SPDX document.
544
+
545
+ This method extracts information from a license reference identifier
546
+ and formats it into the structure required by the SPDX specification
547
+ for extracted licensing information.
548
+
549
+ Args:
550
+ lic_ref (str): License reference identifier to parse
551
+
552
+ Returns:
553
+ dict: Dictionary containing required SPDX fields for extracted license info:
554
+ - licenseId: The unique identifier for this license
555
+ - name: A readable name for the license
556
+ - extractedText: A placeholder for the actual license text
557
+ - comment: Information about how the license was detected
558
+ """
320
559
  source, name = self._extract_license_info(lic_ref)
321
560
  source_text = f' by {source}.' if source else '.'
322
561
 
@@ -328,7 +567,21 @@ class SpdxLite:
328
567
  }
329
568
 
330
569
  def _extract_license_info(self, lic_ref: str):
331
- """Extract source and name from license reference."""
570
+ """
571
+ Extract source and name from license reference.
572
+
573
+ This method parses a license reference string to extract the source
574
+ (e.g., scancode, scanoss) and the actual license name using regular
575
+ expressions.
576
+
577
+ Args:
578
+ lic_ref (str): License reference identifier to parse
579
+
580
+ Returns:
581
+ tuple: A tuple containing (source, name) where:
582
+ - source (str): The tool or system that identified the license
583
+ - name (str): The actual license name
584
+ """
332
585
  match = re.search(r'^LicenseRef-(scancode-|scanoss-|)(\S+)$', lic_ref, re.IGNORECASE)
333
586
  if match:
334
587
  source = match.group(1).replace('-', '')
@@ -416,8 +669,6 @@ class SpdxLite:
416
669
  self._spdx_licenses[lic_id_short] = lic_id
417
670
  if lic_name:
418
671
  self._spdx_lic_names[lic_name] = lic_id
419
- # self.print_stderr(f'Licenses: {self._spdx_licenses}')
420
- # self.print_stderr(f'Lookup: {self._spdx_lic_lookup}')
421
672
  return True
422
673
 
423
674
  def get_spdx_license_id(self, lic_name: str) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: scanoss
3
- Version: 1.20.3
3
+ Version: 1.20.5
4
4
  Summary: Simple Python library to leverage the SCANOSS APIs
5
5
  Home-page: https://scanoss.com
6
6
  Author: SCANOSS
@@ -1 +0,0 @@
1
- date: 20250303125404, utime: 1741006444
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes