fosslight-source 2.2.0__tar.gz → 2.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {fosslight_source-2.2.0/src/fosslight_source.egg-info → fosslight_source-2.2.1}/PKG-INFO +2 -2
  2. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/requirements.txt +1 -1
  3. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/setup.py +1 -1
  4. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/_parsing_scancode_file_item.py +1 -29
  5. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/_parsing_scanoss_file.py +4 -18
  6. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/_scan_item.py +0 -80
  7. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/cli.py +16 -34
  8. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/run_scancode.py +33 -31
  9. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/run_scanoss.py +9 -22
  10. fosslight_source-2.2.1/src/fosslight_source/run_spdx_extractor.py +37 -0
  11. {fosslight_source-2.2.0 → fosslight_source-2.2.1/src/fosslight_source.egg-info}/PKG-INFO +2 -2
  12. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source.egg-info/requires.txt +1 -1
  13. fosslight_source-2.2.0/src/fosslight_source/run_spdx_extractor.py +0 -50
  14. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/LICENSE +0 -0
  15. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/MANIFEST.in +0 -0
  16. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/README.md +0 -0
  17. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/setup.cfg +0 -0
  18. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/__init__.py +0 -0
  19. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/_help.py +0 -0
  20. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source/_license_matched.py +0 -0
  21. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source.egg-info/SOURCES.txt +0 -0
  22. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source.egg-info/dependency_links.txt +0 -0
  23. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source.egg-info/entry_points.txt +0 -0
  24. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/src/fosslight_source.egg-info/top_level.txt +0 -0
  25. {fosslight_source-2.2.0 → fosslight_source-2.2.1}/tests/test_tox.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.2.0
3
+ Version: 2.2.1
4
4
  Summary: FOSSLight Source Scanner
5
5
  Home-page: https://github.com/fosslight/fosslight_source_scanner
6
6
  Download-URL: https://github.com/fosslight/fosslight_source_scanner
@@ -17,7 +17,7 @@ License-File: LICENSE
17
17
  Requires-Dist: pyparsing
18
18
  Requires-Dist: scanoss>=1.18.0
19
19
  Requires-Dist: XlsxWriter
20
- Requires-Dist: fosslight_util>=2.1.31
20
+ Requires-Dist: fosslight_util>=2.1.34
21
21
  Requires-Dist: PyYAML
22
22
  Requires-Dist: wheel>=0.38.1
23
23
  Requires-Dist: intbitset
@@ -1,7 +1,7 @@
1
1
  pyparsing
2
2
  scanoss>=1.18.0
3
3
  XlsxWriter
4
- fosslight_util>=2.1.31
4
+ fosslight_util>=2.1.34
5
5
  PyYAML
6
6
  wheel>=0.38.1
7
7
  intbitset
@@ -14,7 +14,7 @@ with open('requirements.txt', 'r', 'utf-8') as f:
14
14
  if __name__ == "__main__":
15
15
  setup(
16
16
  name='fosslight_source',
17
- version='2.2.0',
17
+ version='2.2.1',
18
18
  package_dir={"": "src"},
19
19
  packages=find_packages(where='src'),
20
20
  description='FOSSLight Source Scanner',
@@ -10,12 +10,9 @@ import fosslight_util.constant as constant
10
10
  from fosslight_util.get_pom_license import get_license_from_pom
11
11
  from ._license_matched import MatchedLicense
12
12
  from ._scan_item import SourceItem
13
- from ._scan_item import is_exclude_dir
14
- from ._scan_item import is_exclude_file
15
13
  from ._scan_item import replace_word
16
14
  from ._scan_item import is_notice_file
17
15
  from ._scan_item import is_manifest_file
18
- from ._scan_item import is_package_dir
19
16
  from typing import Tuple
20
17
 
21
18
  logger = logging.getLogger(constant.LOGGER_NAME)
@@ -83,8 +80,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
83
80
  msg = []
84
81
  scancode_file_item = []
85
82
  license_list = {} # Key :[license]+[matched_text], value: MatchedLicense()
86
- prev_dir = ""
87
- prev_dir_value = False
88
83
 
89
84
  if scancode_file_list:
90
85
  for file in scancode_file_list:
@@ -96,22 +91,11 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
96
91
  is_binary = file.get("is_binary", False)
97
92
  if "type" in file:
98
93
  is_dir = file["type"] == "directory"
99
- if is_dir:
100
- prev_dir_value = is_exclude_dir(file_path)
101
- prev_dir = file_path
102
-
103
94
  if not is_binary and not is_dir:
104
95
  licenses = file.get("licenses", [])
105
96
  copyright_list = file.get("copyrights", [])
106
97
 
107
98
  result_item = SourceItem(file_path)
108
- is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path))
109
- if is_pkg:
110
- result_item.source_name_or_path = pkg_path
111
- if not any(x.source_name_or_path == result_item.source_name_or_path for x in scancode_file_item):
112
- result_item.exclude = True
113
- scancode_file_item.append(result_item)
114
- continue
115
99
 
116
100
  if has_error and "scan_errors" in file:
117
101
  error_msg = file.get("scan_errors", [])
@@ -238,8 +222,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
238
222
  set(license_expression_list))
239
223
  result_item.comment = ','.join(license_expression_list)
240
224
 
241
- if is_exclude_file(file_path, prev_dir, prev_dir_value):
242
- result_item.exclude = True
243
225
  scancode_file_item.append(result_item)
244
226
  except Exception as ex:
245
227
  msg.append(f"Error Parsing item: {ex}")
@@ -271,17 +253,9 @@ def parsing_scancode_32_later(
271
253
  is_binary = file.get("is_binary", False)
272
254
  is_dir = file.get("type", "") == "directory"
273
255
  if (not file_path) or is_binary or is_dir:
256
+ logger.info(f"Skipping {file_path} because it is binary or directory")
274
257
  continue
275
-
276
258
  result_item = SourceItem(file_path)
277
- is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path))
278
- if is_pkg:
279
- result_item.source_name_or_path = pkg_path
280
- if not any(x.source_name_or_path == result_item.source_name_or_path for x in scancode_file_item):
281
- result_item.exclude = True
282
- scancode_file_item.append(result_item)
283
- continue
284
-
285
259
  if has_error:
286
260
  error_msg = file.get("scan_errors", [])
287
261
  if error_msg:
@@ -334,8 +308,6 @@ def parsing_scancode_32_later(
334
308
  license_list[lic_matched_key] = lic_info
335
309
  license_detected.append(found_lic)
336
310
  result_item.licenses = license_detected
337
-
338
- result_item.exclude = is_exclude_file(file_path)
339
311
  file_ext = os.path.splitext(file_path)[1].lower()
340
312
  is_source_file = file_ext and file_ext in SOURCE_EXTENSIONS
341
313
  result_item.is_license_text = is_notice_file(file_path) or (
@@ -3,12 +3,9 @@
3
3
  # Copyright (c) 2020 LG Electronics Inc.
4
4
  # SPDX-License-Identifier: Apache-2.0
5
5
 
6
- import os
7
6
  import logging
8
7
  import fosslight_util.constant as constant
9
8
  from ._scan_item import SourceItem
10
- from ._scan_item import is_exclude_file
11
- from ._scan_item import is_package_dir
12
9
  from ._scan_item import replace_word
13
10
  from typing import Tuple
14
11
 
@@ -18,7 +15,7 @@ SCANOSS_INFO_HEADER = ['No', 'Source Path', 'Component Declared', 'SPDX Tag',
18
15
  'Matched Rate (line number)', 'scanoss_fileURL']
19
16
 
20
17
 
21
- def parsing_extraInfo(scanned_result: dict) -> list:
18
+ def parsing_extra_info(scanned_result: dict) -> list:
22
19
  scanoss_extra_info = []
23
20
  for scan_item in scanned_result:
24
21
  license_w_source = scan_item.scanoss_reference
@@ -37,22 +34,14 @@ def parsing_extraInfo(scanned_result: dict) -> list:
37
34
  return scanoss_extra_info
38
35
 
39
36
 
40
- def parsing_scanResult(scanoss_report: dict, path_to_scan: str = "", path_to_exclude: list = []) -> Tuple[bool, list]:
37
+ def parsing_scan_result(scanoss_report: dict, excluded_files: set = None) -> Tuple[bool, list]:
41
38
  scanoss_file_item = []
42
- abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
43
39
 
44
40
  for file_path, findings in scanoss_report.items():
45
- abs_file_path = os.path.abspath(os.path.join(path_to_scan, file_path))
46
- if any(os.path.commonpath([abs_file_path, exclude_path]) == exclude_path for exclude_path in abs_path_to_exclude):
41
+ file_path_normalized = file_path.replace('\\', '/')
42
+ if file_path_normalized in excluded_files:
47
43
  continue
48
44
  result_item = SourceItem(file_path)
49
- is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path))
50
- if is_pkg:
51
- result_item.source_name_or_path = pkg_path
52
- if not any(x.source_name_or_path == result_item.source_name_or_path for x in scanoss_file_item):
53
- result_item.exclude = True
54
- scanoss_file_item.append(result_item)
55
- continue
56
45
 
57
46
  if 'id' in findings[0]:
58
47
  if "none" == findings[0]['id']:
@@ -86,9 +75,6 @@ def parsing_scanResult(scanoss_report: dict, path_to_scan: str = "", path_to_exc
86
75
  result_item.licenses = license_detected
87
76
  result_item.scanoss_reference = license_w_source
88
77
 
89
- if is_exclude_file(file_path):
90
- result_item.exclude = True
91
-
92
78
  if 'file_url' in findings[0]:
93
79
  result_item.fileURL = findings[0]['file_url']
94
80
  if 'matched' in findings[0]:
@@ -19,14 +19,6 @@ _notice_filename = ['licen[cs]e[s]?', 'notice[s]?', 'legal', 'copyright[s]?', 'c
19
19
  '[a,l]?gpl[-]?[1-3]?[.,-,_]?[0-1]?', 'mit', 'bsd[-]?[0-4]?', 'bsd[-]?[0-4][-]?clause[s]?',
20
20
  'apache[-,_]?[1-2]?[.,-,_]?[0-2]?']
21
21
  _manifest_filename = [r'.*\.pom$', r'package\.json$', r'setup\.py$', r'pubspec\.yaml$', r'.*\.podspec$', r'Cargo\.toml$']
22
- _exclude_filename = ["changelog", "config.guess", "config.sub", "changes", "ltmain.sh",
23
- "configure", "configure.ac", "depcomp", "compile", "missing", "makefile"]
24
- _exclude_extension = [".m4", ".in", ".po"]
25
- _exclude_directory = ["test", "tests", "doc", "docs"]
26
- _exclude_directory = [os.path.sep + dir_name +
27
- os.path.sep for dir_name in _exclude_directory]
28
- _exclude_directory.append("/.")
29
- _package_directory = ["node_modules", "venv", "Pods", "Carthage"]
30
22
  MAX_LICENSE_LENGTH = 200
31
23
  MAX_LICENSE_TOTAL_LENGTH = 600
32
24
  SUBSTRING_LICENSE_COMMENT = "Maximum character limit (License)"
@@ -209,39 +201,6 @@ class SourceItem(FileItem):
209
201
  return self.source_name_or_path == other.source_name_or_path
210
202
 
211
203
 
212
- def is_exclude_dir(dir_path: str) -> bool:
213
- if dir_path:
214
- dir_path = dir_path.lower()
215
- dir_path = dir_path if dir_path.endswith(
216
- os.path.sep) else dir_path + os.path.sep
217
- dir_path = dir_path if dir_path.startswith(
218
- os.path.sep) else os.path.sep + dir_path
219
- return any(dir_name in dir_path for dir_name in _exclude_directory)
220
- return False
221
-
222
-
223
- def is_exclude_file(file_path: str, prev_dir: str = None, prev_dir_exclude_value: bool = None) -> bool:
224
- file_path = file_path.lower()
225
- filename = os.path.basename(file_path)
226
- if os.path.splitext(filename)[1] in _exclude_extension:
227
- return True
228
- if filename.startswith('.') or filename in _exclude_filename:
229
- return True
230
-
231
- dir_path = os.path.dirname(file_path)
232
- if prev_dir is not None: # running ScanCode
233
- if dir_path == prev_dir:
234
- return prev_dir_exclude_value
235
- else:
236
- # There will be no execution of this else statement.
237
- # Because scancode json output results are sorted by path,
238
- # most of them will match the previous if statement.
239
- return is_exclude_dir(dir_path)
240
- else: # running SCANOSS
241
- return is_exclude_dir(dir_path)
242
- return False
243
-
244
-
245
204
  def is_notice_file(file_path: str) -> bool:
246
205
  pattern = r"({})(?<!w)".format("|".join(_notice_filename))
247
206
  filename = os.path.basename(file_path)
@@ -252,42 +211,3 @@ def is_manifest_file(file_path: str) -> bool:
252
211
  pattern = r"({})$".format("|".join(_manifest_filename))
253
212
  filename = os.path.basename(file_path)
254
213
  return bool(re.match(pattern, filename, re.IGNORECASE))
255
-
256
-
257
- def is_package_dir(dir_path: str) -> bool:
258
- # scancode and scanoss use '/' as path separator regardless of OS
259
- dir_path = dir_path.replace('\\', '/')
260
- path_parts = dir_path.split('/')
261
-
262
- for pkg_dir in _package_directory:
263
- if pkg_dir in path_parts:
264
- pkg_index = path_parts.index(pkg_dir)
265
- pkg_path = '/'.join(path_parts[:pkg_index + 1])
266
- return True, pkg_path
267
- return False, ""
268
-
269
-
270
- def _has_parent_in_exclude_list(rel_path: str, path_to_exclude: list) -> bool:
271
- path_parts = rel_path.replace('\\', '/').split('/')
272
- for i in range(1, len(path_parts)):
273
- parent_path = '/'.join(path_parts[:i])
274
- if parent_path in path_to_exclude:
275
- return True
276
- return False
277
-
278
-
279
- def get_excluded_paths(path_to_scan: str, custom_excluded_paths: list = []) -> list:
280
- path_to_exclude = custom_excluded_paths.copy()
281
- abs_path_to_scan = os.path.abspath(path_to_scan)
282
-
283
- for root, dirs, files in os.walk(path_to_scan):
284
- for dir_name in dirs:
285
- dir_path = os.path.join(root, dir_name)
286
- rel_path = os.path.relpath(dir_path, abs_path_to_scan)
287
- if not _has_parent_in_exclude_list(rel_path, path_to_exclude):
288
- if dir_name in _package_directory:
289
- path_to_exclude.append(rel_path)
290
- elif is_exclude_dir(rel_path):
291
- path_to_exclude.append(rel_path)
292
-
293
- return path_to_exclude
@@ -14,13 +14,12 @@ from datetime import datetime
14
14
  import fosslight_util.constant as constant
15
15
  from fosslight_util.set_log import init_log
16
16
  from fosslight_util.timer_thread import TimerThread
17
- from fosslight_util.exclude import excluding_files
18
17
  from ._help import print_version, print_help_msg_source_scanner
19
18
  from ._license_matched import get_license_list_to_print
20
19
  from fosslight_util.output_format import check_output_formats_v2, write_output_file
21
20
  from fosslight_util.correct import correct_with_yaml
22
21
  from .run_scancode import run_scan
23
- from ._scan_item import get_excluded_paths
22
+ from fosslight_util.exclude import get_excluded_paths
24
23
  from .run_scanoss import run_scanoss_py
25
24
  from .run_scanoss import get_scanoss_extra_info
26
25
  import yaml
@@ -38,6 +37,9 @@ MERGED_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Path', 'OSS Name',
38
37
  'OSS Version', 'License', 'Download Location',
39
38
  'Homepage', 'Copyright Text', 'Exclude', 'Comment', 'license_reference']}
40
39
  SCANNER_TYPE = ['kb', 'scancode', 'scanoss', 'all']
40
+ EXCLUDE_FILENAME = ["changelog", "config.guess", "config.sub", "changes", "ltmain.sh",
41
+ "configure", "configure.ac", "depcomp", "compile", "missing", "Makefile"]
42
+ EXCLUDE_FILE_EXTENSION = [".m4", ".in", ".po"]
41
43
 
42
44
  logger = logging.getLogger(constant.LOGGER_NAME)
43
45
  warnings.filterwarnings("ignore", category=FutureWarning)
@@ -126,23 +128,6 @@ def main() -> None:
126
128
  sys.exit(1)
127
129
 
128
130
 
129
- def count_files(path_to_scan: str, path_to_exclude: list) -> Tuple[int, int]:
130
- total_files = 0
131
- excluded_files = 0
132
- abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
133
-
134
- for root, _, files in os.walk(path_to_scan):
135
- for file in files:
136
- file_path = os.path.join(root, file)
137
- abs_file_path = os.path.abspath(file_path)
138
- if any(os.path.commonpath([abs_file_path, exclude_path]) == exclude_path
139
- for exclude_path in abs_path_to_exclude):
140
- excluded_files += 1
141
- total_files += 1
142
-
143
- return total_files, excluded_files
144
-
145
-
146
131
  def create_report_file(
147
132
  _start_time: str, merged_result: list,
148
133
  license_list: list, scanoss_result: list,
@@ -150,7 +135,7 @@ def create_report_file(
150
135
  output_path: str = "", output_files: list = [],
151
136
  output_extensions: list = [], correct_mode: bool = True,
152
137
  correct_filepath: str = "", path_to_scan: str = "", path_to_exclude: list = [],
153
- formats: list = [], excluded_file_list: list = [], api_limit_exceed: bool = False
138
+ formats: list = [], api_limit_exceed: bool = False, files_count: int = 0
154
139
  ) -> 'ScannerItem':
155
140
  """
156
141
  Create report files for given scanned result.
@@ -209,7 +194,6 @@ def create_report_file(
209
194
 
210
195
  scan_item = ScannerItem(PKG_NAME, _start_time)
211
196
  scan_item.set_cover_pathinfo(path_to_scan, path_to_exclude)
212
- files_count, _ = count_files(path_to_scan, path_to_exclude)
213
197
  scan_item.set_cover_comment(f"Scanned files: {files_count}")
214
198
 
215
199
  if api_limit_exceed:
@@ -223,12 +207,6 @@ def create_report_file(
223
207
 
224
208
  if merged_result:
225
209
  sheet_list = {}
226
- # Remove results that are in excluding file list
227
- for i in range(len(merged_result) - 1, -1, -1): # Iterate from last to first
228
- item_path = merged_result[i].source_name_or_path # Assuming SourceItem has 'file_path' attribute
229
- if item_path in excluded_file_list:
230
- del merged_result[i] # Delete matching item
231
-
232
210
  scan_item.append_file_items(merged_result, PKG_NAME)
233
211
 
234
212
  if selected_scanner == 'scanoss':
@@ -365,15 +343,17 @@ def run_scanners(
365
343
 
366
344
  logger, result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{start_time}.txt"),
367
345
  True, logging.INFO, logging.DEBUG, PKG_NAME, path_to_scan, path_to_exclude)
368
- excluded_file_list = excluding_files(path_to_exclude, path_to_scan)
369
346
 
370
347
  if '.xlsx' not in output_extensions and print_matched_text:
371
348
  logger.warning("-m option is only available for excel.")
372
349
  print_matched_text = False
373
350
 
374
351
  if success:
375
- excluded_path_with_default_exclusion = get_excluded_paths(path_to_scan, path_to_exclude)
352
+ path_to_exclude_with_filename = path_to_exclude + EXCLUDE_FILENAME
353
+ excluded_path_with_default_exclusion, excluded_path_without_dot, excluded_files, cnt_file_except_skipped = (
354
+ get_excluded_paths(path_to_scan, path_to_exclude_with_filename, EXCLUDE_FILE_EXTENSION))
376
355
  logger.debug(f"Skipped paths: {excluded_path_with_default_exclusion}")
356
+
377
357
  if not selected_scanner:
378
358
  selected_scanner = 'all'
379
359
  if selected_scanner in ['scancode', 'all', 'kb']:
@@ -381,18 +361,20 @@ def run_scanners(
381
361
  write_json_file, num_cores, True,
382
362
  print_matched_text, formats, called_by_cli,
383
363
  time_out, correct_mode, correct_filepath,
384
- excluded_path_with_default_exclusion)
364
+ excluded_path_with_default_exclusion,
365
+ excluded_files)
366
+ excluded_files = set(excluded_files) if excluded_files else set()
385
367
  if selected_scanner in ['scanoss', 'all']:
386
368
  scanoss_result, api_limit_exceed = run_scanoss_py(path_to_scan, output_file_name, formats, True, write_json_file,
387
- num_cores, excluded_path_with_default_exclusion)
369
+ num_cores, excluded_path_with_default_exclusion, excluded_files)
388
370
  if selected_scanner in SCANNER_TYPE:
389
371
  run_kb = True if selected_scanner in ['kb', 'all'] else False
390
- spdx_downloads = get_spdx_downloads(path_to_scan, excluded_path_with_default_exclusion)
372
+ spdx_downloads = get_spdx_downloads(path_to_scan, excluded_files)
391
373
  merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads, path_to_scan, run_kb)
392
374
  scan_item = create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
393
375
  print_matched_text, output_path, output_files, output_extensions, correct_mode,
394
- correct_filepath, path_to_scan, path_to_exclude, formats, excluded_file_list,
395
- api_limit_exceed)
376
+ correct_filepath, path_to_scan, excluded_path_without_dot, formats,
377
+ api_limit_exceed, cnt_file_except_skipped)
396
378
  else:
397
379
  print_help_msg_source_scanner()
398
380
  result_log[RESULT_KEY] = "Unsupported scanner"
@@ -29,7 +29,8 @@ def run_scan(
29
29
  return_results: bool = False, need_license: bool = False,
30
30
  formats: list = [], called_by_cli: bool = False,
31
31
  time_out: int = 120, correct_mode: bool = True,
32
- correct_filepath: str = "", path_to_exclude: list = []
32
+ correct_filepath: str = "", path_to_exclude: list = [],
33
+ excluded_files: list = []
33
34
  ) -> Tuple[bool, str, list, list]:
34
35
  if not called_by_cli:
35
36
  global logger
@@ -90,39 +91,40 @@ def run_scan(
90
91
  exclude_path_normalized = os.path.normpath(exclude_path).replace("\\", "/")
91
92
 
92
93
  if exclude_path_normalized.endswith("/**"):
93
- exclude_path_normalized = exclude_path_normalized[:-3]
94
- elif exclude_path_normalized.endswith("**"):
95
- exclude_path_normalized = exclude_path_normalized.rstrip("*")
96
-
97
- if exclude_path_normalized.startswith("**/"):
98
- exclude_path_normalized = exclude_path_normalized[3:]
99
-
100
- full_exclude_path = os.path.join(abs_path_to_scan, exclude_path)
101
- is_dir = os.path.isdir(full_exclude_path)
102
- is_file = os.path.isfile(full_exclude_path)
103
- if is_dir:
104
- dir_name = os.path.basename(exclude_path_normalized.rstrip("/"))
105
- base_path = exclude_path_normalized.rstrip("/")
106
-
107
- if dir_name:
108
- total_files_to_excluded.append(dir_name)
109
- max_depth = 0
110
- for root, dirs, files in os.walk(full_exclude_path):
111
- depth = root[len(full_exclude_path):].count(os.sep)
112
- max_depth = max(max_depth, depth)
113
- for depth in range(1, max_depth + 2):
114
- pattern = base_path + "/*" * depth
115
- total_files_to_excluded.append(pattern)
94
+ base_dir = exclude_path_normalized[:-3].rstrip("/")
95
+ if base_dir:
96
+ full_exclude_path = os.path.join(abs_path_to_scan, base_dir)
97
+ if os.path.isdir(full_exclude_path):
98
+ total_files_to_excluded.append(base_dir)
99
+ total_files_to_excluded.append(exclude_path_normalized)
100
+ else:
101
+ total_files_to_excluded.append(exclude_path_normalized)
116
102
  else:
117
103
  total_files_to_excluded.append(exclude_path_normalized)
118
- elif is_file:
119
- total_files_to_excluded.append(exclude_path_normalized)
120
104
  else:
121
- if "/" in exclude_path_normalized:
122
- dir_name = os.path.basename(exclude_path_normalized.rstrip("/"))
123
- if dir_name:
124
- total_files_to_excluded.append(dir_name)
125
- total_files_to_excluded.append(exclude_path_normalized)
105
+ has_glob_chars = any(char in exclude_path_normalized for char in ['*', '?', '['])
106
+ if not has_glob_chars:
107
+ full_exclude_path = os.path.join(abs_path_to_scan, exclude_path_normalized)
108
+ is_dir = os.path.isdir(full_exclude_path)
109
+ is_file = os.path.isfile(full_exclude_path)
110
+ else:
111
+ is_dir = False
112
+ is_file = False
113
+
114
+ if is_dir:
115
+ base_path = exclude_path_normalized.rstrip("/")
116
+ if base_path:
117
+ total_files_to_excluded.append(base_path)
118
+ total_files_to_excluded.append(f"{base_path}/**")
119
+ else:
120
+ total_files_to_excluded.append(exclude_path_normalized)
121
+ elif is_file:
122
+ total_files_to_excluded.append(f"**/{exclude_path_normalized}")
123
+ else:
124
+ total_files_to_excluded.append(exclude_path_normalized)
125
+
126
+ if excluded_files:
127
+ total_files_to_excluded.extend(f"**/{file_path}" for file_path in excluded_files)
126
128
 
127
129
  total_files_to_excluded = sorted(list(set(total_files_to_excluded)))
128
130
  ignore_tuple = tuple(total_files_to_excluded)
@@ -12,8 +12,8 @@ from datetime import datetime
12
12
  import fosslight_util.constant as constant
13
13
  from fosslight_util.set_log import init_log
14
14
  from fosslight_util.output_format import check_output_formats_v2 # , write_output_file
15
- from ._parsing_scanoss_file import parsing_scanResult # scanoss
16
- from ._parsing_scanoss_file import parsing_extraInfo # scanoss
15
+ from ._parsing_scanoss_file import parsing_scan_result # scanoss
16
+ from ._parsing_scanoss_file import parsing_extra_info # scanoss
17
17
  import shutil
18
18
  from pathlib import Path
19
19
  from scanoss.scanner import Scanner, ScanType
@@ -28,11 +28,12 @@ SCANOSS_OUTPUT_FILE = "scanoss_raw_result.json"
28
28
 
29
29
 
30
30
  def get_scanoss_extra_info(scanned_result: dict) -> list:
31
- return parsing_extraInfo(scanned_result)
31
+ return parsing_extra_info(scanned_result)
32
32
 
33
33
 
34
- def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list = [], called_by_cli: bool = False,
35
- write_json_file: bool = False, num_threads: int = -1, path_to_exclude: list = []) -> list:
34
+ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list = [],
35
+ called_by_cli: bool = False, write_json_file: bool = False, num_threads: int = -1,
36
+ path_to_exclude: list = [], excluded_files: set = None) -> list:
36
37
  """
37
38
  Run scanoss.py for the given path.
38
39
 
@@ -72,7 +73,7 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
72
73
  try:
73
74
  scanner = Scanner(
74
75
  ignore_cert_errors=True,
75
- skip_folders=path_to_exclude,
76
+ skip_folders=list(path_to_exclude) if path_to_exclude else [],
76
77
  scan_output=output_json_file,
77
78
  scan_options=ScanType.SCAN_SNIPPETS.value,
78
79
  nb_threads=num_threads if num_threads > 0 else 10
@@ -86,30 +87,16 @@ def run_scanoss_py(path_to_scan: str, output_file_name: str = "", format: list =
86
87
  logger.debug(f"{captured_output}")
87
88
 
88
89
  if os.path.isfile(output_json_file):
89
- total_files_to_excluded = []
90
- if path_to_exclude:
91
- for path in path_to_exclude:
92
- path = os.path.join(path_to_scan, os.path.relpath(path, os.path.abspath(path_to_scan))) \
93
- if not os.path.isabs(path_to_scan) and os.path.isabs(path) else os.path.join(path_to_scan, path)
94
- if os.path.isdir(path):
95
- for root, _, files in os.walk(path):
96
- root = root[len(path_to_scan) + 1:]
97
- total_files_to_excluded.extend([os.path.normpath(os.path.join(root, file)).replace('\\', '/')
98
- for file in files])
99
- elif os.path.isfile(path):
100
- path = path[len(path_to_scan) + 1:]
101
- total_files_to_excluded.append(os.path.normpath(path).replace('\\', '/'))
102
-
103
90
  with open(output_json_file, "r") as st_json:
104
91
  st_python = json.load(st_json)
105
- for key_to_exclude in total_files_to_excluded:
92
+ for key_to_exclude in excluded_files:
106
93
  if key_to_exclude in st_python:
107
94
  del st_python[key_to_exclude]
108
95
  with open(output_json_file, 'w') as st_json:
109
96
  json.dump(st_python, st_json, indent=4)
110
97
  with open(output_json_file, "r") as st_json:
111
98
  st_python = json.load(st_json)
112
- scanoss_file_list = parsing_scanResult(st_python, path_to_scan, path_to_exclude)
99
+ scanoss_file_list = parsing_scan_result(st_python, excluded_files)
113
100
 
114
101
  except Exception as error:
115
102
  logger.debug(f"SCANOSS Parsing {path_to_scan}: {error}")
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # Copyright (c) 2023 LG Electronics Inc.
4
+ # SPDX-License-Identifier: Apache-2.0
5
+
6
+ import os
7
+ import logging
8
+ import re
9
+ import fosslight_util.constant as constant
10
+ import mmap
11
+
12
+ logger = logging.getLogger(constant.LOGGER_NAME)
13
+
14
+
15
+ def get_spdx_downloads(path_to_scan: str, path_to_exclude: set = None) -> dict:
16
+ download_dict = {}
17
+ find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
18
+ abs_path_to_scan = os.path.abspath(path_to_scan)
19
+
20
+ for root, dirs, files in os.walk(path_to_scan):
21
+ for file in files:
22
+ file_path = os.path.join(root, file)
23
+ rel_path_file = os.path.relpath(file_path, abs_path_to_scan).replace('\\', '/')
24
+ if rel_path_file in path_to_exclude:
25
+ continue
26
+ try:
27
+ if os.path.getsize(file_path) > 0:
28
+ with open(file_path, "r") as f:
29
+ with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
30
+ for word in find_word.findall(mmap_obj):
31
+ if rel_path_file in download_dict:
32
+ download_dict[rel_path_file].append(word.decode('utf-8'))
33
+ else:
34
+ download_dict[rel_path_file] = [word.decode('utf-8')]
35
+ except Exception as ex:
36
+ logger.warning(f"Failed to extract SPDX download location. {rel_path_file}, {ex}")
37
+ return download_dict
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fosslight_source
3
- Version: 2.2.0
3
+ Version: 2.2.1
4
4
  Summary: FOSSLight Source Scanner
5
5
  Home-page: https://github.com/fosslight/fosslight_source_scanner
6
6
  Download-URL: https://github.com/fosslight/fosslight_source_scanner
@@ -17,7 +17,7 @@ License-File: LICENSE
17
17
  Requires-Dist: pyparsing
18
18
  Requires-Dist: scanoss>=1.18.0
19
19
  Requires-Dist: XlsxWriter
20
- Requires-Dist: fosslight_util>=2.1.31
20
+ Requires-Dist: fosslight_util>=2.1.34
21
21
  Requires-Dist: PyYAML
22
22
  Requires-Dist: wheel>=0.38.1
23
23
  Requires-Dist: intbitset
@@ -1,7 +1,7 @@
1
1
  pyparsing
2
2
  scanoss>=1.18.0
3
3
  XlsxWriter
4
- fosslight_util>=2.1.31
4
+ fosslight_util>=2.1.34
5
5
  PyYAML
6
6
  wheel>=0.38.1
7
7
  intbitset
@@ -1,50 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
- # Copyright (c) 2023 LG Electronics Inc.
4
- # SPDX-License-Identifier: Apache-2.0
5
-
6
- import os
7
- import logging
8
- import re
9
- import fosslight_util.constant as constant
10
- import mmap
11
-
12
- logger = logging.getLogger(constant.LOGGER_NAME)
13
-
14
-
15
- def get_file_list(path_to_scan: str, path_to_exclude: list = []) -> list:
16
- file_list = []
17
- abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
18
- for root, dirs, files in os.walk(path_to_scan):
19
- for file in files:
20
- file_path = os.path.join(root, file)
21
- abs_file_path = os.path.abspath(file_path)
22
- if any(os.path.commonpath([abs_file_path, exclude_path]) == exclude_path
23
- for exclude_path in abs_path_to_exclude):
24
- continue
25
- file_list.append(file_path)
26
- return file_list
27
-
28
-
29
- def get_spdx_downloads(path_to_scan: str, path_to_exclude: list = []) -> dict:
30
- download_dict = {}
31
- find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
32
-
33
- file_list = get_file_list(path_to_scan, path_to_exclude)
34
-
35
- for file in file_list:
36
- try:
37
- rel_path_file = os.path.relpath(file, path_to_scan)
38
- # remove the path_to_scan from the file paths
39
- if os.path.getsize(file) > 0:
40
- with open(file, "r") as f:
41
- with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
42
- for word in find_word.findall(mmap_obj):
43
- if rel_path_file in download_dict:
44
- download_dict[rel_path_file].append(word.decode('utf-8'))
45
- else:
46
- download_dict[rel_path_file] = [word.decode('utf-8')]
47
- except Exception as ex:
48
- msg = str(ex)
49
- logger.warning(f"Failed to extract SPDX download location. {rel_path_file}, {msg}")
50
- return download_dict