codeaudit 1.5.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codeaudit/__about__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Maikel Mardjan <mike@bm-support.org>
2
2
  #
3
3
  # SPDX-License-Identifier: GPL-3.0-or-later
4
- __version__ = "1.5.0"
4
+ __version__ = "1.6.0"
@@ -19,6 +19,7 @@ from codeaudit.security_checks import perform_validations , ast_security_checks
19
19
  from codeaudit.totals import overview_per_file , get_statistics , overview_count , total_modules
20
20
  from codeaudit.checkmodules import get_all_modules , get_imported_modules_by_file , get_standard_library_modules , check_module_vulnerability
21
21
  from codeaudit.pypi_package_scan import get_pypi_download_info , get_package_source
22
+ from codeaudit.suppression import filter_sast_results
22
23
 
23
24
  from pathlib import Path
24
25
  import json
@@ -27,6 +28,7 @@ import pandas as pd
27
28
  import platform
28
29
  from collections import Counter
29
30
 
31
+
30
32
  import altair as alt
31
33
 
32
34
  def version():
@@ -35,7 +37,7 @@ def version():
35
37
  return {"name" : "Python_Code_Audit",
36
38
  "version" : ca_version}
37
39
 
38
- def filescan(input_path):
40
+ def filescan(input_path , nosec=False):
39
41
  """
40
42
  Scan a Python source file, a local directory, or a **PyPI package** from PyPI.org for
41
43
  security weaknesses and return the results as a JSON-serializable
@@ -102,14 +104,14 @@ def filescan(input_path):
102
104
  if file_path.is_dir(): #local directory scan
103
105
  package_name = get_filename_from_path(input_path)
104
106
  output |= {"package_name": package_name}
105
- scan_output = _codeaudit_directory_scan(input_path)
107
+ scan_output = _codeaudit_directory_scan(input_path, nosec_flag=nosec )
106
108
  output |= scan_output
107
109
  return output
108
110
  elif file_path.suffix.lower() == ".py" and file_path.is_file() and is_ast_parsable(input_path): #check on parseable single Python file
109
111
  # do a file check
110
112
  file_information = overview_per_file(input_path)
111
113
  module_information = get_modules(input_path) # modules per file
112
- scan_output = _codeaudit_scan(input_path)
114
+ scan_output = _codeaudit_scan(input_path , nosec_flag=nosec)
113
115
  file_output["0"] = file_information | module_information | scan_output #there is only 1 file , so index 0 equals as for package to make functionality that use the output that works on the dict or json can equal for a package or a single file!
114
116
  output |= { "file_security_info" : file_output}
115
117
  return output
@@ -122,7 +124,7 @@ def filescan(input_path):
122
124
  output |= {"package_name": package_name,
123
125
  "package_release": release}
124
126
  try:
125
- scan_output = _codeaudit_directory_scan(src_dir)
127
+ scan_output = _codeaudit_directory_scan(src_dir , nosec_flag=nosec)
126
128
  output |= scan_output
127
129
  finally:
128
130
  # Cleaning up temp directory
@@ -132,20 +134,24 @@ def filescan(input_path):
132
134
  # Its not a directory nor a valid Python file:
133
135
  return {"Error" : "File is not a *.py file, does not exist or is not a valid directory path towards a Python package."}
134
136
 
135
- def _codeaudit_scan(filename):
137
+ def _codeaudit_scan(filename , nosec_flag):
136
138
  """Internal helper function to do a SAST scan on a single file
137
139
  To scan a file, or Python package using the API interface, use the `filescan` API call!
138
140
  """
139
141
  #get the file name
140
- name_of_file = get_filename_from_path(filename)
141
- sast_data = perform_validations(filename)
142
+ name_of_file = get_filename_from_path(filename)
143
+ if not nosec_flag: #no filtering on reviewed items with markers in code
144
+ sast_data = perform_validations(filename)
145
+ else:
146
+ unfiltered_scan_output = perform_validations(filename) #scans for weaknesses in the file
147
+ sast_data = filter_sast_results(unfiltered_scan_output)
142
148
  sast_data_results = sast_data["result"]
143
149
  sast_result = dict(sorted(sast_data_results.items()))
144
150
  output = { "file_name" : name_of_file ,
145
151
  "sast_result": sast_result}
146
152
  return output
147
153
 
148
- def _codeaudit_directory_scan(input_path):
154
+ def _codeaudit_directory_scan(input_path , nosec_flag):
149
155
  """Performs a scan on a local directory
150
156
  Function is also used with scanning directory PyPI.org packages, since in that case a tmp directory is used
151
157
  """
@@ -160,7 +166,7 @@ def _codeaudit_directory_scan(input_path):
160
166
  for i,file in enumerate(files_to_check):
161
167
  file_information = overview_per_file(file)
162
168
  module_information = get_modules(file) # modules per file
163
- scan_output = _codeaudit_scan(file)
169
+ scan_output = _codeaudit_scan(file , nosec_flag )
164
170
  file_output[i] = file_information | module_information | scan_output
165
171
  output |= { "file_security_info" : file_output}
166
172
  return output
@@ -216,36 +222,90 @@ def read_input_file(filename):
216
222
  raise json.JSONDecodeError(f"Invalid JSON in file: {filename}", e.doc, e.pos)
217
223
 
218
224
 
219
- def get_construct_counts(input_file):
225
+
226
+
227
+
228
+ def get_weakness_counts(input_file, nosec=False):
220
229
  """
221
- Analyze a Python file or package(directory) and count occurrences of code constructs (aka weaknesses).
230
+ Analyze a Python file or package (directory) and count occurrences of code weaknesses.
222
231
 
223
- This function uses `filescan` API call to retrieve security-related information
224
- about the input file. This returns a dict. Then it counts how many times each code construct
225
- appears across all scanned files.
232
+ This function uses the `filescan` API call to retrieve security-related information
233
+ and aggregates the total number of occurrences per weakness construct.
226
234
 
227
235
  Args:
228
- input_file (str): Path to the file or directory(package) to scan.
236
+ input_file (str): Path to the file or directory (package) to scan.
237
+ nosec (bool): Whether to suppress findings marked with nosec comments.
229
238
 
230
239
  Returns:
231
240
  dict: A dictionary mapping each construct name (str) to the total
232
- number of occurrences (int) across all scanned files.
241
+ number of occurrences (int).
242
+
243
+ Raises:
244
+ ValueError: If the scan fails or returns an error result.
245
+ TypeError: If the scan result has an unexpected structure.
246
+ """
247
+ scan_result = filescan(input_file, nosec)
248
+
249
+ # Explicitly handle scan failure or unexpected return
250
+ if not isinstance(scan_result, dict):
251
+ raise ValueError("filescan() did not return a valid result dictionary")
252
+
253
+ if "Error" in scan_result:
254
+ raise ValueError(scan_result["Error"])
255
+
256
+ file_security_info = scan_result.get("file_security_info")
257
+ if not isinstance(file_security_info, dict):
258
+ # Valid scan, but no findings (e.g. empty or non-parsable input)
259
+ return {}
233
260
 
234
- Notes:
235
- - The `filescan` function is expected to return a dictionary with
236
- a 'file_security_info' key, containing per-file information.
237
- - Each file's 'sast_result' should be a dictionary mapping
238
- construct names to lists of occurrences.
239
- """
240
- scan_result = filescan(input_file)
241
261
  counter = Counter()
262
+
263
+ for file_info in file_security_info.values():
264
+ if not isinstance(file_info, dict):
265
+ continue
266
+
267
+ sast_result = file_info.get("sast_result", {})
268
+ if not isinstance(sast_result, dict):
269
+ continue
270
+
271
+ for construct, occurrences in sast_result.items():
272
+ if isinstance(occurrences, (list, tuple)):
273
+ counter[construct] += len(occurrences)
274
+
275
+ return dict(counter)
276
+
277
+
278
+
279
+ # def get_weakness_counts(input_file , nosec=False):
280
+ # """
281
+ # Analyze a Python file or package(directory) and count occurrences of code weaknesses.
282
+
283
+ # This function uses `filescan` API call to retrieve security-related information
284
+ # about the input file. This returns a dict. Then it counts how many times each code construct
285
+ # appears across all scanned files.
286
+
287
+ # Args:
288
+ # input_file (str): Path to the file or directory(package) to scan.
289
+
290
+ # Returns:
291
+ # dict: A dictionary mapping each construct name (str) to the total
292
+ # number of occurrences (int) across all scanned files.
293
+
294
+ # Notes:
295
+ # - The `filescan` function is expected to return a dictionary with
296
+ # a 'file_security_info' key, containing per-file information.
297
+ # - Each file's 'sast_result' should be a dictionary mapping
298
+ # construct names to lists of occurrences.
299
+ # """
300
+ # scan_result = filescan(input_file, nosec)
301
+ # counter = Counter()
242
302
 
243
- for file_info in scan_result.get('file_security_info', {}).values():
244
- sast_result = file_info.get('sast_result', {})
245
- for construct, occurence in sast_result.items(): #occurence is times the construct appears in a single file
246
- counter[construct] += len(occurence)
303
+ # for file_info in scan_result.get('file_security_info', {}).values():
304
+ # sast_result = file_info.get('sast_result', {})
305
+ # for construct, occurrence in sast_result.items(): #occurrence is times the construct appears in a single file
306
+ # counter[construct] += len(occurrence)
247
307
 
248
- return dict(counter)
308
+ # return dict(counter)
249
309
 
250
310
  def get_modules(filename):
251
311
  """Gets modules of a Python file """
@@ -47,6 +47,9 @@ Subprocess Usage,subprocess.call,High,Requires careful input validation to preve
47
47
  Subprocess Usage,subprocess.check_call,High,Requires careful input validation to prevent command injection vulnerabilities.
48
48
  Subprocess Usage,subprocess.Popen,Medium,Requires careful input validation to prevent command injection vulnerabilities.
49
49
  Subprocess Usage,subprocess.run,Medium,Requires careful input validation to prevent command injection vulnerabilities.
50
+ Subprocess Usage,subprocess.check_output,Medium,Requires careful input validation to prevent command injection vulnerabilities.
51
+ Subprocess Usage,subprocess.getstatusoutput,Medium,Requires careful input validation to prevent command injection vulnerabilities.
52
+ Subprocess Usage,subprocess.getoutput,Medium,Requires careful input validation to prevent command injection vulnerabilities.
50
53
  Tarfile Extraction,tarfile.TarFile,High,Vulnerable to path traversal attacks if used with untrusted archives.
51
54
  Base64 Encoding ,base64,Low,"Base64 encoding is not for security. It only visually hides data and provides no confidentiality. Often used to obfuscate malware in code."
52
55
  XML-RPC Client,xmlrpc.client,High,Vulnerable to denial-of-service via decompression bombs.
@@ -19,6 +19,7 @@ APP_KEY
19
19
  APP_SECRET
20
20
  AUTH
21
21
  auth_key
22
+ auth_password
22
23
  AUTH_SECRET
23
24
  auth_token
24
25
  AUTH_TOKEN
@@ -78,7 +78,7 @@ def find_constructs(source_code, constructs_to_detect):
78
78
  elif node.func.attr in ('input') and 'builtins' in core_modules: #catch obfuscating construct with builtins module
79
79
  construct = 'input'
80
80
  elif node.func.attr in ('compile') and 'builtins' in core_modules: #catch obfuscating construct with builtins module
81
- construct = 'compile'
81
+ construct = 'compile'
82
82
  elif isinstance(func, ast.Name):
83
83
  resolved = alias_map.get(func.id, func.id)
84
84
  if resolved in constructs_to_detect:
codeaudit/privacy_lint.py CHANGED
@@ -148,11 +148,11 @@ def match_secret(secrets, name, value):
148
148
  value_lower = str(value).lower()
149
149
 
150
150
  # Shorter secrets first to preserve original behavior
151
- for secret in sorted(secrets, key=len):
152
- pattern = re.compile(rf"\b{re.escape(secret)}\b")
151
+ for secret_tag in sorted(secrets, key=len):
152
+ pattern = re.compile(rf"\b{re.escape(secret_tag)}\b")
153
153
 
154
154
  if pattern.search(name_lower) or pattern.search(value_lower):
155
- return secret
155
+ return secret_tag
156
156
 
157
157
  return None
158
158
 
@@ -104,7 +104,7 @@ def get_package_source(url, nocxheaders=NOCX_HEADERS, nocxtimeout=10):
104
104
  f.write(content)
105
105
 
106
106
  with tarfile.open(tar_path, "r:gz") as tar:
107
- tar.extractall(path=temp_dir,filter='data') #Possible risks are mitigated as far as possible, see architecture notes.
107
+ tar.extractall(path=temp_dir,filter='data') # nosec Possible risks are mitigated as far as possible, see architecture notes.
108
108
 
109
109
  return temp_dir, tmpdir_obj # return both so caller controls lifetime
110
110
 
codeaudit/reporting.py CHANGED
@@ -16,6 +16,7 @@ Reporting functions for codeaudit
16
16
  import re
17
17
  import os
18
18
  from pathlib import Path
19
+ import sys
19
20
 
20
21
  import pandas as pd
21
22
  import html
@@ -30,10 +31,14 @@ from codeaudit.htmlhelpfunctions import json_to_html , dict_list_to_html_table
30
31
  from codeaudit import __version__
31
32
  from codeaudit.pypi_package_scan import get_pypi_download_info , get_package_source
32
33
  from codeaudit.privacy_lint import secret_scan , has_privacy_findings
34
+ from codeaudit.suppression import filter_sast_results
33
35
 
34
36
  from importlib.resources import files
35
37
 
36
38
 
39
+
40
+
41
+
37
42
  PYTHON_CODE_AUDIT_TEXT = '<a href="https://github.com/nocomplexity/codeaudit" target="_blank"><b>Python Code Audit</b></a>'
38
43
  DISCLAIMER_TEXT = (
39
44
  "<p><b>Disclaimer:</b> <i>This SAST tool "
@@ -41,6 +46,7 @@ DISCLAIMER_TEXT = (
41
46
  + " provides a powerful, automatic security analysis for Python source code. However, it's not a substitute for human review in combination with business knowledge. Undetected vulnerabilities may still exist.</i></p>"
42
47
  )
43
48
 
49
+ NOSEC_WARNING = '<p><b>INFO</b>: The --nosec flag is active. Security findings with in-line suppressions will be excluded from the report.</p>'
44
50
 
45
51
  SIMPLE_CSS_FILE = files('codeaudit') / 'simple.css'
46
52
 
@@ -207,30 +213,35 @@ def display_found_modules(modules_discovered):
207
213
  return output
208
214
 
209
215
 
210
- def scan_report(input_path, filename=DEFAULT_OUTPUT_FILE):
216
+ def scan_report(input_path, filename=DEFAULT_OUTPUT_FILE, nosec=False):
211
217
  """Scans Python source code or PyPI packages for security weaknesses.
212
-
213
218
  This function performs static application security testing (SAST) on a
214
- given input, which can be:
219
+ specified input. The input can be one of the following:
215
220
 
216
- - A local directory containing Python source code
217
- - A single local Python file
218
- - A package name hosted on PyPI.org
221
+ * A local directory containing Python source code
222
+ * A single local Python file
223
+ * The name of a package hosted on PyPI
219
224
 
220
- codeaudit filescan <pythonfile|package-name|directory> [reportname.html]
225
+ codeaudit filescan <pythonfile|package-name|directory> [reportname.html] [--nosec]
221
226
 
222
- Depending on the input type, the function analyzes the source code for
223
- potential security issues, generates an HTML report summarizing the
224
- findings, and writes the report to a static HTML file.
227
+ Based on the input type, the function analyzes the source code for potential
228
+ security issues, generates an HTML report summarizing the findings, and
229
+ writes the report to disk.
225
230
 
226
231
  If a PyPI package name is provided, the function downloads the source
227
- distribution (sdist), scans the extracted source code, and removes all
228
- temporary files after the scan completes.
232
+ distribution (sdist), extracts it to a temporary directory, scans the
233
+ extracted source code, and cleans up all temporary files after the scan
234
+ completes.
235
+
236
+ Examples:
229
237
 
230
- Example:
231
238
  Scan a local directory and write the report to ``report.html``::
232
239
 
233
- codeaudit filescan_/shitwork/custompythonmodule/
240
+ codeaudit filescan /path/to/custompythonmodule report.html
241
+
242
+ Scan a local directory::
243
+
244
+ codeaudit filescan /path/to/project
234
245
 
235
246
  Scan a single Python file::
236
247
 
@@ -238,31 +249,66 @@ def scan_report(input_path, filename=DEFAULT_OUTPUT_FILE):
238
249
 
239
250
  Scan a package hosted on PyPI::
240
251
 
241
- codeaudit filescan linkaudit #A nice project to check broken links in markdown files
252
+ codeaudit filescan linkaudit
242
253
 
243
254
  codeaudit filescan requests
244
255
 
256
+
257
+ Specify an output report file::
258
+
259
+ codeaudit filescan /path/to/project report.html
260
+
261
+ Enable filtering of issues marked with ``#nosec`` or another marker on potential code weaknesses that mitigated or known ::
262
+
263
+ codeaudit filescan myexample.py --nosec
264
+
265
+ POSITIONAL ARGUMENTS
266
+ INPUT_PATH
267
+ Path to a local Python file or directory, or the name of a package available on PyPI.
268
+
269
+
270
+ FLAGS
271
+ -f, --filename=FILENAME
272
+ Default: 'codeaudit-report.html'
273
+ -n, --nosec=NOSEC
274
+ Default: False
275
+
276
+
245
277
  Args:
278
+
279
+ -f, --filename=FILENAME
280
+ Default: 'codeaudit-report.html'
281
+ Name (and optional path) of the HTML file to write the scan report to. The filename should use the ``.html`` extension. Defaults to ``DEFAULT_OUTPUT_FILE``.
282
+ -n, --nosec=NOSEC
283
+ Default: False
284
+ Whether to filter out issues marked as reviewed or ignored in the source code. Defaults to ``False``, no filtering.
285
+
246
286
  input_path (str): Path to a local Python file or directory, or the name
247
- of a package available on PyPI.org.
287
+ of a package available on PyPI.
248
288
  filename (str, optional): Name (and optional path) of the HTML file to
249
289
  write the scan report to. The filename should use the ``.html``
250
290
  extension. Defaults to ``DEFAULT_OUTPUT_FILE``.
291
+ nosec (bool, optional): Whether to filter out issues marked as reviewed
292
+ or ignored in the source code. Defaults to ``False``, no filtering.
251
293
 
252
294
  Returns:
253
- None. The function writes a static HTML security report to disk.
295
+ None: The function writes a static HTML security report to disk.
254
296
 
255
297
  Raises:
256
- None explicitly. Errors and invalid inputs are reported to stdout.
298
+ None: Errors and invalid inputs are reported to stdout.
257
299
  """
258
300
  # Check if the input is a valid directory or a single valid Python file
259
301
  # In case no local file or directory is found, check if the givin input is pypi package name
260
302
  file_path = Path(input_path)
261
303
  if file_path.is_dir():
262
- directory_scan_report(input_path , filename ) #create a package aka directory scan report
304
+ directory_scan_report(input_path , nosec_flag=nosec, filename=filename) #create a package aka directory scan report
263
305
  elif file_path.suffix == ".py" and file_path.is_file() and is_ast_parsable(input_path):
264
306
  #create a sast file check report
265
- scan_output = perform_validations(input_path) #scans for weaknesses in the file
307
+ if not nosec: #no filtering on reviewed items with markers in code
308
+ scan_output = perform_validations(input_path) #scans for weaknesses in the file
309
+ else:
310
+ unfiltered_scan_output = perform_validations(input_path) #scans for weaknesses in the file
311
+ scan_output = filter_sast_results(unfiltered_scan_output)
266
312
  spy_output = secret_scan(input_path) #scans for secrets in the file
267
313
  file_report_html = single_file_report(input_path , scan_output)
268
314
  secrets_report_html = secrets_report(spy_output)
@@ -270,6 +316,8 @@ def scan_report(input_path, filename=DEFAULT_OUTPUT_FILE):
270
316
  html_output = '<h1>Python Code Audit Report</h1>' #prepared to be embedded to display multiple reports, so <h2> used
271
317
  html_output += f'<h2>Security scan: {name_of_file}</h2>'
272
318
  html_output += '<p>' + f'Location of the file: {input_path} </p>'
319
+ if nosec:
320
+ html_output += NOSEC_WARNING
273
321
  html_output += file_report_html
274
322
  html_output += secrets_report_html
275
323
  html_output += '<br>'
@@ -285,8 +333,8 @@ def scan_report(input_path, filename=DEFAULT_OUTPUT_FILE):
285
333
  if url is not None:
286
334
  print(url)
287
335
  print(release)
288
- src_dir, tmp_handle = get_package_source(url)
289
- directory_scan_report(src_dir , filename , package_name, release ) #create scan report for a package or directory
336
+ src_dir, tmp_handle = get_package_source(url)
337
+ directory_scan_report(src_dir , nosec_flag=nosec, filename=filename, package_name=package_name , release=release ) #create a package aka directory scan report
290
338
  # Cleaning up temp directory
291
339
  tmp_handle.cleanup() # deletes everything from temp directory
292
340
  else:
@@ -411,7 +459,7 @@ def single_file_report(filename , scan_output):
411
459
  return output
412
460
 
413
461
 
414
- def directory_scan_report(directory_to_scan , filename=DEFAULT_OUTPUT_FILE , package_name=None , release=None):
462
+ def directory_scan_report(directory_to_scan , nosec_flag, filename=DEFAULT_OUTPUT_FILE , package_name=None , release=None ):
415
463
  """Reports potential security issues for all Python files found in a directory.
416
464
 
417
465
  This function performs security validations on all files found in a specified directory.
@@ -444,12 +492,18 @@ def directory_scan_report(directory_to_scan , filename=DEFAULT_OUTPUT_FILE , pac
444
492
  else:
445
493
  output += f'<p>Below the result of the Codeaudit scan of the directory:<b> {name_of_package}</b></p>'
446
494
  output += f'<p>Total Python files found: <b>{len(files_to_check)}</b></p>'
495
+ if nosec_flag:
496
+ output += NOSEC_WARNING
447
497
  number_of_files = len(files_to_check)
448
498
  print(f'Number of files that are checked for security issues:{number_of_files}')
449
499
  printProgressBar(0, number_of_files, prefix='Progress:', suffix='Complete', length=50)
450
500
  for i,file_to_scan in enumerate(files_to_check):
451
- printProgressBar(i + 1, number_of_files, prefix='Progress:', suffix='Complete', length=50)
452
- scan_output = perform_validations(file_to_scan)
501
+ printProgressBar(i + 1, number_of_files, prefix='Progress:', suffix='Complete', length=50)
502
+ if not nosec_flag: #no filtering on reviewed items with markers in code
503
+ scan_output = perform_validations(file_to_scan) #scans for weaknesses in the file
504
+ else:
505
+ unfiltered_scan_output = perform_validations(file_to_scan) #scans for weaknesses in the file
506
+ scan_output = filter_sast_results(unfiltered_scan_output)
453
507
  spy_output = secret_scan(file_to_scan) #scans for secrets in the file
454
508
  data = scan_output["result"]
455
509
  if data or has_privacy_findings(spy_output):
@@ -599,64 +653,132 @@ def collect_issue_lines(filename, line):
599
653
  return code_lines
600
654
 
601
655
 
602
- def create_htmlfile(html_input,outputfile):
603
- """ Creates a clean html file based on html input given """
604
- # Read CSS from the file - So it is included in the reporting HTML file
656
+ def create_htmlfile(html_input, outputfile):
657
+ """Creates a clean html file based on html input given"""
658
+
659
+ output_path = Path(outputfile).expanduser().resolve()
660
+
661
+ # Validate output directory (CLI-friendly)
662
+ if not output_path.parent.is_dir():
663
+ print(
664
+ f"Error: output directory does not exist:\n {output_path.parent}",
665
+ file=sys.stderr,
666
+ )
667
+ sys.exit(1)
668
+
669
+ # Read CSS so it is included in the reporting HTML file
670
+ css_content = Path(SIMPLE_CSS_FILE).read_text(encoding="utf-8")
605
671
 
606
- with open(SIMPLE_CSS_FILE, 'r') as css_file:
607
- css_content = css_file.read()
608
672
  # Start building the HTML
609
673
  output = '<!DOCTYPE html><html lang="en-US"><head>'
610
674
  output += '<meta charset="UTF-8"/>'
611
675
  output += '<title>Python_Code_Audit_SecurityReport</title>'
612
- # Inline CSS inside <style> block
613
- output += f'<style>\n{css_content}\n</style>'
614
- output += '<script src="https://cdn.jsdelivr.net/npm/vega@5"></script>' # needed for altair plots
615
- output += '<script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>' # needed for altair plots
616
- output += '<script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>' # needed for altair plots
676
+ output += f'<style>\n{css_content}\n</style>'
677
+ output += '<script src="https://cdn.jsdelivr.net/npm/vega@5"></script>'
678
+ output += '<script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>'
679
+ output += '<script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>'
617
680
  output += '</head><body>'
618
681
  output += '<div class="container">'
619
682
  output += html_input
683
+
620
684
  now = datetime.datetime.now()
621
685
  timestamp_str = now.strftime("%Y-%m-%d %H:%M")
622
- code_audit_version = __version__
686
+ code_audit_version = __version__
687
+
623
688
  output += (
624
689
  f"<p>This Python security report was created on: <b>{timestamp_str}</b> with "
625
690
  + PYTHON_CODE_AUDIT_TEXT
626
691
  + f" version <b>{code_audit_version}</b></p>"
627
692
  )
693
+
628
694
  output += '<hr>'
629
- output += '<footer>'
695
+ output += '<footer>'
630
696
  output += (
631
697
  '<div class="footer-links">'
632
698
  'Check the <a href="https://nocomplexity.com/documents/codeaudit/intro.html" '
633
699
  'target="_blank">documentation</a> for help on found issues.<br>'
634
700
  'Codeaudit is made with <span class="heart">&#10084;</span> by cyber security '
635
- 'professionals who advocate for <a href="https://nocomplexity.com/simplify-security/" target="_blank">open simple security solutions</a>.<br>'
636
- '<a href="https://nocomplexity.com/documents/codeaudit/CONTRIBUTE.html" target="_blank">Join the community</a> and contribute to make this tool better!'
637
- "</div>"
701
+ 'professionals who advocate for <a href="https://nocomplexity.com/simplify-security/" '
702
+ 'target="_blank">open simple security solutions</a>.<br>'
703
+ '<a href="https://nocomplexity.com/documents/codeaudit/CONTRIBUTE.html" '
704
+ 'target="_blank">Join the community</a> and contribute to make this tool better!'
705
+ '</div>'
638
706
  )
639
- output += "</footer>"
640
- output += '</div>' #base container
707
+ output += '</footer>'
708
+ output += '</div>'
641
709
  output += '</body></html>'
642
- # Now create the HTML output file
643
- with open(outputfile, 'w') as f:
644
- f.write(output)
645
- current_directory = os.getcwd()
646
- # Get the directory of the output file (if any)
647
- directory_for_output = os.path.dirname(os.path.abspath(outputfile))
648
- filename_only = os.path.basename(outputfile)
649
- # Determine the effective directory to use in the file URL
650
- if not directory_for_output or directory_for_output == current_directory:
651
- file_url = f'file://{current_directory}/{filename_only}'
652
- else:
653
- file_url = f'file://{directory_for_output}/{filename_only}'
654
- # Print the result
710
+
711
+ # Write the HTML file
712
+ output_path.write_text(output, encoding="utf-8")
713
+
655
714
  print("\n=====================================================================")
656
- print(f'Code Audit report file created!\nPaste the line below directly into your browser bar:\n\t{file_url}\n')
715
+ print(
716
+ "Code Audit report file created!\n"
717
+ "Paste the line below directly into your browser bar:\n"
718
+ f"\t{output_path.as_uri()}\n"
719
+ )
657
720
  print("=====================================================================\n")
658
721
 
659
722
 
723
+
724
+ # def create_htmlfile(html_input,outputfile):
725
+ # """ Creates a clean html file based on html input given """
726
+ # # Read CSS from the file - So it is included in the reporting HTML file
727
+
728
+ # with open(SIMPLE_CSS_FILE, 'r') as css_file:
729
+ # css_content = css_file.read()
730
+ # # Start building the HTML
731
+ # output = '<!DOCTYPE html><html lang="en-US"><head>'
732
+ # output += '<meta charset="UTF-8"/>'
733
+ # output += '<title>Python_Code_Audit_SecurityReport</title>'
734
+ # # Inline CSS inside <style> block
735
+ # output += f'<style>\n{css_content}\n</style>'
736
+ # output += '<script src="https://cdn.jsdelivr.net/npm/vega@5"></script>' # needed for altair plots
737
+ # output += '<script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>' # needed for altair plots
738
+ # output += '<script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>' # needed for altair plots
739
+ # output += '</head><body>'
740
+ # output += '<div class="container">'
741
+ # output += html_input
742
+ # now = datetime.datetime.now()
743
+ # timestamp_str = now.strftime("%Y-%m-%d %H:%M")
744
+ # code_audit_version = __version__
745
+ # output += (
746
+ # f"<p>This Python security report was created on: <b>{timestamp_str}</b> with "
747
+ # + PYTHON_CODE_AUDIT_TEXT
748
+ # + f" version <b>{code_audit_version}</b></p>"
749
+ # )
750
+ # output += '<hr>'
751
+ # output += '<footer>'
752
+ # output += (
753
+ # '<div class="footer-links">'
754
+ # 'Check the <a href="https://nocomplexity.com/documents/codeaudit/intro.html" '
755
+ # 'target="_blank">documentation</a> for help on found issues.<br>'
756
+ # 'Codeaudit is made with <span class="heart">&#10084;</span> by cyber security '
757
+ # 'professionals who advocate for <a href="https://nocomplexity.com/simplify-security/" target="_blank">open simple security solutions</a>.<br>'
758
+ # '<a href="https://nocomplexity.com/documents/codeaudit/CONTRIBUTE.html" target="_blank">Join the community</a> and contribute to make this tool better!'
759
+ # "</div>"
760
+ # )
761
+ # output += "</footer>"
762
+ # output += '</div>' #base container
763
+ # output += '</body></html>'
764
+ # # Now create the HTML output file
765
+ # with open(outputfile, 'w') as f:
766
+ # f.write(output)
767
+ # current_directory = os.getcwd()
768
+ # # Get the directory of the output file (if any)
769
+ # directory_for_output = os.path.dirname(os.path.abspath(outputfile))
770
+ # filename_only = os.path.basename(outputfile)
771
+ # # Determine the effective directory to use in the file URL
772
+ # if not directory_for_output or directory_for_output == current_directory:
773
+ # file_url = f'file://{current_directory}/{filename_only}'
774
+ # else:
775
+ # file_url = f'file://{directory_for_output}/{filename_only}'
776
+ # # Print the result
777
+ # print("\n=====================================================================")
778
+ # print(f'Code Audit report file created!\nPaste the line below directly into your browser bar:\n\t{file_url}\n')
779
+ # print("=====================================================================\n")
780
+
781
+
660
782
  def extract_altair_html(plot_html):
661
783
  match = re.search(r"<body[^>]*>(.*?)</body>", plot_html, re.DOTALL | re.IGNORECASE)
662
784
  if match:
@@ -49,9 +49,9 @@ def perform_validations(sourcefile):
49
49
 
50
50
  name_of_file = get_filename_from_path (sourcefile)
51
51
 
52
- result = {'Name file' : name_of_file ,
52
+ result = {'file_name' : name_of_file ,
53
53
  'file_location': sourcefile ,
54
- 'Checks done:' : constructs ,
54
+ 'checks_done:' : constructs ,
55
55
  'result': scan_result}
56
56
 
57
57
  return result
@@ -0,0 +1,233 @@
1
+ import ast
2
+ import tokenize
3
+ from collections import defaultdict
4
+ import re
5
+ import sys
6
+
7
+ def get_all_comments_by_line(filename):
8
+ """
9
+ Tokenize the file once and collect all real # comments
10
+ grouped by their starting line number.
11
+ """
12
+ comments_by_line = defaultdict(list)
13
+
14
+ try:
15
+ with tokenize.open(filename) as f:
16
+ for token in tokenize.generate_tokens(f.readline):
17
+ if token.type == tokenize.COMMENT:
18
+ text = token.string.lstrip("# \t").rstrip()
19
+ if text:
20
+ comments_by_line[token.start[0]].append(text)
21
+
22
+ except (OSError, UnicodeDecodeError, tokenize.TokenError) as exc:
23
+ # Fail loudly with context instead of silently ignoring
24
+ raise RuntimeError(
25
+ f"Failed to extract comments from {filename}"
26
+ ) from exc
27
+
28
+ return {
29
+ line: "\n".join(texts)
30
+ for line, texts in comments_by_line.items()
31
+ }
32
+
33
+
34
+
35
+
36
+
37
+
38
+ def get_start_to_end_lines(filename):
39
+ """
40
+ Parse the file once using AST and build a mapping:
41
+ start_line → highest end_lineno found for any node starting on that line.
42
+
43
+ Returns:
44
+ dict[int, int] — line numbers are 1-based
45
+ Returns empty dict if the file cannot be read or parsed.
46
+ """
47
+ end_lines = {}
48
+
49
+ try:
50
+ with open(filename, 'r', encoding='utf-8') as f:
51
+ source = f.read()
52
+
53
+ try:
54
+ tree = ast.parse(source, filename=filename)
55
+
56
+ for node in ast.walk(tree):
57
+ # Most nodes have lineno, but some (like comprehension ifs) might not
58
+ if not hasattr(node, 'lineno'):
59
+ continue
60
+
61
+ start = node.lineno
62
+ # end_lineno may be missing in very old Python versions → fallback to start
63
+ end = getattr(node, 'end_lineno', start)
64
+
65
+ # Keep the maximum span for nodes starting on the same line
66
+ if start not in end_lines or end > end_lines[start]:
67
+ end_lines[start] = end
68
+
69
+ except SyntaxError as e:
70
+ print(
71
+ f"Syntax error in {filename} (line {e.lineno}): {e.msg}",
72
+ file=sys.stderr
73
+ )
74
+ return {}
75
+ except (ValueError, UnicodeDecodeError) as e:
76
+ print(
77
+ f"Cannot read {filename} properly: {type(e).__name__}: {e}",
78
+ file=sys.stderr
79
+ )
80
+ return {}
81
+ except MemoryError:
82
+ print(f"Out of memory while parsing {filename}", file=sys.stderr)
83
+ return {}
84
+ except Exception as e:
85
+ print(
86
+ f"Unexpected error parsing AST of {filename}: "
87
+ f"{type(e).__name__}: {e}",
88
+ file=sys.stderr
89
+ )
90
+ return {}
91
+
92
+ except FileNotFoundError:
93
+ print(f"File not found: {filename}", file=sys.stderr)
94
+ return {}
95
+ except PermissionError:
96
+ print(f"Permission denied: {filename}", file=sys.stderr)
97
+ return {}
98
+ except IsADirectoryError:
99
+ print(f"Is a directory, not a file: {filename}", file=sys.stderr)
100
+ return {}
101
+ except OSError as e:
102
+ print(f"OS error opening {filename}: {e}", file=sys.stderr)
103
+ return {}
104
+ except Exception as e:
105
+ print(
106
+ f"Critical error while accessing {filename}: "
107
+ f"{type(e).__name__}: {e}",
108
+ file=sys.stderr
109
+ )
110
+ return {}
111
+
112
+ return end_lines
113
+
114
+
115
+ # def get_start_to_end_lines(filename):
116
+ # """
117
+ # Parse AST once and build mapping: start_line → highest end_line found for nodes
118
+ # starting on that line.
119
+ # """
120
+ # end_lines = {}
121
+
122
+ # try:
123
+ # with open(filename, 'r', encoding='utf-8') as f:
124
+ # source = f.read()
125
+ # tree = ast.parse(source)
126
+
127
+ # for node in ast.walk(tree):
128
+ # if not hasattr(node, 'lineno'):
129
+ # continue
130
+ # start = node.lineno
131
+ # end = getattr(node, 'end_lineno', start)
132
+ # # Take the maximum end line if multiple nodes start on same line
133
+ # if start not in end_lines or end > end_lines[start]:
134
+ # end_lines[start] = end
135
+ # except Exception:
136
+ # pass
137
+
138
+ # return end_lines
139
+
140
+
141
+ def is_suppressed(line, comments_by_line, start_to_end, match_func):
142
+ """
143
+ Check if the statement starting at `line` is suppressed by looking at comments
144
+ from start_line to end_line inclusive.
145
+ """
146
+ end = start_to_end.get(line, line)
147
+ for comment_line in range(line, end + 1):
148
+ comment = comments_by_line.get(comment_line, "")
149
+ if match_func(comment):
150
+ return True
151
+ return False
152
+
153
+
154
+ def filter_sast_results(sast_dict):
155
+ """
156
+ Returns a new filtered dictionary with suppressed findings removed.
157
+ Parses & tokenizes the file only once.
158
+ Respects multi-line statements via AST end_lineno.
159
+ Empty lists and their keys are removed from the result.
160
+ """
161
+ file_location = sast_dict["file_location"]
162
+ original_result = sast_dict.get("result", {})
163
+
164
+ if not original_result:
165
+ return sast_dict.copy()
166
+
167
+ # Collect all unique line numbers that have findings
168
+ all_issue_lines = set()
169
+ for lines in original_result.values():
170
+ if isinstance(lines, list):
171
+ all_issue_lines.update(lines)
172
+
173
+ if not all_issue_lines:
174
+ return sast_dict.copy()
175
+
176
+ # Parse and tokenize **once**
177
+ comments_by_line = get_all_comments_by_line(file_location)
178
+ start_to_end = get_start_to_end_lines(file_location)
179
+
180
+ # Decide which lines to KEEP
181
+ keep_lines = set()
182
+ for line in sorted(all_issue_lines):
183
+ if not is_suppressed(line, comments_by_line, start_to_end, match_suppression_keyword):
184
+ keep_lines.add(line)
185
+
186
+ # Build new result dictionary
187
+ new_result = {}
188
+ for key, value in original_result.items():
189
+ if isinstance(value, list):
190
+ filtered = [ln for ln in value if ln in keep_lines]
191
+ if filtered:
192
+ new_result[key] = filtered
193
+ else:
194
+ new_result[key] = value
195
+
196
+ # Return new full dictionary
197
+ filtered_dict = sast_dict.copy()
198
+ filtered_dict["result"] = new_result
199
+ return filtered_dict
200
+
201
+
202
+ def match_suppression_keyword(comment_line):
203
+ """
204
+ Checks if a SAST suppression marker is present in the comment.
205
+ """
206
+
207
+ MARKER_LIST = [
208
+ "nosec",
209
+ "nosemgrep",
210
+ "sast-ignore",
211
+ "ignore-sast",
212
+ "security-ignore",
213
+ "ignore-security",
214
+ "NOSONAR",
215
+ "noqa",
216
+ # False positive / risk handling
217
+ "false-positive",
218
+ "falsepositive",
219
+ "risk-accepted",
220
+ "security-accepted",
221
+ "security-reviewed",
222
+ "security-exception",
223
+ ]
224
+
225
+ if not comment_line:
226
+ return False
227
+
228
+ normalized = " ".join(
229
+ word.lstrip("#").lower()
230
+ for word in comment_line.split()
231
+ )
232
+ tokens = re.split(r"[^\w\-]+", normalized)
233
+ return any(marker.lower() in tokens for marker in MARKER_LIST)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeaudit
3
- Version: 1.5.0
3
+ Version: 1.6.0
4
4
  Summary: Simplified static security checks for Python
5
5
  Project-URL: Documentation, https://github.com/nocomplexity/codeaudit#readme
6
6
  Project-URL: Issues, https://github.com/nocomplexity/codeaudit/issues
@@ -0,0 +1,25 @@
1
+ codeaudit/__about__.py,sha256=EZ0swjOPnWsY4bG29vXRMJsA2zyCpDKGUv7nXcLLL5E,144
2
+ codeaudit/__init__.py,sha256=YGs6qU0BVHPGtXCS-vfBDLO4TOfJDLTWMgaFDTmi_Iw,157
3
+ codeaudit/altairplots.py,sha256=gBXN1_wxUmjzTNizvzbOeCKvUxpClGPdZmK7ICK1x68,4531
4
+ codeaudit/api_interfaces.py,sha256=6GGz7k1fuSkzEXGjoqavQCmawTh0PVQNglttzSArFWI,17573
5
+ codeaudit/api_reporting.py,sha256=W8eutTJ0d-TENbv5cCmAOfu4GEp_RwiQ4XU5FCmfkoI,1736
6
+ codeaudit/checkmodules.py,sha256=aiF34KO-9HZDRgVBtSwVFdeUxT5_Ka5VtmlfgoLgNVs,5582
7
+ codeaudit/codeaudit.py,sha256=g2HzRX6a3fckKUhyRrk6n3-5qNdVYtZRI1gqQ-QNl10,3775
8
+ codeaudit/complexitycheck.py,sha256=A3_a5v-U0YQr80pWQwSVvOsY_eQtqwNkQf9Txr9mNtQ,3722
9
+ codeaudit/filehelpfunctions.py,sha256=-5kIymEUcc7j0bRBS4XblvE3pbi3rWjkU5O2M_tinvM,4374
10
+ codeaudit/htmlhelpfunctions.py,sha256=-SMsyfF7TRIfJkrUqoJuh7AoG1RVrYFsZfFljoxVHXc,3246
11
+ codeaudit/issuevalidations.py,sha256=zf2Gr7KpyvA05K17IX05pQy-1oQWnbapVIvcUMcbNn8,6441
12
+ codeaudit/privacy_lint.py,sha256=Rcefen7RswwJWnoE-Vrr2iE3zFjNoE19qW_O7LjGfN4,10264
13
+ codeaudit/pypi_package_scan.py,sha256=dmk3xBUL0mZ5aCIc1fRVpuI1UIx1ejnOqfc4qB04748,4730
14
+ codeaudit/reporting.py,sha256=AHgkbKOaAjBSh2ePZFFqm-MWdb2ZYTMmcFvOJy1wdLQ,43298
15
+ codeaudit/security_checks.py,sha256=IuJMo99188TgJoYfTpMQiCs3Dchw4EvCGWuwh_Cds7k,2167
16
+ codeaudit/simple.css,sha256=H7KT61oXJkVr9qXVrC5ME_Zph9jI-uR2IxOsXG1xs5k,4013
17
+ codeaudit/suppression.py,sha256=zSLarg79pahStnXFklf_ERQvDXFgOr375BtPXEVSQjA,7060
18
+ codeaudit/totals.py,sha256=b6OkzcMdqGKPwuGBKrwAeCxBOJxHa5FHauGWnEb-6zM,6387
19
+ codeaudit/data/sastchecks.csv,sha256=dZDOgpVqFz3jPWWiLI-6CXE_SmOQ9Ay6N98NV72ay5w,10122
20
+ codeaudit/data/secretslist.txt,sha256=BoVX6bijqaL5g-2JRGGf0x-S8NhZWtt7fzovZ1NrEK8,1905
21
+ codeaudit-1.6.0.dist-info/METADATA,sha256=KMLuS8-HAhww_uVHYyEgWANkx5RZJTqcPDfxZgX5bC8,7814
22
+ codeaudit-1.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
23
+ codeaudit-1.6.0.dist-info/entry_points.txt,sha256=7w6I8zii62nJHIIF30CRP5g1z8enMqF1pZEDdlw4HcQ,55
24
+ codeaudit-1.6.0.dist-info/licenses/LICENSE.txt,sha256=-5gWaMGKJ54oX8TYP7oeg2zITdTapzyWl9PP0tispuA,34674
25
+ codeaudit-1.6.0.dist-info/RECORD,,
@@ -1,24 +0,0 @@
1
- codeaudit/__about__.py,sha256=m0MoVjbAY6gx2X7P9BlRpPZOet3Ry3xAdoXoKNHrJXk,144
2
- codeaudit/__init__.py,sha256=YGs6qU0BVHPGtXCS-vfBDLO4TOfJDLTWMgaFDTmi_Iw,157
3
- codeaudit/altairplots.py,sha256=gBXN1_wxUmjzTNizvzbOeCKvUxpClGPdZmK7ICK1x68,4531
4
- codeaudit/api_interfaces.py,sha256=zWJrLDM8b3b2-rN0gCoPdflEFMzKUz3M7PfXtXvDpd4,15358
5
- codeaudit/api_reporting.py,sha256=W8eutTJ0d-TENbv5cCmAOfu4GEp_RwiQ4XU5FCmfkoI,1736
6
- codeaudit/checkmodules.py,sha256=aiF34KO-9HZDRgVBtSwVFdeUxT5_Ka5VtmlfgoLgNVs,5582
7
- codeaudit/codeaudit.py,sha256=g2HzRX6a3fckKUhyRrk6n3-5qNdVYtZRI1gqQ-QNl10,3775
8
- codeaudit/complexitycheck.py,sha256=A3_a5v-U0YQr80pWQwSVvOsY_eQtqwNkQf9Txr9mNtQ,3722
9
- codeaudit/filehelpfunctions.py,sha256=-5kIymEUcc7j0bRBS4XblvE3pbi3rWjkU5O2M_tinvM,4374
10
- codeaudit/htmlhelpfunctions.py,sha256=-SMsyfF7TRIfJkrUqoJuh7AoG1RVrYFsZfFljoxVHXc,3246
11
- codeaudit/issuevalidations.py,sha256=-WdaXT_R-P9w0JbQpJ5ngVoVhG9Yee2ri0aH5SoC1Ao,6404
12
- codeaudit/privacy_lint.py,sha256=TNS_BnWFXv14PslK9mBsQLwt73Ujcn9FbI7TQSYT0k8,10252
13
- codeaudit/pypi_package_scan.py,sha256=yxCXrRvjc4r0YsJYHvHJuJTyHC5QZl3sRQp73akCXx8,4723
14
- codeaudit/reporting.py,sha256=s3OuiPj6au5oELz-kmI6n-8NooJXjqvBLWKs4tzEg7s,38269
15
- codeaudit/security_checks.py,sha256=wEO_A054zXmLccWGREi6cNADa4IgoOPxHsq-Je5iMIY,2167
16
- codeaudit/simple.css,sha256=H7KT61oXJkVr9qXVrC5ME_Zph9jI-uR2IxOsXG1xs5k,4013
17
- codeaudit/totals.py,sha256=b6OkzcMdqGKPwuGBKrwAeCxBOJxHa5FHauGWnEb-6zM,6387
18
- codeaudit/data/sastchecks.csv,sha256=fIcyZgymCtAluPta9fTEk6a9DJ2AGJczZYRPUIQuSag,9738
19
- codeaudit/data/secretslist.txt,sha256=2Jqt9B5UfcRNeNpys8okmXCn4SYkp9M3_rJrI-KXCbE,1891
20
- codeaudit-1.5.0.dist-info/METADATA,sha256=ZWeMEYTu4ASLGJU5l8Stk8GjMcogzAFDF6NEdFsFmeA,7814
21
- codeaudit-1.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
- codeaudit-1.5.0.dist-info/entry_points.txt,sha256=7w6I8zii62nJHIIF30CRP5g1z8enMqF1pZEDdlw4HcQ,55
23
- codeaudit-1.5.0.dist-info/licenses/LICENSE.txt,sha256=-5gWaMGKJ54oX8TYP7oeg2zITdTapzyWl9PP0tispuA,34674
24
- codeaudit-1.5.0.dist-info/RECORD,,