codeaudit 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codeaudit/__about__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Maikel Mardjan <mike@bm-support.org>
2
2
  #
3
3
  # SPDX-License-Identifier: GPL-3.0-or-later
4
- __version__ = "1.3.0"
4
+ __version__ = "1.4.0"
codeaudit/altairplots.py CHANGED
@@ -117,3 +117,29 @@ def issue_plot(input_dict):
117
117
  )
118
118
 
119
119
  return chart
120
+
121
+
122
+ def issue_overview(df):
123
+ """
124
+ Create an Altair arc (donut) chart from a DataFrame
125
+ with 'call' and 'count' columns, showing counts in the legend.
126
+ """
127
+ # Create a label combining call and count for the legend
128
+ df = df.copy()
129
+ df["label"] = df["call"] + " (" + df["count"].astype(str) + ")"
130
+
131
+ chart = (
132
+ alt.Chart(df)
133
+ .mark_arc(innerRadius=50, outerRadius=120)
134
+ .encode(
135
+ theta=alt.Theta("count:Q", title="Count"),
136
+ color=alt.Color("label:N", title="Calls (Count)"),
137
+ tooltip=["call", "count"]
138
+ )
139
+ .properties(
140
+ title="Overview of Security Weaknesses",
141
+ width=600,
142
+ height=600
143
+ )
144
+ )
145
+ return chart
@@ -139,7 +139,7 @@ def read_input_file(filename):
139
139
 
140
140
  def get_construct_counts(input_file):
141
141
  """
142
- Analyze a scan result and count occurrences of code constructs (aka weaknesses).
142
+ Analyze a Python file or package(directory) and count occurrences of code constructs (aka weaknesses).
143
143
 
144
144
  This function uses `filescan` API call to retrieve security-related information
145
145
  about the input file. This returns a dict. Then it counts how many times each code construct
@@ -0,0 +1,36 @@
1
+ """
2
+ License GPLv3 or higher.
3
+
4
+ (C) 2025 Created by Maikel Mardjan - https://nocomplexity.com/
5
+
6
+ This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
7
+
8
+ This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
9
+
10
+ You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
11
+
12
+
13
+ Public API functions for Python Code Audit aka codeaudit on pypi.org
14
+
15
+ All reporting API functions are created based on the Code Audit JSON format that is used when scan results are stored using the `codeaudit.api_interfaces.save_to_json` call!
16
+
17
+ These API functions are on purpose opinionated for one goal: Keep things simple!
18
+ So all results are returned as Pandas Dataframe. This makes things easier for further processing!
19
+
20
+ """
21
+ import pandas as pd
22
+ from collections import Counter
23
+
24
+ def total_weaknesses(input_file):
25
+ """Returns the total weaknesses found"""
26
+ scan_result = input_file
27
+ counter = Counter()
28
+
29
+ for file_info in scan_result.get('file_security_info', {}).values():
30
+ sast_result = file_info.get('sast_result', {})
31
+ for construct, occurence in sast_result.items(): #occurence is times the construct appears in a single file
32
+ counter[construct] += len(occurence)
33
+
34
+ result = dict(counter)
35
+ df = pd.DataFrame(list(result.items()), columns=['call', 'count'])
36
+ return df
@@ -64,6 +64,8 @@ Shelve Usage,shelve.DbfilenameShelf,High,"The `shelve` module uses `pickle` inte
64
64
  Unsafe Deserialization: multiprocessing,connection.recv,High,"Uses pickle, which can execute arbitrary code when receiving data. "
65
65
  Unsafe Deserialization: multiprocessing,multiprocessing.connection.Connection,High,Relies on pickle; dangerous with untrusted data.
66
66
  Zipfile Extraction,zipfile.ZipFile,High,Vulnerable to path traversal attacks if used with untrusted archives.
67
+ Zstandard (zstd) decompression,compression.zstd.open,High,Vulnerable to path traversal attacks if used with untrusted archives.
68
+ Zstandard (zstd) decompression,compression.zstd.decompress,High,Vulnerable to path traversal attacks if used with untrusted archives.
67
69
  Gzip File Handling,gzip.open,Medium,Risk of decompression bombs or resource exhaustion with untrusted data.
68
70
  BZ2 File Handling,bz2.open,Medium,Decompressing untrusted data can lead to resource exhaustion attacks.
69
71
  BZ2 File Handling,bz2.BZ2File,Medium,Decompressing untrusted data can lead to resource exhaustion attacks.
@@ -0,0 +1,113 @@
1
+ """
2
+ License GPLv3 or higher.
3
+
4
+ (C) 2025 Created by Maikel Mardjan - https://nocomplexity.com/
5
+
6
+ This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
7
+
8
+ This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
9
+
10
+ You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
11
+
12
+
13
+ Public API functions for Python Code Audit aka codeaudit on pypi.org
14
+ """
15
+
16
+
17
+ import gzip
18
+ import zlib
19
+ import tarfile
20
+ import json
21
+ import tempfile
22
+
23
+ from urllib.request import Request, urlopen
24
+ from urllib.error import URLError, HTTPError
25
+
26
+ from codeaudit import __version__
27
+
28
+ NOCX_HEADERS = {
29
+ "user-agent": f"Python Code Audit /{__version__} (https://github.com/nocomplexity/codeaudit)",
30
+ "Accept": "text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8",
31
+ "Accept-Encoding": "gzip, deflate,br",
32
+ "Connection": "keep-alive",
33
+ "Upgrade-Insecure-Requests": "1",
34
+ }
35
+
36
+
37
+ def get_pypi_package_info(package_name):
38
+ """JSON response, needed to get download URL of sdist"""
39
+ url = f"https://pypi.org/pypi/{package_name}/json"
40
+
41
+ try:
42
+ with urlopen(url) as response:
43
+ return json.load(response)
44
+ except HTTPError: #When urlopen receives a 4xx (client error) or 5xx (server error) status code, it does not return the response object; instead, it immediately raises an exception called urllib.error.HTTPError. If a package is not found a 40x is send with json response {"message": "Not Found"}, I keep handling errors simple
45
+ return False # No package with this name found on pypi.org!
46
+ except URLError as e:
47
+ print(f"Network error: {e}")
48
+ return None
49
+
50
+ def get_pypi_download_info(package_name):
51
+ """Retrieves the sdist download URL
52
+ Using the PyPI JSON API to get the sdist download URL (https://docs.pypi.org/api/json/)
53
+ Note JSON API result is a nested dict with all release info published, so finding the correct sdist download URL needs logic.
54
+ """
55
+ if get_pypi_package_info(package_name) :
56
+ data = get_pypi_package_info(package_name)
57
+ releases_dict = data['releases']
58
+ # Convert the key-value pairs (items) into a list and get the last one
59
+ last_item = list(releases_dict.items())[-1] #last_item is a Python tuple
60
+ sdist_download_url = find_download_url(last_item,'source') # We want the download URL of the source, so *.tar.gz file
61
+ release_info = last_item[0]
62
+ pypi_package_info= { "download_url" : sdist_download_url ,
63
+ "release" : release_info}
64
+ return pypi_package_info
65
+ else:
66
+ #package does not exist
67
+ return False
68
+
69
+ def find_download_url(data, source):
70
+ """
71
+ Given the PyPI release tuple and a python_version string,
72
+ return the URL of the first matching item.
73
+ """
74
+ items = data[1] # Access the list of items directly via index 1 , data is a tuple
75
+
76
+ for item in items:
77
+ if item.get("python_version") == source:
78
+ return item.get("url")
79
+
80
+ return None # if no match found`
81
+
82
+
83
+ def get_package_source(url, nocxheaders=NOCX_HEADERS, nocxtimeout=10):
84
+ """Retrieves a package source and extract so SAST scanning can be applied
85
+ Make sure to cleanup the temporary dir!! Using e.g. `tmp_handle.cleanup()` # deletes everything
86
+ """
87
+ try:
88
+ request = Request(url, headers=nocxheaders or {})
89
+ with urlopen(request, timeout=nocxtimeout) as response:
90
+ content = response.read()
91
+ content_encoding = response.headers.get("Content-Encoding")
92
+ if content_encoding == "gzip":
93
+ content = gzip.decompress(content)
94
+ elif content_encoding == "deflate":
95
+ content = zlib.decompress(content, -zlib.MAX_WBITS)
96
+ elif content_encoding not in [None]:
97
+ raise ValueError(f"Unexpected content encoding: {content_encoding}")
98
+
99
+ # This directory will auto-delete when the context block exits
100
+ tmpdir_obj = tempfile.TemporaryDirectory(prefix="codeaudit_")
101
+ temp_dir = tmpdir_obj.name
102
+
103
+ tar_path = f"{temp_dir}/package.tar.gz"
104
+ with open(tar_path, "wb") as f:
105
+ f.write(content)
106
+
107
+ with tarfile.open(tar_path, "r:gz") as tar:
108
+ tar.extractall(path=temp_dir,filter='data') #Possible risks are mitigated as far as possible, see architecture notes.
109
+
110
+ return temp_dir, tmpdir_obj # return both so caller controls lifetime
111
+
112
+ except Exception as e:
113
+ print(e)
codeaudit/reporting.py CHANGED
@@ -27,6 +27,9 @@ from codeaudit.totals import get_statistics , overview_count , overview_per_file
27
27
  from codeaudit.checkmodules import get_imported_modules , check_module_vulnerability , get_all_modules , get_imported_modules_by_file
28
28
  from codeaudit.htmlhelpfunctions import dict_to_html , json_to_html , dict_list_to_html_table
29
29
  from codeaudit import __version__
30
+ from codeaudit.pypi_package_scan import get_pypi_download_info , get_package_source
31
+
32
+ from codeaudit.api_interfaces import filescan
30
33
 
31
34
  from importlib.resources import files
32
35
 
@@ -104,7 +107,7 @@ def overview_report(directory, filename=DEFAULT_OUTPUT_FILE):
104
107
 
105
108
 
106
109
  def scan_report(input_path , filename=DEFAULT_OUTPUT_FILE):
107
- """Scans Python projects/files, reporting potential security weaknesses.
110
+ """Scans Python code or packages on PyPI.org on security weaknesses.
108
111
 
109
112
  This function performs security validations on the specified file or directory,
110
113
  formats the results into an HTML report, and writes the output to an HTML file.
@@ -119,7 +122,8 @@ def scan_report(input_path , filename=DEFAULT_OUTPUT_FILE):
119
122
  Returns:
120
123
  None - A HTML report is written as output
121
124
  """
122
- # Check if the input is a valid directory or a single valid Python file
125
+ # Check if the input is a valid directory or a single valid Python file
126
+ # In case no local file or directory is found, check if the givin input is pypi package name
123
127
  file_path = Path(input_path)
124
128
  if file_path.is_dir():
125
129
  directory_scan_report(input_path , filename ) #create a package aka directory scan report
@@ -135,9 +139,22 @@ def scan_report(input_path , filename=DEFAULT_OUTPUT_FILE):
135
139
  html += '<br>'
136
140
  html += DISCLAIMER_TEXT
137
141
  create_htmlfile(html,filename)
142
+ elif get_pypi_download_info(input_path):
143
+ package_name = input_path #The variable input_path is now equal to the package name
144
+ print(f"Package: {package_name} exist on PyPI.org!")
145
+ print(f"Now SAST scanning package from the remote location: https://pypi.org/pypi/{package_name}")
146
+ pypi_data = get_pypi_download_info(package_name)
147
+ url = pypi_data['download_url']
148
+ release = pypi_data['release']
149
+ print(url)
150
+ print(release)
151
+ src_dir, tmp_handle = get_package_source(url)
152
+ directory_scan_report(src_dir , filename , package_name, release ) #create scan report for a package or directory
153
+ # Cleaning up temp directory
154
+ tmp_handle.cleanup() # deletes everything from temp directory
138
155
  else:
139
156
  #File is NOT a valid Python file, can not be parsed or directory is invalid.
140
- print(f"Error: '{input_path}' isn't a valid Python file or directory path.")
157
+ print(f"Error: '{input_path}' isn't a valid Python file, directory path to a package or a package on PyPI.org.")
141
158
 
142
159
 
143
160
 
@@ -169,14 +186,14 @@ def single_file_report(filename , scan_output):
169
186
  df = df.sort_values(by="line") # sort by line number
170
187
  html = f'<p>Number of potential security issues found: {number_of_issues}</p>'
171
188
  html += '<details>'
172
- html += '<summary>Click to see the details for found security issues.</summary>'
189
+ html += '<summary>Click to view identified security weaknesses.</summary>'
173
190
  html += df.to_html(escape=False,index=False)
174
191
  html += '</details>'
175
192
  file_overview = overview_per_file(filename)
176
193
  df_overview = pd.DataFrame([file_overview])
177
194
  html += '<br>'
178
- html += '<details>'
179
- html += f'<summary>Click to see details for file {filename}</summary>'
195
+ html += '<details>'
196
+ html += f'<summary>Click to see file details.</summary>'
180
197
  html += df_overview.to_html(escape=True,index=False)
181
198
  html += '</details>'
182
199
  #imported modules
@@ -190,7 +207,7 @@ def single_file_report(filename , scan_output):
190
207
  return html
191
208
 
192
209
 
193
- def directory_scan_report(directory_to_scan , filename=DEFAULT_OUTPUT_FILE):
210
+ def directory_scan_report(directory_to_scan , filename=DEFAULT_OUTPUT_FILE , package_name=None , release=None):
194
211
  """Reports potential security issues for all Python files found in a directory.
195
212
 
196
213
  This function performs security validations on all files found in a specified directory.
@@ -199,7 +216,7 @@ def directory_scan_report(directory_to_scan , filename=DEFAULT_OUTPUT_FILE):
199
216
  You can specify the name and directory for the generated HTML report.
200
217
 
201
218
  Parameters:
202
- file_to_scan (str) : The full path to the Python source file to be scanned.
219
+ directory_to_scan (str) : The full path to the Python source files to be scanned. Can be present in temp directory.
203
220
  filename (str, optional): The name of the HTML file to save the report to.
204
221
  Defaults to `DEFAULT_OUTPUT_FILE`.
205
222
 
@@ -216,8 +233,12 @@ def directory_scan_report(directory_to_scan , filename=DEFAULT_OUTPUT_FILE):
216
233
  files_to_check = collect_python_source_files(directory_to_scan)
217
234
  html += '<h2>Directory scan report</h2>'
218
235
  name_of_package = get_filename_from_path(directory_to_scan)
219
- html += f'<p>Below the result of the Codeaudit scan of the package or directory:<b> {name_of_package}</b></p>'
220
- html += f'<p>Total Python files found: {len(files_to_check)}</p>'
236
+ if package_name is not None:
237
+ #Use real package name and retrieved release info
238
+ html += f'<p>Below the result of the Codeaudit scan of the package - Release :<b> {package_name} - {release} </b></p>'
239
+ else:
240
+ html += f'<p>Below the result of the Codeaudit scan of the directory:<b> {name_of_package}</b></p>'
241
+ html += f'<p>Total Python files found: <b>{len(files_to_check)}</b></p>'
221
242
  number_of_files = len(files_to_check)
222
243
  print(f'Number of files that are checked for security issues:{number_of_files}')
223
244
  printProgressBar(0, number_of_files, prefix='Progress:', suffix='Complete', length=50)
@@ -229,7 +250,8 @@ def directory_scan_report(directory_to_scan , filename=DEFAULT_OUTPUT_FILE):
229
250
  file_report_html = single_file_report(file_to_scan , scan_output)
230
251
  name_of_file = get_filename_from_path(file_to_scan)
231
252
  html += f'<h3>Result for file {name_of_file}</h3>'
232
- html += '<p>' + f'Location of the file: {file_to_scan} </p>'
253
+ if package_name is None:
254
+ html += '<p>' + f'Location of the file: {file_to_scan} </p>'
233
255
  html += file_report_html
234
256
  else:
235
257
  file_name_with_no_issue = get_filename_from_path(file_to_scan)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeaudit
3
- Version: 1.3.0
3
+ Version: 1.4.0
4
4
  Summary: Simplified static security checks for Python
5
5
  Project-URL: Documentation, https://github.com/nocomplexity/codeaudit#readme
6
6
  Project-URL: Issues, https://github.com/nocomplexity/codeaudit/issues
@@ -1,20 +1,22 @@
1
- codeaudit/__about__.py,sha256=RyFrPByQ9BxQaBrv6YLqT39zRak3ZUofqavXgXK5WEk,144
1
+ codeaudit/__about__.py,sha256=-bxUcBaBrbGG65qQWVQjAxsqncjywKoDesQoM0JjIYc,144
2
2
  codeaudit/__init__.py,sha256=YGs6qU0BVHPGtXCS-vfBDLO4TOfJDLTWMgaFDTmi_Iw,157
3
- codeaudit/altairplots.py,sha256=pWW5ZQ8HGRjcwXEnME9d7OvM83P7a9-BtvMxj0RyEkE,3794
4
- codeaudit/api_interfaces.py,sha256=WhxFvGIxc6Jy-MVVJUPSC4f8ZZMHdcMhlTlnS-QARQM,11314
3
+ codeaudit/altairplots.py,sha256=gBXN1_wxUmjzTNizvzbOeCKvUxpClGPdZmK7ICK1x68,4531
4
+ codeaudit/api_interfaces.py,sha256=nnPhVPmodb-2MYCSiToT87uYZypMoX5oKg6WmLCjdiw,11336
5
+ codeaudit/api_reporting.py,sha256=W8eutTJ0d-TENbv5cCmAOfu4GEp_RwiQ4XU5FCmfkoI,1736
5
6
  codeaudit/checkmodules.py,sha256=aiF34KO-9HZDRgVBtSwVFdeUxT5_Ka5VtmlfgoLgNVs,5582
6
7
  codeaudit/codeaudit.py,sha256=yQ7SHx8b3Q9rMu8nCVyyuu3wJr3DlO-BuSIz2ZwJFGM,3426
7
8
  codeaudit/complexitycheck.py,sha256=A3_a5v-U0YQr80pWQwSVvOsY_eQtqwNkQf9Txr9mNtQ,3722
8
9
  codeaudit/filehelpfunctions.py,sha256=tx7HDCyTkZuw8YieXipQXM8iRfrDfIVZyKb7vjmkEFY,4358
9
10
  codeaudit/htmlhelpfunctions.py,sha256=-SMsyfF7TRIfJkrUqoJuh7AoG1RVrYFsZfFljoxVHXc,3246
10
11
  codeaudit/issuevalidations.py,sha256=-WdaXT_R-P9w0JbQpJ5ngVoVhG9Yee2ri0aH5SoC1Ao,6404
11
- codeaudit/reporting.py,sha256=2Dz9sGCCQXesDO7I_S7uZmrsWTVxDCYCo0AmQyAHoN4,22918
12
+ codeaudit/pypi_package_scan.py,sha256=JxgcCtJaVH9pAQ902gjmzgGGECt71r4gnFhqcZUy3OE,4872
13
+ codeaudit/reporting.py,sha256=eXlslslMgG_4ATFpTEi5wtgwTMOgngAi-38Fg-xaYN4,24299
12
14
  codeaudit/security_checks.py,sha256=wEO_A054zXmLccWGREi6cNADa4IgoOPxHsq-Je5iMIY,2167
13
15
  codeaudit/simple.css,sha256=7auhDAUwjdluFIyoCskl-Vfh503prXKqftQrmo0-e_g,3565
14
16
  codeaudit/totals.py,sha256=b6OkzcMdqGKPwuGBKrwAeCxBOJxHa5FHauGWnEb-6zM,6387
15
- codeaudit/data/sastchecks.csv,sha256=Iny33kbGe5HMQgNusZuXywEQgZcRLPqI5NXVbv02EYc,9476
16
- codeaudit-1.3.0.dist-info/METADATA,sha256=990kulWcqEs8xpRZOc0Fo2lv_oJciPnmSV2mu5yzy1U,7505
17
- codeaudit-1.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- codeaudit-1.3.0.dist-info/entry_points.txt,sha256=7w6I8zii62nJHIIF30CRP5g1z8enMqF1pZEDdlw4HcQ,55
19
- codeaudit-1.3.0.dist-info/licenses/LICENSE.txt,sha256=-5gWaMGKJ54oX8TYP7oeg2zITdTapzyWl9PP0tispuA,34674
20
- codeaudit-1.3.0.dist-info/RECORD,,
17
+ codeaudit/data/sastchecks.csv,sha256=fIcyZgymCtAluPta9fTEk6a9DJ2AGJczZYRPUIQuSag,9738
18
+ codeaudit-1.4.0.dist-info/METADATA,sha256=I-dT_-7IdlcdNEinahwY4CN-j6Ty8Q9wlrTbb84dqUk,7505
19
+ codeaudit-1.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
20
+ codeaudit-1.4.0.dist-info/entry_points.txt,sha256=7w6I8zii62nJHIIF30CRP5g1z8enMqF1pZEDdlw4HcQ,55
21
+ codeaudit-1.4.0.dist-info/licenses/LICENSE.txt,sha256=-5gWaMGKJ54oX8TYP7oeg2zITdTapzyWl9PP0tispuA,34674
22
+ codeaudit-1.4.0.dist-info/RECORD,,