codeaudit 1.4.2__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeaudit/__about__.py +1 -1
- codeaudit/data/secretslist.txt +135 -0
- codeaudit/filehelpfunctions.py +1 -1
- codeaudit/privacy_lint.py +292 -0
- codeaudit/reporting.py +322 -143
- codeaudit/simple.css +31 -5
- {codeaudit-1.4.2.dist-info → codeaudit-1.5.0.dist-info}/METADATA +7 -2
- {codeaudit-1.4.2.dist-info → codeaudit-1.5.0.dist-info}/RECORD +11 -9
- {codeaudit-1.4.2.dist-info → codeaudit-1.5.0.dist-info}/WHEEL +0 -0
- {codeaudit-1.4.2.dist-info → codeaudit-1.5.0.dist-info}/entry_points.txt +0 -0
- {codeaudit-1.4.2.dist-info → codeaudit-1.5.0.dist-info}/licenses/LICENSE.txt +0 -0
codeaudit/__about__.py
CHANGED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
|
|
2
|
+
_KEY
|
|
3
|
+
_passwd
|
|
4
|
+
_PASSWORD
|
|
5
|
+
access_key
|
|
6
|
+
access_key_id
|
|
7
|
+
ACCESS_SECRET
|
|
8
|
+
ACCESS_TOKEN
|
|
9
|
+
AccountKey
|
|
10
|
+
AI21_API_KEY
|
|
11
|
+
ALIBABA_CLOUD_ACCESS_KEY_ID
|
|
12
|
+
ALIBABA_CLOUD_ACCESS_KEY_SECRET
|
|
13
|
+
ANTHROPIC_API_KEY
|
|
14
|
+
api_key
|
|
15
|
+
API_TOKEN
|
|
16
|
+
ApiKey
|
|
17
|
+
ApiSecret
|
|
18
|
+
APP_KEY
|
|
19
|
+
APP_SECRET
|
|
20
|
+
AUTH
|
|
21
|
+
auth_key
|
|
22
|
+
AUTH_SECRET
|
|
23
|
+
auth_token
|
|
24
|
+
AUTH_TOKEN
|
|
25
|
+
Authorization
|
|
26
|
+
AWS_ACCESS_KEY_ID
|
|
27
|
+
aws_account_id
|
|
28
|
+
aws_secret_access_key
|
|
29
|
+
AWS_SECRET_ACCESS_KEY
|
|
30
|
+
aws_session_token
|
|
31
|
+
AWS_SESSION_TOKEN
|
|
32
|
+
AZURE_OPENAI_API_KEY
|
|
33
|
+
AZURE_OPENAI_API_VERSION
|
|
34
|
+
AZURE_OPENAI_ENDPOINT
|
|
35
|
+
AzureStorageKey
|
|
36
|
+
BAIDU_API_KEY
|
|
37
|
+
BAIDU_SECRET_KEY
|
|
38
|
+
BASIC_AUTH
|
|
39
|
+
BEARER
|
|
40
|
+
BEARER_TOKEN
|
|
41
|
+
BEDROCK_REGION
|
|
42
|
+
CLIENT_ID
|
|
43
|
+
client_key
|
|
44
|
+
CLIENT_SECRET
|
|
45
|
+
ClientSecret
|
|
46
|
+
COHERE_API_KEY
|
|
47
|
+
CONNECTION_STRING
|
|
48
|
+
credential
|
|
49
|
+
credentials
|
|
50
|
+
CREDENTIALS_JSON
|
|
51
|
+
creds
|
|
52
|
+
CSRF_TOKEN
|
|
53
|
+
DASHSCOPE_API_KEY
|
|
54
|
+
DEEPSEEK_API_KEY
|
|
55
|
+
DEPLOY_KEY
|
|
56
|
+
encryptedPassword
|
|
57
|
+
ENCRYPTION_SECRET
|
|
58
|
+
EncryptionKey
|
|
59
|
+
FERNET_KEY
|
|
60
|
+
FIREWORKS_API_KEY
|
|
61
|
+
GCP_SERVICE_ACCOUNT_KEY
|
|
62
|
+
GEMINI_API_KEY
|
|
63
|
+
get_api_token
|
|
64
|
+
get_secret
|
|
65
|
+
get_token
|
|
66
|
+
GITHUB_TOKEN
|
|
67
|
+
GOOGLE_API_KEY
|
|
68
|
+
GOOGLE_API_KEY
|
|
69
|
+
HMAC_KEY
|
|
70
|
+
HUGGINGFACE_API_TOKEN
|
|
71
|
+
IBM_WATSONX_API_KEY
|
|
72
|
+
IBM_WATSONX_PROJECT_ID
|
|
73
|
+
ID_TOKEN
|
|
74
|
+
INTEGRATION_KEY
|
|
75
|
+
JWT_ACCESS_TOKEN
|
|
76
|
+
JWT_ALGORITHM
|
|
77
|
+
JWT_AUDIENCE
|
|
78
|
+
JWT_ISSUER
|
|
79
|
+
JWT_PRIVATE_KEY
|
|
80
|
+
JWT_PUBLIC_KEY
|
|
81
|
+
JWT_REFRESH_TOKEN
|
|
82
|
+
JWT_SECRET
|
|
83
|
+
JWT_SECRET_KEY
|
|
84
|
+
JWT_SIGNING_KEY
|
|
85
|
+
JWT_TOKEN
|
|
86
|
+
KEYFILE
|
|
87
|
+
KUBE_TOKEN
|
|
88
|
+
MASTER_KEY
|
|
89
|
+
MISTRAL_API_KEY
|
|
90
|
+
MLAB_PASS
|
|
91
|
+
MOONSHOT_API_KEY
|
|
92
|
+
NetworkCredential
|
|
93
|
+
NVIDIA_API_KEY
|
|
94
|
+
OAUTH_TOKEN
|
|
95
|
+
OLLAMA_API_BASE
|
|
96
|
+
OPENAI_API_KEY
|
|
97
|
+
OPENROUTER_API_KEY
|
|
98
|
+
OTEL_EXPORTER
|
|
99
|
+
PASSPHRASE
|
|
100
|
+
password
|
|
101
|
+
POSTGRES_PASSWORD
|
|
102
|
+
PPLX_API_KEY
|
|
103
|
+
PRIVATE_KEY
|
|
104
|
+
PRIVATE_TOKEN
|
|
105
|
+
REDIS_PASSWORD
|
|
106
|
+
REFRESH_TOKEN
|
|
107
|
+
REPLICATE_API_TOKEN
|
|
108
|
+
ROOT_PASSWORD
|
|
109
|
+
RSA_PRIVATE_KEY
|
|
110
|
+
SAS_TOKEN
|
|
111
|
+
secret
|
|
112
|
+
secret_key
|
|
113
|
+
secret_key_base
|
|
114
|
+
SECRET_TOKEN
|
|
115
|
+
SERVICE_ACCOUNT_KEY
|
|
116
|
+
SESSION_KEY
|
|
117
|
+
SIGNING_KEY
|
|
118
|
+
SILICONFLOW_API_KEY
|
|
119
|
+
SLACK_TOKEN
|
|
120
|
+
SMTP_PASSWORD
|
|
121
|
+
SSH_KEY
|
|
122
|
+
static_key
|
|
123
|
+
STRIPE_API_KEY
|
|
124
|
+
SYSTEM_PASSWORD
|
|
125
|
+
TENCENT_HUNYUAN_API_KEY
|
|
126
|
+
TLS_PRIVATE_KEY
|
|
127
|
+
TOGETHER_API_KEY
|
|
128
|
+
TOKEN
|
|
129
|
+
VAULT_TOKEN
|
|
130
|
+
WEBHOOK_SECRET
|
|
131
|
+
WEBHOOK_TOKEN
|
|
132
|
+
X_API_KEY
|
|
133
|
+
XAI_API_KEY
|
|
134
|
+
YI_API_KEY
|
|
135
|
+
ZHIPUAI_API_KEY
|
codeaudit/filehelpfunctions.py
CHANGED
|
@@ -24,7 +24,7 @@ def read_in_source_file(file_path):
|
|
|
24
24
|
|
|
25
25
|
if file_path.is_dir():
|
|
26
26
|
print(
|
|
27
|
-
"Error: The given path is a directory.\nUse 'codeaudit
|
|
27
|
+
"Error: The given path is a directory.\nUse 'codeaudit filescan' to security audit Python files in a directory or PyPI package.\nThe 'codeaudit modulescan' command works per file only, not on a directory.\nUse codeaudit -h for help"
|
|
28
28
|
)
|
|
29
29
|
sys.exit(1)
|
|
30
30
|
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
from codeaudit.api_interfaces import version
|
|
2
|
+
from codeaudit.filehelpfunctions import get_filename_from_path , collect_python_source_files , is_ast_parsable , read_in_source_file
|
|
3
|
+
from codeaudit.pypi_package_scan import get_pypi_download_info , get_package_source
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
import ast
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import datetime
|
|
9
|
+
import re
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
from importlib.resources import files
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
SECRETS_LIST = files("codeaudit.data").joinpath("secretslist.txt")
|
|
16
|
+
|
|
17
|
+
def secret_scan(input_path):
|
|
18
|
+
"""Scans Python file or a PyPI package for potential privacy leaks.
|
|
19
|
+
|
|
20
|
+
This function analyzes Python code for possible privacy-related issues
|
|
21
|
+
(which often overlap with security weaknesses). The input can be:
|
|
22
|
+
- A local directory containing a Python package
|
|
23
|
+
- A single Python file
|
|
24
|
+
- A PyPI package name (which will be downloaded and scanned)
|
|
25
|
+
|
|
26
|
+
Depending on the input type, the function performs an AST-based scan
|
|
27
|
+
and returns structured metadata along with scan results.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
input_path (str): Path to a local directory, path to a Python
|
|
31
|
+
file, or the name of a PyPI package to scan.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
dict: A dictionary containing scan metadata and results. The
|
|
35
|
+
structure varies depending on the input:
|
|
36
|
+
- For a directory or PyPI package, results include package-level
|
|
37
|
+
privacy findings.
|
|
38
|
+
- For a single Python file, results include file-level privacy
|
|
39
|
+
findings.
|
|
40
|
+
- If the input is invalid, an error dictionary is returned with
|
|
41
|
+
an `"Error"` key.
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
None: All errors are handled internally and reported in the
|
|
45
|
+
returned dictionary.
|
|
46
|
+
"""
|
|
47
|
+
file_output = {}
|
|
48
|
+
file_path = Path(input_path)
|
|
49
|
+
ca_version_info = version()
|
|
50
|
+
now = datetime.datetime.now()
|
|
51
|
+
timestamp_str = now.strftime("%Y-%m-%d %H:%M")
|
|
52
|
+
output = ca_version_info | {"generated_on" : timestamp_str}
|
|
53
|
+
# Check if the input is a valid directory or a single valid Python file
|
|
54
|
+
if file_path.is_dir(): #local directory scan
|
|
55
|
+
package_name = get_filename_from_path(input_path)
|
|
56
|
+
output |= {"package_name": package_name}
|
|
57
|
+
spycheck_output = _codeaudit_directory_spyscan(input_path)
|
|
58
|
+
output |= spycheck_output
|
|
59
|
+
return output
|
|
60
|
+
elif file_path.suffix.lower() == ".py" and file_path.is_file() and is_ast_parsable(input_path): #check on parseable single Python file
|
|
61
|
+
# do a file spy check
|
|
62
|
+
name_of_file = get_filename_from_path(input_path)
|
|
63
|
+
name_dict = {"FileName": name_of_file}
|
|
64
|
+
spycheck_output = spy_check(input_path)
|
|
65
|
+
file_output["0"] = spycheck_output #there is only 1 file , so index 0 equals as for package to make functionality that use the output that works on the dict or json can equal for a package or a single file!
|
|
66
|
+
output |= { "file_name": name_dict,
|
|
67
|
+
"file_privacy_check" : file_output}
|
|
68
|
+
return output
|
|
69
|
+
elif (pypi_data := get_pypi_download_info(input_path)):
|
|
70
|
+
package_name = input_path #The variable input_path is now equal to the package name
|
|
71
|
+
url = pypi_data['download_url']
|
|
72
|
+
release = pypi_data['release']
|
|
73
|
+
if url is not None:
|
|
74
|
+
src_dir, tmp_handle = get_package_source(url)
|
|
75
|
+
output |= {"package_name": package_name,
|
|
76
|
+
"package_release": release}
|
|
77
|
+
try:
|
|
78
|
+
spycheck_output = _codeaudit_directory_spyscan(src_dir)
|
|
79
|
+
output |= spycheck_output
|
|
80
|
+
finally:
|
|
81
|
+
# Cleaning up temp directory
|
|
82
|
+
tmp_handle.cleanup() # deletes everything from temp directory
|
|
83
|
+
return output
|
|
84
|
+
else:
|
|
85
|
+
# Its not a directory nor a valid Python file:
|
|
86
|
+
return {"Error" : "File is not a *.py file, does not exist or is not a valid directory path towards a Python package."}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def spy_check(file):
|
|
90
|
+
"""runs the AST function to get spy info"""
|
|
91
|
+
code = read_in_source_file(file)
|
|
92
|
+
spy_output = collect_secret_values(code)
|
|
93
|
+
name_of_file = get_filename_from_path(file)
|
|
94
|
+
output = { "file_name": name_of_file,
|
|
95
|
+
"privacy_check_result" : spy_output}
|
|
96
|
+
return output
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _codeaudit_directory_spyscan(input_path):
|
|
100
|
+
"""Performs a spyscan on a local directory
|
|
101
|
+
Function is also used with scanning directory PyPI.org packages, since in that case a tmp directory is used
|
|
102
|
+
"""
|
|
103
|
+
output ={}
|
|
104
|
+
file_output = {}
|
|
105
|
+
files_to_check = collect_python_source_files(input_path)
|
|
106
|
+
if len(files_to_check) > 1:
|
|
107
|
+
for i,file in enumerate(files_to_check):
|
|
108
|
+
file_output[i] = spy_check(file)
|
|
109
|
+
output |= { "file_privacy_check" : file_output}
|
|
110
|
+
return output
|
|
111
|
+
else:
|
|
112
|
+
output_msg = f'Directory path {input_path} contains no Python files.'
|
|
113
|
+
return {"Error" : output_msg}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def load_secrets_list(filename=SECRETS_LIST):
|
|
117
|
+
"""
|
|
118
|
+
Load secrets from SECRETS_LIST and return a list of lines,
|
|
119
|
+
excluding empty lines and lines starting with '#'.
|
|
120
|
+
"""
|
|
121
|
+
secrets_patterns = []
|
|
122
|
+
|
|
123
|
+
with open(filename, "r", encoding="utf-8") as f:
|
|
124
|
+
for line in f:
|
|
125
|
+
line = line.strip()
|
|
126
|
+
if not line or line.startswith("#"):
|
|
127
|
+
continue
|
|
128
|
+
secrets_patterns.append(line.lower()) #lower all patterns
|
|
129
|
+
|
|
130
|
+
return secrets_patterns
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def match_secret(secrets, name, value):
|
|
134
|
+
"""
|
|
135
|
+
Check whether a name or value contains a secret.
|
|
136
|
+
|
|
137
|
+
Assumptions:
|
|
138
|
+
- `secrets` are already lowercased.
|
|
139
|
+
|
|
140
|
+
Matching rules (in priority order):
|
|
141
|
+
1. Whole-word match in name
|
|
142
|
+
2. Whole-word match in value
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
The matching secret (lowercased) if found, otherwise None.
|
|
146
|
+
"""
|
|
147
|
+
name_lower = str(name).lower()
|
|
148
|
+
value_lower = str(value).lower()
|
|
149
|
+
|
|
150
|
+
# Shorter secrets first to preserve original behavior
|
|
151
|
+
for secret in sorted(secrets, key=len):
|
|
152
|
+
pattern = re.compile(rf"\b{re.escape(secret)}\b")
|
|
153
|
+
|
|
154
|
+
if pattern.search(name_lower) or pattern.search(value_lower):
|
|
155
|
+
return secret
|
|
156
|
+
|
|
157
|
+
return None
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def collect_secret_values(source_code, secrets_file=SECRETS_LIST):
|
|
161
|
+
secrets = load_secrets_list(secrets_file)
|
|
162
|
+
results = []
|
|
163
|
+
source_lines = source_code.splitlines()
|
|
164
|
+
|
|
165
|
+
# -------------------------
|
|
166
|
+
# Helpers
|
|
167
|
+
# -------------------------
|
|
168
|
+
def get_constant(node):
|
|
169
|
+
return getattr(node, "value", None)
|
|
170
|
+
|
|
171
|
+
def is_os_environ(node):
|
|
172
|
+
return (
|
|
173
|
+
getattr(getattr(node, "value", None), "attr", None) == "environ"
|
|
174
|
+
and getattr(getattr(getattr(node, "value", None), "value", None), "id", None) == "os"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def get_target_repr(node):
|
|
178
|
+
if hasattr(node, "id"):
|
|
179
|
+
return node.id
|
|
180
|
+
if hasattr(node, "attr") or hasattr(node, "slice"):
|
|
181
|
+
return ast.unparse(node)
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
def classify_value(node):
|
|
185
|
+
if node is None:
|
|
186
|
+
return None
|
|
187
|
+
|
|
188
|
+
if isinstance(node, ast.Constant):
|
|
189
|
+
return node.value
|
|
190
|
+
|
|
191
|
+
if hasattr(node, "slice"):
|
|
192
|
+
if is_os_environ(node):
|
|
193
|
+
return get_constant(node.slice)
|
|
194
|
+
return ast.unparse(node)
|
|
195
|
+
|
|
196
|
+
if hasattr(node, "func") and getattr(node, "args", None):
|
|
197
|
+
first_arg = node.args[0]
|
|
198
|
+
if isinstance(first_arg, ast.Constant):
|
|
199
|
+
return first_arg.value
|
|
200
|
+
|
|
201
|
+
if hasattr(node, "id") or hasattr(node, "attr"):
|
|
202
|
+
return ast.unparse(node)
|
|
203
|
+
|
|
204
|
+
return ast.unparse(node)
|
|
205
|
+
|
|
206
|
+
def get_original_line(node):
|
|
207
|
+
lineno = getattr(node, "lineno", None)
|
|
208
|
+
if lineno is None:
|
|
209
|
+
return None
|
|
210
|
+
lines = []
|
|
211
|
+
# line before
|
|
212
|
+
if lineno > 1:
|
|
213
|
+
lines.append(source_lines[lineno - 2].rstrip())
|
|
214
|
+
|
|
215
|
+
# current line
|
|
216
|
+
if 1 <= lineno <= len(source_lines):
|
|
217
|
+
lines.append(source_lines[lineno - 1].rstrip())
|
|
218
|
+
|
|
219
|
+
# line after
|
|
220
|
+
if lineno < len(source_lines):
|
|
221
|
+
lines.append(source_lines[lineno].rstrip())
|
|
222
|
+
|
|
223
|
+
return "\n".join(lines)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def add_value(name, value_node, node):
|
|
227
|
+
value = classify_value(value_node)
|
|
228
|
+
matched = match_secret(secrets, name, value)
|
|
229
|
+
if matched is not None: #when no match is found, no results will be added to the result dict.
|
|
230
|
+
results.append(
|
|
231
|
+
{
|
|
232
|
+
"lineno": getattr(node, "lineno", None),
|
|
233
|
+
"code": get_original_line(node),
|
|
234
|
+
# "name": name,
|
|
235
|
+
# "value": value,
|
|
236
|
+
"matched": matched,
|
|
237
|
+
}
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
# -------------------------
|
|
242
|
+
# Walk all AST nodes
|
|
243
|
+
# -------------------------
|
|
244
|
+
tree = ast.parse(source_code)
|
|
245
|
+
for node in ast.walk(tree):
|
|
246
|
+
|
|
247
|
+
# Assignments
|
|
248
|
+
for target in getattr(node, "targets", []):
|
|
249
|
+
name = get_target_repr(target)
|
|
250
|
+
if name:
|
|
251
|
+
add_value(name, getattr(node, "value", None), node)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# Annotated assignments
|
|
255
|
+
if isinstance(node, ast.AnnAssign):
|
|
256
|
+
name = get_target_repr(node.target)
|
|
257
|
+
if name:
|
|
258
|
+
add_value(name, getattr(node, "value", None), node)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
# Function calls (keyword arguments only)
|
|
262
|
+
if isinstance(node, ast.Call):
|
|
263
|
+
for kw in node.keywords:
|
|
264
|
+
if kw.arg:
|
|
265
|
+
add_value(kw.arg, kw.value, kw)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
return sorted(results, key=lambda item: item["lineno"])
|
|
269
|
+
|
|
270
|
+
def has_privacy_findings(data):
|
|
271
|
+
"""
|
|
272
|
+
Returns True if at least one file has a non-empty
|
|
273
|
+
'privacy_check_result' list, otherwise False.
|
|
274
|
+
"""
|
|
275
|
+
filesscanned = data.get("file_privacy_check", {})
|
|
276
|
+
|
|
277
|
+
for file_info in filesscanned.values():
|
|
278
|
+
results = file_info.get("privacy_check_result")
|
|
279
|
+
if results and len(results) > 0:
|
|
280
|
+
return True
|
|
281
|
+
|
|
282
|
+
return False
|
|
283
|
+
|
|
284
|
+
def count_privacy_check_results(data):
|
|
285
|
+
"""
|
|
286
|
+
count number of secrets found for a dict created with secret_scan(filename)
|
|
287
|
+
|
|
288
|
+
:param data: Description
|
|
289
|
+
"""
|
|
290
|
+
return len(
|
|
291
|
+
data["file_privacy_check"]["0"]["privacy_check_result"]
|
|
292
|
+
)
|
codeaudit/reporting.py
CHANGED
|
@@ -18,6 +18,7 @@ import os
|
|
|
18
18
|
from pathlib import Path
|
|
19
19
|
|
|
20
20
|
import pandas as pd
|
|
21
|
+
import html
|
|
21
22
|
import datetime
|
|
22
23
|
|
|
23
24
|
from codeaudit.security_checks import perform_validations , ast_security_checks
|
|
@@ -25,15 +26,20 @@ from codeaudit.filehelpfunctions import get_filename_from_path , collect_python_
|
|
|
25
26
|
from codeaudit.altairplots import multi_bar_chart
|
|
26
27
|
from codeaudit.totals import get_statistics , overview_count , overview_per_file , total_modules
|
|
27
28
|
from codeaudit.checkmodules import get_imported_modules , check_module_vulnerability , get_all_modules , get_imported_modules_by_file
|
|
28
|
-
from codeaudit.htmlhelpfunctions import
|
|
29
|
+
from codeaudit.htmlhelpfunctions import json_to_html , dict_list_to_html_table
|
|
29
30
|
from codeaudit import __version__
|
|
30
31
|
from codeaudit.pypi_package_scan import get_pypi_download_info , get_package_source
|
|
31
|
-
|
|
32
|
-
from codeaudit.api_interfaces import filescan
|
|
32
|
+
from codeaudit.privacy_lint import secret_scan , has_privacy_findings
|
|
33
33
|
|
|
34
34
|
from importlib.resources import files
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
|
|
37
|
+
PYTHON_CODE_AUDIT_TEXT = '<a href="https://github.com/nocomplexity/codeaudit" target="_blank"><b>Python Code Audit</b></a>'
|
|
38
|
+
DISCLAIMER_TEXT = (
|
|
39
|
+
"<p><b>Disclaimer:</b> <i>This SAST tool "
|
|
40
|
+
+ PYTHON_CODE_AUDIT_TEXT
|
|
41
|
+
+ " provides a powerful, automatic security analysis for Python source code. However, it's not a substitute for human review in combination with business knowledge. Undetected vulnerabilities may still exist.</i></p>"
|
|
42
|
+
)
|
|
37
43
|
|
|
38
44
|
|
|
39
45
|
SIMPLE_CSS_FILE = files('codeaudit') / 'simple.css'
|
|
@@ -49,6 +55,9 @@ def overview_report(directory, filename=DEFAULT_OUTPUT_FILE):
|
|
|
49
55
|
- A local directory containing Python source files
|
|
50
56
|
- The name of a package hosted on PyPI.org
|
|
51
57
|
|
|
58
|
+
So:
|
|
59
|
+
codeaudit overview <package-name|directory> [reportname.html]
|
|
60
|
+
|
|
52
61
|
For PyPI packages, the source distribution (sdist) is downloaded,
|
|
53
62
|
extracted to a temporary directory, scanned, and removed after the report
|
|
54
63
|
is generated.
|
|
@@ -84,6 +93,7 @@ def overview_report(directory, filename=DEFAULT_OUTPUT_FILE):
|
|
|
84
93
|
package name.
|
|
85
94
|
"""
|
|
86
95
|
clean_up = False
|
|
96
|
+
advice = None
|
|
87
97
|
if os.path.exists(directory):
|
|
88
98
|
# Check if the path is actually a directory
|
|
89
99
|
if not os.path.isdir(directory):
|
|
@@ -102,6 +112,7 @@ def overview_report(directory, filename=DEFAULT_OUTPUT_FILE):
|
|
|
102
112
|
pypi_data = get_pypi_download_info(package_name)
|
|
103
113
|
url = pypi_data['download_url']
|
|
104
114
|
release = pypi_data['release']
|
|
115
|
+
advice = f'<p>👉 To perform a SAST scan on the source code, run:<pre><code class="language-python">codeaudit filescan {package_name}</code></pre></p>'
|
|
105
116
|
if url is not None:
|
|
106
117
|
print(f'Creating Python Code Audit overview for package:\n{url}')
|
|
107
118
|
src_dir, tmp_handle = get_package_source(url)
|
|
@@ -117,52 +128,84 @@ def overview_report(directory, filename=DEFAULT_OUTPUT_FILE):
|
|
|
117
128
|
df['Std-Modules'] = modules['Std-Modules']
|
|
118
129
|
df['External-Modules'] = modules['External-Modules']
|
|
119
130
|
overview_df = overview_count(df)
|
|
120
|
-
|
|
131
|
+
output = '<h1>' + f'Python Code Audit overview report' + '</h1><br>'
|
|
121
132
|
if clean_up:
|
|
122
|
-
|
|
123
|
-
|
|
133
|
+
output += f'<p>Codeaudit overview scan of package:<b> {package_name}</b></p>'
|
|
134
|
+
output += f'<p>Version:<b>{release}</b></p>'
|
|
124
135
|
else:
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
136
|
+
output += f'<p>Overview for the directory:<b> {directory}</b></p>'
|
|
137
|
+
output += f'<h2>Summary</h2>'
|
|
138
|
+
output += overview_df.to_html(escape=True,index=False)
|
|
139
|
+
output += '<br><br>'
|
|
129
140
|
security_based_on_max_complexity = overview_df.loc[0,'Maximum_Complexity']
|
|
130
141
|
if security_based_on_max_complexity > 40:
|
|
131
|
-
|
|
142
|
+
output += '<p>Based on the maximum found complexity in a source file: Security concern rate is <b>❌ HIGH</b>.'
|
|
132
143
|
else:
|
|
133
|
-
|
|
144
|
+
output += '<p>Based on the maximum found complexity in a source file: Security concern rate is <b>✅ LOW</b>.'
|
|
134
145
|
security_based_on_loc = overview_df.loc[0,'Number_Of_Lines']
|
|
135
146
|
if security_based_on_loc > 2000:
|
|
136
|
-
|
|
147
|
+
output += '<p>Based on the total Lines of Code (LoC) : Security concern rate is <b>❌ HIGH</b>.'
|
|
137
148
|
else:
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
## Module overview
|
|
141
|
-
modules_discovered = get_all_modules(directory)
|
|
149
|
+
output += '<p>Based on the total Lines of Code (LoC) : Security concern rate is <b>✅ LOW</b>.'
|
|
150
|
+
output += '<br>'
|
|
151
|
+
## Module overview
|
|
152
|
+
modules_discovered = get_all_modules(directory)
|
|
142
153
|
if clean_up:
|
|
143
154
|
tmp_handle.cleanup() #Clean up tmp directory if overview is created directly from PyPI package
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
html += '<summary>Click to see the report details.</summary>'
|
|
155
|
+
output += '<details>'
|
|
156
|
+
output += '<summary>View all discovered modules.</summary>'
|
|
157
|
+
output += display_found_modules(modules_discovered)
|
|
158
|
+
output += '</details>'
|
|
159
|
+
output += f'<h2>Detailed overview per source file</h2>'
|
|
160
|
+
output += '<details>'
|
|
161
|
+
output += '<summary>View the report details.</summary>'
|
|
152
162
|
df_plot = pd.DataFrame(result) # again make the df from the result variable
|
|
153
|
-
|
|
154
|
-
|
|
163
|
+
output += df_plot.to_html(escape=True,index=False)
|
|
164
|
+
output += '</details>'
|
|
155
165
|
# I now want only a plot for LoC, so drop other columns from Dataframe
|
|
156
166
|
df_plot = pd.DataFrame(result) # again make the df from the result variable
|
|
157
167
|
df_plot = df_plot.drop(columns=['FilePath'])
|
|
158
168
|
plot = multi_bar_chart(df_plot)
|
|
159
169
|
plot_html = plot.to_html()
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
170
|
+
output += '<br><br>'
|
|
171
|
+
output += '<h2>Visual Overview</h2>'
|
|
172
|
+
output += extract_altair_html(plot_html)
|
|
173
|
+
output += '<p><b>💬 Advice:</b></p>'
|
|
174
|
+
if advice is not None and advice != "":
|
|
175
|
+
output += advice
|
|
176
|
+
else:
|
|
177
|
+
output += f'<p>👉 To perform a SAST scan on the source code, run:<pre><code class="language-python">codeaudit filescan {directory}</code></pre></p>'
|
|
178
|
+
create_htmlfile(output,filename)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def display_found_modules(modules_discovered):
|
|
182
|
+
"""Formats discovered Python modules into an HTML string.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
modules_discovered (dict): Dictionary containing discovered modules with
|
|
186
|
+
keys 'core_modules' and 'imported_modules', each mapping to an
|
|
187
|
+
iterable of module names.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
str: HTML-formatted string listing standard library modules and
|
|
191
|
+
imported external packages.
|
|
192
|
+
"""
|
|
193
|
+
core_modules = modules_discovered["core_modules"]
|
|
194
|
+
external_modules = modules_discovered["imported_modules"]
|
|
195
|
+
output = "<p><b>Used Python Standard libraries:</b></p>"
|
|
196
|
+
output += (
|
|
197
|
+
"<ul>\n"
|
|
198
|
+
+ "\n".join(f" <li>{module}</li>" for module in core_modules)
|
|
199
|
+
+ "\n</ul>"
|
|
200
|
+
)
|
|
201
|
+
output += "<p><b>Imported libraries (packages):</b></p>"
|
|
202
|
+
output += (
|
|
203
|
+
"<ul>\n"
|
|
204
|
+
+ "\n".join(f" <li>{module}</li>" for module in external_modules)
|
|
205
|
+
+ "\n</ul>"
|
|
206
|
+
)
|
|
207
|
+
return output
|
|
208
|
+
|
|
166
209
|
|
|
167
210
|
def scan_report(input_path, filename=DEFAULT_OUTPUT_FILE):
|
|
168
211
|
"""Scans Python source code or PyPI packages for security weaknesses.
|
|
@@ -174,6 +217,8 @@ def scan_report(input_path, filename=DEFAULT_OUTPUT_FILE):
|
|
|
174
217
|
- A single local Python file
|
|
175
218
|
- A package name hosted on PyPI.org
|
|
176
219
|
|
|
220
|
+
codeaudit filescan <pythonfile|package-name|directory> [reportname.html]
|
|
221
|
+
|
|
177
222
|
Depending on the input type, the function analyzes the source code for
|
|
178
223
|
potential security issues, generates an HTML report summarizing the
|
|
179
224
|
findings, and writes the report to a static HTML file.
|
|
@@ -217,16 +262,19 @@ def scan_report(input_path, filename=DEFAULT_OUTPUT_FILE):
|
|
|
217
262
|
directory_scan_report(input_path , filename ) #create a package aka directory scan report
|
|
218
263
|
elif file_path.suffix == ".py" and file_path.is_file() and is_ast_parsable(input_path):
|
|
219
264
|
#create a sast file check report
|
|
220
|
-
scan_output = perform_validations(input_path)
|
|
221
|
-
|
|
265
|
+
scan_output = perform_validations(input_path) #scans for weaknesses in the file
|
|
266
|
+
spy_output = secret_scan(input_path) #scans for secrets in the file
|
|
267
|
+
file_report_html = single_file_report(input_path , scan_output)
|
|
268
|
+
secrets_report_html = secrets_report(spy_output)
|
|
222
269
|
name_of_file = get_filename_from_path(input_path)
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
270
|
+
html_output = '<h1>Python Code Audit Report</h1>' #prepared to be embedded to display multiple reports, so <h2> used
|
|
271
|
+
html_output += f'<h2>Security scan: {name_of_file}</h2>'
|
|
272
|
+
html_output += '<p>' + f'Location of the file: {input_path} </p>'
|
|
273
|
+
html_output += file_report_html
|
|
274
|
+
html_output += secrets_report_html
|
|
275
|
+
html_output += '<br>'
|
|
276
|
+
html_output += DISCLAIMER_TEXT
|
|
277
|
+
create_htmlfile(html_output,filename)
|
|
230
278
|
elif get_pypi_download_info(input_path):
|
|
231
279
|
package_name = input_path #The variable input_path is now equal to the package name
|
|
232
280
|
print(f"Package: {package_name} exist on PyPI.org!")
|
|
@@ -247,8 +295,70 @@ def scan_report(input_path, filename=DEFAULT_OUTPUT_FILE):
|
|
|
247
295
|
else:
|
|
248
296
|
#File is NOT a valid Python file, can not be parsed or directory is invalid.
|
|
249
297
|
print(f"Error: '{input_path}' isn't a valid Python file, directory path to a package or a package on PyPI.org.")
|
|
250
|
-
|
|
251
|
-
|
|
298
|
+
|
|
299
|
+
def secrets_report(spy_output):
|
|
300
|
+
"""
|
|
301
|
+
Generate an HTML report section for detected secrets and external egress risks.
|
|
302
|
+
|
|
303
|
+
This function analyzes the provided static analysis output to determine
|
|
304
|
+
whether logic for connecting to external or remote services is present.
|
|
305
|
+
If such logic is detected, it generates an HTML report section describing
|
|
306
|
+
the potential external egress risk and includes a detailed, tabular analysis
|
|
307
|
+
of where connection-related variables are used. If no such logic is found,
|
|
308
|
+
a success message indicating low data exfiltration risk is returned.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
filename (str): Name of the file being analyzed. This parameter is used
|
|
312
|
+
for contextual identification and reporting purposes.
|
|
313
|
+
spy_output (object): Output from the secrets or static analysis process
|
|
314
|
+
containing findings used to detect external service connections.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
str: An HTML string representing the secrets and external egress risk
|
|
318
|
+
report section.
|
|
319
|
+
"""
|
|
320
|
+
if has_privacy_findings(spy_output):
|
|
321
|
+
output = '<br><p>⚠️ <b>External Egress Risk</b>: Possible API keys or logic for connecting to remote services found.</p>'
|
|
322
|
+
output += '<details>'
|
|
323
|
+
output += '<summary>View detailed analysis for suspected locations where secrets are found or used in the code.</summary>'
|
|
324
|
+
pylint_df = pylint_reporting(spy_output)
|
|
325
|
+
output += pylint_df.to_html(escape=False,index=False)
|
|
326
|
+
output += '</details>'
|
|
327
|
+
output += '<br>'
|
|
328
|
+
else:
|
|
329
|
+
output = f'<br><p>✅ No Logic for connecting to remote services found. Risk of data exfiltration to external systems is <b>low</b>.</p>'
|
|
330
|
+
return output
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def pylint_reporting(result):
|
|
334
|
+
"""
|
|
335
|
+
Creates a pandas DataFrame of privacy findings with columns:
|
|
336
|
+
'lineno' and 'code'.
|
|
337
|
+
HTML-escaped and newlines converted to <br> for safe display.
|
|
338
|
+
"""
|
|
339
|
+
rows = []
|
|
340
|
+
|
|
341
|
+
# Check that file_privacy_check exists and is not empty
|
|
342
|
+
if result.get("file_privacy_check"):
|
|
343
|
+
for item in result["file_privacy_check"].values():
|
|
344
|
+
for entry in item.get("privacy_check_result", []):
|
|
345
|
+
# Escape HTML special characters
|
|
346
|
+
escaped_code = html.escape(entry["code"])
|
|
347
|
+
# Convert newlines to <br> and wrap in <pre><code>
|
|
348
|
+
code_html = f'<pre><code class="language-python">{escaped_code.replace("\n", "<br>")}</code></pre>'
|
|
349
|
+
# Add a row to the list
|
|
350
|
+
rows.append({
|
|
351
|
+
"lineno": entry["lineno"],
|
|
352
|
+
"matched" : entry["matched"],
|
|
353
|
+
"code": code_html
|
|
354
|
+
})
|
|
355
|
+
|
|
356
|
+
# Convert to pandas DataFrame
|
|
357
|
+
df = pd.DataFrame(rows, columns=["lineno", "matched", "code"])
|
|
358
|
+
df = df.rename(columns={"lineno": "line", "matched": "found"}) #rename to UI frienly names
|
|
359
|
+
|
|
360
|
+
return df
|
|
361
|
+
|
|
252
362
|
|
|
253
363
|
def single_file_report(filename , scan_output):
|
|
254
364
|
"""Function to DRY for a codescan when used for single for CLI or within a directory scan"""
|
|
@@ -275,28 +385,30 @@ def single_file_report(filename , scan_output):
|
|
|
275
385
|
df['code'] = df['code'].str.replace(r'\n', '<br>', regex=True) # to convert \n to \\n for display
|
|
276
386
|
df['validation'] = df['validation'].apply(replace_second_dot) #Make the validation column smaller - this is the simplest way! without using styling options from Pandas!
|
|
277
387
|
df = df[["line", "validation", "severity", "info", "code"]] # reorder the columns before converting to html
|
|
278
|
-
df = df.sort_values(by="line") # sort by line number
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
388
|
+
df = df.sort_values(by="line") # sort by line number
|
|
389
|
+
if number_of_issues > 0:
|
|
390
|
+
output = f'<p>⚠️ <b>{number_of_issues}</b> potential <b>security issues</b> found!</p>'
|
|
391
|
+
output += '<details>'
|
|
392
|
+
output += '<summary>View identified security weaknesses.</summary>'
|
|
393
|
+
output += df.to_html(escape=False,index=False)
|
|
394
|
+
output += '</details>'
|
|
395
|
+
output += '<br>'
|
|
396
|
+
else:
|
|
397
|
+
output = '' # No weaknesses found, no message, since privacy breaches may be present.
|
|
284
398
|
file_overview = overview_per_file(filename)
|
|
285
|
-
df_overview = pd.DataFrame([file_overview])
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
html += '<details>'
|
|
294
|
-
html += '<summary>Click to see details for used modules in this file.</summary>'
|
|
399
|
+
df_overview = pd.DataFrame([file_overview])
|
|
400
|
+
output += '<details>'
|
|
401
|
+
output += f'<summary>View detailed analysis of security relevant file details.</summary>'
|
|
402
|
+
output += df_overview.to_html(escape=True,index=False)
|
|
403
|
+
output += '</details>'
|
|
404
|
+
output += '<br>'
|
|
405
|
+
output += '<details>'
|
|
406
|
+
output += '<summary>View used modules in this file.</summary>'
|
|
295
407
|
modules_found = get_imported_modules_by_file(filename)
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
return
|
|
408
|
+
output += display_found_modules(modules_found)
|
|
409
|
+
output += f'<p>To check for <b>reported vulnerabilities</b> in external modules used by this file, use the command:<br><div class="code-box">codeaudit modulescan {filename}</div><br></p>'
|
|
410
|
+
output += '</details>'
|
|
411
|
+
return output
|
|
300
412
|
|
|
301
413
|
|
|
302
414
|
def directory_scan_report(directory_to_scan , filename=DEFAULT_OUTPUT_FILE , package_name=None , release=None):
|
|
@@ -321,101 +433,160 @@ def directory_scan_report(directory_to_scan , filename=DEFAULT_OUTPUT_FILE , pac
|
|
|
321
433
|
exit(1)
|
|
322
434
|
|
|
323
435
|
collection_ok_files = [] # create a collection of files with no issues found
|
|
324
|
-
|
|
436
|
+
output = '<h1>Python Code Audit Report</h1>'
|
|
325
437
|
files_to_check = collect_python_source_files(directory_to_scan)
|
|
326
|
-
|
|
438
|
+
output += '<h2>Directory scan report</h2>'
|
|
327
439
|
name_of_package = get_filename_from_path(directory_to_scan)
|
|
328
440
|
if package_name is not None:
|
|
329
441
|
#Use real package name and retrieved release info
|
|
330
|
-
|
|
331
|
-
|
|
442
|
+
output += f'<p>Below the result of the Codeaudit scan of (Package name - Release):</p>'
|
|
443
|
+
output += f'<p><b> {package_name} - {release} </b></p>'
|
|
332
444
|
else:
|
|
333
|
-
|
|
334
|
-
|
|
445
|
+
output += f'<p>Below the result of the Codeaudit scan of the directory:<b> {name_of_package}</b></p>'
|
|
446
|
+
output += f'<p>Total Python files found: <b>{len(files_to_check)}</b></p>'
|
|
335
447
|
number_of_files = len(files_to_check)
|
|
336
448
|
print(f'Number of files that are checked for security issues:{number_of_files}')
|
|
337
449
|
printProgressBar(0, number_of_files, prefix='Progress:', suffix='Complete', length=50)
|
|
338
450
|
for i,file_to_scan in enumerate(files_to_check):
|
|
339
451
|
printProgressBar(i + 1, number_of_files, prefix='Progress:', suffix='Complete', length=50)
|
|
340
452
|
scan_output = perform_validations(file_to_scan)
|
|
453
|
+
spy_output = secret_scan(file_to_scan) #scans for secrets in the file
|
|
341
454
|
data = scan_output["result"]
|
|
342
|
-
if data:
|
|
455
|
+
if data or has_privacy_findings(spy_output):
|
|
343
456
|
file_report_html = single_file_report(file_to_scan , scan_output)
|
|
344
457
|
name_of_file = get_filename_from_path(file_to_scan)
|
|
345
|
-
|
|
458
|
+
output += f'<h3>Security scan: {name_of_file}</h3>'
|
|
346
459
|
if package_name is None:
|
|
347
|
-
|
|
348
|
-
|
|
460
|
+
output += '<p>' + f'Location of the file: {file_to_scan} </p>'
|
|
461
|
+
output += file_report_html
|
|
462
|
+
secrets_report_html = secrets_report(spy_output)
|
|
463
|
+
output += secrets_report_html
|
|
349
464
|
else:
|
|
350
465
|
file_name_with_no_issue = get_filename_from_path(file_to_scan)
|
|
351
466
|
collection_ok_files.append({'filename' : file_name_with_no_issue ,
|
|
352
|
-
'directory': file_to_scan})
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
467
|
+
'directory': file_to_scan})
|
|
468
|
+
output += '<h2>Files in directory with no security issues</h2>'
|
|
469
|
+
output += f'<p>✅ Total Python files <b>without</b> detected security issues: {len(collection_ok_files)}</p>'
|
|
470
|
+
output += '<p>The Python files with no security issues <b>detected</b> by codeaudit are:<p>'
|
|
471
|
+
output += dict_list_to_html_table(collection_ok_files)
|
|
472
|
+
output += '<br>'
|
|
358
473
|
if package_name is not None:
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
create_htmlfile(
|
|
474
|
+
output += f'<p><b>Note:</b><i>Since this check is done on a package on PyPI.org, the temporary local directories are deleted. To examine the package in detail, you should download the sources locally and run the command:<code>codeaudit filescan</code> again.</i></p>'
|
|
475
|
+
output += '<p><b>Disclaimer:</b><i>This scan only evaluates Python files. Please note that security vulnerabilities may also exist in other files associated with the Python module.</i></p>'
|
|
476
|
+
output += DISCLAIMER_TEXT
|
|
477
|
+
create_htmlfile(output,filename)
|
|
478
|
+
|
|
363
479
|
|
|
364
480
|
def report_module_information(inputfile, reportname=DEFAULT_OUTPUT_FILE):
|
|
365
|
-
"""
|
|
481
|
+
"""
|
|
482
|
+
Generate a report on known vulnerabilities in Python modules and packages.
|
|
366
483
|
|
|
367
|
-
This function analyzes a single Python
|
|
368
|
-
modules and checks
|
|
369
|
-
database. The results are
|
|
484
|
+
This function analyzes a single Python file to identify imported
|
|
485
|
+
external modules and checks those modules against the OSV vulnerability
|
|
486
|
+
database. The collected results are written to a static HTML report.
|
|
370
487
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
488
|
+
If the input refers to a valid PyPI package name instead of a local Python
|
|
489
|
+
file, the function generates a vulnerability report directly for that
|
|
490
|
+
package.
|
|
374
491
|
|
|
375
|
-
|
|
492
|
+
While processing modules, progress information is printed to standard
|
|
493
|
+
output.
|
|
376
494
|
|
|
377
495
|
Example:
|
|
378
496
|
Generate a module vulnerability report for a Python file::
|
|
379
497
|
|
|
380
|
-
codeaudit modulescan
|
|
498
|
+
codeaudit modulescan <pythonfile>|<package> [yourreportname.html]
|
|
499
|
+
|
|
500
|
+
codeaudit modulescan mypythonfile.py
|
|
381
501
|
|
|
382
502
|
Args:
|
|
383
|
-
inputfile (str): Path to
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
503
|
+
inputfile (str): Path to a Python source file (*.py) to analyze, or the
|
|
504
|
+
name of a package available on PyPI.
|
|
505
|
+
reportname (str, optional): Name (and optional path) of the HTML file to
|
|
506
|
+
write the vulnerability report to. The filename should use the
|
|
507
|
+
``.html`` extension. Defaults to ``DEFAULT_OUTPUT_FILE``.
|
|
387
508
|
|
|
388
509
|
Returns:
|
|
389
|
-
None
|
|
510
|
+
None: The function writes a static HTML report to disk.
|
|
390
511
|
|
|
391
512
|
Raises:
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
"""
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
513
|
+
SystemExit: If the input is not a valid Python file or a valid PyPI
|
|
514
|
+
package. File parsing and I/O errors are reported via standard
|
|
515
|
+
output before exiting.
|
|
516
|
+
"""
|
|
517
|
+
html_output = '<h1>Python Code Audit Report</h1>'
|
|
518
|
+
file_path = Path(inputfile)
|
|
519
|
+
if file_path.is_dir():
|
|
520
|
+
print("codeaudit modulescan only works on single python files (*.py) or packages present on PyPI.org")
|
|
521
|
+
print("See codeaudit modulescan -h or check the manual https://codeaudit.nocomplexity.com")
|
|
522
|
+
exit(1)
|
|
523
|
+
elif file_path.suffix == ".py" and file_path.is_file() and is_ast_parsable(inputfile):
|
|
524
|
+
source = read_in_source_file(inputfile)
|
|
525
|
+
used_modules = get_imported_modules(source)
|
|
526
|
+
# Initial call to print 0% progress
|
|
527
|
+
external_modules = used_modules['imported_modules']
|
|
528
|
+
l = len(external_modules)
|
|
529
|
+
printProgressBar(0, l, prefix='Progress:', suffix='Complete', length=50)
|
|
530
|
+
html_output += f'<h2>Module scan report</h2>'
|
|
531
|
+
html_output += f'<p>Security information for file: <b>{inputfile}</b></p>'
|
|
532
|
+
html_output += f'<p>Total Dependencies Scanned: {l} </p>'
|
|
533
|
+
if external_modules:
|
|
534
|
+
html_output += '<details>'
|
|
535
|
+
html_output += '<summary>View scanned module dependencies(imported packages).</summary>'
|
|
536
|
+
html_output += "<ul>\n" + "\n".join(f" <li>{module}</li>" for module in external_modules) + "\n</ul>"
|
|
537
|
+
html_output += '</details>'
|
|
414
538
|
else:
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
539
|
+
html_output += '<p>✅ No external modules found!'
|
|
540
|
+
# Now vuln info per external module
|
|
541
|
+
if external_modules:
|
|
542
|
+
html_output += '<h3>Vulnerability information for detected modules</h3>'
|
|
543
|
+
for i,module in enumerate(external_modules): #sorted for nicer report
|
|
544
|
+
printProgressBar(i + 1, l, prefix='Progress:', suffix='Complete', length=50)
|
|
545
|
+
html_output += module_vulnerability_check(module) + '<br>'
|
|
546
|
+
html_output += f'<br><p>💡 To check for <b>security weaknesses</b> in this package, use the command:<div class="code-box">codeaudit filescan {inputfile}</div><br></p>'
|
|
547
|
+
html_output += '<br>' + DISCLAIMER_TEXT
|
|
548
|
+
create_htmlfile(html_output,reportname)
|
|
549
|
+
elif get_pypi_download_info(inputfile):
|
|
550
|
+
package_name = inputfile #The input variable is now equal to the package name
|
|
551
|
+
html_output += f'<h2>Package scan report for known vulnerabilities</h2>'
|
|
552
|
+
html_output += module_vulnerability_check(package_name)
|
|
553
|
+
html_output += f'<br><p>💡 To check for <b>security weaknesses</b> in this package, use the command:<div class="code-box">codeaudit filescan {package_name}</div><br></p>'
|
|
554
|
+
html_output += '<br>' + DISCLAIMER_TEXT
|
|
555
|
+
create_htmlfile(html_output,reportname)
|
|
556
|
+
else:
|
|
557
|
+
# File is NOT a valid Python file, or package does not exist on PyPI.
|
|
558
|
+
print(f"Error: '{inputfile}' isn't a valid Python file(*.py), or a valid package on PyPI.org.")
|
|
559
|
+
exit(1)
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def module_vulnerability_check(module):
|
|
563
|
+
"""
|
|
564
|
+
Build the HTML fragment for the module vulnerability section of a code audit
|
|
565
|
+
module scan report.
|
|
566
|
+
|
|
567
|
+
The function checks whether vulnerability information is available for the
|
|
568
|
+
given Python package/module and returns an HTML snippet accordingly:
|
|
569
|
+
- If no vulnerabilities are found, a success message is rendered.
|
|
570
|
+
- If vulnerabilities are found, a collapsible HTML <details> section is
|
|
571
|
+
generated containing the formatted vulnerability data.
|
|
572
|
+
|
|
573
|
+
Args:
|
|
574
|
+
module (str): Name of the Python package/module to check.
|
|
575
|
+
|
|
576
|
+
Returns:
|
|
577
|
+
str: HTML string representing the vulnerability scan result for the module.
|
|
578
|
+
"""
|
|
579
|
+
output = ""
|
|
580
|
+
vuln_info = check_module_vulnerability(module)
|
|
581
|
+
if not vuln_info:
|
|
582
|
+
# here SAST scan for package? - not needed (now)- do a filescan on Python package manually - dependency trees can be deep and for complex package are never Python only.
|
|
583
|
+
output += f"<p>✅ No known vulnerabilities found for package: <b>{module}</b>.</p>"
|
|
584
|
+
else:
|
|
585
|
+
output += "<details>"
|
|
586
|
+
output += f"<summary>⚠️ View vulnerability information for package <b>{module}</b>.</summary>"
|
|
587
|
+
output += json_to_html(vuln_info)
|
|
588
|
+
output += "</details>"
|
|
589
|
+
return output
|
|
419
590
|
|
|
420
591
|
|
|
421
592
|
def collect_issue_lines(filename, line):
|
|
@@ -431,13 +602,13 @@ def collect_issue_lines(filename, line):
|
|
|
431
602
|
def create_htmlfile(html_input,outputfile):
|
|
432
603
|
""" Creates a clean html file based on html input given """
|
|
433
604
|
# Read CSS from the file - So it is included in the reporting HTML file
|
|
434
|
-
|
|
605
|
+
|
|
435
606
|
with open(SIMPLE_CSS_FILE, 'r') as css_file:
|
|
436
607
|
css_content = css_file.read()
|
|
437
608
|
# Start building the HTML
|
|
438
609
|
output = '<!DOCTYPE html><html lang="en-US"><head>'
|
|
439
610
|
output += '<meta charset="UTF-8"/>'
|
|
440
|
-
output += '<title>
|
|
611
|
+
output += '<title>Python_Code_Audit_SecurityReport</title>'
|
|
441
612
|
# Inline CSS inside <style> block
|
|
442
613
|
output += f'<style>\n{css_content}\n</style>'
|
|
443
614
|
output += '<script src="https://cdn.jsdelivr.net/npm/vega@5"></script>' # needed for altair plots
|
|
@@ -449,11 +620,23 @@ def create_htmlfile(html_input,outputfile):
|
|
|
449
620
|
now = datetime.datetime.now()
|
|
450
621
|
timestamp_str = now.strftime("%Y-%m-%d %H:%M")
|
|
451
622
|
code_audit_version = __version__
|
|
452
|
-
output +=
|
|
623
|
+
output += (
|
|
624
|
+
f"<p>This Python security report was created on: <b>{timestamp_str}</b> with "
|
|
625
|
+
+ PYTHON_CODE_AUDIT_TEXT
|
|
626
|
+
+ f" version <b>{code_audit_version}</b></p>"
|
|
627
|
+
)
|
|
453
628
|
output += '<hr>'
|
|
454
|
-
output +=
|
|
455
|
-
output +=
|
|
456
|
-
|
|
629
|
+
output += '<footer>'
|
|
630
|
+
output += (
|
|
631
|
+
'<div class="footer-links">'
|
|
632
|
+
'Check the <a href="https://nocomplexity.com/documents/codeaudit/intro.html" '
|
|
633
|
+
'target="_blank">documentation</a> for help on found issues.<br>'
|
|
634
|
+
'Codeaudit is made with <span class="heart">❤</span> by cyber security '
|
|
635
|
+
'professionals who advocate for <a href="https://nocomplexity.com/simplify-security/" target="_blank">open simple security solutions</a>.<br>'
|
|
636
|
+
'<a href="https://nocomplexity.com/documents/codeaudit/CONTRIBUTE.html" target="_blank">Join the community</a> and contribute to make this tool better!'
|
|
637
|
+
"</div>"
|
|
638
|
+
)
|
|
639
|
+
output += "</footer>"
|
|
457
640
|
output += '</div>' #base container
|
|
458
641
|
output += '</body></html>'
|
|
459
642
|
# Now create the HTML output file
|
|
@@ -474,10 +657,6 @@ def create_htmlfile(html_input,outputfile):
|
|
|
474
657
|
print("=====================================================================\n")
|
|
475
658
|
|
|
476
659
|
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
660
|
def extract_altair_html(plot_html):
|
|
482
661
|
match = re.search(r"<body[^>]*>(.*?)</body>", plot_html, re.DOTALL | re.IGNORECASE)
|
|
483
662
|
if match:
|
|
@@ -562,17 +741,17 @@ def report_implemented_tests(filename=DEFAULT_OUTPUT_FILE):
|
|
|
562
741
|
df_checks = ast_security_checks()
|
|
563
742
|
df_checks['construct'] = df_checks['construct'].apply(replace_second_dot) #Make the validation column smaller - this is the simplest way! without using styling options from Pandas!
|
|
564
743
|
df_checks_sorted = df_checks.sort_values(by='construct')
|
|
565
|
-
|
|
744
|
+
output = '<h1>Python Code Audit Implemented validations</h1>' #prepared to be embedded to display multiple reports, so <h2> used
|
|
566
745
|
number_of_test = len(df_checks)
|
|
567
746
|
|
|
568
|
-
|
|
747
|
+
output += df_checks_sorted.to_html(escape=False,index=False)
|
|
569
748
|
code_audit_version = __version__
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
create_htmlfile(
|
|
749
|
+
output += '<br>'
|
|
750
|
+
output += f'<p>Number of implemented security validations:<b>{number_of_test}</b></p>'
|
|
751
|
+
output += f'<p>Version of codeaudit: <b>{code_audit_version}</b>'
|
|
752
|
+
output += '<p>Because Python and cybersecurity are constantly changing, issue reports <b>SHOULD</b> specify the codeaudit version used.</p>'
|
|
753
|
+
output += DISCLAIMER_TEXT
|
|
754
|
+
create_htmlfile(output,filename)
|
|
576
755
|
|
|
577
756
|
|
|
578
757
|
def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='█', printEnd="\r"):
|
codeaudit/simple.css
CHANGED
|
@@ -24,7 +24,7 @@ p {
|
|
|
24
24
|
|
|
25
25
|
/* Body base styles */
|
|
26
26
|
body {
|
|
27
|
-
font-family: Arial, Helvetica, sans-serif;
|
|
27
|
+
font-family: Inter, Roboto, Arial, Helvetica, sans-serif;
|
|
28
28
|
background-color: #FFFFFF;
|
|
29
29
|
color: #333;
|
|
30
30
|
line-height: 1.6;
|
|
@@ -175,11 +175,37 @@ pre {
|
|
|
175
175
|
}
|
|
176
176
|
|
|
177
177
|
footer {
|
|
178
|
-
background-color: #
|
|
179
|
-
color:
|
|
178
|
+
background-color: #E6E6E6; /* nocx grey background */
|
|
179
|
+
color: #555; /* Softer text color for better readability */
|
|
180
180
|
text-align: center;
|
|
181
|
-
padding: 10px;
|
|
182
|
-
|
|
181
|
+
padding: 30px 10px;
|
|
182
|
+
margin-top: 10px;
|
|
183
|
+
border-top: 1px solid #eee;
|
|
184
|
+
font-size: 14px;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
.footer-links {
|
|
189
|
+
margin-top: 10px;
|
|
190
|
+
line-height: 2;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
.footer-links a {
|
|
194
|
+
color: #ff0000;
|
|
195
|
+
font-weight: 500;
|
|
196
|
+
transition: color 0.2s;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
.footer-links a:hover {
|
|
200
|
+
color: #cc0000;
|
|
201
|
+
text-decoration: underline;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
.heart {
|
|
205
|
+
color: #ff0000;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
|
|
183
209
|
|
|
184
210
|
.json-display {
|
|
185
211
|
background-color: #2d2d2d; /* dark gray background */
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeaudit
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.0
|
|
4
4
|
Summary: Simplified static security checks for Python
|
|
5
5
|
Project-URL: Documentation, https://github.com/nocomplexity/codeaudit#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/nocomplexity/codeaudit/issues
|
|
@@ -64,6 +64,10 @@ Python Code Audit has the following features:
|
|
|
64
64
|
|
|
65
65
|
* **Inline Issue Reporting**: Shows potential security issues with line numbers and code snippets.
|
|
66
66
|
|
|
67
|
+
|
|
68
|
+
* **External Egress Detection**: Identifies embedded API keys and logic that enables communication with remote services, helping uncover hidden data exfiltration paths.
|
|
69
|
+
|
|
70
|
+
|
|
67
71
|
* **HTML Reports**: All output is saved in simple, static HTML reports viewable in any browser.
|
|
68
72
|
|
|
69
73
|
|
|
@@ -100,6 +104,7 @@ This will show all commands:
|
|
|
100
104
|
|
|
101
105
|
Python Code Audit - A modern Python security source code analyzer based on distrust.
|
|
102
106
|
|
|
107
|
+
|
|
103
108
|
Commands to evaluate Python source code:
|
|
104
109
|
Usage: codeaudit COMMAND <directory|package> [report.html]
|
|
105
110
|
|
|
@@ -108,7 +113,7 @@ Depending on the command, you must specify a local directory, a Python file, or
|
|
|
108
113
|
Commands:
|
|
109
114
|
overview Generates an overview report of code complexity and security indicators.
|
|
110
115
|
filescan Scans Python source code or PyPI packages for security weaknesses.
|
|
111
|
-
modulescan
|
|
116
|
+
modulescan Generate a report on known vulnerabilities in Python modules and packages.
|
|
112
117
|
checks Creates an HTML report of all implemented security checks.
|
|
113
118
|
version Prints the module version. Or use codeaudit [-v] [--v] [-version] or [--version].
|
|
114
119
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
codeaudit/__about__.py,sha256=
|
|
1
|
+
codeaudit/__about__.py,sha256=m0MoVjbAY6gx2X7P9BlRpPZOet3Ry3xAdoXoKNHrJXk,144
|
|
2
2
|
codeaudit/__init__.py,sha256=YGs6qU0BVHPGtXCS-vfBDLO4TOfJDLTWMgaFDTmi_Iw,157
|
|
3
3
|
codeaudit/altairplots.py,sha256=gBXN1_wxUmjzTNizvzbOeCKvUxpClGPdZmK7ICK1x68,4531
|
|
4
4
|
codeaudit/api_interfaces.py,sha256=zWJrLDM8b3b2-rN0gCoPdflEFMzKUz3M7PfXtXvDpd4,15358
|
|
@@ -6,17 +6,19 @@ codeaudit/api_reporting.py,sha256=W8eutTJ0d-TENbv5cCmAOfu4GEp_RwiQ4XU5FCmfkoI,17
|
|
|
6
6
|
codeaudit/checkmodules.py,sha256=aiF34KO-9HZDRgVBtSwVFdeUxT5_Ka5VtmlfgoLgNVs,5582
|
|
7
7
|
codeaudit/codeaudit.py,sha256=g2HzRX6a3fckKUhyRrk6n3-5qNdVYtZRI1gqQ-QNl10,3775
|
|
8
8
|
codeaudit/complexitycheck.py,sha256=A3_a5v-U0YQr80pWQwSVvOsY_eQtqwNkQf9Txr9mNtQ,3722
|
|
9
|
-
codeaudit/filehelpfunctions.py,sha256
|
|
9
|
+
codeaudit/filehelpfunctions.py,sha256=-5kIymEUcc7j0bRBS4XblvE3pbi3rWjkU5O2M_tinvM,4374
|
|
10
10
|
codeaudit/htmlhelpfunctions.py,sha256=-SMsyfF7TRIfJkrUqoJuh7AoG1RVrYFsZfFljoxVHXc,3246
|
|
11
11
|
codeaudit/issuevalidations.py,sha256=-WdaXT_R-P9w0JbQpJ5ngVoVhG9Yee2ri0aH5SoC1Ao,6404
|
|
12
|
+
codeaudit/privacy_lint.py,sha256=TNS_BnWFXv14PslK9mBsQLwt73Ujcn9FbI7TQSYT0k8,10252
|
|
12
13
|
codeaudit/pypi_package_scan.py,sha256=yxCXrRvjc4r0YsJYHvHJuJTyHC5QZl3sRQp73akCXx8,4723
|
|
13
|
-
codeaudit/reporting.py,sha256=
|
|
14
|
+
codeaudit/reporting.py,sha256=s3OuiPj6au5oELz-kmI6n-8NooJXjqvBLWKs4tzEg7s,38269
|
|
14
15
|
codeaudit/security_checks.py,sha256=wEO_A054zXmLccWGREi6cNADa4IgoOPxHsq-Je5iMIY,2167
|
|
15
|
-
codeaudit/simple.css,sha256=
|
|
16
|
+
codeaudit/simple.css,sha256=H7KT61oXJkVr9qXVrC5ME_Zph9jI-uR2IxOsXG1xs5k,4013
|
|
16
17
|
codeaudit/totals.py,sha256=b6OkzcMdqGKPwuGBKrwAeCxBOJxHa5FHauGWnEb-6zM,6387
|
|
17
18
|
codeaudit/data/sastchecks.csv,sha256=fIcyZgymCtAluPta9fTEk6a9DJ2AGJczZYRPUIQuSag,9738
|
|
18
|
-
codeaudit
|
|
19
|
-
codeaudit-1.
|
|
20
|
-
codeaudit-1.
|
|
21
|
-
codeaudit-1.
|
|
22
|
-
codeaudit-1.
|
|
19
|
+
codeaudit/data/secretslist.txt,sha256=2Jqt9B5UfcRNeNpys8okmXCn4SYkp9M3_rJrI-KXCbE,1891
|
|
20
|
+
codeaudit-1.5.0.dist-info/METADATA,sha256=ZWeMEYTu4ASLGJU5l8Stk8GjMcogzAFDF6NEdFsFmeA,7814
|
|
21
|
+
codeaudit-1.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
22
|
+
codeaudit-1.5.0.dist-info/entry_points.txt,sha256=7w6I8zii62nJHIIF30CRP5g1z8enMqF1pZEDdlw4HcQ,55
|
|
23
|
+
codeaudit-1.5.0.dist-info/licenses/LICENSE.txt,sha256=-5gWaMGKJ54oX8TYP7oeg2zITdTapzyWl9PP0tispuA,34674
|
|
24
|
+
codeaudit-1.5.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|