codeaudit 1.4.2__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeaudit/__about__.py +1 -1
- codeaudit/api_interfaces.py +88 -28
- codeaudit/data/sastchecks.csv +3 -0
- codeaudit/data/secretslist.txt +136 -0
- codeaudit/filehelpfunctions.py +1 -1
- codeaudit/issuevalidations.py +1 -1
- codeaudit/privacy_lint.py +292 -0
- codeaudit/pypi_package_scan.py +1 -1
- codeaudit/reporting.py +491 -190
- codeaudit/security_checks.py +2 -2
- codeaudit/simple.css +31 -5
- codeaudit/suppression.py +233 -0
- {codeaudit-1.4.2.dist-info → codeaudit-1.6.0.dist-info}/METADATA +7 -2
- codeaudit-1.6.0.dist-info/RECORD +25 -0
- codeaudit-1.4.2.dist-info/RECORD +0 -22
- {codeaudit-1.4.2.dist-info → codeaudit-1.6.0.dist-info}/WHEEL +0 -0
- {codeaudit-1.4.2.dist-info → codeaudit-1.6.0.dist-info}/entry_points.txt +0 -0
- {codeaudit-1.4.2.dist-info → codeaudit-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
codeaudit/__about__.py
CHANGED
codeaudit/api_interfaces.py
CHANGED
|
@@ -19,6 +19,7 @@ from codeaudit.security_checks import perform_validations , ast_security_checks
|
|
|
19
19
|
from codeaudit.totals import overview_per_file , get_statistics , overview_count , total_modules
|
|
20
20
|
from codeaudit.checkmodules import get_all_modules , get_imported_modules_by_file , get_standard_library_modules , check_module_vulnerability
|
|
21
21
|
from codeaudit.pypi_package_scan import get_pypi_download_info , get_package_source
|
|
22
|
+
from codeaudit.suppression import filter_sast_results
|
|
22
23
|
|
|
23
24
|
from pathlib import Path
|
|
24
25
|
import json
|
|
@@ -27,6 +28,7 @@ import pandas as pd
|
|
|
27
28
|
import platform
|
|
28
29
|
from collections import Counter
|
|
29
30
|
|
|
31
|
+
|
|
30
32
|
import altair as alt
|
|
31
33
|
|
|
32
34
|
def version():
|
|
@@ -35,7 +37,7 @@ def version():
|
|
|
35
37
|
return {"name" : "Python_Code_Audit",
|
|
36
38
|
"version" : ca_version}
|
|
37
39
|
|
|
38
|
-
def filescan(input_path):
|
|
40
|
+
def filescan(input_path , nosec=False):
|
|
39
41
|
"""
|
|
40
42
|
Scan a Python source file, a local directory, or a **PyPI package** from PyPI.org for
|
|
41
43
|
security weaknesses and return the results as a JSON-serializable
|
|
@@ -102,14 +104,14 @@ def filescan(input_path):
|
|
|
102
104
|
if file_path.is_dir(): #local directory scan
|
|
103
105
|
package_name = get_filename_from_path(input_path)
|
|
104
106
|
output |= {"package_name": package_name}
|
|
105
|
-
scan_output = _codeaudit_directory_scan(input_path)
|
|
107
|
+
scan_output = _codeaudit_directory_scan(input_path, nosec_flag=nosec )
|
|
106
108
|
output |= scan_output
|
|
107
109
|
return output
|
|
108
110
|
elif file_path.suffix.lower() == ".py" and file_path.is_file() and is_ast_parsable(input_path): #check on parseable single Python file
|
|
109
111
|
# do a file check
|
|
110
112
|
file_information = overview_per_file(input_path)
|
|
111
113
|
module_information = get_modules(input_path) # modules per file
|
|
112
|
-
scan_output = _codeaudit_scan(input_path)
|
|
114
|
+
scan_output = _codeaudit_scan(input_path , nosec_flag=nosec)
|
|
113
115
|
file_output["0"] = file_information | module_information | scan_output #there is only 1 file , so index 0 equals as for package to make functionality that use the output that works on the dict or json can equal for a package or a single file!
|
|
114
116
|
output |= { "file_security_info" : file_output}
|
|
115
117
|
return output
|
|
@@ -122,7 +124,7 @@ def filescan(input_path):
|
|
|
122
124
|
output |= {"package_name": package_name,
|
|
123
125
|
"package_release": release}
|
|
124
126
|
try:
|
|
125
|
-
scan_output = _codeaudit_directory_scan(src_dir)
|
|
127
|
+
scan_output = _codeaudit_directory_scan(src_dir , nosec_flag=nosec)
|
|
126
128
|
output |= scan_output
|
|
127
129
|
finally:
|
|
128
130
|
# Cleaning up temp directory
|
|
@@ -132,20 +134,24 @@ def filescan(input_path):
|
|
|
132
134
|
# Its not a directory nor a valid Python file:
|
|
133
135
|
return {"Error" : "File is not a *.py file, does not exist or is not a valid directory path towards a Python package."}
|
|
134
136
|
|
|
135
|
-
def _codeaudit_scan(filename):
|
|
137
|
+
def _codeaudit_scan(filename , nosec_flag):
|
|
136
138
|
"""Internal helper function to do a SAST scan on a single file
|
|
137
139
|
To scan a file, or Python package using the API interface, use the `filescan` API call!
|
|
138
140
|
"""
|
|
139
141
|
#get the file name
|
|
140
|
-
name_of_file = get_filename_from_path(filename)
|
|
141
|
-
|
|
142
|
+
name_of_file = get_filename_from_path(filename)
|
|
143
|
+
if not nosec_flag: #no filtering on reviewed items with markers in code
|
|
144
|
+
sast_data = perform_validations(filename)
|
|
145
|
+
else:
|
|
146
|
+
unfiltered_scan_output = perform_validations(filename) #scans for weaknesses in the file
|
|
147
|
+
sast_data = filter_sast_results(unfiltered_scan_output)
|
|
142
148
|
sast_data_results = sast_data["result"]
|
|
143
149
|
sast_result = dict(sorted(sast_data_results.items()))
|
|
144
150
|
output = { "file_name" : name_of_file ,
|
|
145
151
|
"sast_result": sast_result}
|
|
146
152
|
return output
|
|
147
153
|
|
|
148
|
-
def _codeaudit_directory_scan(input_path):
|
|
154
|
+
def _codeaudit_directory_scan(input_path , nosec_flag):
|
|
149
155
|
"""Performs a scan on a local directory
|
|
150
156
|
Function is also used with scanning directory PyPI.org packages, since in that case a tmp directory is used
|
|
151
157
|
"""
|
|
@@ -160,7 +166,7 @@ def _codeaudit_directory_scan(input_path):
|
|
|
160
166
|
for i,file in enumerate(files_to_check):
|
|
161
167
|
file_information = overview_per_file(file)
|
|
162
168
|
module_information = get_modules(file) # modules per file
|
|
163
|
-
scan_output = _codeaudit_scan(file)
|
|
169
|
+
scan_output = _codeaudit_scan(file , nosec_flag )
|
|
164
170
|
file_output[i] = file_information | module_information | scan_output
|
|
165
171
|
output |= { "file_security_info" : file_output}
|
|
166
172
|
return output
|
|
@@ -216,36 +222,90 @@ def read_input_file(filename):
|
|
|
216
222
|
raise json.JSONDecodeError(f"Invalid JSON in file: {filename}", e.doc, e.pos)
|
|
217
223
|
|
|
218
224
|
|
|
219
|
-
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def get_weakness_counts(input_file, nosec=False):
|
|
220
229
|
"""
|
|
221
|
-
Analyze a Python file or package(directory) and count occurrences of code
|
|
230
|
+
Analyze a Python file or package (directory) and count occurrences of code weaknesses.
|
|
222
231
|
|
|
223
|
-
This function uses `filescan` API call to retrieve security-related information
|
|
224
|
-
|
|
225
|
-
appears across all scanned files.
|
|
232
|
+
This function uses the `filescan` API call to retrieve security-related information
|
|
233
|
+
and aggregates the total number of occurrences per weakness construct.
|
|
226
234
|
|
|
227
235
|
Args:
|
|
228
|
-
input_file (str): Path to the file or directory(package) to scan.
|
|
236
|
+
input_file (str): Path to the file or directory (package) to scan.
|
|
237
|
+
nosec (bool): Whether to suppress findings marked with nosec comments.
|
|
229
238
|
|
|
230
239
|
Returns:
|
|
231
240
|
dict: A dictionary mapping each construct name (str) to the total
|
|
232
|
-
number of occurrences (int)
|
|
241
|
+
number of occurrences (int).
|
|
242
|
+
|
|
243
|
+
Raises:
|
|
244
|
+
ValueError: If the scan fails or returns an error result.
|
|
245
|
+
TypeError: If the scan result has an unexpected structure.
|
|
246
|
+
"""
|
|
247
|
+
scan_result = filescan(input_file, nosec)
|
|
248
|
+
|
|
249
|
+
# Explicitly handle scan failure or unexpected return
|
|
250
|
+
if not isinstance(scan_result, dict):
|
|
251
|
+
raise ValueError("filescan() did not return a valid result dictionary")
|
|
252
|
+
|
|
253
|
+
if "Error" in scan_result:
|
|
254
|
+
raise ValueError(scan_result["Error"])
|
|
255
|
+
|
|
256
|
+
file_security_info = scan_result.get("file_security_info")
|
|
257
|
+
if not isinstance(file_security_info, dict):
|
|
258
|
+
# Valid scan, but no findings (e.g. empty or non-parsable input)
|
|
259
|
+
return {}
|
|
233
260
|
|
|
234
|
-
Notes:
|
|
235
|
-
- The `filescan` function is expected to return a dictionary with
|
|
236
|
-
a 'file_security_info' key, containing per-file information.
|
|
237
|
-
- Each file's 'sast_result' should be a dictionary mapping
|
|
238
|
-
construct names to lists of occurrences.
|
|
239
|
-
"""
|
|
240
|
-
scan_result = filescan(input_file)
|
|
241
261
|
counter = Counter()
|
|
262
|
+
|
|
263
|
+
for file_info in file_security_info.values():
|
|
264
|
+
if not isinstance(file_info, dict):
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
sast_result = file_info.get("sast_result", {})
|
|
268
|
+
if not isinstance(sast_result, dict):
|
|
269
|
+
continue
|
|
270
|
+
|
|
271
|
+
for construct, occurrences in sast_result.items():
|
|
272
|
+
if isinstance(occurrences, (list, tuple)):
|
|
273
|
+
counter[construct] += len(occurrences)
|
|
274
|
+
|
|
275
|
+
return dict(counter)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
# def get_weakness_counts(input_file , nosec=False):
|
|
280
|
+
# """
|
|
281
|
+
# Analyze a Python file or package(directory) and count occurrences of code weaknesses.
|
|
282
|
+
|
|
283
|
+
# This function uses `filescan` API call to retrieve security-related information
|
|
284
|
+
# about the input file. This returns a dict. Then it counts how many times each code construct
|
|
285
|
+
# appears across all scanned files.
|
|
286
|
+
|
|
287
|
+
# Args:
|
|
288
|
+
# input_file (str): Path to the file or directory(package) to scan.
|
|
289
|
+
|
|
290
|
+
# Returns:
|
|
291
|
+
# dict: A dictionary mapping each construct name (str) to the total
|
|
292
|
+
# number of occurrences (int) across all scanned files.
|
|
293
|
+
|
|
294
|
+
# Notes:
|
|
295
|
+
# - The `filescan` function is expected to return a dictionary with
|
|
296
|
+
# a 'file_security_info' key, containing per-file information.
|
|
297
|
+
# - Each file's 'sast_result' should be a dictionary mapping
|
|
298
|
+
# construct names to lists of occurrences.
|
|
299
|
+
# """
|
|
300
|
+
# scan_result = filescan(input_file, nosec)
|
|
301
|
+
# counter = Counter()
|
|
242
302
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
303
|
+
# for file_info in scan_result.get('file_security_info', {}).values():
|
|
304
|
+
# sast_result = file_info.get('sast_result', {})
|
|
305
|
+
# for construct, occurrence in sast_result.items(): #occurrence is times the construct appears in a single file
|
|
306
|
+
# counter[construct] += len(occurrence)
|
|
247
307
|
|
|
248
|
-
|
|
308
|
+
# return dict(counter)
|
|
249
309
|
|
|
250
310
|
def get_modules(filename):
|
|
251
311
|
"""Gets modules of a Python file """
|
codeaudit/data/sastchecks.csv
CHANGED
|
@@ -47,6 +47,9 @@ Subprocess Usage,subprocess.call,High,Requires careful input validation to preve
|
|
|
47
47
|
Subprocess Usage,subprocess.check_call,High,Requires careful input validation to prevent command injection vulnerabilities.
|
|
48
48
|
Subprocess Usage,subprocess.Popen,Medium,Requires careful input validation to prevent command injection vulnerabilities.
|
|
49
49
|
Subprocess Usage,subprocess.run,Medium,Requires careful input validation to prevent command injection vulnerabilities.
|
|
50
|
+
Subprocess Usage,subprocess.check_output,Medium,Requires careful input validation to prevent command injection vulnerabilities.
|
|
51
|
+
Subprocess Usage,subprocess.getstatusoutput,Medium,Requires careful input validation to prevent command injection vulnerabilities.
|
|
52
|
+
Subprocess Usage,subprocess.getoutput,Medium,Requires careful input validation to prevent command injection vulnerabilities.
|
|
50
53
|
Tarfile Extraction,tarfile.TarFile,High,Vulnerable to path traversal attacks if used with untrusted archives.
|
|
51
54
|
Base64 Encoding ,base64,Low,"Base64 encoding is not for security. It only visually hides data and provides no confidentiality. Often used to obfuscate malware in code."
|
|
52
55
|
XML-RPC Client,xmlrpc.client,High,Vulnerable to denial-of-service via decompression bombs.
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
|
|
2
|
+
_KEY
|
|
3
|
+
_passwd
|
|
4
|
+
_PASSWORD
|
|
5
|
+
access_key
|
|
6
|
+
access_key_id
|
|
7
|
+
ACCESS_SECRET
|
|
8
|
+
ACCESS_TOKEN
|
|
9
|
+
AccountKey
|
|
10
|
+
AI21_API_KEY
|
|
11
|
+
ALIBABA_CLOUD_ACCESS_KEY_ID
|
|
12
|
+
ALIBABA_CLOUD_ACCESS_KEY_SECRET
|
|
13
|
+
ANTHROPIC_API_KEY
|
|
14
|
+
api_key
|
|
15
|
+
API_TOKEN
|
|
16
|
+
ApiKey
|
|
17
|
+
ApiSecret
|
|
18
|
+
APP_KEY
|
|
19
|
+
APP_SECRET
|
|
20
|
+
AUTH
|
|
21
|
+
auth_key
|
|
22
|
+
auth_password
|
|
23
|
+
AUTH_SECRET
|
|
24
|
+
auth_token
|
|
25
|
+
AUTH_TOKEN
|
|
26
|
+
Authorization
|
|
27
|
+
AWS_ACCESS_KEY_ID
|
|
28
|
+
aws_account_id
|
|
29
|
+
aws_secret_access_key
|
|
30
|
+
AWS_SECRET_ACCESS_KEY
|
|
31
|
+
aws_session_token
|
|
32
|
+
AWS_SESSION_TOKEN
|
|
33
|
+
AZURE_OPENAI_API_KEY
|
|
34
|
+
AZURE_OPENAI_API_VERSION
|
|
35
|
+
AZURE_OPENAI_ENDPOINT
|
|
36
|
+
AzureStorageKey
|
|
37
|
+
BAIDU_API_KEY
|
|
38
|
+
BAIDU_SECRET_KEY
|
|
39
|
+
BASIC_AUTH
|
|
40
|
+
BEARER
|
|
41
|
+
BEARER_TOKEN
|
|
42
|
+
BEDROCK_REGION
|
|
43
|
+
CLIENT_ID
|
|
44
|
+
client_key
|
|
45
|
+
CLIENT_SECRET
|
|
46
|
+
ClientSecret
|
|
47
|
+
COHERE_API_KEY
|
|
48
|
+
CONNECTION_STRING
|
|
49
|
+
credential
|
|
50
|
+
credentials
|
|
51
|
+
CREDENTIALS_JSON
|
|
52
|
+
creds
|
|
53
|
+
CSRF_TOKEN
|
|
54
|
+
DASHSCOPE_API_KEY
|
|
55
|
+
DEEPSEEK_API_KEY
|
|
56
|
+
DEPLOY_KEY
|
|
57
|
+
encryptedPassword
|
|
58
|
+
ENCRYPTION_SECRET
|
|
59
|
+
EncryptionKey
|
|
60
|
+
FERNET_KEY
|
|
61
|
+
FIREWORKS_API_KEY
|
|
62
|
+
GCP_SERVICE_ACCOUNT_KEY
|
|
63
|
+
GEMINI_API_KEY
|
|
64
|
+
get_api_token
|
|
65
|
+
get_secret
|
|
66
|
+
get_token
|
|
67
|
+
GITHUB_TOKEN
|
|
68
|
+
GOOGLE_API_KEY
|
|
69
|
+
GOOGLE_API_KEY
|
|
70
|
+
HMAC_KEY
|
|
71
|
+
HUGGINGFACE_API_TOKEN
|
|
72
|
+
IBM_WATSONX_API_KEY
|
|
73
|
+
IBM_WATSONX_PROJECT_ID
|
|
74
|
+
ID_TOKEN
|
|
75
|
+
INTEGRATION_KEY
|
|
76
|
+
JWT_ACCESS_TOKEN
|
|
77
|
+
JWT_ALGORITHM
|
|
78
|
+
JWT_AUDIENCE
|
|
79
|
+
JWT_ISSUER
|
|
80
|
+
JWT_PRIVATE_KEY
|
|
81
|
+
JWT_PUBLIC_KEY
|
|
82
|
+
JWT_REFRESH_TOKEN
|
|
83
|
+
JWT_SECRET
|
|
84
|
+
JWT_SECRET_KEY
|
|
85
|
+
JWT_SIGNING_KEY
|
|
86
|
+
JWT_TOKEN
|
|
87
|
+
KEYFILE
|
|
88
|
+
KUBE_TOKEN
|
|
89
|
+
MASTER_KEY
|
|
90
|
+
MISTRAL_API_KEY
|
|
91
|
+
MLAB_PASS
|
|
92
|
+
MOONSHOT_API_KEY
|
|
93
|
+
NetworkCredential
|
|
94
|
+
NVIDIA_API_KEY
|
|
95
|
+
OAUTH_TOKEN
|
|
96
|
+
OLLAMA_API_BASE
|
|
97
|
+
OPENAI_API_KEY
|
|
98
|
+
OPENROUTER_API_KEY
|
|
99
|
+
OTEL_EXPORTER
|
|
100
|
+
PASSPHRASE
|
|
101
|
+
password
|
|
102
|
+
POSTGRES_PASSWORD
|
|
103
|
+
PPLX_API_KEY
|
|
104
|
+
PRIVATE_KEY
|
|
105
|
+
PRIVATE_TOKEN
|
|
106
|
+
REDIS_PASSWORD
|
|
107
|
+
REFRESH_TOKEN
|
|
108
|
+
REPLICATE_API_TOKEN
|
|
109
|
+
ROOT_PASSWORD
|
|
110
|
+
RSA_PRIVATE_KEY
|
|
111
|
+
SAS_TOKEN
|
|
112
|
+
secret
|
|
113
|
+
secret_key
|
|
114
|
+
secret_key_base
|
|
115
|
+
SECRET_TOKEN
|
|
116
|
+
SERVICE_ACCOUNT_KEY
|
|
117
|
+
SESSION_KEY
|
|
118
|
+
SIGNING_KEY
|
|
119
|
+
SILICONFLOW_API_KEY
|
|
120
|
+
SLACK_TOKEN
|
|
121
|
+
SMTP_PASSWORD
|
|
122
|
+
SSH_KEY
|
|
123
|
+
static_key
|
|
124
|
+
STRIPE_API_KEY
|
|
125
|
+
SYSTEM_PASSWORD
|
|
126
|
+
TENCENT_HUNYUAN_API_KEY
|
|
127
|
+
TLS_PRIVATE_KEY
|
|
128
|
+
TOGETHER_API_KEY
|
|
129
|
+
TOKEN
|
|
130
|
+
VAULT_TOKEN
|
|
131
|
+
WEBHOOK_SECRET
|
|
132
|
+
WEBHOOK_TOKEN
|
|
133
|
+
X_API_KEY
|
|
134
|
+
XAI_API_KEY
|
|
135
|
+
YI_API_KEY
|
|
136
|
+
ZHIPUAI_API_KEY
|
codeaudit/filehelpfunctions.py
CHANGED
|
@@ -24,7 +24,7 @@ def read_in_source_file(file_path):
|
|
|
24
24
|
|
|
25
25
|
if file_path.is_dir():
|
|
26
26
|
print(
|
|
27
|
-
"Error: The given path is a directory.\nUse 'codeaudit
|
|
27
|
+
"Error: The given path is a directory.\nUse 'codeaudit filescan' to security audit Python files in a directory or PyPI package.\nThe 'codeaudit modulescan' command works per file only, not on a directory.\nUse codeaudit -h for help"
|
|
28
28
|
)
|
|
29
29
|
sys.exit(1)
|
|
30
30
|
|
codeaudit/issuevalidations.py
CHANGED
|
@@ -78,7 +78,7 @@ def find_constructs(source_code, constructs_to_detect):
|
|
|
78
78
|
elif node.func.attr in ('input') and 'builtins' in core_modules: #catch obfuscating construct with builtins module
|
|
79
79
|
construct = 'input'
|
|
80
80
|
elif node.func.attr in ('compile') and 'builtins' in core_modules: #catch obfuscating construct with builtins module
|
|
81
|
-
construct = 'compile'
|
|
81
|
+
construct = 'compile'
|
|
82
82
|
elif isinstance(func, ast.Name):
|
|
83
83
|
resolved = alias_map.get(func.id, func.id)
|
|
84
84
|
if resolved in constructs_to_detect:
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
from codeaudit.api_interfaces import version
|
|
2
|
+
from codeaudit.filehelpfunctions import get_filename_from_path , collect_python_source_files , is_ast_parsable , read_in_source_file
|
|
3
|
+
from codeaudit.pypi_package_scan import get_pypi_download_info , get_package_source
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
import ast
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import datetime
|
|
9
|
+
import re
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
from importlib.resources import files
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
SECRETS_LIST = files("codeaudit.data").joinpath("secretslist.txt")
|
|
16
|
+
|
|
17
|
+
def secret_scan(input_path):
|
|
18
|
+
"""Scans Python file or a PyPI package for potential privacy leaks.
|
|
19
|
+
|
|
20
|
+
This function analyzes Python code for possible privacy-related issues
|
|
21
|
+
(which often overlap with security weaknesses). The input can be:
|
|
22
|
+
- A local directory containing a Python package
|
|
23
|
+
- A single Python file
|
|
24
|
+
- A PyPI package name (which will be downloaded and scanned)
|
|
25
|
+
|
|
26
|
+
Depending on the input type, the function performs an AST-based scan
|
|
27
|
+
and returns structured metadata along with scan results.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
input_path (str): Path to a local directory, path to a Python
|
|
31
|
+
file, or the name of a PyPI package to scan.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
dict: A dictionary containing scan metadata and results. The
|
|
35
|
+
structure varies depending on the input:
|
|
36
|
+
- For a directory or PyPI package, results include package-level
|
|
37
|
+
privacy findings.
|
|
38
|
+
- For a single Python file, results include file-level privacy
|
|
39
|
+
findings.
|
|
40
|
+
- If the input is invalid, an error dictionary is returned with
|
|
41
|
+
an `"Error"` key.
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
None: All errors are handled internally and reported in the
|
|
45
|
+
returned dictionary.
|
|
46
|
+
"""
|
|
47
|
+
file_output = {}
|
|
48
|
+
file_path = Path(input_path)
|
|
49
|
+
ca_version_info = version()
|
|
50
|
+
now = datetime.datetime.now()
|
|
51
|
+
timestamp_str = now.strftime("%Y-%m-%d %H:%M")
|
|
52
|
+
output = ca_version_info | {"generated_on" : timestamp_str}
|
|
53
|
+
# Check if the input is a valid directory or a single valid Python file
|
|
54
|
+
if file_path.is_dir(): #local directory scan
|
|
55
|
+
package_name = get_filename_from_path(input_path)
|
|
56
|
+
output |= {"package_name": package_name}
|
|
57
|
+
spycheck_output = _codeaudit_directory_spyscan(input_path)
|
|
58
|
+
output |= spycheck_output
|
|
59
|
+
return output
|
|
60
|
+
elif file_path.suffix.lower() == ".py" and file_path.is_file() and is_ast_parsable(input_path): #check on parseable single Python file
|
|
61
|
+
# do a file spy check
|
|
62
|
+
name_of_file = get_filename_from_path(input_path)
|
|
63
|
+
name_dict = {"FileName": name_of_file}
|
|
64
|
+
spycheck_output = spy_check(input_path)
|
|
65
|
+
file_output["0"] = spycheck_output #there is only 1 file , so index 0 equals as for package to make functionality that use the output that works on the dict or json can equal for a package or a single file!
|
|
66
|
+
output |= { "file_name": name_dict,
|
|
67
|
+
"file_privacy_check" : file_output}
|
|
68
|
+
return output
|
|
69
|
+
elif (pypi_data := get_pypi_download_info(input_path)):
|
|
70
|
+
package_name = input_path #The variable input_path is now equal to the package name
|
|
71
|
+
url = pypi_data['download_url']
|
|
72
|
+
release = pypi_data['release']
|
|
73
|
+
if url is not None:
|
|
74
|
+
src_dir, tmp_handle = get_package_source(url)
|
|
75
|
+
output |= {"package_name": package_name,
|
|
76
|
+
"package_release": release}
|
|
77
|
+
try:
|
|
78
|
+
spycheck_output = _codeaudit_directory_spyscan(src_dir)
|
|
79
|
+
output |= spycheck_output
|
|
80
|
+
finally:
|
|
81
|
+
# Cleaning up temp directory
|
|
82
|
+
tmp_handle.cleanup() # deletes everything from temp directory
|
|
83
|
+
return output
|
|
84
|
+
else:
|
|
85
|
+
# Its not a directory nor a valid Python file:
|
|
86
|
+
return {"Error" : "File is not a *.py file, does not exist or is not a valid directory path towards a Python package."}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def spy_check(file):
|
|
90
|
+
"""runs the AST function to get spy info"""
|
|
91
|
+
code = read_in_source_file(file)
|
|
92
|
+
spy_output = collect_secret_values(code)
|
|
93
|
+
name_of_file = get_filename_from_path(file)
|
|
94
|
+
output = { "file_name": name_of_file,
|
|
95
|
+
"privacy_check_result" : spy_output}
|
|
96
|
+
return output
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _codeaudit_directory_spyscan(input_path):
|
|
100
|
+
"""Performs a spyscan on a local directory
|
|
101
|
+
Function is also used with scanning directory PyPI.org packages, since in that case a tmp directory is used
|
|
102
|
+
"""
|
|
103
|
+
output ={}
|
|
104
|
+
file_output = {}
|
|
105
|
+
files_to_check = collect_python_source_files(input_path)
|
|
106
|
+
if len(files_to_check) > 1:
|
|
107
|
+
for i,file in enumerate(files_to_check):
|
|
108
|
+
file_output[i] = spy_check(file)
|
|
109
|
+
output |= { "file_privacy_check" : file_output}
|
|
110
|
+
return output
|
|
111
|
+
else:
|
|
112
|
+
output_msg = f'Directory path {input_path} contains no Python files.'
|
|
113
|
+
return {"Error" : output_msg}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def load_secrets_list(filename=SECRETS_LIST):
|
|
117
|
+
"""
|
|
118
|
+
Load secrets from SECRETS_LIST and return a list of lines,
|
|
119
|
+
excluding empty lines and lines starting with '#'.
|
|
120
|
+
"""
|
|
121
|
+
secrets_patterns = []
|
|
122
|
+
|
|
123
|
+
with open(filename, "r", encoding="utf-8") as f:
|
|
124
|
+
for line in f:
|
|
125
|
+
line = line.strip()
|
|
126
|
+
if not line or line.startswith("#"):
|
|
127
|
+
continue
|
|
128
|
+
secrets_patterns.append(line.lower()) #lower all patterns
|
|
129
|
+
|
|
130
|
+
return secrets_patterns
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def match_secret(secrets, name, value):
|
|
134
|
+
"""
|
|
135
|
+
Check whether a name or value contains a secret.
|
|
136
|
+
|
|
137
|
+
Assumptions:
|
|
138
|
+
- `secrets` are already lowercased.
|
|
139
|
+
|
|
140
|
+
Matching rules (in priority order):
|
|
141
|
+
1. Whole-word match in name
|
|
142
|
+
2. Whole-word match in value
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
The matching secret (lowercased) if found, otherwise None.
|
|
146
|
+
"""
|
|
147
|
+
name_lower = str(name).lower()
|
|
148
|
+
value_lower = str(value).lower()
|
|
149
|
+
|
|
150
|
+
# Shorter secrets first to preserve original behavior
|
|
151
|
+
for secret_tag in sorted(secrets, key=len):
|
|
152
|
+
pattern = re.compile(rf"\b{re.escape(secret_tag)}\b")
|
|
153
|
+
|
|
154
|
+
if pattern.search(name_lower) or pattern.search(value_lower):
|
|
155
|
+
return secret_tag
|
|
156
|
+
|
|
157
|
+
return None
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def collect_secret_values(source_code, secrets_file=SECRETS_LIST):
|
|
161
|
+
secrets = load_secrets_list(secrets_file)
|
|
162
|
+
results = []
|
|
163
|
+
source_lines = source_code.splitlines()
|
|
164
|
+
|
|
165
|
+
# -------------------------
|
|
166
|
+
# Helpers
|
|
167
|
+
# -------------------------
|
|
168
|
+
def get_constant(node):
|
|
169
|
+
return getattr(node, "value", None)
|
|
170
|
+
|
|
171
|
+
def is_os_environ(node):
|
|
172
|
+
return (
|
|
173
|
+
getattr(getattr(node, "value", None), "attr", None) == "environ"
|
|
174
|
+
and getattr(getattr(getattr(node, "value", None), "value", None), "id", None) == "os"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def get_target_repr(node):
|
|
178
|
+
if hasattr(node, "id"):
|
|
179
|
+
return node.id
|
|
180
|
+
if hasattr(node, "attr") or hasattr(node, "slice"):
|
|
181
|
+
return ast.unparse(node)
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
def classify_value(node):
|
|
185
|
+
if node is None:
|
|
186
|
+
return None
|
|
187
|
+
|
|
188
|
+
if isinstance(node, ast.Constant):
|
|
189
|
+
return node.value
|
|
190
|
+
|
|
191
|
+
if hasattr(node, "slice"):
|
|
192
|
+
if is_os_environ(node):
|
|
193
|
+
return get_constant(node.slice)
|
|
194
|
+
return ast.unparse(node)
|
|
195
|
+
|
|
196
|
+
if hasattr(node, "func") and getattr(node, "args", None):
|
|
197
|
+
first_arg = node.args[0]
|
|
198
|
+
if isinstance(first_arg, ast.Constant):
|
|
199
|
+
return first_arg.value
|
|
200
|
+
|
|
201
|
+
if hasattr(node, "id") or hasattr(node, "attr"):
|
|
202
|
+
return ast.unparse(node)
|
|
203
|
+
|
|
204
|
+
return ast.unparse(node)
|
|
205
|
+
|
|
206
|
+
def get_original_line(node):
|
|
207
|
+
lineno = getattr(node, "lineno", None)
|
|
208
|
+
if lineno is None:
|
|
209
|
+
return None
|
|
210
|
+
lines = []
|
|
211
|
+
# line before
|
|
212
|
+
if lineno > 1:
|
|
213
|
+
lines.append(source_lines[lineno - 2].rstrip())
|
|
214
|
+
|
|
215
|
+
# current line
|
|
216
|
+
if 1 <= lineno <= len(source_lines):
|
|
217
|
+
lines.append(source_lines[lineno - 1].rstrip())
|
|
218
|
+
|
|
219
|
+
# line after
|
|
220
|
+
if lineno < len(source_lines):
|
|
221
|
+
lines.append(source_lines[lineno].rstrip())
|
|
222
|
+
|
|
223
|
+
return "\n".join(lines)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def add_value(name, value_node, node):
|
|
227
|
+
value = classify_value(value_node)
|
|
228
|
+
matched = match_secret(secrets, name, value)
|
|
229
|
+
if matched is not None: #when no match is found, no results will be added to the result dict.
|
|
230
|
+
results.append(
|
|
231
|
+
{
|
|
232
|
+
"lineno": getattr(node, "lineno", None),
|
|
233
|
+
"code": get_original_line(node),
|
|
234
|
+
# "name": name,
|
|
235
|
+
# "value": value,
|
|
236
|
+
"matched": matched,
|
|
237
|
+
}
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
# -------------------------
|
|
242
|
+
# Walk all AST nodes
|
|
243
|
+
# -------------------------
|
|
244
|
+
tree = ast.parse(source_code)
|
|
245
|
+
for node in ast.walk(tree):
|
|
246
|
+
|
|
247
|
+
# Assignments
|
|
248
|
+
for target in getattr(node, "targets", []):
|
|
249
|
+
name = get_target_repr(target)
|
|
250
|
+
if name:
|
|
251
|
+
add_value(name, getattr(node, "value", None), node)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# Annotated assignments
|
|
255
|
+
if isinstance(node, ast.AnnAssign):
|
|
256
|
+
name = get_target_repr(node.target)
|
|
257
|
+
if name:
|
|
258
|
+
add_value(name, getattr(node, "value", None), node)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
# Function calls (keyword arguments only)
|
|
262
|
+
if isinstance(node, ast.Call):
|
|
263
|
+
for kw in node.keywords:
|
|
264
|
+
if kw.arg:
|
|
265
|
+
add_value(kw.arg, kw.value, kw)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
return sorted(results, key=lambda item: item["lineno"])
|
|
269
|
+
|
|
270
|
+
def has_privacy_findings(data):
|
|
271
|
+
"""
|
|
272
|
+
Returns True if at least one file has a non-empty
|
|
273
|
+
'privacy_check_result' list, otherwise False.
|
|
274
|
+
"""
|
|
275
|
+
filesscanned = data.get("file_privacy_check", {})
|
|
276
|
+
|
|
277
|
+
for file_info in filesscanned.values():
|
|
278
|
+
results = file_info.get("privacy_check_result")
|
|
279
|
+
if results and len(results) > 0:
|
|
280
|
+
return True
|
|
281
|
+
|
|
282
|
+
return False
|
|
283
|
+
|
|
284
|
+
def count_privacy_check_results(data):
|
|
285
|
+
"""
|
|
286
|
+
count number of secrets found for a dict created with secret_scan(filename)
|
|
287
|
+
|
|
288
|
+
:param data: Description
|
|
289
|
+
"""
|
|
290
|
+
return len(
|
|
291
|
+
data["file_privacy_check"]["0"]["privacy_check_result"]
|
|
292
|
+
)
|
codeaudit/pypi_package_scan.py
CHANGED
|
@@ -104,7 +104,7 @@ def get_package_source(url, nocxheaders=NOCX_HEADERS, nocxtimeout=10):
|
|
|
104
104
|
f.write(content)
|
|
105
105
|
|
|
106
106
|
with tarfile.open(tar_path, "r:gz") as tar:
|
|
107
|
-
tar.extractall(path=temp_dir,filter='data') #Possible risks are mitigated as far as possible, see architecture notes.
|
|
107
|
+
tar.extractall(path=temp_dir,filter='data') # nosec Possible risks are mitigated as far as possible, see architecture notes.
|
|
108
108
|
|
|
109
109
|
return temp_dir, tmpdir_obj # return both so caller controls lifetime
|
|
110
110
|
|