codeaudit 1.4.2__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codeaudit/__about__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Maikel Mardjan <mike@bm-support.org>
2
2
  #
3
3
  # SPDX-License-Identifier: GPL-3.0-or-later
4
- __version__ = "1.4.2"
4
+ __version__ = "1.6.0"
@@ -19,6 +19,7 @@ from codeaudit.security_checks import perform_validations , ast_security_checks
19
19
  from codeaudit.totals import overview_per_file , get_statistics , overview_count , total_modules
20
20
  from codeaudit.checkmodules import get_all_modules , get_imported_modules_by_file , get_standard_library_modules , check_module_vulnerability
21
21
  from codeaudit.pypi_package_scan import get_pypi_download_info , get_package_source
22
+ from codeaudit.suppression import filter_sast_results
22
23
 
23
24
  from pathlib import Path
24
25
  import json
@@ -27,6 +28,7 @@ import pandas as pd
27
28
  import platform
28
29
  from collections import Counter
29
30
 
31
+
30
32
  import altair as alt
31
33
 
32
34
  def version():
@@ -35,7 +37,7 @@ def version():
35
37
  return {"name" : "Python_Code_Audit",
36
38
  "version" : ca_version}
37
39
 
38
- def filescan(input_path):
40
+ def filescan(input_path , nosec=False):
39
41
  """
40
42
  Scan a Python source file, a local directory, or a **PyPI package** from PyPI.org for
41
43
  security weaknesses and return the results as a JSON-serializable
@@ -102,14 +104,14 @@ def filescan(input_path):
102
104
  if file_path.is_dir(): #local directory scan
103
105
  package_name = get_filename_from_path(input_path)
104
106
  output |= {"package_name": package_name}
105
- scan_output = _codeaudit_directory_scan(input_path)
107
+ scan_output = _codeaudit_directory_scan(input_path, nosec_flag=nosec )
106
108
  output |= scan_output
107
109
  return output
108
110
  elif file_path.suffix.lower() == ".py" and file_path.is_file() and is_ast_parsable(input_path): #check on parseable single Python file
109
111
  # do a file check
110
112
  file_information = overview_per_file(input_path)
111
113
  module_information = get_modules(input_path) # modules per file
112
- scan_output = _codeaudit_scan(input_path)
114
+ scan_output = _codeaudit_scan(input_path , nosec_flag=nosec)
113
115
  file_output["0"] = file_information | module_information | scan_output #there is only 1 file , so index 0 equals as for package to make functionality that use the output that works on the dict or json can equal for a package or a single file!
114
116
  output |= { "file_security_info" : file_output}
115
117
  return output
@@ -122,7 +124,7 @@ def filescan(input_path):
122
124
  output |= {"package_name": package_name,
123
125
  "package_release": release}
124
126
  try:
125
- scan_output = _codeaudit_directory_scan(src_dir)
127
+ scan_output = _codeaudit_directory_scan(src_dir , nosec_flag=nosec)
126
128
  output |= scan_output
127
129
  finally:
128
130
  # Cleaning up temp directory
@@ -132,20 +134,24 @@ def filescan(input_path):
132
134
  # Its not a directory nor a valid Python file:
133
135
  return {"Error" : "File is not a *.py file, does not exist or is not a valid directory path towards a Python package."}
134
136
 
135
- def _codeaudit_scan(filename):
137
+ def _codeaudit_scan(filename , nosec_flag):
136
138
  """Internal helper function to do a SAST scan on a single file
137
139
  To scan a file, or Python package using the API interface, use the `filescan` API call!
138
140
  """
139
141
  #get the file name
140
- name_of_file = get_filename_from_path(filename)
141
- sast_data = perform_validations(filename)
142
+ name_of_file = get_filename_from_path(filename)
143
+ if not nosec_flag: #no filtering on reviewed items with markers in code
144
+ sast_data = perform_validations(filename)
145
+ else:
146
+ unfiltered_scan_output = perform_validations(filename) #scans for weaknesses in the file
147
+ sast_data = filter_sast_results(unfiltered_scan_output)
142
148
  sast_data_results = sast_data["result"]
143
149
  sast_result = dict(sorted(sast_data_results.items()))
144
150
  output = { "file_name" : name_of_file ,
145
151
  "sast_result": sast_result}
146
152
  return output
147
153
 
148
- def _codeaudit_directory_scan(input_path):
154
+ def _codeaudit_directory_scan(input_path , nosec_flag):
149
155
  """Performs a scan on a local directory
150
156
  Function is also used with scanning directory PyPI.org packages, since in that case a tmp directory is used
151
157
  """
@@ -160,7 +166,7 @@ def _codeaudit_directory_scan(input_path):
160
166
  for i,file in enumerate(files_to_check):
161
167
  file_information = overview_per_file(file)
162
168
  module_information = get_modules(file) # modules per file
163
- scan_output = _codeaudit_scan(file)
169
+ scan_output = _codeaudit_scan(file , nosec_flag )
164
170
  file_output[i] = file_information | module_information | scan_output
165
171
  output |= { "file_security_info" : file_output}
166
172
  return output
@@ -216,36 +222,90 @@ def read_input_file(filename):
216
222
  raise json.JSONDecodeError(f"Invalid JSON in file: {filename}", e.doc, e.pos)
217
223
 
218
224
 
219
- def get_construct_counts(input_file):
225
+
226
+
227
+
228
+ def get_weakness_counts(input_file, nosec=False):
220
229
  """
221
- Analyze a Python file or package(directory) and count occurrences of code constructs (aka weaknesses).
230
+ Analyze a Python file or package (directory) and count occurrences of code weaknesses.
222
231
 
223
- This function uses `filescan` API call to retrieve security-related information
224
- about the input file. This returns a dict. Then it counts how many times each code construct
225
- appears across all scanned files.
232
+ This function uses the `filescan` API call to retrieve security-related information
233
+ and aggregates the total number of occurrences per weakness construct.
226
234
 
227
235
  Args:
228
- input_file (str): Path to the file or directory(package) to scan.
236
+ input_file (str): Path to the file or directory (package) to scan.
237
+ nosec (bool): Whether to suppress findings marked with nosec comments.
229
238
 
230
239
  Returns:
231
240
  dict: A dictionary mapping each construct name (str) to the total
232
- number of occurrences (int) across all scanned files.
241
+ number of occurrences (int).
242
+
243
+ Raises:
244
+ ValueError: If the scan fails or returns an error result.
245
+ TypeError: If the scan result has an unexpected structure.
246
+ """
247
+ scan_result = filescan(input_file, nosec)
248
+
249
+ # Explicitly handle scan failure or unexpected return
250
+ if not isinstance(scan_result, dict):
251
+ raise ValueError("filescan() did not return a valid result dictionary")
252
+
253
+ if "Error" in scan_result:
254
+ raise ValueError(scan_result["Error"])
255
+
256
+ file_security_info = scan_result.get("file_security_info")
257
+ if not isinstance(file_security_info, dict):
258
+ # Valid scan, but no findings (e.g. empty or non-parsable input)
259
+ return {}
233
260
 
234
- Notes:
235
- - The `filescan` function is expected to return a dictionary with
236
- a 'file_security_info' key, containing per-file information.
237
- - Each file's 'sast_result' should be a dictionary mapping
238
- construct names to lists of occurrences.
239
- """
240
- scan_result = filescan(input_file)
241
261
  counter = Counter()
262
+
263
+ for file_info in file_security_info.values():
264
+ if not isinstance(file_info, dict):
265
+ continue
266
+
267
+ sast_result = file_info.get("sast_result", {})
268
+ if not isinstance(sast_result, dict):
269
+ continue
270
+
271
+ for construct, occurrences in sast_result.items():
272
+ if isinstance(occurrences, (list, tuple)):
273
+ counter[construct] += len(occurrences)
274
+
275
+ return dict(counter)
276
+
277
+
278
+
279
+ # def get_weakness_counts(input_file , nosec=False):
280
+ # """
281
+ # Analyze a Python file or package(directory) and count occurrences of code weaknesses.
282
+
283
+ # This function uses `filescan` API call to retrieve security-related information
284
+ # about the input file. This returns a dict. Then it counts how many times each code construct
285
+ # appears across all scanned files.
286
+
287
+ # Args:
288
+ # input_file (str): Path to the file or directory(package) to scan.
289
+
290
+ # Returns:
291
+ # dict: A dictionary mapping each construct name (str) to the total
292
+ # number of occurrences (int) across all scanned files.
293
+
294
+ # Notes:
295
+ # - The `filescan` function is expected to return a dictionary with
296
+ # a 'file_security_info' key, containing per-file information.
297
+ # - Each file's 'sast_result' should be a dictionary mapping
298
+ # construct names to lists of occurrences.
299
+ # """
300
+ # scan_result = filescan(input_file, nosec)
301
+ # counter = Counter()
242
302
 
243
- for file_info in scan_result.get('file_security_info', {}).values():
244
- sast_result = file_info.get('sast_result', {})
245
- for construct, occurence in sast_result.items(): #occurence is times the construct appears in a single file
246
- counter[construct] += len(occurence)
303
+ # for file_info in scan_result.get('file_security_info', {}).values():
304
+ # sast_result = file_info.get('sast_result', {})
305
+ # for construct, occurrence in sast_result.items(): #occurrence is times the construct appears in a single file
306
+ # counter[construct] += len(occurrence)
247
307
 
248
- return dict(counter)
308
+ # return dict(counter)
249
309
 
250
310
  def get_modules(filename):
251
311
  """Gets modules of a Python file """
@@ -47,6 +47,9 @@ Subprocess Usage,subprocess.call,High,Requires careful input validation to preve
47
47
  Subprocess Usage,subprocess.check_call,High,Requires careful input validation to prevent command injection vulnerabilities.
48
48
  Subprocess Usage,subprocess.Popen,Medium,Requires careful input validation to prevent command injection vulnerabilities.
49
49
  Subprocess Usage,subprocess.run,Medium,Requires careful input validation to prevent command injection vulnerabilities.
50
+ Subprocess Usage,subprocess.check_output,Medium,Requires careful input validation to prevent command injection vulnerabilities.
51
+ Subprocess Usage,subprocess.getstatusoutput,Medium,Requires careful input validation to prevent command injection vulnerabilities.
52
+ Subprocess Usage,subprocess.getoutput,Medium,Requires careful input validation to prevent command injection vulnerabilities.
50
53
  Tarfile Extraction,tarfile.TarFile,High,Vulnerable to path traversal attacks if used with untrusted archives.
51
54
  Base64 Encoding ,base64,Low,"Base64 encoding is not for security. It only visually hides data and provides no confidentiality. Often used to obfuscate malware in code."
52
55
  XML-RPC Client,xmlrpc.client,High,Vulnerable to denial-of-service via decompression bombs.
@@ -0,0 +1,136 @@
1
+
2
+ _KEY
3
+ _passwd
4
+ _PASSWORD
5
+ access_key
6
+ access_key_id
7
+ ACCESS_SECRET
8
+ ACCESS_TOKEN
9
+ AccountKey
10
+ AI21_API_KEY
11
+ ALIBABA_CLOUD_ACCESS_KEY_ID
12
+ ALIBABA_CLOUD_ACCESS_KEY_SECRET
13
+ ANTHROPIC_API_KEY
14
+ api_key
15
+ API_TOKEN
16
+ ApiKey
17
+ ApiSecret
18
+ APP_KEY
19
+ APP_SECRET
20
+ AUTH
21
+ auth_key
22
+ auth_password
23
+ AUTH_SECRET
24
+ auth_token
25
+ AUTH_TOKEN
26
+ Authorization
27
+ AWS_ACCESS_KEY_ID
28
+ aws_account_id
29
+ aws_secret_access_key
30
+ AWS_SECRET_ACCESS_KEY
31
+ aws_session_token
32
+ AWS_SESSION_TOKEN
33
+ AZURE_OPENAI_API_KEY
34
+ AZURE_OPENAI_API_VERSION
35
+ AZURE_OPENAI_ENDPOINT
36
+ AzureStorageKey
37
+ BAIDU_API_KEY
38
+ BAIDU_SECRET_KEY
39
+ BASIC_AUTH
40
+ BEARER
41
+ BEARER_TOKEN
42
+ BEDROCK_REGION
43
+ CLIENT_ID
44
+ client_key
45
+ CLIENT_SECRET
46
+ ClientSecret
47
+ COHERE_API_KEY
48
+ CONNECTION_STRING
49
+ credential
50
+ credentials
51
+ CREDENTIALS_JSON
52
+ creds
53
+ CSRF_TOKEN
54
+ DASHSCOPE_API_KEY
55
+ DEEPSEEK_API_KEY
56
+ DEPLOY_KEY
57
+ encryptedPassword
58
+ ENCRYPTION_SECRET
59
+ EncryptionKey
60
+ FERNET_KEY
61
+ FIREWORKS_API_KEY
62
+ GCP_SERVICE_ACCOUNT_KEY
63
+ GEMINI_API_KEY
64
+ get_api_token
65
+ get_secret
66
+ get_token
67
+ GITHUB_TOKEN
68
+ GOOGLE_API_KEY
69
+ GOOGLE_API_KEY
70
+ HMAC_KEY
71
+ HUGGINGFACE_API_TOKEN
72
+ IBM_WATSONX_API_KEY
73
+ IBM_WATSONX_PROJECT_ID
74
+ ID_TOKEN
75
+ INTEGRATION_KEY
76
+ JWT_ACCESS_TOKEN
77
+ JWT_ALGORITHM
78
+ JWT_AUDIENCE
79
+ JWT_ISSUER
80
+ JWT_PRIVATE_KEY
81
+ JWT_PUBLIC_KEY
82
+ JWT_REFRESH_TOKEN
83
+ JWT_SECRET
84
+ JWT_SECRET_KEY
85
+ JWT_SIGNING_KEY
86
+ JWT_TOKEN
87
+ KEYFILE
88
+ KUBE_TOKEN
89
+ MASTER_KEY
90
+ MISTRAL_API_KEY
91
+ MLAB_PASS
92
+ MOONSHOT_API_KEY
93
+ NetworkCredential
94
+ NVIDIA_API_KEY
95
+ OAUTH_TOKEN
96
+ OLLAMA_API_BASE
97
+ OPENAI_API_KEY
98
+ OPENROUTER_API_KEY
99
+ OTEL_EXPORTER
100
+ PASSPHRASE
101
+ password
102
+ POSTGRES_PASSWORD
103
+ PPLX_API_KEY
104
+ PRIVATE_KEY
105
+ PRIVATE_TOKEN
106
+ REDIS_PASSWORD
107
+ REFRESH_TOKEN
108
+ REPLICATE_API_TOKEN
109
+ ROOT_PASSWORD
110
+ RSA_PRIVATE_KEY
111
+ SAS_TOKEN
112
+ secret
113
+ secret_key
114
+ secret_key_base
115
+ SECRET_TOKEN
116
+ SERVICE_ACCOUNT_KEY
117
+ SESSION_KEY
118
+ SIGNING_KEY
119
+ SILICONFLOW_API_KEY
120
+ SLACK_TOKEN
121
+ SMTP_PASSWORD
122
+ SSH_KEY
123
+ static_key
124
+ STRIPE_API_KEY
125
+ SYSTEM_PASSWORD
126
+ TENCENT_HUNYUAN_API_KEY
127
+ TLS_PRIVATE_KEY
128
+ TOGETHER_API_KEY
129
+ TOKEN
130
+ VAULT_TOKEN
131
+ WEBHOOK_SECRET
132
+ WEBHOOK_TOKEN
133
+ X_API_KEY
134
+ XAI_API_KEY
135
+ YI_API_KEY
136
+ ZHIPUAI_API_KEY
@@ -24,7 +24,7 @@ def read_in_source_file(file_path):
24
24
 
25
25
  if file_path.is_dir():
26
26
  print(
27
- "Error: The given path is a directory.\nUse 'codeaudit directoryscan' to audit all Python files in a directory.\nThe 'codeaudit modulescan' command works per file only, not on a directory.\nUse codeaudit -h for help"
27
+ "Error: The given path is a directory.\nUse 'codeaudit filescan' to security audit Python files in a directory or PyPI package.\nThe 'codeaudit modulescan' command works per file only, not on a directory.\nUse codeaudit -h for help"
28
28
  )
29
29
  sys.exit(1)
30
30
 
@@ -78,7 +78,7 @@ def find_constructs(source_code, constructs_to_detect):
78
78
  elif node.func.attr in ('input') and 'builtins' in core_modules: #catch obfuscating construct with builtins module
79
79
  construct = 'input'
80
80
  elif node.func.attr in ('compile') and 'builtins' in core_modules: #catch obfuscating construct with builtins module
81
- construct = 'compile'
81
+ construct = 'compile'
82
82
  elif isinstance(func, ast.Name):
83
83
  resolved = alias_map.get(func.id, func.id)
84
84
  if resolved in constructs_to_detect:
@@ -0,0 +1,292 @@
1
+ from codeaudit.api_interfaces import version
2
+ from codeaudit.filehelpfunctions import get_filename_from_path , collect_python_source_files , is_ast_parsable , read_in_source_file
3
+ from codeaudit.pypi_package_scan import get_pypi_download_info , get_package_source
4
+
5
+
6
+ import ast
7
+ from pathlib import Path
8
+ import datetime
9
+ import re
10
+
11
+
12
+ from importlib.resources import files
13
+
14
+
15
+ SECRETS_LIST = files("codeaudit.data").joinpath("secretslist.txt")
16
+
17
+ def secret_scan(input_path):
18
+ """Scans Python file or a PyPI package for potential privacy leaks.
19
+
20
+ This function analyzes Python code for possible privacy-related issues
21
+ (which often overlap with security weaknesses). The input can be:
22
+ - A local directory containing a Python package
23
+ - A single Python file
24
+ - A PyPI package name (which will be downloaded and scanned)
25
+
26
+ Depending on the input type, the function performs an AST-based scan
27
+ and returns structured metadata along with scan results.
28
+
29
+ Args:
30
+ input_path (str): Path to a local directory, path to a Python
31
+ file, or the name of a PyPI package to scan.
32
+
33
+ Returns:
34
+ dict: A dictionary containing scan metadata and results. The
35
+ structure varies depending on the input:
36
+ - For a directory or PyPI package, results include package-level
37
+ privacy findings.
38
+ - For a single Python file, results include file-level privacy
39
+ findings.
40
+ - If the input is invalid, an error dictionary is returned with
41
+ an `"Error"` key.
42
+
43
+ Raises:
44
+ None: All errors are handled internally and reported in the
45
+ returned dictionary.
46
+ """
47
+ file_output = {}
48
+ file_path = Path(input_path)
49
+ ca_version_info = version()
50
+ now = datetime.datetime.now()
51
+ timestamp_str = now.strftime("%Y-%m-%d %H:%M")
52
+ output = ca_version_info | {"generated_on" : timestamp_str}
53
+ # Check if the input is a valid directory or a single valid Python file
54
+ if file_path.is_dir(): #local directory scan
55
+ package_name = get_filename_from_path(input_path)
56
+ output |= {"package_name": package_name}
57
+ spycheck_output = _codeaudit_directory_spyscan(input_path)
58
+ output |= spycheck_output
59
+ return output
60
+ elif file_path.suffix.lower() == ".py" and file_path.is_file() and is_ast_parsable(input_path): #check on parseable single Python file
61
+ # do a file spy check
62
+ name_of_file = get_filename_from_path(input_path)
63
+ name_dict = {"FileName": name_of_file}
64
+ spycheck_output = spy_check(input_path)
65
+ file_output["0"] = spycheck_output #there is only 1 file , so index 0 equals as for package to make functionality that use the output that works on the dict or json can equal for a package or a single file!
66
+ output |= { "file_name": name_dict,
67
+ "file_privacy_check" : file_output}
68
+ return output
69
+ elif (pypi_data := get_pypi_download_info(input_path)):
70
+ package_name = input_path #The variable input_path is now equal to the package name
71
+ url = pypi_data['download_url']
72
+ release = pypi_data['release']
73
+ if url is not None:
74
+ src_dir, tmp_handle = get_package_source(url)
75
+ output |= {"package_name": package_name,
76
+ "package_release": release}
77
+ try:
78
+ spycheck_output = _codeaudit_directory_spyscan(src_dir)
79
+ output |= spycheck_output
80
+ finally:
81
+ # Cleaning up temp directory
82
+ tmp_handle.cleanup() # deletes everything from temp directory
83
+ return output
84
+ else:
85
+ # Its not a directory nor a valid Python file:
86
+ return {"Error" : "File is not a *.py file, does not exist or is not a valid directory path towards a Python package."}
87
+
88
+
89
+ def spy_check(file):
90
+ """runs the AST function to get spy info"""
91
+ code = read_in_source_file(file)
92
+ spy_output = collect_secret_values(code)
93
+ name_of_file = get_filename_from_path(file)
94
+ output = { "file_name": name_of_file,
95
+ "privacy_check_result" : spy_output}
96
+ return output
97
+
98
+
99
+ def _codeaudit_directory_spyscan(input_path):
100
+ """Performs a spyscan on a local directory
101
+ Function is also used with scanning directory PyPI.org packages, since in that case a tmp directory is used
102
+ """
103
+ output ={}
104
+ file_output = {}
105
+ files_to_check = collect_python_source_files(input_path)
106
+ if len(files_to_check) > 1:
107
+ for i,file in enumerate(files_to_check):
108
+ file_output[i] = spy_check(file)
109
+ output |= { "file_privacy_check" : file_output}
110
+ return output
111
+ else:
112
+ output_msg = f'Directory path {input_path} contains no Python files.'
113
+ return {"Error" : output_msg}
114
+
115
+
116
+ def load_secrets_list(filename=SECRETS_LIST):
117
+ """
118
+ Load secrets from SECRETS_LIST and return a list of lines,
119
+ excluding empty lines and lines starting with '#'.
120
+ """
121
+ secrets_patterns = []
122
+
123
+ with open(filename, "r", encoding="utf-8") as f:
124
+ for line in f:
125
+ line = line.strip()
126
+ if not line or line.startswith("#"):
127
+ continue
128
+ secrets_patterns.append(line.lower()) #lower all patterns
129
+
130
+ return secrets_patterns
131
+
132
+
133
+ def match_secret(secrets, name, value):
134
+ """
135
+ Check whether a name or value contains a secret.
136
+
137
+ Assumptions:
138
+ - `secrets` are already lowercased.
139
+
140
+ Matching rules (in priority order):
141
+ 1. Whole-word match in name
142
+ 2. Whole-word match in value
143
+
144
+ Returns:
145
+ The matching secret (lowercased) if found, otherwise None.
146
+ """
147
+ name_lower = str(name).lower()
148
+ value_lower = str(value).lower()
149
+
150
+ # Shorter secrets first to preserve original behavior
151
+ for secret_tag in sorted(secrets, key=len):
152
+ pattern = re.compile(rf"\b{re.escape(secret_tag)}\b")
153
+
154
+ if pattern.search(name_lower) or pattern.search(value_lower):
155
+ return secret_tag
156
+
157
+ return None
158
+
159
+
160
+ def collect_secret_values(source_code, secrets_file=SECRETS_LIST):
161
+ secrets = load_secrets_list(secrets_file)
162
+ results = []
163
+ source_lines = source_code.splitlines()
164
+
165
+ # -------------------------
166
+ # Helpers
167
+ # -------------------------
168
+ def get_constant(node):
169
+ return getattr(node, "value", None)
170
+
171
+ def is_os_environ(node):
172
+ return (
173
+ getattr(getattr(node, "value", None), "attr", None) == "environ"
174
+ and getattr(getattr(getattr(node, "value", None), "value", None), "id", None) == "os"
175
+ )
176
+
177
+ def get_target_repr(node):
178
+ if hasattr(node, "id"):
179
+ return node.id
180
+ if hasattr(node, "attr") or hasattr(node, "slice"):
181
+ return ast.unparse(node)
182
+ return None
183
+
184
+ def classify_value(node):
185
+ if node is None:
186
+ return None
187
+
188
+ if isinstance(node, ast.Constant):
189
+ return node.value
190
+
191
+ if hasattr(node, "slice"):
192
+ if is_os_environ(node):
193
+ return get_constant(node.slice)
194
+ return ast.unparse(node)
195
+
196
+ if hasattr(node, "func") and getattr(node, "args", None):
197
+ first_arg = node.args[0]
198
+ if isinstance(first_arg, ast.Constant):
199
+ return first_arg.value
200
+
201
+ if hasattr(node, "id") or hasattr(node, "attr"):
202
+ return ast.unparse(node)
203
+
204
+ return ast.unparse(node)
205
+
206
+ def get_original_line(node):
207
+ lineno = getattr(node, "lineno", None)
208
+ if lineno is None:
209
+ return None
210
+ lines = []
211
+ # line before
212
+ if lineno > 1:
213
+ lines.append(source_lines[lineno - 2].rstrip())
214
+
215
+ # current line
216
+ if 1 <= lineno <= len(source_lines):
217
+ lines.append(source_lines[lineno - 1].rstrip())
218
+
219
+ # line after
220
+ if lineno < len(source_lines):
221
+ lines.append(source_lines[lineno].rstrip())
222
+
223
+ return "\n".join(lines)
224
+
225
+
226
+ def add_value(name, value_node, node):
227
+ value = classify_value(value_node)
228
+ matched = match_secret(secrets, name, value)
229
+ if matched is not None: #when no match is found, no results will be added to the result dict.
230
+ results.append(
231
+ {
232
+ "lineno": getattr(node, "lineno", None),
233
+ "code": get_original_line(node),
234
+ # "name": name,
235
+ # "value": value,
236
+ "matched": matched,
237
+ }
238
+ )
239
+
240
+
241
+ # -------------------------
242
+ # Walk all AST nodes
243
+ # -------------------------
244
+ tree = ast.parse(source_code)
245
+ for node in ast.walk(tree):
246
+
247
+ # Assignments
248
+ for target in getattr(node, "targets", []):
249
+ name = get_target_repr(target)
250
+ if name:
251
+ add_value(name, getattr(node, "value", None), node)
252
+
253
+
254
+ # Annotated assignments
255
+ if isinstance(node, ast.AnnAssign):
256
+ name = get_target_repr(node.target)
257
+ if name:
258
+ add_value(name, getattr(node, "value", None), node)
259
+
260
+
261
+ # Function calls (keyword arguments only)
262
+ if isinstance(node, ast.Call):
263
+ for kw in node.keywords:
264
+ if kw.arg:
265
+ add_value(kw.arg, kw.value, kw)
266
+
267
+
268
+ return sorted(results, key=lambda item: item["lineno"])
269
+
270
+ def has_privacy_findings(data):
271
+ """
272
+ Returns True if at least one file has a non-empty
273
+ 'privacy_check_result' list, otherwise False.
274
+ """
275
+ filesscanned = data.get("file_privacy_check", {})
276
+
277
+ for file_info in filesscanned.values():
278
+ results = file_info.get("privacy_check_result")
279
+ if results and len(results) > 0:
280
+ return True
281
+
282
+ return False
283
+
284
+ def count_privacy_check_results(data):
285
+ """
286
+ count number of secrets found for a dict created with secret_scan(filename)
287
+
288
+ :param data: Description
289
+ """
290
+ return len(
291
+ data["file_privacy_check"]["0"]["privacy_check_result"]
292
+ )
@@ -104,7 +104,7 @@ def get_package_source(url, nocxheaders=NOCX_HEADERS, nocxtimeout=10):
104
104
  f.write(content)
105
105
 
106
106
  with tarfile.open(tar_path, "r:gz") as tar:
107
- tar.extractall(path=temp_dir,filter='data') #Possible risks are mitigated as far as possible, see architecture notes.
107
+ tar.extractall(path=temp_dir,filter='data') # nosec Possible risks are mitigated as far as possible, see architecture notes.
108
108
 
109
109
  return temp_dir, tmpdir_obj # return both so caller controls lifetime
110
110