cisco-ai-skill-scanner 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
- cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
- skillanalyzer/__init__.py +45 -0
- skillanalyzer/_version.py +34 -0
- skillanalyzer/api/__init__.py +25 -0
- skillanalyzer/api/api.py +34 -0
- skillanalyzer/api/api_cli.py +78 -0
- skillanalyzer/api/api_server.py +634 -0
- skillanalyzer/api/router.py +527 -0
- skillanalyzer/cli/__init__.py +25 -0
- skillanalyzer/cli/cli.py +816 -0
- skillanalyzer/config/__init__.py +26 -0
- skillanalyzer/config/config.py +149 -0
- skillanalyzer/config/config_parser.py +122 -0
- skillanalyzer/config/constants.py +85 -0
- skillanalyzer/core/__init__.py +24 -0
- skillanalyzer/core/analyzers/__init__.py +75 -0
- skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
- skillanalyzer/core/analyzers/base.py +53 -0
- skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
- skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
- skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
- skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
- skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
- skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
- skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
- skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
- skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
- skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
- skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
- skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
- skillanalyzer/core/analyzers/static.py +1105 -0
- skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
- skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
- skillanalyzer/core/exceptions.py +77 -0
- skillanalyzer/core/loader.py +377 -0
- skillanalyzer/core/models.py +300 -0
- skillanalyzer/core/reporters/__init__.py +26 -0
- skillanalyzer/core/reporters/json_reporter.py +65 -0
- skillanalyzer/core/reporters/markdown_reporter.py +209 -0
- skillanalyzer/core/reporters/sarif_reporter.py +246 -0
- skillanalyzer/core/reporters/table_reporter.py +195 -0
- skillanalyzer/core/rules/__init__.py +19 -0
- skillanalyzer/core/rules/patterns.py +165 -0
- skillanalyzer/core/rules/yara_scanner.py +157 -0
- skillanalyzer/core/scanner.py +437 -0
- skillanalyzer/core/static_analysis/__init__.py +27 -0
- skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
- skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
- skillanalyzer/core/static_analysis/context_extractor.py +742 -0
- skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
- skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
- skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
- skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
- skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
- skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
- skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
- skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
- skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
- skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
- skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
- skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
- skillanalyzer/core/static_analysis/types/__init__.py +36 -0
- skillanalyzer/data/__init__.py +30 -0
- skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
- skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
- skillanalyzer/data/prompts/llm_response_schema.json +71 -0
- skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
- skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
- skillanalyzer/data/prompts/unified_response_schema.md +97 -0
- skillanalyzer/data/rules/signatures.yaml +440 -0
- skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
- skillanalyzer/data/yara_rules/code_execution.yara +61 -0
- skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
- skillanalyzer/data/yara_rules/command_injection.yara +54 -0
- skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
- skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
- skillanalyzer/data/yara_rules/script_injection.yara +83 -0
- skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
- skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
- skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
- skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
- skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
- skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
- skillanalyzer/hooks/__init__.py +21 -0
- skillanalyzer/hooks/pre_commit.py +450 -0
- skillanalyzer/threats/__init__.py +25 -0
- skillanalyzer/threats/threats.py +480 -0
- skillanalyzer/utils/__init__.py +28 -0
- skillanalyzer/utils/command_utils.py +129 -0
- skillanalyzer/utils/di_container.py +154 -0
- skillanalyzer/utils/file_utils.py +86 -0
- skillanalyzer/utils/logging_config.py +96 -0
- skillanalyzer/utils/logging_utils.py +71 -0
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
# Copyright 2026 Cisco Systems, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
16
|
+
|
|
17
|
+
"""Control Flow Graph (CFG) builder for dataflow analysis.
|
|
18
|
+
|
|
19
|
+
Builds control flow graphs from Python ASTs to enable accurate dataflow
|
|
20
|
+
analysis through control structures (if/else, loops, functions).
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import ast
|
|
24
|
+
import logging
|
|
25
|
+
from typing import Any, Generic, TypeVar
|
|
26
|
+
|
|
27
|
+
from ..parser.python_parser import PythonParser
|
|
28
|
+
|
|
29
|
+
T = TypeVar("T")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class CFGNode:
|
|
33
|
+
"""Control Flow Graph node."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, node_id: int, ast_node: Any, label: str = "") -> None:
|
|
36
|
+
"""Initialize CFG node.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
node_id: Unique node ID
|
|
40
|
+
ast_node: Associated AST node
|
|
41
|
+
label: Optional label
|
|
42
|
+
"""
|
|
43
|
+
self.id = node_id
|
|
44
|
+
self.ast_node = ast_node
|
|
45
|
+
self.label = label
|
|
46
|
+
self.predecessors: list[CFGNode] = []
|
|
47
|
+
self.successors: list[CFGNode] = []
|
|
48
|
+
|
|
49
|
+
def __repr__(self) -> str:
|
|
50
|
+
"""String representation."""
|
|
51
|
+
return f"CFGNode({self.id}, {self.label})"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ControlFlowGraph:
|
|
55
|
+
"""Control Flow Graph."""
|
|
56
|
+
|
|
57
|
+
def __init__(self) -> None:
|
|
58
|
+
"""Initialize CFG."""
|
|
59
|
+
self.nodes: list[CFGNode] = []
|
|
60
|
+
self.entry: CFGNode | None = None
|
|
61
|
+
self.exit: CFGNode | None = None
|
|
62
|
+
self._node_counter = 0
|
|
63
|
+
|
|
64
|
+
def create_node(self, ast_node: Any, label: str = "") -> CFGNode:
|
|
65
|
+
"""Create a new CFG node.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
ast_node: AST node
|
|
69
|
+
label: Optional label
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
New CFG node
|
|
73
|
+
"""
|
|
74
|
+
node = CFGNode(self._node_counter, ast_node, label)
|
|
75
|
+
self._node_counter += 1
|
|
76
|
+
self.nodes.append(node)
|
|
77
|
+
return node
|
|
78
|
+
|
|
79
|
+
def add_edge(self, from_node: CFGNode, to_node: CFGNode) -> None:
|
|
80
|
+
"""Add an edge between two nodes.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
from_node: Source node
|
|
84
|
+
to_node: Target node
|
|
85
|
+
"""
|
|
86
|
+
from_node.successors.append(to_node)
|
|
87
|
+
to_node.predecessors.append(from_node)
|
|
88
|
+
|
|
89
|
+
def get_successors(self, node: CFGNode) -> list[CFGNode]:
|
|
90
|
+
"""Get successor nodes.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
node: CFG node
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
List of successor nodes
|
|
97
|
+
"""
|
|
98
|
+
return node.successors
|
|
99
|
+
|
|
100
|
+
def get_predecessors(self, node: CFGNode) -> list[CFGNode]:
|
|
101
|
+
"""Get predecessor nodes.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
node: CFG node
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
List of predecessor nodes
|
|
108
|
+
"""
|
|
109
|
+
return node.predecessors
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class DataFlowAnalyzer(Generic[T]):
|
|
113
|
+
"""Generic dataflow analysis framework."""
|
|
114
|
+
|
|
115
|
+
def __init__(self, parser: PythonParser) -> None:
|
|
116
|
+
"""Initialize dataflow analyzer.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
parser: Python parser instance
|
|
120
|
+
"""
|
|
121
|
+
self.parser = parser
|
|
122
|
+
self.cfg: ControlFlowGraph | None = None
|
|
123
|
+
self.in_facts: dict[int, T] = {}
|
|
124
|
+
self.out_facts: dict[int, T] = {}
|
|
125
|
+
self.logger = logging.getLogger(__name__)
|
|
126
|
+
|
|
127
|
+
def build_cfg(self) -> ControlFlowGraph:
|
|
128
|
+
"""Build Control Flow Graph from AST.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Control Flow Graph
|
|
132
|
+
"""
|
|
133
|
+
# Get AST from parser (PythonParser uses self.tree)
|
|
134
|
+
ast_root = getattr(self.parser, "tree", None)
|
|
135
|
+
if not ast_root:
|
|
136
|
+
self.logger.warning("Cannot build CFG: no AST available. Call parser.parse() first.")
|
|
137
|
+
return ControlFlowGraph()
|
|
138
|
+
|
|
139
|
+
# Clear old state when building a new CFG to prevent state leakage
|
|
140
|
+
self.in_facts.clear()
|
|
141
|
+
self.out_facts.clear()
|
|
142
|
+
|
|
143
|
+
cfg = ControlFlowGraph()
|
|
144
|
+
self._build_python_cfg(ast_root, cfg)
|
|
145
|
+
self.cfg = cfg
|
|
146
|
+
return cfg
|
|
147
|
+
|
|
148
|
+
def _build_python_cfg(self, node: ast.AST, cfg: ControlFlowGraph) -> CFGNode:
|
|
149
|
+
"""Build CFG for Python AST.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
node: Python AST node
|
|
153
|
+
cfg: Control Flow Graph
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
Last CFG node created
|
|
157
|
+
"""
|
|
158
|
+
if isinstance(node, ast.Module):
|
|
159
|
+
entry = cfg.create_node(node, "entry")
|
|
160
|
+
cfg.entry = entry
|
|
161
|
+
|
|
162
|
+
current = entry
|
|
163
|
+
for stmt in node.body:
|
|
164
|
+
next_node = self._build_python_cfg(stmt, cfg)
|
|
165
|
+
cfg.add_edge(current, next_node)
|
|
166
|
+
current = next_node
|
|
167
|
+
|
|
168
|
+
exit_node = cfg.create_node(node, "exit")
|
|
169
|
+
cfg.exit = exit_node
|
|
170
|
+
cfg.add_edge(current, exit_node)
|
|
171
|
+
|
|
172
|
+
return exit_node
|
|
173
|
+
|
|
174
|
+
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
175
|
+
# Build CFG for function body
|
|
176
|
+
entry = cfg.create_node(node, "func_entry")
|
|
177
|
+
if not cfg.entry:
|
|
178
|
+
cfg.entry = entry
|
|
179
|
+
|
|
180
|
+
current = entry
|
|
181
|
+
for stmt in node.body:
|
|
182
|
+
next_node = self._build_python_cfg(stmt, cfg)
|
|
183
|
+
cfg.add_edge(current, next_node)
|
|
184
|
+
current = next_node
|
|
185
|
+
|
|
186
|
+
exit_node = cfg.create_node(node, "func_exit")
|
|
187
|
+
if not cfg.exit:
|
|
188
|
+
cfg.exit = exit_node
|
|
189
|
+
cfg.add_edge(current, exit_node)
|
|
190
|
+
|
|
191
|
+
return exit_node
|
|
192
|
+
|
|
193
|
+
elif isinstance(node, ast.If):
|
|
194
|
+
cond_node = cfg.create_node(node.test, "if_cond")
|
|
195
|
+
|
|
196
|
+
then_entry = cfg.create_node(node, "then_entry")
|
|
197
|
+
cfg.add_edge(cond_node, then_entry)
|
|
198
|
+
|
|
199
|
+
then_current = then_entry
|
|
200
|
+
for stmt in node.body:
|
|
201
|
+
next_node = self._build_python_cfg(stmt, cfg)
|
|
202
|
+
cfg.add_edge(then_current, next_node)
|
|
203
|
+
then_current = next_node
|
|
204
|
+
|
|
205
|
+
if node.orelse:
|
|
206
|
+
else_entry = cfg.create_node(node, "else_entry")
|
|
207
|
+
cfg.add_edge(cond_node, else_entry)
|
|
208
|
+
|
|
209
|
+
else_current = else_entry
|
|
210
|
+
for stmt in node.orelse:
|
|
211
|
+
next_node = self._build_python_cfg(stmt, cfg)
|
|
212
|
+
cfg.add_edge(else_current, next_node)
|
|
213
|
+
else_current = next_node
|
|
214
|
+
|
|
215
|
+
merge = cfg.create_node(node, "if_merge")
|
|
216
|
+
cfg.add_edge(then_current, merge)
|
|
217
|
+
cfg.add_edge(else_current, merge)
|
|
218
|
+
return merge
|
|
219
|
+
else:
|
|
220
|
+
merge = cfg.create_node(node, "if_merge")
|
|
221
|
+
cfg.add_edge(then_current, merge)
|
|
222
|
+
cfg.add_edge(cond_node, merge)
|
|
223
|
+
return merge
|
|
224
|
+
|
|
225
|
+
elif isinstance(node, ast.While):
|
|
226
|
+
cond_node = cfg.create_node(node.test, "while_cond")
|
|
227
|
+
|
|
228
|
+
body_entry = cfg.create_node(node, "while_body")
|
|
229
|
+
cfg.add_edge(cond_node, body_entry)
|
|
230
|
+
|
|
231
|
+
body_current = body_entry
|
|
232
|
+
for stmt in node.body:
|
|
233
|
+
next_node = self._build_python_cfg(stmt, cfg)
|
|
234
|
+
cfg.add_edge(body_current, next_node)
|
|
235
|
+
body_current = next_node
|
|
236
|
+
|
|
237
|
+
cfg.add_edge(body_current, cond_node)
|
|
238
|
+
|
|
239
|
+
exit_node = cfg.create_node(node, "while_exit")
|
|
240
|
+
cfg.add_edge(cond_node, exit_node)
|
|
241
|
+
|
|
242
|
+
return exit_node
|
|
243
|
+
|
|
244
|
+
elif isinstance(node, ast.For):
|
|
245
|
+
iter_node = cfg.create_node(node.iter, "for_iter")
|
|
246
|
+
|
|
247
|
+
body_entry = cfg.create_node(node, "for_body")
|
|
248
|
+
cfg.add_edge(iter_node, body_entry)
|
|
249
|
+
|
|
250
|
+
body_current = body_entry
|
|
251
|
+
for stmt in node.body:
|
|
252
|
+
next_node = self._build_python_cfg(stmt, cfg)
|
|
253
|
+
cfg.add_edge(body_current, next_node)
|
|
254
|
+
body_current = next_node
|
|
255
|
+
|
|
256
|
+
cfg.add_edge(body_current, iter_node)
|
|
257
|
+
|
|
258
|
+
exit_node = cfg.create_node(node, "for_exit")
|
|
259
|
+
cfg.add_edge(iter_node, exit_node)
|
|
260
|
+
|
|
261
|
+
return exit_node
|
|
262
|
+
|
|
263
|
+
elif isinstance(node, ast.Try):
|
|
264
|
+
# Handle try/except/finally blocks
|
|
265
|
+
try_entry = cfg.create_node(node, "try_entry")
|
|
266
|
+
current = try_entry
|
|
267
|
+
|
|
268
|
+
# Try block
|
|
269
|
+
for stmt in node.body:
|
|
270
|
+
next_node = self._build_python_cfg(stmt, cfg)
|
|
271
|
+
cfg.add_edge(current, next_node)
|
|
272
|
+
current = next_node
|
|
273
|
+
|
|
274
|
+
# Exception handlers
|
|
275
|
+
if node.handlers:
|
|
276
|
+
for handler in node.handlers:
|
|
277
|
+
handler_entry = cfg.create_node(handler, "except_entry")
|
|
278
|
+
cfg.add_edge(try_entry, handler_entry)
|
|
279
|
+
handler_current = handler_entry
|
|
280
|
+
for stmt in handler.body:
|
|
281
|
+
next_node = self._build_python_cfg(stmt, cfg)
|
|
282
|
+
cfg.add_edge(handler_current, next_node)
|
|
283
|
+
handler_current = next_node
|
|
284
|
+
# Merge exception handlers back
|
|
285
|
+
cfg.add_edge(handler_current, current)
|
|
286
|
+
|
|
287
|
+
# Finally block
|
|
288
|
+
if node.finalbody:
|
|
289
|
+
finally_entry = cfg.create_node(node, "finally_entry")
|
|
290
|
+
cfg.add_edge(current, finally_entry)
|
|
291
|
+
finally_current = finally_entry
|
|
292
|
+
for stmt in node.finalbody:
|
|
293
|
+
next_node = self._build_python_cfg(stmt, cfg)
|
|
294
|
+
cfg.add_edge(finally_current, next_node)
|
|
295
|
+
finally_current = next_node
|
|
296
|
+
return finally_current
|
|
297
|
+
|
|
298
|
+
return current
|
|
299
|
+
|
|
300
|
+
else:
|
|
301
|
+
return cfg.create_node(node, type(node).__name__)
|
|
302
|
+
|
|
303
|
+
def analyze(self, initial_fact: T, forward: bool = True, max_iteration_multiplier: int = 1000) -> None:
|
|
304
|
+
"""Run dataflow analysis using worklist algorithm.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
initial_fact: Initial dataflow fact
|
|
308
|
+
forward: True for forward analysis, False for backward
|
|
309
|
+
max_iteration_multiplier: Base multiplier for max iterations (default: 1000, increased from 500)
|
|
310
|
+
Max iterations = CFG nodes * effective_multiplier
|
|
311
|
+
Adaptive limits based on CFG size to handle complex files
|
|
312
|
+
"""
|
|
313
|
+
if not self.cfg:
|
|
314
|
+
self.build_cfg()
|
|
315
|
+
|
|
316
|
+
if not self.cfg or not self.cfg.nodes:
|
|
317
|
+
return
|
|
318
|
+
|
|
319
|
+
# Clear facts dictionaries to ensure clean state (defensive programming)
|
|
320
|
+
# This prevents any potential state leakage from previous analyses
|
|
321
|
+
self.in_facts.clear()
|
|
322
|
+
self.out_facts.clear()
|
|
323
|
+
|
|
324
|
+
for node in self.cfg.nodes:
|
|
325
|
+
self.in_facts[node.id] = initial_fact
|
|
326
|
+
self.out_facts[node.id] = initial_fact
|
|
327
|
+
|
|
328
|
+
worklist = list(self.cfg.nodes)
|
|
329
|
+
in_worklist = {node.id for node in worklist}
|
|
330
|
+
|
|
331
|
+
iteration_count = 0
|
|
332
|
+
cfg_size = len(self.cfg.nodes)
|
|
333
|
+
# Adaptive limit: Use higher multiplier for larger CFGs
|
|
334
|
+
# Very small CFGs (< 20 nodes): 1000x multiplier (10k max iterations)
|
|
335
|
+
# Small CFGs (20-50 nodes): 800x multiplier
|
|
336
|
+
# Medium CFGs (50-100 nodes): 600x multiplier
|
|
337
|
+
# Large CFGs (100-200 nodes): 400x multiplier
|
|
338
|
+
# Very large CFGs (> 200 nodes): 300x multiplier (but still allows 60k+ iterations)
|
|
339
|
+
if cfg_size < 20:
|
|
340
|
+
effective_multiplier = max_iteration_multiplier # 1000
|
|
341
|
+
elif cfg_size < 50:
|
|
342
|
+
effective_multiplier = int(max_iteration_multiplier * 0.8) # 800
|
|
343
|
+
elif cfg_size < 100:
|
|
344
|
+
effective_multiplier = int(max_iteration_multiplier * 0.6) # 600
|
|
345
|
+
elif cfg_size < 200:
|
|
346
|
+
effective_multiplier = int(max_iteration_multiplier * 0.4) # 400
|
|
347
|
+
else:
|
|
348
|
+
effective_multiplier = int(max_iteration_multiplier * 0.3) # 300
|
|
349
|
+
max_iterations = cfg_size * effective_multiplier # Safety limit
|
|
350
|
+
|
|
351
|
+
while worklist:
|
|
352
|
+
iteration_count += 1
|
|
353
|
+
|
|
354
|
+
# Safety check to prevent infinite loops
|
|
355
|
+
if iteration_count > max_iterations:
|
|
356
|
+
# Log at debug level to reduce noise - this is expected for complex files
|
|
357
|
+
# The analysis still completes, it just stops early at the safety limit
|
|
358
|
+
self.logger.debug(
|
|
359
|
+
f"Dataflow analysis exceeded max iterations ({max_iterations:,} iterations, "
|
|
360
|
+
f"CFG size: {cfg_size} nodes). Analysis stopped at safety limit. "
|
|
361
|
+
f"This is normal for complex control flow and analysis may be incomplete."
|
|
362
|
+
)
|
|
363
|
+
break
|
|
364
|
+
|
|
365
|
+
node = worklist.pop(0)
|
|
366
|
+
in_worklist.discard(node.id)
|
|
367
|
+
|
|
368
|
+
if forward:
|
|
369
|
+
pred_facts = [self.out_facts[pred.id] for pred in node.predecessors]
|
|
370
|
+
if pred_facts:
|
|
371
|
+
in_fact = self.merge(pred_facts)
|
|
372
|
+
else:
|
|
373
|
+
in_fact = initial_fact
|
|
374
|
+
|
|
375
|
+
self.in_facts[node.id] = in_fact
|
|
376
|
+
|
|
377
|
+
out_fact = self.transfer(node, in_fact)
|
|
378
|
+
|
|
379
|
+
if out_fact != self.out_facts[node.id]:
|
|
380
|
+
self.out_facts[node.id] = out_fact
|
|
381
|
+
|
|
382
|
+
for succ in node.successors:
|
|
383
|
+
if succ.id not in in_worklist:
|
|
384
|
+
worklist.append(succ)
|
|
385
|
+
in_worklist.add(succ.id)
|
|
386
|
+
else:
|
|
387
|
+
succ_facts = [self.in_facts[succ.id] for succ in node.successors]
|
|
388
|
+
if succ_facts:
|
|
389
|
+
out_fact = self.merge(succ_facts)
|
|
390
|
+
else:
|
|
391
|
+
out_fact = initial_fact
|
|
392
|
+
|
|
393
|
+
self.out_facts[node.id] = out_fact
|
|
394
|
+
|
|
395
|
+
in_fact = self.transfer(node, out_fact)
|
|
396
|
+
|
|
397
|
+
if in_fact != self.in_facts[node.id]:
|
|
398
|
+
self.in_facts[node.id] = in_fact
|
|
399
|
+
|
|
400
|
+
for pred in node.predecessors:
|
|
401
|
+
if pred.id not in in_worklist:
|
|
402
|
+
worklist.append(pred)
|
|
403
|
+
in_worklist.add(pred.id)
|
|
404
|
+
|
|
405
|
+
def transfer(self, node: CFGNode, in_fact: T) -> T:
|
|
406
|
+
"""Transfer function for dataflow analysis.
|
|
407
|
+
|
|
408
|
+
Args:
|
|
409
|
+
node: CFG node
|
|
410
|
+
in_fact: Input dataflow fact
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
Output dataflow fact
|
|
414
|
+
"""
|
|
415
|
+
return in_fact
|
|
416
|
+
|
|
417
|
+
def merge(self, facts: list[T]) -> T:
|
|
418
|
+
"""Merge multiple dataflow facts.
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
facts: List of facts to merge
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
Merged fact
|
|
425
|
+
"""
|
|
426
|
+
if facts:
|
|
427
|
+
return facts[0]
|
|
428
|
+
raise NotImplementedError("merge must be implemented by subclass")
|
|
429
|
+
|
|
430
|
+
def get_reaching_definitions(self, node: CFGNode) -> T:
|
|
431
|
+
"""Get reaching definitions at a node.
|
|
432
|
+
|
|
433
|
+
Args:
|
|
434
|
+
node: CFG node
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
Dataflow fact
|
|
438
|
+
"""
|
|
439
|
+
return self.in_facts.get(node.id, self.in_facts.get(0) if self.in_facts else None) # type: ignore
|