codepathfinder 1.2.0__py3-none-manylinux_2_17_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codepathfinder/__init__.py +48 -0
- codepathfinder/bin/pathfinder +0 -0
- codepathfinder/cli/__init__.py +204 -0
- codepathfinder/config.py +92 -0
- codepathfinder/dataflow.py +193 -0
- codepathfinder/decorators.py +158 -0
- codepathfinder/ir.py +107 -0
- codepathfinder/logic.py +101 -0
- codepathfinder/matchers.py +243 -0
- codepathfinder/presets.py +135 -0
- codepathfinder/propagation.py +250 -0
- codepathfinder-1.2.0.dist-info/METADATA +111 -0
- codepathfinder-1.2.0.dist-info/RECORD +33 -0
- codepathfinder-1.2.0.dist-info/WHEEL +5 -0
- codepathfinder-1.2.0.dist-info/entry_points.txt +2 -0
- codepathfinder-1.2.0.dist-info/licenses/LICENSE +661 -0
- codepathfinder-1.2.0.dist-info/top_level.txt +2 -0
- rules/__init__.py +36 -0
- rules/container_combinators.py +209 -0
- rules/container_decorators.py +223 -0
- rules/container_ir.py +104 -0
- rules/container_matchers.py +230 -0
- rules/container_programmatic.py +115 -0
- rules/python/__init__.py +0 -0
- rules/python/deserialization/__init__.py +0 -0
- rules/python/deserialization/pickle_loads.py +479 -0
- rules/python/django/__init__.py +0 -0
- rules/python/django/sql_injection.py +355 -0
- rules/python/flask/__init__.py +0 -0
- rules/python/flask/debug_mode.py +374 -0
- rules/python/injection/__init__.py +0 -0
- rules/python_decorators.py +177 -0
- rules/python_ir.py +80 -0
codepathfinder/ir.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
JSON Intermediate Representation (IR) for pathfinder DSL.
|
|
3
|
+
|
|
4
|
+
The Python DSL serializes to JSON IR, which the Go executor consumes.
|
|
5
|
+
This enables language-agnostic pattern definitions (future: JS, Rust DSLs).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import Any, Dict, Protocol
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class IRType(Enum):
|
|
13
|
+
"""IR node types for different matchers and combinators."""
|
|
14
|
+
|
|
15
|
+
CALL_MATCHER = "call_matcher"
|
|
16
|
+
VARIABLE_MATCHER = "variable_matcher"
|
|
17
|
+
DATAFLOW = "dataflow" # Coming in PR #3
|
|
18
|
+
LOGIC_AND = "logic_and" # Coming in PR #5
|
|
19
|
+
LOGIC_OR = "logic_or" # Coming in PR #5
|
|
20
|
+
LOGIC_NOT = "logic_not" # Coming in PR #5
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MatcherIR(Protocol):
|
|
24
|
+
"""Protocol for all matcher types (duck typing)."""
|
|
25
|
+
|
|
26
|
+
def to_ir(self) -> Dict[str, Any]:
|
|
27
|
+
"""Serialize to JSON IR dictionary."""
|
|
28
|
+
...
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def serialize_ir(matcher: MatcherIR) -> Dict[str, Any]:
|
|
32
|
+
"""
|
|
33
|
+
Serialize any matcher to JSON IR.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
matcher: Any object implementing MatcherIR protocol
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
JSON-serializable dictionary
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
AttributeError: If matcher doesn't implement to_ir()
|
|
43
|
+
"""
|
|
44
|
+
if not hasattr(matcher, "to_ir"):
|
|
45
|
+
raise AttributeError(f"{type(matcher).__name__} must implement to_ir() method")
|
|
46
|
+
|
|
47
|
+
return matcher.to_ir()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def validate_ir(ir: Dict[str, Any]) -> bool:
|
|
51
|
+
"""
|
|
52
|
+
Validate JSON IR structure.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
ir: JSON IR dictionary
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
True if valid, False otherwise
|
|
59
|
+
|
|
60
|
+
Validates:
|
|
61
|
+
- "type" field exists and is valid IRType
|
|
62
|
+
- Required fields present for each type
|
|
63
|
+
"""
|
|
64
|
+
if "type" not in ir:
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
ir_type = IRType(ir["type"])
|
|
69
|
+
except ValueError:
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
# Type-specific validation
|
|
73
|
+
if ir_type == IRType.CALL_MATCHER:
|
|
74
|
+
return (
|
|
75
|
+
"patterns" in ir
|
|
76
|
+
and isinstance(ir["patterns"], list)
|
|
77
|
+
and len(ir["patterns"]) > 0
|
|
78
|
+
and "wildcard" in ir
|
|
79
|
+
and isinstance(ir["wildcard"], bool)
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if ir_type == IRType.VARIABLE_MATCHER:
|
|
83
|
+
return (
|
|
84
|
+
"pattern" in ir
|
|
85
|
+
and isinstance(ir["pattern"], str)
|
|
86
|
+
and len(ir["pattern"]) > 0
|
|
87
|
+
and "wildcard" in ir
|
|
88
|
+
and isinstance(ir["wildcard"], bool)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if ir_type == IRType.DATAFLOW:
|
|
92
|
+
return (
|
|
93
|
+
"sources" in ir
|
|
94
|
+
and isinstance(ir["sources"], list)
|
|
95
|
+
and len(ir["sources"]) > 0
|
|
96
|
+
and "sinks" in ir
|
|
97
|
+
and isinstance(ir["sinks"], list)
|
|
98
|
+
and len(ir["sinks"]) > 0
|
|
99
|
+
and "sanitizers" in ir
|
|
100
|
+
and isinstance(ir["sanitizers"], list)
|
|
101
|
+
and "propagation" in ir
|
|
102
|
+
and isinstance(ir["propagation"], list)
|
|
103
|
+
and "scope" in ir
|
|
104
|
+
and ir["scope"] in ["local", "global"]
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
return True
|
codepathfinder/logic.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Logic operators for combining matchers."""
|
|
2
|
+
|
|
3
|
+
from typing import Union
|
|
4
|
+
from .matchers import CallMatcher, VariableMatcher
|
|
5
|
+
from .dataflow import DataflowMatcher
|
|
6
|
+
from .ir import IRType
|
|
7
|
+
|
|
8
|
+
MatcherType = Union[
|
|
9
|
+
CallMatcher,
|
|
10
|
+
VariableMatcher,
|
|
11
|
+
DataflowMatcher,
|
|
12
|
+
"AndOperator",
|
|
13
|
+
"OrOperator",
|
|
14
|
+
"NotOperator",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AndOperator:
|
|
19
|
+
"""
|
|
20
|
+
Logical AND - all matchers must match.
|
|
21
|
+
|
|
22
|
+
Example:
|
|
23
|
+
And(calls("eval"), variable("user_input"))
|
|
24
|
+
# Matches code that has BOTH eval calls AND user_input variable
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, *matchers: MatcherType):
|
|
28
|
+
if len(matchers) < 2:
|
|
29
|
+
raise ValueError("And() requires at least 2 matchers")
|
|
30
|
+
self.matchers = list(matchers)
|
|
31
|
+
|
|
32
|
+
def to_ir(self) -> dict:
|
|
33
|
+
return {
|
|
34
|
+
"type": IRType.LOGIC_AND.value,
|
|
35
|
+
"matchers": [m.to_ir() for m in self.matchers],
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
def __repr__(self) -> str:
|
|
39
|
+
return f"And({len(self.matchers)} matchers)"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class OrOperator:
|
|
43
|
+
"""
|
|
44
|
+
Logical OR - at least one matcher must match.
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
Or(calls("eval"), calls("exec"))
|
|
48
|
+
# Matches code with eval OR exec
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, *matchers: MatcherType):
|
|
52
|
+
if len(matchers) < 2:
|
|
53
|
+
raise ValueError("Or() requires at least 2 matchers")
|
|
54
|
+
self.matchers = list(matchers)
|
|
55
|
+
|
|
56
|
+
def to_ir(self) -> dict:
|
|
57
|
+
return {
|
|
58
|
+
"type": IRType.LOGIC_OR.value,
|
|
59
|
+
"matchers": [m.to_ir() for m in self.matchers],
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
def __repr__(self) -> str:
|
|
63
|
+
return f"Or({len(self.matchers)} matchers)"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class NotOperator:
|
|
67
|
+
"""
|
|
68
|
+
Logical NOT - matcher must NOT match.
|
|
69
|
+
|
|
70
|
+
Example:
|
|
71
|
+
Not(calls("test_*"))
|
|
72
|
+
# Matches code that does NOT call test_* functions
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self, matcher: MatcherType):
|
|
76
|
+
self.matcher = matcher
|
|
77
|
+
|
|
78
|
+
def to_ir(self) -> dict:
|
|
79
|
+
return {
|
|
80
|
+
"type": IRType.LOGIC_NOT.value,
|
|
81
|
+
"matcher": self.matcher.to_ir(),
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
def __repr__(self) -> str:
|
|
85
|
+
return f"Not({repr(self.matcher)})"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# Public API
|
|
89
|
+
def And(*matchers: MatcherType) -> AndOperator:
|
|
90
|
+
"""Create AND combinator."""
|
|
91
|
+
return AndOperator(*matchers)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def Or(*matchers: MatcherType) -> OrOperator:
|
|
95
|
+
"""Create OR combinator."""
|
|
96
|
+
return OrOperator(*matchers)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def Not(matcher: MatcherType) -> NotOperator:
|
|
100
|
+
"""Create NOT combinator."""
|
|
101
|
+
return NotOperator(matcher)
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core matchers for the pathfinder Python DSL.
|
|
3
|
+
|
|
4
|
+
These matchers generate JSON IR for the Go executor.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, Optional, Union, List, Any
|
|
8
|
+
from .ir import IRType
|
|
9
|
+
|
|
10
|
+
ArgumentValue = Union[str, int, float, bool, List[Union[str, int, float, bool]]]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CallMatcher:
|
|
14
|
+
"""
|
|
15
|
+
Matches function/method calls with optional argument constraints.
|
|
16
|
+
|
|
17
|
+
Examples:
|
|
18
|
+
calls("eval") # Exact match
|
|
19
|
+
calls("eval", "exec") # Multiple patterns
|
|
20
|
+
calls("request.*") # Wildcard (any request.* call)
|
|
21
|
+
calls("*.json") # Wildcard (any *.json call)
|
|
22
|
+
calls("app.run", match_name={"debug": True}) # Keyword argument matching
|
|
23
|
+
calls("open", match_position={1: "w"}) # Positional argument matching
|
|
24
|
+
calls("socket.bind", match_position={"0[0]": "0.0.0.0"}) # Tuple indexing
|
|
25
|
+
calls("connect", match_position={"0[0]": "192.168.*"}) # Wildcard + tuple
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
*patterns: str,
|
|
31
|
+
match_position: Optional[Dict[int, ArgumentValue]] = None,
|
|
32
|
+
match_name: Optional[Dict[str, ArgumentValue]] = None,
|
|
33
|
+
):
|
|
34
|
+
"""
|
|
35
|
+
Args:
|
|
36
|
+
*patterns: Function names to match. Supports wildcards (*).
|
|
37
|
+
match_position: Match positional arguments by index or tuple index.
|
|
38
|
+
Examples: {0: "value"}, {1: ["a", "b"]}, {"0[0]": "0.0.0.0"}
|
|
39
|
+
match_name: Match named/keyword arguments {name: value}
|
|
40
|
+
|
|
41
|
+
Position indexing:
|
|
42
|
+
- Simple: {0: "value"} matches first argument
|
|
43
|
+
- Tuple: {"0[0]": "value"} matches first element of first argument tuple
|
|
44
|
+
- Wildcard: {"0[0]": "192.168.*"} matches with wildcard pattern
|
|
45
|
+
|
|
46
|
+
Raises:
|
|
47
|
+
ValueError: If no patterns provided or pattern is empty
|
|
48
|
+
"""
|
|
49
|
+
if not patterns:
|
|
50
|
+
raise ValueError("calls() requires at least one pattern")
|
|
51
|
+
|
|
52
|
+
if any(not p or not isinstance(p, str) for p in patterns):
|
|
53
|
+
raise ValueError("All patterns must be non-empty strings")
|
|
54
|
+
|
|
55
|
+
self.patterns = list(patterns)
|
|
56
|
+
self.wildcard = any("*" in p for p in patterns)
|
|
57
|
+
self.match_position = match_position or {}
|
|
58
|
+
self.match_name = match_name or {}
|
|
59
|
+
|
|
60
|
+
def _make_constraint(self, value: ArgumentValue) -> Dict[str, Any]:
|
|
61
|
+
"""
|
|
62
|
+
Create an argument constraint from a value.
|
|
63
|
+
|
|
64
|
+
Automatically detects wildcard characters in string values.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
value: The argument value or list of values
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Dictionary with 'value' and 'wildcard' keys
|
|
71
|
+
"""
|
|
72
|
+
# Check if wildcard characters are present in string values
|
|
73
|
+
# NOTE: Argument wildcard is independent of pattern wildcard (self.wildcard)
|
|
74
|
+
# Pattern wildcard applies to function name matching (e.g., "*.bind")
|
|
75
|
+
# Argument wildcard applies to argument value matching (e.g., "192.168.*")
|
|
76
|
+
has_wildcard = False
|
|
77
|
+
if isinstance(value, str) and ("*" in value or "?" in value):
|
|
78
|
+
has_wildcard = True
|
|
79
|
+
elif isinstance(value, list):
|
|
80
|
+
has_wildcard = any(
|
|
81
|
+
isinstance(v, str) and ("*" in v or "?" in v) for v in value
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return {"value": value, "wildcard": has_wildcard}
|
|
85
|
+
|
|
86
|
+
def to_ir(self) -> dict:
|
|
87
|
+
"""
|
|
88
|
+
Serialize to JSON IR for Go executor.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
{
|
|
92
|
+
"type": "call_matcher",
|
|
93
|
+
"patterns": ["eval", "exec"],
|
|
94
|
+
"wildcard": false,
|
|
95
|
+
"matchMode": "any",
|
|
96
|
+
"keywordArgs": { "debug": {"value": true, "wildcard": false} },
|
|
97
|
+
"positionalArgs": { "0": {"value": "0.0.0.0", "wildcard": false} }
|
|
98
|
+
}
|
|
99
|
+
"""
|
|
100
|
+
ir = {
|
|
101
|
+
"type": IRType.CALL_MATCHER.value,
|
|
102
|
+
"patterns": self.patterns,
|
|
103
|
+
"wildcard": self.wildcard,
|
|
104
|
+
"matchMode": "any",
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
# Add positional argument constraints
|
|
108
|
+
if self.match_position:
|
|
109
|
+
positional_args = {}
|
|
110
|
+
for pos, value in self.match_position.items():
|
|
111
|
+
constraint = self._make_constraint(value)
|
|
112
|
+
# Propagate wildcard flag from pattern to argument constraints
|
|
113
|
+
if self.wildcard:
|
|
114
|
+
constraint["wildcard"] = True
|
|
115
|
+
positional_args[str(pos)] = constraint
|
|
116
|
+
ir["positionalArgs"] = positional_args
|
|
117
|
+
|
|
118
|
+
# Add keyword argument constraints
|
|
119
|
+
if self.match_name:
|
|
120
|
+
keyword_args = {}
|
|
121
|
+
for name, value in self.match_name.items():
|
|
122
|
+
constraint = self._make_constraint(value)
|
|
123
|
+
# Propagate wildcard flag from pattern to argument constraints
|
|
124
|
+
if self.wildcard:
|
|
125
|
+
constraint["wildcard"] = True
|
|
126
|
+
keyword_args[name] = constraint
|
|
127
|
+
ir["keywordArgs"] = keyword_args
|
|
128
|
+
|
|
129
|
+
return ir
|
|
130
|
+
|
|
131
|
+
def __repr__(self) -> str:
|
|
132
|
+
patterns_str = ", ".join(f'"{p}"' for p in self.patterns)
|
|
133
|
+
return f"calls({patterns_str})"
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class VariableMatcher:
|
|
137
|
+
"""
|
|
138
|
+
Matches variable references by name.
|
|
139
|
+
|
|
140
|
+
Examples:
|
|
141
|
+
variable("user_input") # Exact match
|
|
142
|
+
variable("user_*") # Wildcard prefix
|
|
143
|
+
variable("*_id") # Wildcard suffix
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
def __init__(self, pattern: str):
|
|
147
|
+
"""
|
|
148
|
+
Args:
|
|
149
|
+
pattern: Variable name pattern. Supports wildcards (*).
|
|
150
|
+
|
|
151
|
+
Raises:
|
|
152
|
+
ValueError: If pattern is empty
|
|
153
|
+
"""
|
|
154
|
+
if not pattern or not isinstance(pattern, str):
|
|
155
|
+
raise ValueError("variable() requires a non-empty string pattern")
|
|
156
|
+
|
|
157
|
+
self.pattern = pattern
|
|
158
|
+
self.wildcard = "*" in pattern
|
|
159
|
+
|
|
160
|
+
def to_ir(self) -> dict:
|
|
161
|
+
"""
|
|
162
|
+
Serialize to JSON IR for Go executor.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
{
|
|
166
|
+
"type": "variable_matcher",
|
|
167
|
+
"pattern": "user_input",
|
|
168
|
+
"wildcard": false
|
|
169
|
+
}
|
|
170
|
+
"""
|
|
171
|
+
return {
|
|
172
|
+
"type": IRType.VARIABLE_MATCHER.value,
|
|
173
|
+
"pattern": self.pattern,
|
|
174
|
+
"wildcard": self.wildcard,
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
def __repr__(self) -> str:
|
|
178
|
+
return f'variable("{self.pattern}")'
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# Public API
|
|
182
|
+
def calls(
|
|
183
|
+
*patterns: str,
|
|
184
|
+
match_position: Optional[Dict[int, ArgumentValue]] = None,
|
|
185
|
+
match_name: Optional[Dict[str, ArgumentValue]] = None,
|
|
186
|
+
) -> CallMatcher:
|
|
187
|
+
"""
|
|
188
|
+
Create a matcher for function/method calls with optional argument constraints.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
*patterns: Function names to match (supports wildcards)
|
|
192
|
+
match_position: Match positional arguments by index {position: value}
|
|
193
|
+
match_name: Match named/keyword arguments {name: value}
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
CallMatcher instance
|
|
197
|
+
|
|
198
|
+
Examples:
|
|
199
|
+
>>> calls("eval")
|
|
200
|
+
calls("eval")
|
|
201
|
+
|
|
202
|
+
>>> calls("request.GET", "request.POST")
|
|
203
|
+
calls("request.GET", "request.POST")
|
|
204
|
+
|
|
205
|
+
>>> calls("urllib.*")
|
|
206
|
+
calls("urllib.*")
|
|
207
|
+
|
|
208
|
+
>>> calls("app.run", match_name={"debug": True})
|
|
209
|
+
calls("app.run")
|
|
210
|
+
|
|
211
|
+
>>> calls("socket.bind", match_position={0: "0.0.0.0"})
|
|
212
|
+
calls("socket.bind")
|
|
213
|
+
|
|
214
|
+
>>> calls("yaml.load", match_position={1: ["Loader", "UnsafeLoader"]})
|
|
215
|
+
calls("yaml.load")
|
|
216
|
+
|
|
217
|
+
>>> calls("chmod", match_position={1: "0o7*"})
|
|
218
|
+
calls("chmod")
|
|
219
|
+
|
|
220
|
+
>>> calls("app.run", match_position={0: "localhost"}, match_name={"debug": True})
|
|
221
|
+
calls("app.run")
|
|
222
|
+
"""
|
|
223
|
+
return CallMatcher(*patterns, match_position=match_position, match_name=match_name)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def variable(pattern: str) -> VariableMatcher:
|
|
227
|
+
"""
|
|
228
|
+
Create a matcher for variable references.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
pattern: Variable name pattern (supports wildcards)
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
VariableMatcher instance
|
|
235
|
+
|
|
236
|
+
Examples:
|
|
237
|
+
>>> variable("user_input")
|
|
238
|
+
variable("user_input")
|
|
239
|
+
|
|
240
|
+
>>> variable("*_id")
|
|
241
|
+
variable("*_id")
|
|
242
|
+
"""
|
|
243
|
+
return VariableMatcher(pattern)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Propagation presets for common use cases.
|
|
3
|
+
|
|
4
|
+
Presets bundle propagation primitives for convenience.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List
|
|
8
|
+
from .propagation import propagates, PropagationPrimitive
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PropagationPresets:
|
|
12
|
+
"""
|
|
13
|
+
Common propagation bundles.
|
|
14
|
+
|
|
15
|
+
Developers can use presets instead of manually listing primitives.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def minimal() -> List[PropagationPrimitive]:
|
|
20
|
+
"""
|
|
21
|
+
Bare minimum propagation (fastest, least false negatives).
|
|
22
|
+
|
|
23
|
+
Covers:
|
|
24
|
+
- Variable assignments
|
|
25
|
+
- Function arguments
|
|
26
|
+
|
|
27
|
+
Coverage: ~40% of real-world flows
|
|
28
|
+
Performance: Fastest (minimal overhead)
|
|
29
|
+
False negatives: Higher (misses return values, strings)
|
|
30
|
+
|
|
31
|
+
Use when:
|
|
32
|
+
- Performance is critical
|
|
33
|
+
- You only care about direct variable flows
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
flows(
|
|
37
|
+
from_sources=calls("request.GET"),
|
|
38
|
+
to_sinks=calls("eval"),
|
|
39
|
+
propagates_through=PropagationPresets.minimal(),
|
|
40
|
+
scope="local"
|
|
41
|
+
)
|
|
42
|
+
"""
|
|
43
|
+
return [
|
|
44
|
+
propagates.assignment(),
|
|
45
|
+
propagates.function_args(),
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def standard() -> List[PropagationPrimitive]:
|
|
50
|
+
"""
|
|
51
|
+
Recommended default (good balance).
|
|
52
|
+
|
|
53
|
+
Covers:
|
|
54
|
+
- Phase 1: assignment, function_args, function_returns
|
|
55
|
+
- Phase 2: string_concat, string_format
|
|
56
|
+
|
|
57
|
+
Coverage: ~75-80% of real-world flows
|
|
58
|
+
Performance: Good (moderate overhead)
|
|
59
|
+
False negatives: Lower
|
|
60
|
+
|
|
61
|
+
Use when:
|
|
62
|
+
- General-purpose taint analysis
|
|
63
|
+
- OWASP Top 10 detection
|
|
64
|
+
- Good balance of coverage and performance
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
flows(
|
|
68
|
+
from_sources=calls("request.*"),
|
|
69
|
+
to_sinks=calls("execute"),
|
|
70
|
+
propagates_through=PropagationPresets.standard(),
|
|
71
|
+
scope="global"
|
|
72
|
+
)
|
|
73
|
+
"""
|
|
74
|
+
return [
|
|
75
|
+
propagates.assignment(),
|
|
76
|
+
propagates.function_args(),
|
|
77
|
+
propagates.function_returns(),
|
|
78
|
+
propagates.string_concat(),
|
|
79
|
+
propagates.string_format(),
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def comprehensive() -> List[PropagationPrimitive]:
|
|
84
|
+
"""
|
|
85
|
+
All MVP primitives (Phase 1 + Phase 2).
|
|
86
|
+
|
|
87
|
+
Covers:
|
|
88
|
+
- All standard() primitives
|
|
89
|
+
|
|
90
|
+
Coverage: ~80% of real-world flows
|
|
91
|
+
Performance: Moderate
|
|
92
|
+
False negatives: Low
|
|
93
|
+
|
|
94
|
+
Use when:
|
|
95
|
+
- Maximum coverage within MVP scope
|
|
96
|
+
- Willing to accept moderate performance overhead
|
|
97
|
+
|
|
98
|
+
Example:
|
|
99
|
+
flows(
|
|
100
|
+
from_sources=calls("request.*"),
|
|
101
|
+
to_sinks=calls("eval"),
|
|
102
|
+
propagates_through=PropagationPresets.comprehensive(),
|
|
103
|
+
scope="global"
|
|
104
|
+
)
|
|
105
|
+
"""
|
|
106
|
+
return PropagationPresets.standard() # For MVP, comprehensive = standard
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def exhaustive() -> List[PropagationPrimitive]:
|
|
110
|
+
"""
|
|
111
|
+
All primitives (Phase 1-6, POST-MVP).
|
|
112
|
+
|
|
113
|
+
NOTE: For MVP, this is same as comprehensive().
|
|
114
|
+
Post-MVP will include collections, control flow, OOP, advanced.
|
|
115
|
+
|
|
116
|
+
Coverage: ~95% of real-world flows (POST-MVP)
|
|
117
|
+
Performance: Slower (comprehensive analysis)
|
|
118
|
+
False negatives: Minimal
|
|
119
|
+
|
|
120
|
+
Use when:
|
|
121
|
+
- Maximum security coverage required
|
|
122
|
+
- Performance is not a concern
|
|
123
|
+
- Production-critical code
|
|
124
|
+
|
|
125
|
+
Example:
|
|
126
|
+
flows(
|
|
127
|
+
from_sources=calls("request.*"),
|
|
128
|
+
to_sinks=calls("execute"),
|
|
129
|
+
propagates_through=PropagationPresets.exhaustive(),
|
|
130
|
+
scope="global"
|
|
131
|
+
)
|
|
132
|
+
"""
|
|
133
|
+
# MVP: same as comprehensive
|
|
134
|
+
# POST-MVP: will include Phase 3-6 primitives
|
|
135
|
+
return PropagationPresets.comprehensive()
|