codepathfinder 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codepathfinder/__init__.py +48 -0
- codepathfinder/config.py +92 -0
- codepathfinder/dataflow.py +193 -0
- codepathfinder/decorators.py +104 -0
- codepathfinder/ir.py +107 -0
- codepathfinder/logic.py +101 -0
- codepathfinder/matchers.py +243 -0
- codepathfinder/presets.py +135 -0
- codepathfinder/propagation.py +250 -0
- codepathfinder-1.1.0.dist-info/METADATA +87 -0
- codepathfinder-1.1.0.dist-info/RECORD +14 -0
- codepathfinder-1.1.0.dist-info/WHEEL +5 -0
- codepathfinder-1.1.0.dist-info/licenses/LICENSE +661 -0
- codepathfinder-1.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core matchers for the pathfinder Python DSL.
|
|
3
|
+
|
|
4
|
+
These matchers generate JSON IR for the Go executor.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, Optional, Union, List, Any
|
|
8
|
+
from .ir import IRType
|
|
9
|
+
|
|
10
|
+
ArgumentValue = Union[str, int, float, bool, List[Union[str, int, float, bool]]]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CallMatcher:
|
|
14
|
+
"""
|
|
15
|
+
Matches function/method calls with optional argument constraints.
|
|
16
|
+
|
|
17
|
+
Examples:
|
|
18
|
+
calls("eval") # Exact match
|
|
19
|
+
calls("eval", "exec") # Multiple patterns
|
|
20
|
+
calls("request.*") # Wildcard (any request.* call)
|
|
21
|
+
calls("*.json") # Wildcard (any *.json call)
|
|
22
|
+
calls("app.run", match_name={"debug": True}) # Keyword argument matching
|
|
23
|
+
calls("open", match_position={1: "w"}) # Positional argument matching
|
|
24
|
+
calls("socket.bind", match_position={"0[0]": "0.0.0.0"}) # Tuple indexing
|
|
25
|
+
calls("connect", match_position={"0[0]": "192.168.*"}) # Wildcard + tuple
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
*patterns: str,
|
|
31
|
+
match_position: Optional[Dict[int, ArgumentValue]] = None,
|
|
32
|
+
match_name: Optional[Dict[str, ArgumentValue]] = None,
|
|
33
|
+
):
|
|
34
|
+
"""
|
|
35
|
+
Args:
|
|
36
|
+
*patterns: Function names to match. Supports wildcards (*).
|
|
37
|
+
match_position: Match positional arguments by index or tuple index.
|
|
38
|
+
Examples: {0: "value"}, {1: ["a", "b"]}, {"0[0]": "0.0.0.0"}
|
|
39
|
+
match_name: Match named/keyword arguments {name: value}
|
|
40
|
+
|
|
41
|
+
Position indexing:
|
|
42
|
+
- Simple: {0: "value"} matches first argument
|
|
43
|
+
- Tuple: {"0[0]": "value"} matches first element of first argument tuple
|
|
44
|
+
- Wildcard: {"0[0]": "192.168.*"} matches with wildcard pattern
|
|
45
|
+
|
|
46
|
+
Raises:
|
|
47
|
+
ValueError: If no patterns provided or pattern is empty
|
|
48
|
+
"""
|
|
49
|
+
if not patterns:
|
|
50
|
+
raise ValueError("calls() requires at least one pattern")
|
|
51
|
+
|
|
52
|
+
if any(not p or not isinstance(p, str) for p in patterns):
|
|
53
|
+
raise ValueError("All patterns must be non-empty strings")
|
|
54
|
+
|
|
55
|
+
self.patterns = list(patterns)
|
|
56
|
+
self.wildcard = any("*" in p for p in patterns)
|
|
57
|
+
self.match_position = match_position or {}
|
|
58
|
+
self.match_name = match_name or {}
|
|
59
|
+
|
|
60
|
+
def _make_constraint(self, value: ArgumentValue) -> Dict[str, Any]:
|
|
61
|
+
"""
|
|
62
|
+
Create an argument constraint from a value.
|
|
63
|
+
|
|
64
|
+
Automatically detects wildcard characters in string values.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
value: The argument value or list of values
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Dictionary with 'value' and 'wildcard' keys
|
|
71
|
+
"""
|
|
72
|
+
# Check if wildcard characters are present in string values
|
|
73
|
+
# NOTE: Argument wildcard is independent of pattern wildcard (self.wildcard)
|
|
74
|
+
# Pattern wildcard applies to function name matching (e.g., "*.bind")
|
|
75
|
+
# Argument wildcard applies to argument value matching (e.g., "192.168.*")
|
|
76
|
+
has_wildcard = False
|
|
77
|
+
if isinstance(value, str) and ("*" in value or "?" in value):
|
|
78
|
+
has_wildcard = True
|
|
79
|
+
elif isinstance(value, list):
|
|
80
|
+
has_wildcard = any(
|
|
81
|
+
isinstance(v, str) and ("*" in v or "?" in v) for v in value
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return {"value": value, "wildcard": has_wildcard}
|
|
85
|
+
|
|
86
|
+
def to_ir(self) -> dict:
|
|
87
|
+
"""
|
|
88
|
+
Serialize to JSON IR for Go executor.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
{
|
|
92
|
+
"type": "call_matcher",
|
|
93
|
+
"patterns": ["eval", "exec"],
|
|
94
|
+
"wildcard": false,
|
|
95
|
+
"matchMode": "any",
|
|
96
|
+
"keywordArgs": { "debug": {"value": true, "wildcard": false} },
|
|
97
|
+
"positionalArgs": { "0": {"value": "0.0.0.0", "wildcard": false} }
|
|
98
|
+
}
|
|
99
|
+
"""
|
|
100
|
+
ir = {
|
|
101
|
+
"type": IRType.CALL_MATCHER.value,
|
|
102
|
+
"patterns": self.patterns,
|
|
103
|
+
"wildcard": self.wildcard,
|
|
104
|
+
"matchMode": "any",
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
# Add positional argument constraints
|
|
108
|
+
if self.match_position:
|
|
109
|
+
positional_args = {}
|
|
110
|
+
for pos, value in self.match_position.items():
|
|
111
|
+
constraint = self._make_constraint(value)
|
|
112
|
+
# Propagate wildcard flag from pattern to argument constraints
|
|
113
|
+
if self.wildcard:
|
|
114
|
+
constraint["wildcard"] = True
|
|
115
|
+
positional_args[str(pos)] = constraint
|
|
116
|
+
ir["positionalArgs"] = positional_args
|
|
117
|
+
|
|
118
|
+
# Add keyword argument constraints
|
|
119
|
+
if self.match_name:
|
|
120
|
+
keyword_args = {}
|
|
121
|
+
for name, value in self.match_name.items():
|
|
122
|
+
constraint = self._make_constraint(value)
|
|
123
|
+
# Propagate wildcard flag from pattern to argument constraints
|
|
124
|
+
if self.wildcard:
|
|
125
|
+
constraint["wildcard"] = True
|
|
126
|
+
keyword_args[name] = constraint
|
|
127
|
+
ir["keywordArgs"] = keyword_args
|
|
128
|
+
|
|
129
|
+
return ir
|
|
130
|
+
|
|
131
|
+
def __repr__(self) -> str:
|
|
132
|
+
patterns_str = ", ".join(f'"{p}"' for p in self.patterns)
|
|
133
|
+
return f"calls({patterns_str})"
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class VariableMatcher:
|
|
137
|
+
"""
|
|
138
|
+
Matches variable references by name.
|
|
139
|
+
|
|
140
|
+
Examples:
|
|
141
|
+
variable("user_input") # Exact match
|
|
142
|
+
variable("user_*") # Wildcard prefix
|
|
143
|
+
variable("*_id") # Wildcard suffix
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
def __init__(self, pattern: str):
|
|
147
|
+
"""
|
|
148
|
+
Args:
|
|
149
|
+
pattern: Variable name pattern. Supports wildcards (*).
|
|
150
|
+
|
|
151
|
+
Raises:
|
|
152
|
+
ValueError: If pattern is empty
|
|
153
|
+
"""
|
|
154
|
+
if not pattern or not isinstance(pattern, str):
|
|
155
|
+
raise ValueError("variable() requires a non-empty string pattern")
|
|
156
|
+
|
|
157
|
+
self.pattern = pattern
|
|
158
|
+
self.wildcard = "*" in pattern
|
|
159
|
+
|
|
160
|
+
def to_ir(self) -> dict:
|
|
161
|
+
"""
|
|
162
|
+
Serialize to JSON IR for Go executor.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
{
|
|
166
|
+
"type": "variable_matcher",
|
|
167
|
+
"pattern": "user_input",
|
|
168
|
+
"wildcard": false
|
|
169
|
+
}
|
|
170
|
+
"""
|
|
171
|
+
return {
|
|
172
|
+
"type": IRType.VARIABLE_MATCHER.value,
|
|
173
|
+
"pattern": self.pattern,
|
|
174
|
+
"wildcard": self.wildcard,
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
def __repr__(self) -> str:
|
|
178
|
+
return f'variable("{self.pattern}")'
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# Public API
|
|
182
|
+
def calls(
|
|
183
|
+
*patterns: str,
|
|
184
|
+
match_position: Optional[Dict[int, ArgumentValue]] = None,
|
|
185
|
+
match_name: Optional[Dict[str, ArgumentValue]] = None,
|
|
186
|
+
) -> CallMatcher:
|
|
187
|
+
"""
|
|
188
|
+
Create a matcher for function/method calls with optional argument constraints.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
*patterns: Function names to match (supports wildcards)
|
|
192
|
+
match_position: Match positional arguments by index {position: value}
|
|
193
|
+
match_name: Match named/keyword arguments {name: value}
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
CallMatcher instance
|
|
197
|
+
|
|
198
|
+
Examples:
|
|
199
|
+
>>> calls("eval")
|
|
200
|
+
calls("eval")
|
|
201
|
+
|
|
202
|
+
>>> calls("request.GET", "request.POST")
|
|
203
|
+
calls("request.GET", "request.POST")
|
|
204
|
+
|
|
205
|
+
>>> calls("urllib.*")
|
|
206
|
+
calls("urllib.*")
|
|
207
|
+
|
|
208
|
+
>>> calls("app.run", match_name={"debug": True})
|
|
209
|
+
calls("app.run")
|
|
210
|
+
|
|
211
|
+
>>> calls("socket.bind", match_position={0: "0.0.0.0"})
|
|
212
|
+
calls("socket.bind")
|
|
213
|
+
|
|
214
|
+
>>> calls("yaml.load", match_position={1: ["Loader", "UnsafeLoader"]})
|
|
215
|
+
calls("yaml.load")
|
|
216
|
+
|
|
217
|
+
>>> calls("chmod", match_position={1: "0o7*"})
|
|
218
|
+
calls("chmod")
|
|
219
|
+
|
|
220
|
+
>>> calls("app.run", match_position={0: "localhost"}, match_name={"debug": True})
|
|
221
|
+
calls("app.run")
|
|
222
|
+
"""
|
|
223
|
+
return CallMatcher(*patterns, match_position=match_position, match_name=match_name)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def variable(pattern: str) -> VariableMatcher:
|
|
227
|
+
"""
|
|
228
|
+
Create a matcher for variable references.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
pattern: Variable name pattern (supports wildcards)
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
VariableMatcher instance
|
|
235
|
+
|
|
236
|
+
Examples:
|
|
237
|
+
>>> variable("user_input")
|
|
238
|
+
variable("user_input")
|
|
239
|
+
|
|
240
|
+
>>> variable("*_id")
|
|
241
|
+
variable("*_id")
|
|
242
|
+
"""
|
|
243
|
+
return VariableMatcher(pattern)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Propagation presets for common use cases.
|
|
3
|
+
|
|
4
|
+
Presets bundle propagation primitives for convenience.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List
|
|
8
|
+
from .propagation import propagates, PropagationPrimitive
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PropagationPresets:
|
|
12
|
+
"""
|
|
13
|
+
Common propagation bundles.
|
|
14
|
+
|
|
15
|
+
Developers can use presets instead of manually listing primitives.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def minimal() -> List[PropagationPrimitive]:
|
|
20
|
+
"""
|
|
21
|
+
Bare minimum propagation (fastest, least false negatives).
|
|
22
|
+
|
|
23
|
+
Covers:
|
|
24
|
+
- Variable assignments
|
|
25
|
+
- Function arguments
|
|
26
|
+
|
|
27
|
+
Coverage: ~40% of real-world flows
|
|
28
|
+
Performance: Fastest (minimal overhead)
|
|
29
|
+
False negatives: Higher (misses return values, strings)
|
|
30
|
+
|
|
31
|
+
Use when:
|
|
32
|
+
- Performance is critical
|
|
33
|
+
- You only care about direct variable flows
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
flows(
|
|
37
|
+
from_sources=calls("request.GET"),
|
|
38
|
+
to_sinks=calls("eval"),
|
|
39
|
+
propagates_through=PropagationPresets.minimal(),
|
|
40
|
+
scope="local"
|
|
41
|
+
)
|
|
42
|
+
"""
|
|
43
|
+
return [
|
|
44
|
+
propagates.assignment(),
|
|
45
|
+
propagates.function_args(),
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def standard() -> List[PropagationPrimitive]:
|
|
50
|
+
"""
|
|
51
|
+
Recommended default (good balance).
|
|
52
|
+
|
|
53
|
+
Covers:
|
|
54
|
+
- Phase 1: assignment, function_args, function_returns
|
|
55
|
+
- Phase 2: string_concat, string_format
|
|
56
|
+
|
|
57
|
+
Coverage: ~75-80% of real-world flows
|
|
58
|
+
Performance: Good (moderate overhead)
|
|
59
|
+
False negatives: Lower
|
|
60
|
+
|
|
61
|
+
Use when:
|
|
62
|
+
- General-purpose taint analysis
|
|
63
|
+
- OWASP Top 10 detection
|
|
64
|
+
- Good balance of coverage and performance
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
flows(
|
|
68
|
+
from_sources=calls("request.*"),
|
|
69
|
+
to_sinks=calls("execute"),
|
|
70
|
+
propagates_through=PropagationPresets.standard(),
|
|
71
|
+
scope="global"
|
|
72
|
+
)
|
|
73
|
+
"""
|
|
74
|
+
return [
|
|
75
|
+
propagates.assignment(),
|
|
76
|
+
propagates.function_args(),
|
|
77
|
+
propagates.function_returns(),
|
|
78
|
+
propagates.string_concat(),
|
|
79
|
+
propagates.string_format(),
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def comprehensive() -> List[PropagationPrimitive]:
|
|
84
|
+
"""
|
|
85
|
+
All MVP primitives (Phase 1 + Phase 2).
|
|
86
|
+
|
|
87
|
+
Covers:
|
|
88
|
+
- All standard() primitives
|
|
89
|
+
|
|
90
|
+
Coverage: ~80% of real-world flows
|
|
91
|
+
Performance: Moderate
|
|
92
|
+
False negatives: Low
|
|
93
|
+
|
|
94
|
+
Use when:
|
|
95
|
+
- Maximum coverage within MVP scope
|
|
96
|
+
- Willing to accept moderate performance overhead
|
|
97
|
+
|
|
98
|
+
Example:
|
|
99
|
+
flows(
|
|
100
|
+
from_sources=calls("request.*"),
|
|
101
|
+
to_sinks=calls("eval"),
|
|
102
|
+
propagates_through=PropagationPresets.comprehensive(),
|
|
103
|
+
scope="global"
|
|
104
|
+
)
|
|
105
|
+
"""
|
|
106
|
+
return PropagationPresets.standard() # For MVP, comprehensive = standard
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def exhaustive() -> List[PropagationPrimitive]:
|
|
110
|
+
"""
|
|
111
|
+
All primitives (Phase 1-6, POST-MVP).
|
|
112
|
+
|
|
113
|
+
NOTE: For MVP, this is same as comprehensive().
|
|
114
|
+
Post-MVP will include collections, control flow, OOP, advanced.
|
|
115
|
+
|
|
116
|
+
Coverage: ~95% of real-world flows (POST-MVP)
|
|
117
|
+
Performance: Slower (comprehensive analysis)
|
|
118
|
+
False negatives: Minimal
|
|
119
|
+
|
|
120
|
+
Use when:
|
|
121
|
+
- Maximum security coverage required
|
|
122
|
+
- Performance is not a concern
|
|
123
|
+
- Production-critical code
|
|
124
|
+
|
|
125
|
+
Example:
|
|
126
|
+
flows(
|
|
127
|
+
from_sources=calls("request.*"),
|
|
128
|
+
to_sinks=calls("execute"),
|
|
129
|
+
propagates_through=PropagationPresets.exhaustive(),
|
|
130
|
+
scope="global"
|
|
131
|
+
)
|
|
132
|
+
"""
|
|
133
|
+
# MVP: same as comprehensive
|
|
134
|
+
# POST-MVP: will include Phase 3-6 primitives
|
|
135
|
+
return PropagationPresets.comprehensive()
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Taint propagation primitives for dataflow analysis.
|
|
3
|
+
|
|
4
|
+
These primitives define HOW taint propagates through code constructs.
|
|
5
|
+
Developers specify which primitives to enable via propagates_through parameter.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Dict, Any, List, Optional
|
|
9
|
+
from enum import Enum
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PropagationType(Enum):
|
|
13
|
+
"""
|
|
14
|
+
Enum of all propagation primitive types.
|
|
15
|
+
|
|
16
|
+
Phase 1 (MVP - This PR):
|
|
17
|
+
ASSIGNMENT, FUNCTION_ARGS, FUNCTION_RETURNS
|
|
18
|
+
|
|
19
|
+
Phase 2 (MVP - Future PR):
|
|
20
|
+
STRING_CONCAT, STRING_FORMAT
|
|
21
|
+
|
|
22
|
+
Phase 3-6 (Post-MVP):
|
|
23
|
+
Collections, control flow, OOP, advanced
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
# ===== PHASE 1: BARE MINIMUM (MVP) =====
|
|
27
|
+
ASSIGNMENT = "assignment"
|
|
28
|
+
FUNCTION_ARGS = "function_args"
|
|
29
|
+
FUNCTION_RETURNS = "function_returns"
|
|
30
|
+
|
|
31
|
+
# ===== PHASE 2: STRING OPERATIONS (MVP - Future PR) =====
|
|
32
|
+
STRING_CONCAT = "string_concat"
|
|
33
|
+
STRING_FORMAT = "string_format"
|
|
34
|
+
|
|
35
|
+
# ===== PHASE 3: COLLECTIONS (POST-MVP) =====
|
|
36
|
+
LIST_APPEND = "list_append"
|
|
37
|
+
LIST_EXTEND = "list_extend"
|
|
38
|
+
DICT_VALUES = "dict_values"
|
|
39
|
+
DICT_UPDATE = "dict_update"
|
|
40
|
+
SET_ADD = "set_add"
|
|
41
|
+
|
|
42
|
+
# ===== PHASE 4: CONTROL FLOW (POST-MVP) =====
|
|
43
|
+
IF_CONDITION = "if_condition"
|
|
44
|
+
FOR_ITERATION = "for_iteration"
|
|
45
|
+
WHILE_CONDITION = "while_condition"
|
|
46
|
+
SWITCH_CASE = "switch_case"
|
|
47
|
+
|
|
48
|
+
# ===== PHASE 5: OOP (POST-MVP) =====
|
|
49
|
+
ATTRIBUTE_ASSIGNMENT = "attribute_assignment"
|
|
50
|
+
METHOD_CALL = "method_call"
|
|
51
|
+
CONSTRUCTOR = "constructor"
|
|
52
|
+
|
|
53
|
+
# ===== PHASE 6: ADVANCED (POST-MVP) =====
|
|
54
|
+
COMPREHENSION = "comprehension"
|
|
55
|
+
LAMBDA_CAPTURE = "lambda_capture"
|
|
56
|
+
YIELD_STMT = "yield_stmt"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class PropagationPrimitive:
|
|
60
|
+
"""
|
|
61
|
+
Base class for propagation primitives.
|
|
62
|
+
|
|
63
|
+
Each primitive describes ONE way taint can flow through code.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self, prim_type: PropagationType, metadata: Optional[Dict[str, Any]] = None
|
|
68
|
+
):
|
|
69
|
+
"""
|
|
70
|
+
Args:
|
|
71
|
+
prim_type: The type of propagation
|
|
72
|
+
metadata: Optional additional configuration
|
|
73
|
+
"""
|
|
74
|
+
self.type = prim_type
|
|
75
|
+
self.metadata = metadata or {}
|
|
76
|
+
|
|
77
|
+
def to_ir(self) -> Dict[str, Any]:
|
|
78
|
+
"""
|
|
79
|
+
Serialize to JSON IR.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
{
|
|
83
|
+
"type": "assignment",
|
|
84
|
+
"metadata": {}
|
|
85
|
+
}
|
|
86
|
+
"""
|
|
87
|
+
return {
|
|
88
|
+
"type": self.type.value,
|
|
89
|
+
"metadata": self.metadata,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
def __repr__(self) -> str:
|
|
93
|
+
return f"propagates.{self.type.value}()"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class propagates:
|
|
97
|
+
"""
|
|
98
|
+
Namespace for taint propagation primitives.
|
|
99
|
+
|
|
100
|
+
Usage:
|
|
101
|
+
propagates.assignment()
|
|
102
|
+
propagates.function_args()
|
|
103
|
+
propagates.function_returns()
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
# ===== PHASE 1: BARE MINIMUM (MVP - THIS PR) =====
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def assignment() -> PropagationPrimitive:
|
|
110
|
+
"""
|
|
111
|
+
Taint propagates through variable assignment.
|
|
112
|
+
|
|
113
|
+
Patterns matched:
|
|
114
|
+
x = tainted # Simple assignment
|
|
115
|
+
a = b = tainted # Chained assignment
|
|
116
|
+
x, y = tainted, safe # Tuple unpacking (x is tainted)
|
|
117
|
+
|
|
118
|
+
This is the MOST COMMON propagation pattern (~40% of all flows).
|
|
119
|
+
|
|
120
|
+
Examples:
|
|
121
|
+
user_input = request.GET.get("id") # source
|
|
122
|
+
query = user_input # PROPAGATES via assignment
|
|
123
|
+
cursor.execute(query) # sink
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
PropagationPrimitive for assignment
|
|
127
|
+
"""
|
|
128
|
+
return PropagationPrimitive(PropagationType.ASSIGNMENT)
|
|
129
|
+
|
|
130
|
+
@staticmethod
|
|
131
|
+
def function_args() -> PropagationPrimitive:
|
|
132
|
+
"""
|
|
133
|
+
Taint propagates through function arguments.
|
|
134
|
+
|
|
135
|
+
Patterns matched:
|
|
136
|
+
func(tainted) # Positional argument
|
|
137
|
+
func(arg=tainted) # Keyword argument
|
|
138
|
+
func(*tainted) # Args unpacking
|
|
139
|
+
func(**tainted) # Kwargs unpacking
|
|
140
|
+
|
|
141
|
+
Critical for inter-procedural analysis (~30% of flows).
|
|
142
|
+
|
|
143
|
+
Examples:
|
|
144
|
+
user_input = request.GET.get("id") # source
|
|
145
|
+
process_data(user_input) # PROPAGATES via function_args
|
|
146
|
+
def process_data(data):
|
|
147
|
+
execute(data) # sink (data is tainted)
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
PropagationPrimitive for function arguments
|
|
151
|
+
"""
|
|
152
|
+
return PropagationPrimitive(PropagationType.FUNCTION_ARGS)
|
|
153
|
+
|
|
154
|
+
@staticmethod
|
|
155
|
+
def function_returns() -> PropagationPrimitive:
|
|
156
|
+
"""
|
|
157
|
+
Taint propagates through return values.
|
|
158
|
+
|
|
159
|
+
Patterns matched:
|
|
160
|
+
return tainted # Direct return
|
|
161
|
+
return tainted if cond else safe # Conditional return
|
|
162
|
+
return [tainted, safe] # Return list containing tainted
|
|
163
|
+
|
|
164
|
+
Essential for functions that transform tainted data (~20% of flows).
|
|
165
|
+
|
|
166
|
+
Examples:
|
|
167
|
+
def get_user_id():
|
|
168
|
+
user_input = request.GET.get("id") # source
|
|
169
|
+
return user_input # PROPAGATES via return
|
|
170
|
+
|
|
171
|
+
query = get_user_id() # query is now tainted
|
|
172
|
+
execute(query) # sink
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
PropagationPrimitive for function returns
|
|
176
|
+
"""
|
|
177
|
+
return PropagationPrimitive(PropagationType.FUNCTION_RETURNS)
|
|
178
|
+
|
|
179
|
+
# ===== PHASE 2: STRING OPERATIONS (MVP - THIS PR) =====
|
|
180
|
+
|
|
181
|
+
@staticmethod
|
|
182
|
+
def string_concat() -> PropagationPrimitive:
|
|
183
|
+
"""
|
|
184
|
+
Taint propagates through string concatenation.
|
|
185
|
+
|
|
186
|
+
Patterns matched:
|
|
187
|
+
result = tainted + "suffix" # Right concat
|
|
188
|
+
result = "prefix" + tainted # Left concat
|
|
189
|
+
result = tainted + safe + more # Mixed concat
|
|
190
|
+
|
|
191
|
+
Critical for SQL/Command injection where queries are built via concat (~10% of flows).
|
|
192
|
+
|
|
193
|
+
Examples:
|
|
194
|
+
user_id = request.GET.get("id") # source
|
|
195
|
+
query = "SELECT * FROM users WHERE id = " + user_id # PROPAGATES via string_concat
|
|
196
|
+
cursor.execute(query) # sink
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
PropagationPrimitive for string concatenation
|
|
200
|
+
"""
|
|
201
|
+
return PropagationPrimitive(PropagationType.STRING_CONCAT)
|
|
202
|
+
|
|
203
|
+
@staticmethod
|
|
204
|
+
def string_format() -> PropagationPrimitive:
|
|
205
|
+
"""
|
|
206
|
+
Taint propagates through string formatting.
|
|
207
|
+
|
|
208
|
+
Patterns matched:
|
|
209
|
+
f"{tainted}" # f-string
|
|
210
|
+
"{}".format(tainted) # str.format()
|
|
211
|
+
"%s" % tainted # % formatting
|
|
212
|
+
"{name}".format(name=tainted) # Named placeholders
|
|
213
|
+
|
|
214
|
+
Critical for SQL injection where ORM methods use format() (~8% of flows).
|
|
215
|
+
|
|
216
|
+
Examples:
|
|
217
|
+
user_id = request.GET.get("id") # source
|
|
218
|
+
query = f"SELECT * FROM users WHERE id = {user_id}" # PROPAGATES via string_format
|
|
219
|
+
cursor.execute(query) # sink
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
PropagationPrimitive for string formatting
|
|
223
|
+
"""
|
|
224
|
+
return PropagationPrimitive(PropagationType.STRING_FORMAT)
|
|
225
|
+
|
|
226
|
+
# ===== PHASE 3-6: POST-MVP =====
|
|
227
|
+
# Will be implemented in post-MVP PRs
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def create_propagation_list(
|
|
231
|
+
primitives: List[PropagationPrimitive],
|
|
232
|
+
) -> List[Dict[str, Any]]:
|
|
233
|
+
"""
|
|
234
|
+
Convert a list of propagation primitives to JSON IR.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
primitives: List of PropagationPrimitive objects
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
List of JSON IR dictionaries
|
|
241
|
+
|
|
242
|
+
Example:
|
|
243
|
+
>>> prims = [propagates.assignment(), propagates.function_args()]
|
|
244
|
+
>>> create_propagation_list(prims)
|
|
245
|
+
[
|
|
246
|
+
{"type": "assignment", "metadata": {}},
|
|
247
|
+
{"type": "function_args", "metadata": {}}
|
|
248
|
+
]
|
|
249
|
+
"""
|
|
250
|
+
return [prim.to_ir() for prim in primitives]
|