codepathfinder 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,243 @@
1
+ """
2
+ Core matchers for the pathfinder Python DSL.
3
+
4
+ These matchers generate JSON IR for the Go executor.
5
+ """
6
+
7
+ from typing import Dict, Optional, Union, List, Any
8
+ from .ir import IRType
9
+
10
+ ArgumentValue = Union[str, int, float, bool, List[Union[str, int, float, bool]]]
11
+
12
+
13
+ class CallMatcher:
14
+ """
15
+ Matches function/method calls with optional argument constraints.
16
+
17
+ Examples:
18
+ calls("eval") # Exact match
19
+ calls("eval", "exec") # Multiple patterns
20
+ calls("request.*") # Wildcard (any request.* call)
21
+ calls("*.json") # Wildcard (any *.json call)
22
+ calls("app.run", match_name={"debug": True}) # Keyword argument matching
23
+ calls("open", match_position={1: "w"}) # Positional argument matching
24
+ calls("socket.bind", match_position={"0[0]": "0.0.0.0"}) # Tuple indexing
25
+ calls("connect", match_position={"0[0]": "192.168.*"}) # Wildcard + tuple
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ *patterns: str,
31
+ match_position: Optional[Dict[int, ArgumentValue]] = None,
32
+ match_name: Optional[Dict[str, ArgumentValue]] = None,
33
+ ):
34
+ """
35
+ Args:
36
+ *patterns: Function names to match. Supports wildcards (*).
37
+ match_position: Match positional arguments by index or tuple index.
38
+ Examples: {0: "value"}, {1: ["a", "b"]}, {"0[0]": "0.0.0.0"}
39
+ match_name: Match named/keyword arguments {name: value}
40
+
41
+ Position indexing:
42
+ - Simple: {0: "value"} matches first argument
43
+ - Tuple: {"0[0]": "value"} matches first element of first argument tuple
44
+ - Wildcard: {"0[0]": "192.168.*"} matches with wildcard pattern
45
+
46
+ Raises:
47
+ ValueError: If no patterns provided or pattern is empty
48
+ """
49
+ if not patterns:
50
+ raise ValueError("calls() requires at least one pattern")
51
+
52
+ if any(not p or not isinstance(p, str) for p in patterns):
53
+ raise ValueError("All patterns must be non-empty strings")
54
+
55
+ self.patterns = list(patterns)
56
+ self.wildcard = any("*" in p for p in patterns)
57
+ self.match_position = match_position or {}
58
+ self.match_name = match_name or {}
59
+
60
+ def _make_constraint(self, value: ArgumentValue) -> Dict[str, Any]:
61
+ """
62
+ Create an argument constraint from a value.
63
+
64
+ Automatically detects wildcard characters in string values.
65
+
66
+ Args:
67
+ value: The argument value or list of values
68
+
69
+ Returns:
70
+ Dictionary with 'value' and 'wildcard' keys
71
+ """
72
+ # Check if wildcard characters are present in string values
73
+ # NOTE: Argument wildcard is independent of pattern wildcard (self.wildcard)
74
+ # Pattern wildcard applies to function name matching (e.g., "*.bind")
75
+ # Argument wildcard applies to argument value matching (e.g., "192.168.*")
76
+ has_wildcard = False
77
+ if isinstance(value, str) and ("*" in value or "?" in value):
78
+ has_wildcard = True
79
+ elif isinstance(value, list):
80
+ has_wildcard = any(
81
+ isinstance(v, str) and ("*" in v or "?" in v) for v in value
82
+ )
83
+
84
+ return {"value": value, "wildcard": has_wildcard}
85
+
86
+ def to_ir(self) -> dict:
87
+ """
88
+ Serialize to JSON IR for Go executor.
89
+
90
+ Returns:
91
+ {
92
+ "type": "call_matcher",
93
+ "patterns": ["eval", "exec"],
94
+ "wildcard": false,
95
+ "matchMode": "any",
96
+ "keywordArgs": { "debug": {"value": true, "wildcard": false} },
97
+ "positionalArgs": { "0": {"value": "0.0.0.0", "wildcard": false} }
98
+ }
99
+ """
100
+ ir = {
101
+ "type": IRType.CALL_MATCHER.value,
102
+ "patterns": self.patterns,
103
+ "wildcard": self.wildcard,
104
+ "matchMode": "any",
105
+ }
106
+
107
+ # Add positional argument constraints
108
+ if self.match_position:
109
+ positional_args = {}
110
+ for pos, value in self.match_position.items():
111
+ constraint = self._make_constraint(value)
112
+ # Propagate wildcard flag from pattern to argument constraints
113
+ if self.wildcard:
114
+ constraint["wildcard"] = True
115
+ positional_args[str(pos)] = constraint
116
+ ir["positionalArgs"] = positional_args
117
+
118
+ # Add keyword argument constraints
119
+ if self.match_name:
120
+ keyword_args = {}
121
+ for name, value in self.match_name.items():
122
+ constraint = self._make_constraint(value)
123
+ # Propagate wildcard flag from pattern to argument constraints
124
+ if self.wildcard:
125
+ constraint["wildcard"] = True
126
+ keyword_args[name] = constraint
127
+ ir["keywordArgs"] = keyword_args
128
+
129
+ return ir
130
+
131
+ def __repr__(self) -> str:
132
+ patterns_str = ", ".join(f'"{p}"' for p in self.patterns)
133
+ return f"calls({patterns_str})"
134
+
135
+
136
+ class VariableMatcher:
137
+ """
138
+ Matches variable references by name.
139
+
140
+ Examples:
141
+ variable("user_input") # Exact match
142
+ variable("user_*") # Wildcard prefix
143
+ variable("*_id") # Wildcard suffix
144
+ """
145
+
146
+ def __init__(self, pattern: str):
147
+ """
148
+ Args:
149
+ pattern: Variable name pattern. Supports wildcards (*).
150
+
151
+ Raises:
152
+ ValueError: If pattern is empty
153
+ """
154
+ if not pattern or not isinstance(pattern, str):
155
+ raise ValueError("variable() requires a non-empty string pattern")
156
+
157
+ self.pattern = pattern
158
+ self.wildcard = "*" in pattern
159
+
160
+ def to_ir(self) -> dict:
161
+ """
162
+ Serialize to JSON IR for Go executor.
163
+
164
+ Returns:
165
+ {
166
+ "type": "variable_matcher",
167
+ "pattern": "user_input",
168
+ "wildcard": false
169
+ }
170
+ """
171
+ return {
172
+ "type": IRType.VARIABLE_MATCHER.value,
173
+ "pattern": self.pattern,
174
+ "wildcard": self.wildcard,
175
+ }
176
+
177
+ def __repr__(self) -> str:
178
+ return f'variable("{self.pattern}")'
179
+
180
+
181
+ # Public API
182
+ def calls(
183
+ *patterns: str,
184
+ match_position: Optional[Dict[int, ArgumentValue]] = None,
185
+ match_name: Optional[Dict[str, ArgumentValue]] = None,
186
+ ) -> CallMatcher:
187
+ """
188
+ Create a matcher for function/method calls with optional argument constraints.
189
+
190
+ Args:
191
+ *patterns: Function names to match (supports wildcards)
192
+ match_position: Match positional arguments by index {position: value}
193
+ match_name: Match named/keyword arguments {name: value}
194
+
195
+ Returns:
196
+ CallMatcher instance
197
+
198
+ Examples:
199
+ >>> calls("eval")
200
+ calls("eval")
201
+
202
+ >>> calls("request.GET", "request.POST")
203
+ calls("request.GET", "request.POST")
204
+
205
+ >>> calls("urllib.*")
206
+ calls("urllib.*")
207
+
208
+ >>> calls("app.run", match_name={"debug": True})
209
+ calls("app.run")
210
+
211
+ >>> calls("socket.bind", match_position={0: "0.0.0.0"})
212
+ calls("socket.bind")
213
+
214
+ >>> calls("yaml.load", match_position={1: ["Loader", "UnsafeLoader"]})
215
+ calls("yaml.load")
216
+
217
+ >>> calls("chmod", match_position={1: "0o7*"})
218
+ calls("chmod")
219
+
220
+ >>> calls("app.run", match_position={0: "localhost"}, match_name={"debug": True})
221
+ calls("app.run")
222
+ """
223
+ return CallMatcher(*patterns, match_position=match_position, match_name=match_name)
224
+
225
+
226
+ def variable(pattern: str) -> VariableMatcher:
227
+ """
228
+ Create a matcher for variable references.
229
+
230
+ Args:
231
+ pattern: Variable name pattern (supports wildcards)
232
+
233
+ Returns:
234
+ VariableMatcher instance
235
+
236
+ Examples:
237
+ >>> variable("user_input")
238
+ variable("user_input")
239
+
240
+ >>> variable("*_id")
241
+ variable("*_id")
242
+ """
243
+ return VariableMatcher(pattern)
@@ -0,0 +1,135 @@
1
+ """
2
+ Propagation presets for common use cases.
3
+
4
+ Presets bundle propagation primitives for convenience.
5
+ """
6
+
7
+ from typing import List
8
+ from .propagation import propagates, PropagationPrimitive
9
+
10
+
11
+ class PropagationPresets:
12
+ """
13
+ Common propagation bundles.
14
+
15
+ Developers can use presets instead of manually listing primitives.
16
+ """
17
+
18
+ @staticmethod
19
+ def minimal() -> List[PropagationPrimitive]:
20
+ """
21
+ Bare minimum propagation (fastest, least false negatives).
22
+
23
+ Covers:
24
+ - Variable assignments
25
+ - Function arguments
26
+
27
+ Coverage: ~40% of real-world flows
28
+ Performance: Fastest (minimal overhead)
29
+ False negatives: Higher (misses return values, strings)
30
+
31
+ Use when:
32
+ - Performance is critical
33
+ - You only care about direct variable flows
34
+
35
+ Example:
36
+ flows(
37
+ from_sources=calls("request.GET"),
38
+ to_sinks=calls("eval"),
39
+ propagates_through=PropagationPresets.minimal(),
40
+ scope="local"
41
+ )
42
+ """
43
+ return [
44
+ propagates.assignment(),
45
+ propagates.function_args(),
46
+ ]
47
+
48
+ @staticmethod
49
+ def standard() -> List[PropagationPrimitive]:
50
+ """
51
+ Recommended default (good balance).
52
+
53
+ Covers:
54
+ - Phase 1: assignment, function_args, function_returns
55
+ - Phase 2: string_concat, string_format
56
+
57
+ Coverage: ~75-80% of real-world flows
58
+ Performance: Good (moderate overhead)
59
+ False negatives: Lower
60
+
61
+ Use when:
62
+ - General-purpose taint analysis
63
+ - OWASP Top 10 detection
64
+ - Good balance of coverage and performance
65
+
66
+ Example:
67
+ flows(
68
+ from_sources=calls("request.*"),
69
+ to_sinks=calls("execute"),
70
+ propagates_through=PropagationPresets.standard(),
71
+ scope="global"
72
+ )
73
+ """
74
+ return [
75
+ propagates.assignment(),
76
+ propagates.function_args(),
77
+ propagates.function_returns(),
78
+ propagates.string_concat(),
79
+ propagates.string_format(),
80
+ ]
81
+
82
+ @staticmethod
83
+ def comprehensive() -> List[PropagationPrimitive]:
84
+ """
85
+ All MVP primitives (Phase 1 + Phase 2).
86
+
87
+ Covers:
88
+ - All standard() primitives
89
+
90
+ Coverage: ~80% of real-world flows
91
+ Performance: Moderate
92
+ False negatives: Low
93
+
94
+ Use when:
95
+ - Maximum coverage within MVP scope
96
+ - Willing to accept moderate performance overhead
97
+
98
+ Example:
99
+ flows(
100
+ from_sources=calls("request.*"),
101
+ to_sinks=calls("eval"),
102
+ propagates_through=PropagationPresets.comprehensive(),
103
+ scope="global"
104
+ )
105
+ """
106
+ return PropagationPresets.standard() # For MVP, comprehensive = standard
107
+
108
+ @staticmethod
109
+ def exhaustive() -> List[PropagationPrimitive]:
110
+ """
111
+ All primitives (Phase 1-6, POST-MVP).
112
+
113
+ NOTE: For MVP, this is same as comprehensive().
114
+ Post-MVP will include collections, control flow, OOP, advanced.
115
+
116
+ Coverage: ~95% of real-world flows (POST-MVP)
117
+ Performance: Slower (comprehensive analysis)
118
+ False negatives: Minimal
119
+
120
+ Use when:
121
+ - Maximum security coverage required
122
+ - Performance is not a concern
123
+ - Production-critical code
124
+
125
+ Example:
126
+ flows(
127
+ from_sources=calls("request.*"),
128
+ to_sinks=calls("execute"),
129
+ propagates_through=PropagationPresets.exhaustive(),
130
+ scope="global"
131
+ )
132
+ """
133
+ # MVP: same as comprehensive
134
+ # POST-MVP: will include Phase 3-6 primitives
135
+ return PropagationPresets.comprehensive()
@@ -0,0 +1,250 @@
1
+ """
2
+ Taint propagation primitives for dataflow analysis.
3
+
4
+ These primitives define HOW taint propagates through code constructs.
5
+ Developers specify which primitives to enable via propagates_through parameter.
6
+ """
7
+
8
+ from typing import Dict, Any, List, Optional
9
+ from enum import Enum
10
+
11
+
12
+ class PropagationType(Enum):
13
+ """
14
+ Enum of all propagation primitive types.
15
+
16
+ Phase 1 (MVP - This PR):
17
+ ASSIGNMENT, FUNCTION_ARGS, FUNCTION_RETURNS
18
+
19
+ Phase 2 (MVP - Future PR):
20
+ STRING_CONCAT, STRING_FORMAT
21
+
22
+ Phase 3-6 (Post-MVP):
23
+ Collections, control flow, OOP, advanced
24
+ """
25
+
26
+ # ===== PHASE 1: BARE MINIMUM (MVP) =====
27
+ ASSIGNMENT = "assignment"
28
+ FUNCTION_ARGS = "function_args"
29
+ FUNCTION_RETURNS = "function_returns"
30
+
31
+ # ===== PHASE 2: STRING OPERATIONS (MVP - Future PR) =====
32
+ STRING_CONCAT = "string_concat"
33
+ STRING_FORMAT = "string_format"
34
+
35
+ # ===== PHASE 3: COLLECTIONS (POST-MVP) =====
36
+ LIST_APPEND = "list_append"
37
+ LIST_EXTEND = "list_extend"
38
+ DICT_VALUES = "dict_values"
39
+ DICT_UPDATE = "dict_update"
40
+ SET_ADD = "set_add"
41
+
42
+ # ===== PHASE 4: CONTROL FLOW (POST-MVP) =====
43
+ IF_CONDITION = "if_condition"
44
+ FOR_ITERATION = "for_iteration"
45
+ WHILE_CONDITION = "while_condition"
46
+ SWITCH_CASE = "switch_case"
47
+
48
+ # ===== PHASE 5: OOP (POST-MVP) =====
49
+ ATTRIBUTE_ASSIGNMENT = "attribute_assignment"
50
+ METHOD_CALL = "method_call"
51
+ CONSTRUCTOR = "constructor"
52
+
53
+ # ===== PHASE 6: ADVANCED (POST-MVP) =====
54
+ COMPREHENSION = "comprehension"
55
+ LAMBDA_CAPTURE = "lambda_capture"
56
+ YIELD_STMT = "yield_stmt"
57
+
58
+
59
+ class PropagationPrimitive:
60
+ """
61
+ Base class for propagation primitives.
62
+
63
+ Each primitive describes ONE way taint can flow through code.
64
+ """
65
+
66
+ def __init__(
67
+ self, prim_type: PropagationType, metadata: Optional[Dict[str, Any]] = None
68
+ ):
69
+ """
70
+ Args:
71
+ prim_type: The type of propagation
72
+ metadata: Optional additional configuration
73
+ """
74
+ self.type = prim_type
75
+ self.metadata = metadata or {}
76
+
77
+ def to_ir(self) -> Dict[str, Any]:
78
+ """
79
+ Serialize to JSON IR.
80
+
81
+ Returns:
82
+ {
83
+ "type": "assignment",
84
+ "metadata": {}
85
+ }
86
+ """
87
+ return {
88
+ "type": self.type.value,
89
+ "metadata": self.metadata,
90
+ }
91
+
92
+ def __repr__(self) -> str:
93
+ return f"propagates.{self.type.value}()"
94
+
95
+
96
+ class propagates:
97
+ """
98
+ Namespace for taint propagation primitives.
99
+
100
+ Usage:
101
+ propagates.assignment()
102
+ propagates.function_args()
103
+ propagates.function_returns()
104
+ """
105
+
106
+ # ===== PHASE 1: BARE MINIMUM (MVP - THIS PR) =====
107
+
108
+ @staticmethod
109
+ def assignment() -> PropagationPrimitive:
110
+ """
111
+ Taint propagates through variable assignment.
112
+
113
+ Patterns matched:
114
+ x = tainted # Simple assignment
115
+ a = b = tainted # Chained assignment
116
+ x, y = tainted, safe # Tuple unpacking (x is tainted)
117
+
118
+ This is the MOST COMMON propagation pattern (~40% of all flows).
119
+
120
+ Examples:
121
+ user_input = request.GET.get("id") # source
122
+ query = user_input # PROPAGATES via assignment
123
+ cursor.execute(query) # sink
124
+
125
+ Returns:
126
+ PropagationPrimitive for assignment
127
+ """
128
+ return PropagationPrimitive(PropagationType.ASSIGNMENT)
129
+
130
+ @staticmethod
131
+ def function_args() -> PropagationPrimitive:
132
+ """
133
+ Taint propagates through function arguments.
134
+
135
+ Patterns matched:
136
+ func(tainted) # Positional argument
137
+ func(arg=tainted) # Keyword argument
138
+ func(*tainted) # Args unpacking
139
+ func(**tainted) # Kwargs unpacking
140
+
141
+ Critical for inter-procedural analysis (~30% of flows).
142
+
143
+ Examples:
144
+ user_input = request.GET.get("id") # source
145
+ process_data(user_input) # PROPAGATES via function_args
146
+ def process_data(data):
147
+ execute(data) # sink (data is tainted)
148
+
149
+ Returns:
150
+ PropagationPrimitive for function arguments
151
+ """
152
+ return PropagationPrimitive(PropagationType.FUNCTION_ARGS)
153
+
154
+ @staticmethod
155
+ def function_returns() -> PropagationPrimitive:
156
+ """
157
+ Taint propagates through return values.
158
+
159
+ Patterns matched:
160
+ return tainted # Direct return
161
+ return tainted if cond else safe # Conditional return
162
+ return [tainted, safe] # Return list containing tainted
163
+
164
+ Essential for functions that transform tainted data (~20% of flows).
165
+
166
+ Examples:
167
+ def get_user_id():
168
+ user_input = request.GET.get("id") # source
169
+ return user_input # PROPAGATES via return
170
+
171
+ query = get_user_id() # query is now tainted
172
+ execute(query) # sink
173
+
174
+ Returns:
175
+ PropagationPrimitive for function returns
176
+ """
177
+ return PropagationPrimitive(PropagationType.FUNCTION_RETURNS)
178
+
179
+ # ===== PHASE 2: STRING OPERATIONS (MVP - THIS PR) =====
180
+
181
+ @staticmethod
182
+ def string_concat() -> PropagationPrimitive:
183
+ """
184
+ Taint propagates through string concatenation.
185
+
186
+ Patterns matched:
187
+ result = tainted + "suffix" # Right concat
188
+ result = "prefix" + tainted # Left concat
189
+ result = tainted + safe + more # Mixed concat
190
+
191
+ Critical for SQL/Command injection where queries are built via concat (~10% of flows).
192
+
193
+ Examples:
194
+ user_id = request.GET.get("id") # source
195
+ query = "SELECT * FROM users WHERE id = " + user_id # PROPAGATES via string_concat
196
+ cursor.execute(query) # sink
197
+
198
+ Returns:
199
+ PropagationPrimitive for string concatenation
200
+ """
201
+ return PropagationPrimitive(PropagationType.STRING_CONCAT)
202
+
203
+ @staticmethod
204
+ def string_format() -> PropagationPrimitive:
205
+ """
206
+ Taint propagates through string formatting.
207
+
208
+ Patterns matched:
209
+ f"{tainted}" # f-string
210
+ "{}".format(tainted) # str.format()
211
+ "%s" % tainted # % formatting
212
+ "{name}".format(name=tainted) # Named placeholders
213
+
214
+ Critical for SQL injection where ORM methods use format() (~8% of flows).
215
+
216
+ Examples:
217
+ user_id = request.GET.get("id") # source
218
+ query = f"SELECT * FROM users WHERE id = {user_id}" # PROPAGATES via string_format
219
+ cursor.execute(query) # sink
220
+
221
+ Returns:
222
+ PropagationPrimitive for string formatting
223
+ """
224
+ return PropagationPrimitive(PropagationType.STRING_FORMAT)
225
+
226
+ # ===== PHASE 3-6: POST-MVP =====
227
+ # Will be implemented in post-MVP PRs
228
+
229
+
230
+ def create_propagation_list(
231
+ primitives: List[PropagationPrimitive],
232
+ ) -> List[Dict[str, Any]]:
233
+ """
234
+ Convert a list of propagation primitives to JSON IR.
235
+
236
+ Args:
237
+ primitives: List of PropagationPrimitive objects
238
+
239
+ Returns:
240
+ List of JSON IR dictionaries
241
+
242
+ Example:
243
+ >>> prims = [propagates.assignment(), propagates.function_args()]
244
+ >>> create_propagation_list(prims)
245
+ [
246
+ {"type": "assignment", "metadata": {}},
247
+ {"type": "function_args", "metadata": {}}
248
+ ]
249
+ """
250
+ return [prim.to_ir() for prim in primitives]