codepathfinder 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of codepathfinder might be problematic. Click here for more details.
- codepathfinder/__init__.py +48 -0
- codepathfinder/config.py +92 -0
- codepathfinder/dataflow.py +193 -0
- codepathfinder/decorators.py +104 -0
- codepathfinder/ir.py +107 -0
- codepathfinder/logic.py +101 -0
- codepathfinder/matchers.py +148 -0
- codepathfinder/presets.py +135 -0
- codepathfinder/propagation.py +250 -0
- codepathfinder-1.0.0.dist-info/METADATA +87 -0
- codepathfinder-1.0.0.dist-info/RECORD +14 -0
- codepathfinder-1.0.0.dist-info/WHEEL +5 -0
- codepathfinder-1.0.0.dist-info/licenses/LICENSE +661 -0
- codepathfinder-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core matchers for the pathfinder Python DSL.
|
|
3
|
+
|
|
4
|
+
These matchers generate JSON IR for the Go executor.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .ir import IRType
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CallMatcher:
|
|
11
|
+
"""
|
|
12
|
+
Matches function/method calls in the callgraph.
|
|
13
|
+
|
|
14
|
+
Examples:
|
|
15
|
+
calls("eval") # Exact match
|
|
16
|
+
calls("eval", "exec") # Multiple patterns
|
|
17
|
+
calls("request.*") # Wildcard (any request.* call)
|
|
18
|
+
calls("*.json") # Wildcard (any *.json call)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, *patterns: str):
|
|
22
|
+
"""
|
|
23
|
+
Args:
|
|
24
|
+
*patterns: Function names to match. Supports wildcards (*).
|
|
25
|
+
|
|
26
|
+
Raises:
|
|
27
|
+
ValueError: If no patterns provided or pattern is empty
|
|
28
|
+
"""
|
|
29
|
+
if not patterns:
|
|
30
|
+
raise ValueError("calls() requires at least one pattern")
|
|
31
|
+
|
|
32
|
+
if any(not p or not isinstance(p, str) for p in patterns):
|
|
33
|
+
raise ValueError("All patterns must be non-empty strings")
|
|
34
|
+
|
|
35
|
+
self.patterns = list(patterns)
|
|
36
|
+
self.wildcard = any("*" in p for p in patterns)
|
|
37
|
+
|
|
38
|
+
def to_ir(self) -> dict:
|
|
39
|
+
"""
|
|
40
|
+
Serialize to JSON IR for Go executor.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
{
|
|
44
|
+
"type": "call_matcher",
|
|
45
|
+
"patterns": ["eval", "exec"],
|
|
46
|
+
"wildcard": false,
|
|
47
|
+
"match_mode": "any" # matches if ANY pattern matches
|
|
48
|
+
}
|
|
49
|
+
"""
|
|
50
|
+
return {
|
|
51
|
+
"type": IRType.CALL_MATCHER.value,
|
|
52
|
+
"patterns": self.patterns,
|
|
53
|
+
"wildcard": self.wildcard,
|
|
54
|
+
"match_mode": "any",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
def __repr__(self) -> str:
|
|
58
|
+
patterns_str = ", ".join(f'"{p}"' for p in self.patterns)
|
|
59
|
+
return f"calls({patterns_str})"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class VariableMatcher:
|
|
63
|
+
"""
|
|
64
|
+
Matches variable references by name.
|
|
65
|
+
|
|
66
|
+
Examples:
|
|
67
|
+
variable("user_input") # Exact match
|
|
68
|
+
variable("user_*") # Wildcard prefix
|
|
69
|
+
variable("*_id") # Wildcard suffix
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def __init__(self, pattern: str):
|
|
73
|
+
"""
|
|
74
|
+
Args:
|
|
75
|
+
pattern: Variable name pattern. Supports wildcards (*).
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: If pattern is empty
|
|
79
|
+
"""
|
|
80
|
+
if not pattern or not isinstance(pattern, str):
|
|
81
|
+
raise ValueError("variable() requires a non-empty string pattern")
|
|
82
|
+
|
|
83
|
+
self.pattern = pattern
|
|
84
|
+
self.wildcard = "*" in pattern
|
|
85
|
+
|
|
86
|
+
def to_ir(self) -> dict:
|
|
87
|
+
"""
|
|
88
|
+
Serialize to JSON IR for Go executor.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
{
|
|
92
|
+
"type": "variable_matcher",
|
|
93
|
+
"pattern": "user_input",
|
|
94
|
+
"wildcard": false
|
|
95
|
+
}
|
|
96
|
+
"""
|
|
97
|
+
return {
|
|
98
|
+
"type": IRType.VARIABLE_MATCHER.value,
|
|
99
|
+
"pattern": self.pattern,
|
|
100
|
+
"wildcard": self.wildcard,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
def __repr__(self) -> str:
|
|
104
|
+
return f'variable("{self.pattern}")'
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# Public API
|
|
108
|
+
def calls(*patterns: str) -> CallMatcher:
|
|
109
|
+
"""
|
|
110
|
+
Create a matcher for function/method calls.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
*patterns: Function names to match (supports wildcards)
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
CallMatcher instance
|
|
117
|
+
|
|
118
|
+
Examples:
|
|
119
|
+
>>> calls("eval")
|
|
120
|
+
calls("eval")
|
|
121
|
+
|
|
122
|
+
>>> calls("request.GET", "request.POST")
|
|
123
|
+
calls("request.GET", "request.POST")
|
|
124
|
+
|
|
125
|
+
>>> calls("urllib.*")
|
|
126
|
+
calls("urllib.*")
|
|
127
|
+
"""
|
|
128
|
+
return CallMatcher(*patterns)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def variable(pattern: str) -> VariableMatcher:
|
|
132
|
+
"""
|
|
133
|
+
Create a matcher for variable references.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
pattern: Variable name pattern (supports wildcards)
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
VariableMatcher instance
|
|
140
|
+
|
|
141
|
+
Examples:
|
|
142
|
+
>>> variable("user_input")
|
|
143
|
+
variable("user_input")
|
|
144
|
+
|
|
145
|
+
>>> variable("*_id")
|
|
146
|
+
variable("*_id")
|
|
147
|
+
"""
|
|
148
|
+
return VariableMatcher(pattern)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Propagation presets for common use cases.
|
|
3
|
+
|
|
4
|
+
Presets bundle propagation primitives for convenience.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List
|
|
8
|
+
from .propagation import propagates, PropagationPrimitive
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PropagationPresets:
|
|
12
|
+
"""
|
|
13
|
+
Common propagation bundles.
|
|
14
|
+
|
|
15
|
+
Developers can use presets instead of manually listing primitives.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def minimal() -> List[PropagationPrimitive]:
|
|
20
|
+
"""
|
|
21
|
+
Bare minimum propagation (fastest, least false negatives).
|
|
22
|
+
|
|
23
|
+
Covers:
|
|
24
|
+
- Variable assignments
|
|
25
|
+
- Function arguments
|
|
26
|
+
|
|
27
|
+
Coverage: ~40% of real-world flows
|
|
28
|
+
Performance: Fastest (minimal overhead)
|
|
29
|
+
False negatives: Higher (misses return values, strings)
|
|
30
|
+
|
|
31
|
+
Use when:
|
|
32
|
+
- Performance is critical
|
|
33
|
+
- You only care about direct variable flows
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
flows(
|
|
37
|
+
from_sources=calls("request.GET"),
|
|
38
|
+
to_sinks=calls("eval"),
|
|
39
|
+
propagates_through=PropagationPresets.minimal(),
|
|
40
|
+
scope="local"
|
|
41
|
+
)
|
|
42
|
+
"""
|
|
43
|
+
return [
|
|
44
|
+
propagates.assignment(),
|
|
45
|
+
propagates.function_args(),
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def standard() -> List[PropagationPrimitive]:
|
|
50
|
+
"""
|
|
51
|
+
Recommended default (good balance).
|
|
52
|
+
|
|
53
|
+
Covers:
|
|
54
|
+
- Phase 1: assignment, function_args, function_returns
|
|
55
|
+
- Phase 2: string_concat, string_format
|
|
56
|
+
|
|
57
|
+
Coverage: ~75-80% of real-world flows
|
|
58
|
+
Performance: Good (moderate overhead)
|
|
59
|
+
False negatives: Lower
|
|
60
|
+
|
|
61
|
+
Use when:
|
|
62
|
+
- General-purpose taint analysis
|
|
63
|
+
- OWASP Top 10 detection
|
|
64
|
+
- Good balance of coverage and performance
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
flows(
|
|
68
|
+
from_sources=calls("request.*"),
|
|
69
|
+
to_sinks=calls("execute"),
|
|
70
|
+
propagates_through=PropagationPresets.standard(),
|
|
71
|
+
scope="global"
|
|
72
|
+
)
|
|
73
|
+
"""
|
|
74
|
+
return [
|
|
75
|
+
propagates.assignment(),
|
|
76
|
+
propagates.function_args(),
|
|
77
|
+
propagates.function_returns(),
|
|
78
|
+
propagates.string_concat(),
|
|
79
|
+
propagates.string_format(),
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def comprehensive() -> List[PropagationPrimitive]:
|
|
84
|
+
"""
|
|
85
|
+
All MVP primitives (Phase 1 + Phase 2).
|
|
86
|
+
|
|
87
|
+
Covers:
|
|
88
|
+
- All standard() primitives
|
|
89
|
+
|
|
90
|
+
Coverage: ~80% of real-world flows
|
|
91
|
+
Performance: Moderate
|
|
92
|
+
False negatives: Low
|
|
93
|
+
|
|
94
|
+
Use when:
|
|
95
|
+
- Maximum coverage within MVP scope
|
|
96
|
+
- Willing to accept moderate performance overhead
|
|
97
|
+
|
|
98
|
+
Example:
|
|
99
|
+
flows(
|
|
100
|
+
from_sources=calls("request.*"),
|
|
101
|
+
to_sinks=calls("eval"),
|
|
102
|
+
propagates_through=PropagationPresets.comprehensive(),
|
|
103
|
+
scope="global"
|
|
104
|
+
)
|
|
105
|
+
"""
|
|
106
|
+
return PropagationPresets.standard() # For MVP, comprehensive = standard
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def exhaustive() -> List[PropagationPrimitive]:
|
|
110
|
+
"""
|
|
111
|
+
All primitives (Phase 1-6, POST-MVP).
|
|
112
|
+
|
|
113
|
+
NOTE: For MVP, this is same as comprehensive().
|
|
114
|
+
Post-MVP will include collections, control flow, OOP, advanced.
|
|
115
|
+
|
|
116
|
+
Coverage: ~95% of real-world flows (POST-MVP)
|
|
117
|
+
Performance: Slower (comprehensive analysis)
|
|
118
|
+
False negatives: Minimal
|
|
119
|
+
|
|
120
|
+
Use when:
|
|
121
|
+
- Maximum security coverage required
|
|
122
|
+
- Performance is not a concern
|
|
123
|
+
- Production-critical code
|
|
124
|
+
|
|
125
|
+
Example:
|
|
126
|
+
flows(
|
|
127
|
+
from_sources=calls("request.*"),
|
|
128
|
+
to_sinks=calls("execute"),
|
|
129
|
+
propagates_through=PropagationPresets.exhaustive(),
|
|
130
|
+
scope="global"
|
|
131
|
+
)
|
|
132
|
+
"""
|
|
133
|
+
# MVP: same as comprehensive
|
|
134
|
+
# POST-MVP: will include Phase 3-6 primitives
|
|
135
|
+
return PropagationPresets.comprehensive()
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Taint propagation primitives for dataflow analysis.
|
|
3
|
+
|
|
4
|
+
These primitives define HOW taint propagates through code constructs.
|
|
5
|
+
Developers specify which primitives to enable via propagates_through parameter.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Dict, Any, List, Optional
|
|
9
|
+
from enum import Enum
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PropagationType(Enum):
|
|
13
|
+
"""
|
|
14
|
+
Enum of all propagation primitive types.
|
|
15
|
+
|
|
16
|
+
Phase 1 (MVP - This PR):
|
|
17
|
+
ASSIGNMENT, FUNCTION_ARGS, FUNCTION_RETURNS
|
|
18
|
+
|
|
19
|
+
Phase 2 (MVP - Future PR):
|
|
20
|
+
STRING_CONCAT, STRING_FORMAT
|
|
21
|
+
|
|
22
|
+
Phase 3-6 (Post-MVP):
|
|
23
|
+
Collections, control flow, OOP, advanced
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
# ===== PHASE 1: BARE MINIMUM (MVP) =====
|
|
27
|
+
ASSIGNMENT = "assignment"
|
|
28
|
+
FUNCTION_ARGS = "function_args"
|
|
29
|
+
FUNCTION_RETURNS = "function_returns"
|
|
30
|
+
|
|
31
|
+
# ===== PHASE 2: STRING OPERATIONS (MVP - Future PR) =====
|
|
32
|
+
STRING_CONCAT = "string_concat"
|
|
33
|
+
STRING_FORMAT = "string_format"
|
|
34
|
+
|
|
35
|
+
# ===== PHASE 3: COLLECTIONS (POST-MVP) =====
|
|
36
|
+
LIST_APPEND = "list_append"
|
|
37
|
+
LIST_EXTEND = "list_extend"
|
|
38
|
+
DICT_VALUES = "dict_values"
|
|
39
|
+
DICT_UPDATE = "dict_update"
|
|
40
|
+
SET_ADD = "set_add"
|
|
41
|
+
|
|
42
|
+
# ===== PHASE 4: CONTROL FLOW (POST-MVP) =====
|
|
43
|
+
IF_CONDITION = "if_condition"
|
|
44
|
+
FOR_ITERATION = "for_iteration"
|
|
45
|
+
WHILE_CONDITION = "while_condition"
|
|
46
|
+
SWITCH_CASE = "switch_case"
|
|
47
|
+
|
|
48
|
+
# ===== PHASE 5: OOP (POST-MVP) =====
|
|
49
|
+
ATTRIBUTE_ASSIGNMENT = "attribute_assignment"
|
|
50
|
+
METHOD_CALL = "method_call"
|
|
51
|
+
CONSTRUCTOR = "constructor"
|
|
52
|
+
|
|
53
|
+
# ===== PHASE 6: ADVANCED (POST-MVP) =====
|
|
54
|
+
COMPREHENSION = "comprehension"
|
|
55
|
+
LAMBDA_CAPTURE = "lambda_capture"
|
|
56
|
+
YIELD_STMT = "yield_stmt"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class PropagationPrimitive:
|
|
60
|
+
"""
|
|
61
|
+
Base class for propagation primitives.
|
|
62
|
+
|
|
63
|
+
Each primitive describes ONE way taint can flow through code.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self, prim_type: PropagationType, metadata: Optional[Dict[str, Any]] = None
|
|
68
|
+
):
|
|
69
|
+
"""
|
|
70
|
+
Args:
|
|
71
|
+
prim_type: The type of propagation
|
|
72
|
+
metadata: Optional additional configuration
|
|
73
|
+
"""
|
|
74
|
+
self.type = prim_type
|
|
75
|
+
self.metadata = metadata or {}
|
|
76
|
+
|
|
77
|
+
def to_ir(self) -> Dict[str, Any]:
|
|
78
|
+
"""
|
|
79
|
+
Serialize to JSON IR.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
{
|
|
83
|
+
"type": "assignment",
|
|
84
|
+
"metadata": {}
|
|
85
|
+
}
|
|
86
|
+
"""
|
|
87
|
+
return {
|
|
88
|
+
"type": self.type.value,
|
|
89
|
+
"metadata": self.metadata,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
def __repr__(self) -> str:
|
|
93
|
+
return f"propagates.{self.type.value}()"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class propagates:
|
|
97
|
+
"""
|
|
98
|
+
Namespace for taint propagation primitives.
|
|
99
|
+
|
|
100
|
+
Usage:
|
|
101
|
+
propagates.assignment()
|
|
102
|
+
propagates.function_args()
|
|
103
|
+
propagates.function_returns()
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
# ===== PHASE 1: BARE MINIMUM (MVP - THIS PR) =====
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def assignment() -> PropagationPrimitive:
|
|
110
|
+
"""
|
|
111
|
+
Taint propagates through variable assignment.
|
|
112
|
+
|
|
113
|
+
Patterns matched:
|
|
114
|
+
x = tainted # Simple assignment
|
|
115
|
+
a = b = tainted # Chained assignment
|
|
116
|
+
x, y = tainted, safe # Tuple unpacking (x is tainted)
|
|
117
|
+
|
|
118
|
+
This is the MOST COMMON propagation pattern (~40% of all flows).
|
|
119
|
+
|
|
120
|
+
Examples:
|
|
121
|
+
user_input = request.GET.get("id") # source
|
|
122
|
+
query = user_input # PROPAGATES via assignment
|
|
123
|
+
cursor.execute(query) # sink
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
PropagationPrimitive for assignment
|
|
127
|
+
"""
|
|
128
|
+
return PropagationPrimitive(PropagationType.ASSIGNMENT)
|
|
129
|
+
|
|
130
|
+
@staticmethod
|
|
131
|
+
def function_args() -> PropagationPrimitive:
|
|
132
|
+
"""
|
|
133
|
+
Taint propagates through function arguments.
|
|
134
|
+
|
|
135
|
+
Patterns matched:
|
|
136
|
+
func(tainted) # Positional argument
|
|
137
|
+
func(arg=tainted) # Keyword argument
|
|
138
|
+
func(*tainted) # Args unpacking
|
|
139
|
+
func(**tainted) # Kwargs unpacking
|
|
140
|
+
|
|
141
|
+
Critical for inter-procedural analysis (~30% of flows).
|
|
142
|
+
|
|
143
|
+
Examples:
|
|
144
|
+
user_input = request.GET.get("id") # source
|
|
145
|
+
process_data(user_input) # PROPAGATES via function_args
|
|
146
|
+
def process_data(data):
|
|
147
|
+
execute(data) # sink (data is tainted)
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
PropagationPrimitive for function arguments
|
|
151
|
+
"""
|
|
152
|
+
return PropagationPrimitive(PropagationType.FUNCTION_ARGS)
|
|
153
|
+
|
|
154
|
+
@staticmethod
|
|
155
|
+
def function_returns() -> PropagationPrimitive:
|
|
156
|
+
"""
|
|
157
|
+
Taint propagates through return values.
|
|
158
|
+
|
|
159
|
+
Patterns matched:
|
|
160
|
+
return tainted # Direct return
|
|
161
|
+
return tainted if cond else safe # Conditional return
|
|
162
|
+
return [tainted, safe] # Return list containing tainted
|
|
163
|
+
|
|
164
|
+
Essential for functions that transform tainted data (~20% of flows).
|
|
165
|
+
|
|
166
|
+
Examples:
|
|
167
|
+
def get_user_id():
|
|
168
|
+
user_input = request.GET.get("id") # source
|
|
169
|
+
return user_input # PROPAGATES via return
|
|
170
|
+
|
|
171
|
+
query = get_user_id() # query is now tainted
|
|
172
|
+
execute(query) # sink
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
PropagationPrimitive for function returns
|
|
176
|
+
"""
|
|
177
|
+
return PropagationPrimitive(PropagationType.FUNCTION_RETURNS)
|
|
178
|
+
|
|
179
|
+
# ===== PHASE 2: STRING OPERATIONS (MVP - THIS PR) =====
|
|
180
|
+
|
|
181
|
+
@staticmethod
|
|
182
|
+
def string_concat() -> PropagationPrimitive:
|
|
183
|
+
"""
|
|
184
|
+
Taint propagates through string concatenation.
|
|
185
|
+
|
|
186
|
+
Patterns matched:
|
|
187
|
+
result = tainted + "suffix" # Right concat
|
|
188
|
+
result = "prefix" + tainted # Left concat
|
|
189
|
+
result = tainted + safe + more # Mixed concat
|
|
190
|
+
|
|
191
|
+
Critical for SQL/Command injection where queries are built via concat (~10% of flows).
|
|
192
|
+
|
|
193
|
+
Examples:
|
|
194
|
+
user_id = request.GET.get("id") # source
|
|
195
|
+
query = "SELECT * FROM users WHERE id = " + user_id # PROPAGATES via string_concat
|
|
196
|
+
cursor.execute(query) # sink
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
PropagationPrimitive for string concatenation
|
|
200
|
+
"""
|
|
201
|
+
return PropagationPrimitive(PropagationType.STRING_CONCAT)
|
|
202
|
+
|
|
203
|
+
@staticmethod
|
|
204
|
+
def string_format() -> PropagationPrimitive:
|
|
205
|
+
"""
|
|
206
|
+
Taint propagates through string formatting.
|
|
207
|
+
|
|
208
|
+
Patterns matched:
|
|
209
|
+
f"{tainted}" # f-string
|
|
210
|
+
"{}".format(tainted) # str.format()
|
|
211
|
+
"%s" % tainted # % formatting
|
|
212
|
+
"{name}".format(name=tainted) # Named placeholders
|
|
213
|
+
|
|
214
|
+
Critical for SQL injection where ORM methods use format() (~8% of flows).
|
|
215
|
+
|
|
216
|
+
Examples:
|
|
217
|
+
user_id = request.GET.get("id") # source
|
|
218
|
+
query = f"SELECT * FROM users WHERE id = {user_id}" # PROPAGATES via string_format
|
|
219
|
+
cursor.execute(query) # sink
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
PropagationPrimitive for string formatting
|
|
223
|
+
"""
|
|
224
|
+
return PropagationPrimitive(PropagationType.STRING_FORMAT)
|
|
225
|
+
|
|
226
|
+
# ===== PHASE 3-6: POST-MVP =====
|
|
227
|
+
# Will be implemented in post-MVP PRs
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def create_propagation_list(
|
|
231
|
+
primitives: List[PropagationPrimitive],
|
|
232
|
+
) -> List[Dict[str, Any]]:
|
|
233
|
+
"""
|
|
234
|
+
Convert a list of propagation primitives to JSON IR.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
primitives: List of PropagationPrimitive objects
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
List of JSON IR dictionaries
|
|
241
|
+
|
|
242
|
+
Example:
|
|
243
|
+
>>> prims = [propagates.assignment(), propagates.function_args()]
|
|
244
|
+
>>> create_propagation_list(prims)
|
|
245
|
+
[
|
|
246
|
+
{"type": "assignment", "metadata": {}},
|
|
247
|
+
{"type": "function_args", "metadata": {}}
|
|
248
|
+
]
|
|
249
|
+
"""
|
|
250
|
+
return [prim.to_ir() for prim in primitives]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codepathfinder
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Python DSL for code-pathfinder security patterns
|
|
5
|
+
Home-page: https://github.com/shivasurya/code-pathfinder
|
|
6
|
+
Author: code-pathfinder contributors
|
|
7
|
+
License: AGPL-3.0
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Security
|
|
18
|
+
Classifier: Topic :: Software Development :: Testing
|
|
19
|
+
Requires-Python: >=3.8
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
25
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
26
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
27
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
28
|
+
Dynamic: home-page
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
Dynamic: requires-python
|
|
31
|
+
|
|
32
|
+
# Code-Pathfinder Python DSL
|
|
33
|
+
|
|
34
|
+
Python DSL for defining security patterns in Code Pathfinder - an open-source security suite combining structural code analysis with AI-powered vulnerability detection.
|
|
35
|
+
|
|
36
|
+
**Project Goals:**
|
|
37
|
+
- Real-time IDE integration bringing security insights directly into your editor
|
|
38
|
+
- AI-assisted analysis leveraging LLMs to understand context and identify vulnerabilities
|
|
39
|
+
- Unified workflow coverage from local development to CI/CD pipelines
|
|
40
|
+
- Flexible reporting supporting DefectDojo, GitHub Advanced Security, SARIF, and other platforms
|
|
41
|
+
|
|
42
|
+
**Documentation**: https://codepathfinder.dev/
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install codepathfinder
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Quick Example
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from codepathfinder import rule, flows, calls
|
|
54
|
+
from codepathfinder.presets import PropagationPresets
|
|
55
|
+
|
|
56
|
+
@rule(id="sql-injection", severity="critical", cwe="CWE-89")
|
|
57
|
+
def detect_sql_injection():
|
|
58
|
+
"""Detects SQL injection vulnerabilities"""
|
|
59
|
+
return flows(
|
|
60
|
+
from_sources=calls("request.GET", "request.POST"),
|
|
61
|
+
to_sinks=calls("execute", "executemany"),
|
|
62
|
+
sanitized_by=calls("quote_sql"),
|
|
63
|
+
propagates_through=PropagationPresets.standard(),
|
|
64
|
+
scope="global"
|
|
65
|
+
)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Features
|
|
69
|
+
|
|
70
|
+
- **Matchers**: `calls()`, `variable()` for pattern matching
|
|
71
|
+
- **Dataflow Analysis**: `flows()` for source-to-sink taint tracking
|
|
72
|
+
- **Propagation**: Explicit propagation primitives (assignment, function args, returns)
|
|
73
|
+
- **Logic Operators**: `And()`, `Or()`, `Not()` for complex rules
|
|
74
|
+
- **JSON IR**: Serializes to JSON for Go executor integration
|
|
75
|
+
|
|
76
|
+
## Documentation
|
|
77
|
+
|
|
78
|
+
For detailed documentation, visit https://codepathfinder.dev/
|
|
79
|
+
|
|
80
|
+
## Requirements
|
|
81
|
+
|
|
82
|
+
- Python 3.8+
|
|
83
|
+
- No external dependencies (stdlib only!)
|
|
84
|
+
|
|
85
|
+
## License
|
|
86
|
+
|
|
87
|
+
AGPL-3.0 - GNU Affero General Public License v3
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
codepathfinder/__init__.py,sha256=rcfHf6nq7bOBDRLEb-ZDBlsFpCxrvie6J2IYYwxUKWY,1194
|
|
2
|
+
codepathfinder/config.py,sha256=jx1Q5QnX2zJKKhai6ISwFIWh7h9M4o06bgZpyieGx98,2473
|
|
3
|
+
codepathfinder/dataflow.py,sha256=H2X3uCc4Srl5WzmjmAeICJggUFSZnNhn1WbrWP7g8Cc,6815
|
|
4
|
+
codepathfinder/decorators.py,sha256=pkvHhf2TLHu1-Gjlqwu718yaIPsPZ4JiSSM2EReshg8,2870
|
|
5
|
+
codepathfinder/ir.py,sha256=K0YfGSFZyysDRd8B-o9gnyou5R3EbwApPsK3qSjmDSE,2837
|
|
6
|
+
codepathfinder/logic.py,sha256=cA76-mhE_A7WmWQtZtufZWxMKSrI4Bt7avJRWi20ud4,2418
|
|
7
|
+
codepathfinder/matchers.py,sha256=o3vINaXOnVVMtxSVYHCbtkID3uDY_Hjcfvma547luwc,3787
|
|
8
|
+
codepathfinder/presets.py,sha256=_EU2WNtMY5PfY1iRcoZuiLkzKRddvtdn6H8tSy1dzGw,3914
|
|
9
|
+
codepathfinder/propagation.py,sha256=yz1ODauUD0hnzDjPWfTIdQojWcvkYbwrnvou4C9Fy6U,7695
|
|
10
|
+
codepathfinder-1.0.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
11
|
+
codepathfinder-1.0.0.dist-info/METADATA,sha256=yoE0QafGRO_HJYvzfnQF_kflon-_Bd4I_7mCQw1jnWY,2936
|
|
12
|
+
codepathfinder-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
+
codepathfinder-1.0.0.dist-info/top_level.txt,sha256=Ll603QFZoCmFBDISN1VT5QHmodZsgNiPs00voNqpOZ4,15
|
|
14
|
+
codepathfinder-1.0.0.dist-info/RECORD,,
|