codepathfinder 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of codepathfinder might be problematic. Click here for more details.
- codepathfinder/__init__.py +48 -0
- codepathfinder/config.py +92 -0
- codepathfinder/dataflow.py +193 -0
- codepathfinder/decorators.py +104 -0
- codepathfinder/ir.py +107 -0
- codepathfinder/logic.py +101 -0
- codepathfinder/matchers.py +148 -0
- codepathfinder/presets.py +135 -0
- codepathfinder/propagation.py +250 -0
- codepathfinder-1.0.0.dist-info/METADATA +87 -0
- codepathfinder-1.0.0.dist-info/RECORD +14 -0
- codepathfinder-1.0.0.dist-info/WHEEL +5 -0
- codepathfinder-1.0.0.dist-info/licenses/LICENSE +661 -0
- codepathfinder-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""
|
|
2
|
+
codepathfinder - Python DSL for static analysis security patterns
|
|
3
|
+
|
|
4
|
+
Examples:
|
|
5
|
+
Basic matchers:
|
|
6
|
+
>>> from codepathfinder import calls, variable
|
|
7
|
+
>>> calls("eval")
|
|
8
|
+
>>> variable("user_input")
|
|
9
|
+
|
|
10
|
+
Rule definition:
|
|
11
|
+
>>> from codepathfinder import rule, calls
|
|
12
|
+
>>> @rule(id="test", severity="high")
|
|
13
|
+
>>> def detect_eval():
|
|
14
|
+
>>> return calls("eval")
|
|
15
|
+
|
|
16
|
+
Dataflow analysis:
|
|
17
|
+
>>> from codepathfinder import flows, calls, propagates
|
|
18
|
+
>>> flows(
|
|
19
|
+
... from_sources=calls("request.GET"),
|
|
20
|
+
... to_sinks=calls("execute"),
|
|
21
|
+
... propagates_through=[propagates.assignment()]
|
|
22
|
+
... )
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
__version__ = "1.0.0"
|
|
26
|
+
|
|
27
|
+
from .matchers import calls, variable
|
|
28
|
+
from .decorators import rule
|
|
29
|
+
from .dataflow import flows
|
|
30
|
+
from .propagation import propagates
|
|
31
|
+
from .presets import PropagationPresets
|
|
32
|
+
from .config import set_default_propagation, set_default_scope
|
|
33
|
+
from .logic import And, Or, Not
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"calls",
|
|
37
|
+
"variable",
|
|
38
|
+
"rule",
|
|
39
|
+
"flows",
|
|
40
|
+
"propagates",
|
|
41
|
+
"PropagationPresets",
|
|
42
|
+
"set_default_propagation",
|
|
43
|
+
"set_default_scope",
|
|
44
|
+
"And",
|
|
45
|
+
"Or",
|
|
46
|
+
"Not",
|
|
47
|
+
"__version__",
|
|
48
|
+
]
|
codepathfinder/config.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Global configuration for codepathfinder DSL.
|
|
3
|
+
|
|
4
|
+
Allows setting default propagation, scope, etc.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
from .propagation import PropagationPrimitive
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PathfinderConfig:
|
|
12
|
+
"""Singleton configuration for codepathfinder."""
|
|
13
|
+
|
|
14
|
+
_instance: Optional["PathfinderConfig"] = None
|
|
15
|
+
_default_propagation: List[PropagationPrimitive] = []
|
|
16
|
+
_default_scope: str = "global"
|
|
17
|
+
|
|
18
|
+
def __new__(cls):
|
|
19
|
+
if cls._instance is None:
|
|
20
|
+
cls._instance = super().__new__(cls)
|
|
21
|
+
return cls._instance
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def default_propagation(self) -> List[PropagationPrimitive]:
|
|
25
|
+
"""Get default propagation primitives."""
|
|
26
|
+
return self._default_propagation
|
|
27
|
+
|
|
28
|
+
@default_propagation.setter
|
|
29
|
+
def default_propagation(self, value: List[PropagationPrimitive]):
|
|
30
|
+
"""Set default propagation primitives."""
|
|
31
|
+
self._default_propagation = value
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def default_scope(self) -> str:
|
|
35
|
+
"""Get default scope."""
|
|
36
|
+
return self._default_scope
|
|
37
|
+
|
|
38
|
+
@default_scope.setter
|
|
39
|
+
def default_scope(self, value: str):
|
|
40
|
+
"""Set default scope."""
|
|
41
|
+
if value not in ["local", "global"]:
|
|
42
|
+
raise ValueError(f"scope must be 'local' or 'global', got '{value}'")
|
|
43
|
+
self._default_scope = value
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# Global config instance
|
|
47
|
+
_config = PathfinderConfig()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def set_default_propagation(primitives: List[PropagationPrimitive]) -> None:
|
|
51
|
+
"""
|
|
52
|
+
Set global default propagation primitives.
|
|
53
|
+
|
|
54
|
+
All flows() calls without explicit propagates_through will use this default.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
primitives: List of PropagationPrimitive objects
|
|
58
|
+
|
|
59
|
+
Example:
|
|
60
|
+
set_default_propagation(PropagationPresets.standard())
|
|
61
|
+
|
|
62
|
+
# Now all flows() without propagates_through use standard()
|
|
63
|
+
flows(
|
|
64
|
+
from_sources=calls("request.GET"),
|
|
65
|
+
to_sinks=calls("eval"),
|
|
66
|
+
# propagates_through defaults to standard()
|
|
67
|
+
)
|
|
68
|
+
"""
|
|
69
|
+
_config.default_propagation = primitives
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def set_default_scope(scope: str) -> None:
|
|
73
|
+
"""
|
|
74
|
+
Set global default scope.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
scope: "local" or "global"
|
|
78
|
+
|
|
79
|
+
Example:
|
|
80
|
+
set_default_scope("local")
|
|
81
|
+
"""
|
|
82
|
+
_config.default_scope = scope
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_default_propagation() -> List[PropagationPrimitive]:
|
|
86
|
+
"""Get global default propagation primitives."""
|
|
87
|
+
return _config.default_propagation
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def get_default_scope() -> str:
|
|
91
|
+
"""Get global default scope."""
|
|
92
|
+
return _config.default_scope
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dataflow matcher for taint analysis.
|
|
3
|
+
|
|
4
|
+
The flows() function is the core of OWASP Top 10 pattern detection.
|
|
5
|
+
It describes how tainted data flows from sources to sinks.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import List, Optional, Union
|
|
9
|
+
from .matchers import CallMatcher
|
|
10
|
+
from .propagation import PropagationPrimitive, create_propagation_list
|
|
11
|
+
from .ir import IRType
|
|
12
|
+
from .config import get_default_propagation, get_default_scope
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DataflowMatcher:
|
|
16
|
+
"""
|
|
17
|
+
Matches tainted data flows from sources to sinks.
|
|
18
|
+
|
|
19
|
+
This is the primary matcher for security vulnerabilities like:
|
|
20
|
+
- SQL Injection (A03:2021)
|
|
21
|
+
- Command Injection (A03:2021)
|
|
22
|
+
- SSRF (A10:2021)
|
|
23
|
+
- Path Traversal (A01:2021)
|
|
24
|
+
- Insecure Deserialization (A08:2021)
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
sources: Matchers for taint sources (e.g., user input)
|
|
28
|
+
sinks: Matchers for dangerous sinks (e.g., eval, execute)
|
|
29
|
+
sanitizers: Optional matchers for sanitizer functions
|
|
30
|
+
propagates_through: List of propagation primitives (EXPLICIT!)
|
|
31
|
+
scope: "local" (same function) or "global" (cross-function)
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
from_sources: Union[CallMatcher, List[CallMatcher]],
|
|
37
|
+
to_sinks: Union[CallMatcher, List[CallMatcher]],
|
|
38
|
+
sanitized_by: Optional[Union[CallMatcher, List[CallMatcher]]] = None,
|
|
39
|
+
propagates_through: Optional[List[PropagationPrimitive]] = None,
|
|
40
|
+
scope: Optional[str] = None,
|
|
41
|
+
):
|
|
42
|
+
"""
|
|
43
|
+
Args:
|
|
44
|
+
from_sources: Source matcher(s) - where taint originates
|
|
45
|
+
to_sinks: Sink matcher(s) - dangerous functions
|
|
46
|
+
sanitized_by: Optional sanitizer matcher(s)
|
|
47
|
+
propagates_through: EXPLICIT list of propagation primitives
|
|
48
|
+
(default: None = no propagation!)
|
|
49
|
+
scope: "local" (intra-procedural) or "global" (inter-procedural)
|
|
50
|
+
|
|
51
|
+
Raises:
|
|
52
|
+
ValueError: If sources/sinks are empty, scope invalid, etc.
|
|
53
|
+
|
|
54
|
+
Examples:
|
|
55
|
+
# SQL Injection
|
|
56
|
+
flows(
|
|
57
|
+
from_sources=calls("request.GET", "request.POST"),
|
|
58
|
+
to_sinks=calls("execute", "executemany"),
|
|
59
|
+
sanitized_by=calls("quote_sql"),
|
|
60
|
+
propagates_through=[
|
|
61
|
+
propagates.assignment(),
|
|
62
|
+
propagates.function_args(),
|
|
63
|
+
],
|
|
64
|
+
scope="global"
|
|
65
|
+
)
|
|
66
|
+
"""
|
|
67
|
+
# Validate sources
|
|
68
|
+
if isinstance(from_sources, CallMatcher):
|
|
69
|
+
from_sources = [from_sources]
|
|
70
|
+
if not from_sources:
|
|
71
|
+
raise ValueError("flows() requires at least one source")
|
|
72
|
+
self.sources = from_sources
|
|
73
|
+
|
|
74
|
+
# Validate sinks
|
|
75
|
+
if isinstance(to_sinks, CallMatcher):
|
|
76
|
+
to_sinks = [to_sinks]
|
|
77
|
+
if not to_sinks:
|
|
78
|
+
raise ValueError("flows() requires at least one sink")
|
|
79
|
+
self.sinks = to_sinks
|
|
80
|
+
|
|
81
|
+
# Validate sanitizers
|
|
82
|
+
if sanitized_by is None:
|
|
83
|
+
sanitized_by = []
|
|
84
|
+
elif isinstance(sanitized_by, CallMatcher):
|
|
85
|
+
sanitized_by = [sanitized_by]
|
|
86
|
+
self.sanitizers = sanitized_by
|
|
87
|
+
|
|
88
|
+
# Validate propagation (use global default if not specified)
|
|
89
|
+
if propagates_through is None:
|
|
90
|
+
propagates_through = get_default_propagation()
|
|
91
|
+
self.propagates_through = propagates_through
|
|
92
|
+
|
|
93
|
+
# Validate scope (use global default if not specified)
|
|
94
|
+
if scope is None:
|
|
95
|
+
scope = get_default_scope()
|
|
96
|
+
if scope not in ["local", "global"]:
|
|
97
|
+
raise ValueError(f"scope must be 'local' or 'global', got '{scope}'")
|
|
98
|
+
self.scope = scope
|
|
99
|
+
|
|
100
|
+
def to_ir(self) -> dict:
|
|
101
|
+
"""
|
|
102
|
+
Serialize to JSON IR for Go executor.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
{
|
|
106
|
+
"type": "dataflow",
|
|
107
|
+
"sources": [
|
|
108
|
+
{"type": "call_matcher", "patterns": ["request.GET"], ...}
|
|
109
|
+
],
|
|
110
|
+
"sinks": [
|
|
111
|
+
{"type": "call_matcher", "patterns": ["execute"], ...}
|
|
112
|
+
],
|
|
113
|
+
"sanitizers": [
|
|
114
|
+
{"type": "call_matcher", "patterns": ["quote_sql"], ...}
|
|
115
|
+
],
|
|
116
|
+
"propagation": [
|
|
117
|
+
{"type": "assignment", "metadata": {}},
|
|
118
|
+
{"type": "function_args", "metadata": {}}
|
|
119
|
+
],
|
|
120
|
+
"scope": "global"
|
|
121
|
+
}
|
|
122
|
+
"""
|
|
123
|
+
return {
|
|
124
|
+
"type": IRType.DATAFLOW.value,
|
|
125
|
+
"sources": [src.to_ir() for src in self.sources],
|
|
126
|
+
"sinks": [sink.to_ir() for sink in self.sinks],
|
|
127
|
+
"sanitizers": [san.to_ir() for san in self.sanitizers],
|
|
128
|
+
"propagation": create_propagation_list(self.propagates_through),
|
|
129
|
+
"scope": self.scope,
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
def __repr__(self) -> str:
|
|
133
|
+
src_count = len(self.sources)
|
|
134
|
+
sink_count = len(self.sinks)
|
|
135
|
+
prop_count = len(self.propagates_through)
|
|
136
|
+
return (
|
|
137
|
+
f"flows(sources={src_count}, sinks={sink_count}, "
|
|
138
|
+
f"propagation={prop_count}, scope='{self.scope}')"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# Public API
|
|
143
|
+
def flows(
|
|
144
|
+
from_sources: Union[CallMatcher, List[CallMatcher]],
|
|
145
|
+
to_sinks: Union[CallMatcher, List[CallMatcher]],
|
|
146
|
+
sanitized_by: Optional[Union[CallMatcher, List[CallMatcher]]] = None,
|
|
147
|
+
propagates_through: Optional[List[PropagationPrimitive]] = None,
|
|
148
|
+
scope: Optional[str] = None,
|
|
149
|
+
) -> DataflowMatcher:
|
|
150
|
+
"""
|
|
151
|
+
Create a dataflow matcher for taint analysis.
|
|
152
|
+
|
|
153
|
+
This is the PRIMARY matcher for OWASP Top 10 vulnerabilities.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
from_sources: Where taint originates (e.g., user input)
|
|
157
|
+
to_sinks: Dangerous functions that consume tainted data
|
|
158
|
+
sanitized_by: Optional functions that neutralize taint
|
|
159
|
+
propagates_through: HOW taint flows (MUST be explicit!)
|
|
160
|
+
scope: "local" or "global" analysis
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
DataflowMatcher instance
|
|
164
|
+
|
|
165
|
+
Examples:
|
|
166
|
+
>>> from codepathfinder import flows, calls, propagates
|
|
167
|
+
>>>
|
|
168
|
+
>>> # SQL Injection
|
|
169
|
+
>>> flows(
|
|
170
|
+
... from_sources=calls("request.GET"),
|
|
171
|
+
... to_sinks=calls("execute"),
|
|
172
|
+
... propagates_through=[propagates.assignment()]
|
|
173
|
+
... )
|
|
174
|
+
>>>
|
|
175
|
+
>>> # Command Injection with sanitization
|
|
176
|
+
>>> flows(
|
|
177
|
+
... from_sources=calls("request.POST"),
|
|
178
|
+
... to_sinks=calls("os.system", "subprocess.call"),
|
|
179
|
+
... sanitized_by=calls("shlex.quote"),
|
|
180
|
+
... propagates_through=[
|
|
181
|
+
... propagates.assignment(),
|
|
182
|
+
... propagates.function_args()
|
|
183
|
+
... ],
|
|
184
|
+
... scope="global"
|
|
185
|
+
... )
|
|
186
|
+
"""
|
|
187
|
+
return DataflowMatcher(
|
|
188
|
+
from_sources=from_sources,
|
|
189
|
+
to_sinks=to_sinks,
|
|
190
|
+
sanitized_by=sanitized_by,
|
|
191
|
+
propagates_through=propagates_through,
|
|
192
|
+
scope=scope,
|
|
193
|
+
)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Decorators for pathfinder rule definitions.
|
|
3
|
+
|
|
4
|
+
The @rule decorator marks functions as security patterns.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Callable, Optional
|
|
8
|
+
from .ir import serialize_ir
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Rule:
|
|
12
|
+
"""
|
|
13
|
+
Represents a security rule with metadata.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
id: Unique rule identifier (e.g., "sqli-001")
|
|
17
|
+
name: Human-readable name (defaults to function name)
|
|
18
|
+
severity: critical | high | medium | low
|
|
19
|
+
cwe: CWE identifier (e.g., "CWE-89")
|
|
20
|
+
owasp: OWASP category (e.g., "A03:2021")
|
|
21
|
+
description: What this rule detects (from docstring)
|
|
22
|
+
matcher: The matcher/combinator returned by the rule function
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
id: str,
|
|
28
|
+
severity: str,
|
|
29
|
+
func: Callable,
|
|
30
|
+
cwe: Optional[str] = None,
|
|
31
|
+
owasp: Optional[str] = None,
|
|
32
|
+
):
|
|
33
|
+
self.id = id
|
|
34
|
+
self.name = func.__name__
|
|
35
|
+
self.severity = severity
|
|
36
|
+
self.cwe = cwe
|
|
37
|
+
self.owasp = owasp
|
|
38
|
+
self.description = func.__doc__ or ""
|
|
39
|
+
self.func = func
|
|
40
|
+
|
|
41
|
+
def execute(self) -> dict:
|
|
42
|
+
"""
|
|
43
|
+
Execute the rule function and serialize to JSON IR.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
{
|
|
47
|
+
"rule": {
|
|
48
|
+
"id": "sqli-001",
|
|
49
|
+
"name": "detect_sql_injection",
|
|
50
|
+
"severity": "critical",
|
|
51
|
+
"cwe": "CWE-89",
|
|
52
|
+
"owasp": "A03:2021",
|
|
53
|
+
"description": "Detects SQL injection vulnerabilities"
|
|
54
|
+
},
|
|
55
|
+
"matcher": {
|
|
56
|
+
"type": "call_matcher",
|
|
57
|
+
"patterns": ["execute"],
|
|
58
|
+
"wildcard": false
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
"""
|
|
62
|
+
matcher = self.func()
|
|
63
|
+
return {
|
|
64
|
+
"rule": {
|
|
65
|
+
"id": self.id,
|
|
66
|
+
"name": self.name,
|
|
67
|
+
"severity": self.severity,
|
|
68
|
+
"cwe": self.cwe,
|
|
69
|
+
"owasp": self.owasp,
|
|
70
|
+
"description": self.description.strip(),
|
|
71
|
+
},
|
|
72
|
+
"matcher": serialize_ir(matcher),
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def rule(
|
|
77
|
+
id: str,
|
|
78
|
+
severity: str,
|
|
79
|
+
cwe: Optional[str] = None,
|
|
80
|
+
owasp: Optional[str] = None,
|
|
81
|
+
) -> Callable[[Callable], Rule]:
|
|
82
|
+
"""
|
|
83
|
+
Decorator to mark a function as a security rule.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
id: Unique rule identifier
|
|
87
|
+
severity: critical | high | medium | low
|
|
88
|
+
cwe: Optional CWE identifier
|
|
89
|
+
owasp: Optional OWASP category
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Decorator function
|
|
93
|
+
|
|
94
|
+
Example:
|
|
95
|
+
@rule(id="code-injection", severity="critical", cwe="CWE-94")
|
|
96
|
+
def detect_code_injection():
|
|
97
|
+
'''Detects code injection via eval'''
|
|
98
|
+
return calls("eval", "exec")
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
def decorator(func: Callable) -> Rule:
|
|
102
|
+
return Rule(id=id, severity=severity, func=func, cwe=cwe, owasp=owasp)
|
|
103
|
+
|
|
104
|
+
return decorator
|
codepathfinder/ir.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
JSON Intermediate Representation (IR) for pathfinder DSL.
|
|
3
|
+
|
|
4
|
+
The Python DSL serializes to JSON IR, which the Go executor consumes.
|
|
5
|
+
This enables language-agnostic pattern definitions (future: JS, Rust DSLs).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import Any, Dict, Protocol
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class IRType(Enum):
|
|
13
|
+
"""IR node types for different matchers and combinators."""
|
|
14
|
+
|
|
15
|
+
CALL_MATCHER = "call_matcher"
|
|
16
|
+
VARIABLE_MATCHER = "variable_matcher"
|
|
17
|
+
DATAFLOW = "dataflow" # Coming in PR #3
|
|
18
|
+
LOGIC_AND = "logic_and" # Coming in PR #5
|
|
19
|
+
LOGIC_OR = "logic_or" # Coming in PR #5
|
|
20
|
+
LOGIC_NOT = "logic_not" # Coming in PR #5
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MatcherIR(Protocol):
|
|
24
|
+
"""Protocol for all matcher types (duck typing)."""
|
|
25
|
+
|
|
26
|
+
def to_ir(self) -> Dict[str, Any]:
|
|
27
|
+
"""Serialize to JSON IR dictionary."""
|
|
28
|
+
...
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def serialize_ir(matcher: MatcherIR) -> Dict[str, Any]:
|
|
32
|
+
"""
|
|
33
|
+
Serialize any matcher to JSON IR.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
matcher: Any object implementing MatcherIR protocol
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
JSON-serializable dictionary
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
AttributeError: If matcher doesn't implement to_ir()
|
|
43
|
+
"""
|
|
44
|
+
if not hasattr(matcher, "to_ir"):
|
|
45
|
+
raise AttributeError(f"{type(matcher).__name__} must implement to_ir() method")
|
|
46
|
+
|
|
47
|
+
return matcher.to_ir()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def validate_ir(ir: Dict[str, Any]) -> bool:
|
|
51
|
+
"""
|
|
52
|
+
Validate JSON IR structure.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
ir: JSON IR dictionary
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
True if valid, False otherwise
|
|
59
|
+
|
|
60
|
+
Validates:
|
|
61
|
+
- "type" field exists and is valid IRType
|
|
62
|
+
- Required fields present for each type
|
|
63
|
+
"""
|
|
64
|
+
if "type" not in ir:
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
ir_type = IRType(ir["type"])
|
|
69
|
+
except ValueError:
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
# Type-specific validation
|
|
73
|
+
if ir_type == IRType.CALL_MATCHER:
|
|
74
|
+
return (
|
|
75
|
+
"patterns" in ir
|
|
76
|
+
and isinstance(ir["patterns"], list)
|
|
77
|
+
and len(ir["patterns"]) > 0
|
|
78
|
+
and "wildcard" in ir
|
|
79
|
+
and isinstance(ir["wildcard"], bool)
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if ir_type == IRType.VARIABLE_MATCHER:
|
|
83
|
+
return (
|
|
84
|
+
"pattern" in ir
|
|
85
|
+
and isinstance(ir["pattern"], str)
|
|
86
|
+
and len(ir["pattern"]) > 0
|
|
87
|
+
and "wildcard" in ir
|
|
88
|
+
and isinstance(ir["wildcard"], bool)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if ir_type == IRType.DATAFLOW:
|
|
92
|
+
return (
|
|
93
|
+
"sources" in ir
|
|
94
|
+
and isinstance(ir["sources"], list)
|
|
95
|
+
and len(ir["sources"]) > 0
|
|
96
|
+
and "sinks" in ir
|
|
97
|
+
and isinstance(ir["sinks"], list)
|
|
98
|
+
and len(ir["sinks"]) > 0
|
|
99
|
+
and "sanitizers" in ir
|
|
100
|
+
and isinstance(ir["sanitizers"], list)
|
|
101
|
+
and "propagation" in ir
|
|
102
|
+
and isinstance(ir["propagation"], list)
|
|
103
|
+
and "scope" in ir
|
|
104
|
+
and ir["scope"] in ["local", "global"]
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
return True
|
codepathfinder/logic.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Logic operators for combining matchers."""
|
|
2
|
+
|
|
3
|
+
from typing import Union
|
|
4
|
+
from .matchers import CallMatcher, VariableMatcher
|
|
5
|
+
from .dataflow import DataflowMatcher
|
|
6
|
+
from .ir import IRType
|
|
7
|
+
|
|
8
|
+
MatcherType = Union[
|
|
9
|
+
CallMatcher,
|
|
10
|
+
VariableMatcher,
|
|
11
|
+
DataflowMatcher,
|
|
12
|
+
"AndOperator",
|
|
13
|
+
"OrOperator",
|
|
14
|
+
"NotOperator",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AndOperator:
|
|
19
|
+
"""
|
|
20
|
+
Logical AND - all matchers must match.
|
|
21
|
+
|
|
22
|
+
Example:
|
|
23
|
+
And(calls("eval"), variable("user_input"))
|
|
24
|
+
# Matches code that has BOTH eval calls AND user_input variable
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, *matchers: MatcherType):
|
|
28
|
+
if len(matchers) < 2:
|
|
29
|
+
raise ValueError("And() requires at least 2 matchers")
|
|
30
|
+
self.matchers = list(matchers)
|
|
31
|
+
|
|
32
|
+
def to_ir(self) -> dict:
|
|
33
|
+
return {
|
|
34
|
+
"type": IRType.LOGIC_AND.value,
|
|
35
|
+
"matchers": [m.to_ir() for m in self.matchers],
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
def __repr__(self) -> str:
|
|
39
|
+
return f"And({len(self.matchers)} matchers)"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class OrOperator:
|
|
43
|
+
"""
|
|
44
|
+
Logical OR - at least one matcher must match.
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
Or(calls("eval"), calls("exec"))
|
|
48
|
+
# Matches code with eval OR exec
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, *matchers: MatcherType):
|
|
52
|
+
if len(matchers) < 2:
|
|
53
|
+
raise ValueError("Or() requires at least 2 matchers")
|
|
54
|
+
self.matchers = list(matchers)
|
|
55
|
+
|
|
56
|
+
def to_ir(self) -> dict:
|
|
57
|
+
return {
|
|
58
|
+
"type": IRType.LOGIC_OR.value,
|
|
59
|
+
"matchers": [m.to_ir() for m in self.matchers],
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
def __repr__(self) -> str:
|
|
63
|
+
return f"Or({len(self.matchers)} matchers)"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class NotOperator:
|
|
67
|
+
"""
|
|
68
|
+
Logical NOT - matcher must NOT match.
|
|
69
|
+
|
|
70
|
+
Example:
|
|
71
|
+
Not(calls("test_*"))
|
|
72
|
+
# Matches code that does NOT call test_* functions
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self, matcher: MatcherType):
|
|
76
|
+
self.matcher = matcher
|
|
77
|
+
|
|
78
|
+
def to_ir(self) -> dict:
|
|
79
|
+
return {
|
|
80
|
+
"type": IRType.LOGIC_NOT.value,
|
|
81
|
+
"matcher": self.matcher.to_ir(),
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
def __repr__(self) -> str:
|
|
85
|
+
return f"Not({repr(self.matcher)})"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# Public API
|
|
89
|
+
def And(*matchers: MatcherType) -> AndOperator:
|
|
90
|
+
"""Create AND combinator."""
|
|
91
|
+
return AndOperator(*matchers)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def Or(*matchers: MatcherType) -> OrOperator:
|
|
95
|
+
"""Create OR combinator."""
|
|
96
|
+
return OrOperator(*matchers)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def Not(matcher: MatcherType) -> NotOperator:
|
|
100
|
+
"""Create NOT combinator."""
|
|
101
|
+
return NotOperator(matcher)
|