codepathfinder 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of codepathfinder might be problematic. Click here for more details.

@@ -0,0 +1,48 @@
1
+ """
2
+ codepathfinder - Python DSL for static analysis security patterns
3
+
4
+ Examples:
5
+ Basic matchers:
6
+ >>> from codepathfinder import calls, variable
7
+ >>> calls("eval")
8
+ >>> variable("user_input")
9
+
10
+ Rule definition:
11
+ >>> from codepathfinder import rule, calls
12
+ >>> @rule(id="test", severity="high")
13
+ >>> def detect_eval():
14
+ >>> return calls("eval")
15
+
16
+ Dataflow analysis:
17
+ >>> from codepathfinder import flows, calls, propagates
18
+ >>> flows(
19
+ ... from_sources=calls("request.GET"),
20
+ ... to_sinks=calls("execute"),
21
+ ... propagates_through=[propagates.assignment()]
22
+ ... )
23
+ """
24
+
25
+ __version__ = "1.0.0"
26
+
27
+ from .matchers import calls, variable
28
+ from .decorators import rule
29
+ from .dataflow import flows
30
+ from .propagation import propagates
31
+ from .presets import PropagationPresets
32
+ from .config import set_default_propagation, set_default_scope
33
+ from .logic import And, Or, Not
34
+
35
+ __all__ = [
36
+ "calls",
37
+ "variable",
38
+ "rule",
39
+ "flows",
40
+ "propagates",
41
+ "PropagationPresets",
42
+ "set_default_propagation",
43
+ "set_default_scope",
44
+ "And",
45
+ "Or",
46
+ "Not",
47
+ "__version__",
48
+ ]
@@ -0,0 +1,92 @@
1
+ """
2
+ Global configuration for codepathfinder DSL.
3
+
4
+ Allows setting default propagation, scope, etc.
5
+ """
6
+
7
+ from typing import List, Optional
8
+ from .propagation import PropagationPrimitive
9
+
10
+
11
+ class PathfinderConfig:
12
+ """Singleton configuration for codepathfinder."""
13
+
14
+ _instance: Optional["PathfinderConfig"] = None
15
+ _default_propagation: List[PropagationPrimitive] = []
16
+ _default_scope: str = "global"
17
+
18
+ def __new__(cls):
19
+ if cls._instance is None:
20
+ cls._instance = super().__new__(cls)
21
+ return cls._instance
22
+
23
+ @property
24
+ def default_propagation(self) -> List[PropagationPrimitive]:
25
+ """Get default propagation primitives."""
26
+ return self._default_propagation
27
+
28
+ @default_propagation.setter
29
+ def default_propagation(self, value: List[PropagationPrimitive]):
30
+ """Set default propagation primitives."""
31
+ self._default_propagation = value
32
+
33
+ @property
34
+ def default_scope(self) -> str:
35
+ """Get default scope."""
36
+ return self._default_scope
37
+
38
+ @default_scope.setter
39
+ def default_scope(self, value: str):
40
+ """Set default scope."""
41
+ if value not in ["local", "global"]:
42
+ raise ValueError(f"scope must be 'local' or 'global', got '{value}'")
43
+ self._default_scope = value
44
+
45
+
46
+ # Global config instance
47
+ _config = PathfinderConfig()
48
+
49
+
50
+ def set_default_propagation(primitives: List[PropagationPrimitive]) -> None:
51
+ """
52
+ Set global default propagation primitives.
53
+
54
+ All flows() calls without explicit propagates_through will use this default.
55
+
56
+ Args:
57
+ primitives: List of PropagationPrimitive objects
58
+
59
+ Example:
60
+ set_default_propagation(PropagationPresets.standard())
61
+
62
+ # Now all flows() without propagates_through use standard()
63
+ flows(
64
+ from_sources=calls("request.GET"),
65
+ to_sinks=calls("eval"),
66
+ # propagates_through defaults to standard()
67
+ )
68
+ """
69
+ _config.default_propagation = primitives
70
+
71
+
72
+ def set_default_scope(scope: str) -> None:
73
+ """
74
+ Set global default scope.
75
+
76
+ Args:
77
+ scope: "local" or "global"
78
+
79
+ Example:
80
+ set_default_scope("local")
81
+ """
82
+ _config.default_scope = scope
83
+
84
+
85
+ def get_default_propagation() -> List[PropagationPrimitive]:
86
+ """Get global default propagation primitives."""
87
+ return _config.default_propagation
88
+
89
+
90
+ def get_default_scope() -> str:
91
+ """Get global default scope."""
92
+ return _config.default_scope
@@ -0,0 +1,193 @@
1
+ """
2
+ Dataflow matcher for taint analysis.
3
+
4
+ The flows() function is the core of OWASP Top 10 pattern detection.
5
+ It describes how tainted data flows from sources to sinks.
6
+ """
7
+
8
+ from typing import List, Optional, Union
9
+ from .matchers import CallMatcher
10
+ from .propagation import PropagationPrimitive, create_propagation_list
11
+ from .ir import IRType
12
+ from .config import get_default_propagation, get_default_scope
13
+
14
+
15
+ class DataflowMatcher:
16
+ """
17
+ Matches tainted data flows from sources to sinks.
18
+
19
+ This is the primary matcher for security vulnerabilities like:
20
+ - SQL Injection (A03:2021)
21
+ - Command Injection (A03:2021)
22
+ - SSRF (A10:2021)
23
+ - Path Traversal (A01:2021)
24
+ - Insecure Deserialization (A08:2021)
25
+
26
+ Attributes:
27
+ sources: Matchers for taint sources (e.g., user input)
28
+ sinks: Matchers for dangerous sinks (e.g., eval, execute)
29
+ sanitizers: Optional matchers for sanitizer functions
30
+ propagates_through: List of propagation primitives (EXPLICIT!)
31
+ scope: "local" (same function) or "global" (cross-function)
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ from_sources: Union[CallMatcher, List[CallMatcher]],
37
+ to_sinks: Union[CallMatcher, List[CallMatcher]],
38
+ sanitized_by: Optional[Union[CallMatcher, List[CallMatcher]]] = None,
39
+ propagates_through: Optional[List[PropagationPrimitive]] = None,
40
+ scope: Optional[str] = None,
41
+ ):
42
+ """
43
+ Args:
44
+ from_sources: Source matcher(s) - where taint originates
45
+ to_sinks: Sink matcher(s) - dangerous functions
46
+ sanitized_by: Optional sanitizer matcher(s)
47
+ propagates_through: EXPLICIT list of propagation primitives
48
+ (default: None = no propagation!)
49
+ scope: "local" (intra-procedural) or "global" (inter-procedural)
50
+
51
+ Raises:
52
+ ValueError: If sources/sinks are empty, scope invalid, etc.
53
+
54
+ Examples:
55
+ # SQL Injection
56
+ flows(
57
+ from_sources=calls("request.GET", "request.POST"),
58
+ to_sinks=calls("execute", "executemany"),
59
+ sanitized_by=calls("quote_sql"),
60
+ propagates_through=[
61
+ propagates.assignment(),
62
+ propagates.function_args(),
63
+ ],
64
+ scope="global"
65
+ )
66
+ """
67
+ # Validate sources
68
+ if isinstance(from_sources, CallMatcher):
69
+ from_sources = [from_sources]
70
+ if not from_sources:
71
+ raise ValueError("flows() requires at least one source")
72
+ self.sources = from_sources
73
+
74
+ # Validate sinks
75
+ if isinstance(to_sinks, CallMatcher):
76
+ to_sinks = [to_sinks]
77
+ if not to_sinks:
78
+ raise ValueError("flows() requires at least one sink")
79
+ self.sinks = to_sinks
80
+
81
+ # Validate sanitizers
82
+ if sanitized_by is None:
83
+ sanitized_by = []
84
+ elif isinstance(sanitized_by, CallMatcher):
85
+ sanitized_by = [sanitized_by]
86
+ self.sanitizers = sanitized_by
87
+
88
+ # Validate propagation (use global default if not specified)
89
+ if propagates_through is None:
90
+ propagates_through = get_default_propagation()
91
+ self.propagates_through = propagates_through
92
+
93
+ # Validate scope (use global default if not specified)
94
+ if scope is None:
95
+ scope = get_default_scope()
96
+ if scope not in ["local", "global"]:
97
+ raise ValueError(f"scope must be 'local' or 'global', got '{scope}'")
98
+ self.scope = scope
99
+
100
+ def to_ir(self) -> dict:
101
+ """
102
+ Serialize to JSON IR for Go executor.
103
+
104
+ Returns:
105
+ {
106
+ "type": "dataflow",
107
+ "sources": [
108
+ {"type": "call_matcher", "patterns": ["request.GET"], ...}
109
+ ],
110
+ "sinks": [
111
+ {"type": "call_matcher", "patterns": ["execute"], ...}
112
+ ],
113
+ "sanitizers": [
114
+ {"type": "call_matcher", "patterns": ["quote_sql"], ...}
115
+ ],
116
+ "propagation": [
117
+ {"type": "assignment", "metadata": {}},
118
+ {"type": "function_args", "metadata": {}}
119
+ ],
120
+ "scope": "global"
121
+ }
122
+ """
123
+ return {
124
+ "type": IRType.DATAFLOW.value,
125
+ "sources": [src.to_ir() for src in self.sources],
126
+ "sinks": [sink.to_ir() for sink in self.sinks],
127
+ "sanitizers": [san.to_ir() for san in self.sanitizers],
128
+ "propagation": create_propagation_list(self.propagates_through),
129
+ "scope": self.scope,
130
+ }
131
+
132
+ def __repr__(self) -> str:
133
+ src_count = len(self.sources)
134
+ sink_count = len(self.sinks)
135
+ prop_count = len(self.propagates_through)
136
+ return (
137
+ f"flows(sources={src_count}, sinks={sink_count}, "
138
+ f"propagation={prop_count}, scope='{self.scope}')"
139
+ )
140
+
141
+
142
+ # Public API
143
+ def flows(
144
+ from_sources: Union[CallMatcher, List[CallMatcher]],
145
+ to_sinks: Union[CallMatcher, List[CallMatcher]],
146
+ sanitized_by: Optional[Union[CallMatcher, List[CallMatcher]]] = None,
147
+ propagates_through: Optional[List[PropagationPrimitive]] = None,
148
+ scope: Optional[str] = None,
149
+ ) -> DataflowMatcher:
150
+ """
151
+ Create a dataflow matcher for taint analysis.
152
+
153
+ This is the PRIMARY matcher for OWASP Top 10 vulnerabilities.
154
+
155
+ Args:
156
+ from_sources: Where taint originates (e.g., user input)
157
+ to_sinks: Dangerous functions that consume tainted data
158
+ sanitized_by: Optional functions that neutralize taint
159
+ propagates_through: HOW taint flows (MUST be explicit!)
160
+ scope: "local" or "global" analysis
161
+
162
+ Returns:
163
+ DataflowMatcher instance
164
+
165
+ Examples:
166
+ >>> from codepathfinder import flows, calls, propagates
167
+ >>>
168
+ >>> # SQL Injection
169
+ >>> flows(
170
+ ... from_sources=calls("request.GET"),
171
+ ... to_sinks=calls("execute"),
172
+ ... propagates_through=[propagates.assignment()]
173
+ ... )
174
+ >>>
175
+ >>> # Command Injection with sanitization
176
+ >>> flows(
177
+ ... from_sources=calls("request.POST"),
178
+ ... to_sinks=calls("os.system", "subprocess.call"),
179
+ ... sanitized_by=calls("shlex.quote"),
180
+ ... propagates_through=[
181
+ ... propagates.assignment(),
182
+ ... propagates.function_args()
183
+ ... ],
184
+ ... scope="global"
185
+ ... )
186
+ """
187
+ return DataflowMatcher(
188
+ from_sources=from_sources,
189
+ to_sinks=to_sinks,
190
+ sanitized_by=sanitized_by,
191
+ propagates_through=propagates_through,
192
+ scope=scope,
193
+ )
@@ -0,0 +1,104 @@
1
+ """
2
+ Decorators for pathfinder rule definitions.
3
+
4
+ The @rule decorator marks functions as security patterns.
5
+ """
6
+
7
+ from typing import Callable, Optional
8
+ from .ir import serialize_ir
9
+
10
+
11
+ class Rule:
12
+ """
13
+ Represents a security rule with metadata.
14
+
15
+ Attributes:
16
+ id: Unique rule identifier (e.g., "sqli-001")
17
+ name: Human-readable name (defaults to function name)
18
+ severity: critical | high | medium | low
19
+ cwe: CWE identifier (e.g., "CWE-89")
20
+ owasp: OWASP category (e.g., "A03:2021")
21
+ description: What this rule detects (from docstring)
22
+ matcher: The matcher/combinator returned by the rule function
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ id: str,
28
+ severity: str,
29
+ func: Callable,
30
+ cwe: Optional[str] = None,
31
+ owasp: Optional[str] = None,
32
+ ):
33
+ self.id = id
34
+ self.name = func.__name__
35
+ self.severity = severity
36
+ self.cwe = cwe
37
+ self.owasp = owasp
38
+ self.description = func.__doc__ or ""
39
+ self.func = func
40
+
41
+ def execute(self) -> dict:
42
+ """
43
+ Execute the rule function and serialize to JSON IR.
44
+
45
+ Returns:
46
+ {
47
+ "rule": {
48
+ "id": "sqli-001",
49
+ "name": "detect_sql_injection",
50
+ "severity": "critical",
51
+ "cwe": "CWE-89",
52
+ "owasp": "A03:2021",
53
+ "description": "Detects SQL injection vulnerabilities"
54
+ },
55
+ "matcher": {
56
+ "type": "call_matcher",
57
+ "patterns": ["execute"],
58
+ "wildcard": false
59
+ }
60
+ }
61
+ """
62
+ matcher = self.func()
63
+ return {
64
+ "rule": {
65
+ "id": self.id,
66
+ "name": self.name,
67
+ "severity": self.severity,
68
+ "cwe": self.cwe,
69
+ "owasp": self.owasp,
70
+ "description": self.description.strip(),
71
+ },
72
+ "matcher": serialize_ir(matcher),
73
+ }
74
+
75
+
76
+ def rule(
77
+ id: str,
78
+ severity: str,
79
+ cwe: Optional[str] = None,
80
+ owasp: Optional[str] = None,
81
+ ) -> Callable[[Callable], Rule]:
82
+ """
83
+ Decorator to mark a function as a security rule.
84
+
85
+ Args:
86
+ id: Unique rule identifier
87
+ severity: critical | high | medium | low
88
+ cwe: Optional CWE identifier
89
+ owasp: Optional OWASP category
90
+
91
+ Returns:
92
+ Decorator function
93
+
94
+ Example:
95
+ @rule(id="code-injection", severity="critical", cwe="CWE-94")
96
+ def detect_code_injection():
97
+ '''Detects code injection via eval'''
98
+ return calls("eval", "exec")
99
+ """
100
+
101
+ def decorator(func: Callable) -> Rule:
102
+ return Rule(id=id, severity=severity, func=func, cwe=cwe, owasp=owasp)
103
+
104
+ return decorator
codepathfinder/ir.py ADDED
@@ -0,0 +1,107 @@
1
+ """
2
+ JSON Intermediate Representation (IR) for pathfinder DSL.
3
+
4
+ The Python DSL serializes to JSON IR, which the Go executor consumes.
5
+ This enables language-agnostic pattern definitions (future: JS, Rust DSLs).
6
+ """
7
+
8
+ from enum import Enum
9
+ from typing import Any, Dict, Protocol
10
+
11
+
12
+ class IRType(Enum):
13
+ """IR node types for different matchers and combinators."""
14
+
15
+ CALL_MATCHER = "call_matcher"
16
+ VARIABLE_MATCHER = "variable_matcher"
17
+ DATAFLOW = "dataflow" # Coming in PR #3
18
+ LOGIC_AND = "logic_and" # Coming in PR #5
19
+ LOGIC_OR = "logic_or" # Coming in PR #5
20
+ LOGIC_NOT = "logic_not" # Coming in PR #5
21
+
22
+
23
+ class MatcherIR(Protocol):
24
+ """Protocol for all matcher types (duck typing)."""
25
+
26
+ def to_ir(self) -> Dict[str, Any]:
27
+ """Serialize to JSON IR dictionary."""
28
+ ...
29
+
30
+
31
+ def serialize_ir(matcher: MatcherIR) -> Dict[str, Any]:
32
+ """
33
+ Serialize any matcher to JSON IR.
34
+
35
+ Args:
36
+ matcher: Any object implementing MatcherIR protocol
37
+
38
+ Returns:
39
+ JSON-serializable dictionary
40
+
41
+ Raises:
42
+ AttributeError: If matcher doesn't implement to_ir()
43
+ """
44
+ if not hasattr(matcher, "to_ir"):
45
+ raise AttributeError(f"{type(matcher).__name__} must implement to_ir() method")
46
+
47
+ return matcher.to_ir()
48
+
49
+
50
+ def validate_ir(ir: Dict[str, Any]) -> bool:
51
+ """
52
+ Validate JSON IR structure.
53
+
54
+ Args:
55
+ ir: JSON IR dictionary
56
+
57
+ Returns:
58
+ True if valid, False otherwise
59
+
60
+ Validates:
61
+ - "type" field exists and is valid IRType
62
+ - Required fields present for each type
63
+ """
64
+ if "type" not in ir:
65
+ return False
66
+
67
+ try:
68
+ ir_type = IRType(ir["type"])
69
+ except ValueError:
70
+ return False
71
+
72
+ # Type-specific validation
73
+ if ir_type == IRType.CALL_MATCHER:
74
+ return (
75
+ "patterns" in ir
76
+ and isinstance(ir["patterns"], list)
77
+ and len(ir["patterns"]) > 0
78
+ and "wildcard" in ir
79
+ and isinstance(ir["wildcard"], bool)
80
+ )
81
+
82
+ if ir_type == IRType.VARIABLE_MATCHER:
83
+ return (
84
+ "pattern" in ir
85
+ and isinstance(ir["pattern"], str)
86
+ and len(ir["pattern"]) > 0
87
+ and "wildcard" in ir
88
+ and isinstance(ir["wildcard"], bool)
89
+ )
90
+
91
+ if ir_type == IRType.DATAFLOW:
92
+ return (
93
+ "sources" in ir
94
+ and isinstance(ir["sources"], list)
95
+ and len(ir["sources"]) > 0
96
+ and "sinks" in ir
97
+ and isinstance(ir["sinks"], list)
98
+ and len(ir["sinks"]) > 0
99
+ and "sanitizers" in ir
100
+ and isinstance(ir["sanitizers"], list)
101
+ and "propagation" in ir
102
+ and isinstance(ir["propagation"], list)
103
+ and "scope" in ir
104
+ and ir["scope"] in ["local", "global"]
105
+ )
106
+
107
+ return True
@@ -0,0 +1,101 @@
1
+ """Logic operators for combining matchers."""
2
+
3
+ from typing import Union
4
+ from .matchers import CallMatcher, VariableMatcher
5
+ from .dataflow import DataflowMatcher
6
+ from .ir import IRType
7
+
8
+ MatcherType = Union[
9
+ CallMatcher,
10
+ VariableMatcher,
11
+ DataflowMatcher,
12
+ "AndOperator",
13
+ "OrOperator",
14
+ "NotOperator",
15
+ ]
16
+
17
+
18
+ class AndOperator:
19
+ """
20
+ Logical AND - all matchers must match.
21
+
22
+ Example:
23
+ And(calls("eval"), variable("user_input"))
24
+ # Matches code that has BOTH eval calls AND user_input variable
25
+ """
26
+
27
+ def __init__(self, *matchers: MatcherType):
28
+ if len(matchers) < 2:
29
+ raise ValueError("And() requires at least 2 matchers")
30
+ self.matchers = list(matchers)
31
+
32
+ def to_ir(self) -> dict:
33
+ return {
34
+ "type": IRType.LOGIC_AND.value,
35
+ "matchers": [m.to_ir() for m in self.matchers],
36
+ }
37
+
38
+ def __repr__(self) -> str:
39
+ return f"And({len(self.matchers)} matchers)"
40
+
41
+
42
+ class OrOperator:
43
+ """
44
+ Logical OR - at least one matcher must match.
45
+
46
+ Example:
47
+ Or(calls("eval"), calls("exec"))
48
+ # Matches code with eval OR exec
49
+ """
50
+
51
+ def __init__(self, *matchers: MatcherType):
52
+ if len(matchers) < 2:
53
+ raise ValueError("Or() requires at least 2 matchers")
54
+ self.matchers = list(matchers)
55
+
56
+ def to_ir(self) -> dict:
57
+ return {
58
+ "type": IRType.LOGIC_OR.value,
59
+ "matchers": [m.to_ir() for m in self.matchers],
60
+ }
61
+
62
+ def __repr__(self) -> str:
63
+ return f"Or({len(self.matchers)} matchers)"
64
+
65
+
66
+ class NotOperator:
67
+ """
68
+ Logical NOT - matcher must NOT match.
69
+
70
+ Example:
71
+ Not(calls("test_*"))
72
+ # Matches code that does NOT call test_* functions
73
+ """
74
+
75
+ def __init__(self, matcher: MatcherType):
76
+ self.matcher = matcher
77
+
78
+ def to_ir(self) -> dict:
79
+ return {
80
+ "type": IRType.LOGIC_NOT.value,
81
+ "matcher": self.matcher.to_ir(),
82
+ }
83
+
84
+ def __repr__(self) -> str:
85
+ return f"Not({repr(self.matcher)})"
86
+
87
+
88
+ # Public API
89
+ def And(*matchers: MatcherType) -> AndOperator:
90
+ """Create AND combinator."""
91
+ return AndOperator(*matchers)
92
+
93
+
94
+ def Or(*matchers: MatcherType) -> OrOperator:
95
+ """Create OR combinator."""
96
+ return OrOperator(*matchers)
97
+
98
+
99
+ def Not(matcher: MatcherType) -> NotOperator:
100
+ """Create NOT combinator."""
101
+ return NotOperator(matcher)