codepathfinder 1.2.0__py3-none-manylinux_2_17_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codepathfinder/__init__.py +48 -0
- codepathfinder/bin/pathfinder +0 -0
- codepathfinder/cli/__init__.py +204 -0
- codepathfinder/config.py +92 -0
- codepathfinder/dataflow.py +193 -0
- codepathfinder/decorators.py +158 -0
- codepathfinder/ir.py +107 -0
- codepathfinder/logic.py +101 -0
- codepathfinder/matchers.py +243 -0
- codepathfinder/presets.py +135 -0
- codepathfinder/propagation.py +250 -0
- codepathfinder-1.2.0.dist-info/METADATA +111 -0
- codepathfinder-1.2.0.dist-info/RECORD +33 -0
- codepathfinder-1.2.0.dist-info/WHEEL +5 -0
- codepathfinder-1.2.0.dist-info/entry_points.txt +2 -0
- codepathfinder-1.2.0.dist-info/licenses/LICENSE +661 -0
- codepathfinder-1.2.0.dist-info/top_level.txt +2 -0
- rules/__init__.py +36 -0
- rules/container_combinators.py +209 -0
- rules/container_decorators.py +223 -0
- rules/container_ir.py +104 -0
- rules/container_matchers.py +230 -0
- rules/container_programmatic.py +115 -0
- rules/python/__init__.py +0 -0
- rules/python/deserialization/__init__.py +0 -0
- rules/python/deserialization/pickle_loads.py +479 -0
- rules/python/django/__init__.py +0 -0
- rules/python/django/sql_injection.py +355 -0
- rules/python/flask/__init__.py +0 -0
- rules/python/flask/debug_mode.py +374 -0
- rules/python/injection/__init__.py +0 -0
- rules/python_decorators.py +177 -0
- rules/python_ir.py +80 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""
|
|
2
|
+
codepathfinder - Python DSL for static analysis security patterns
|
|
3
|
+
|
|
4
|
+
Examples:
|
|
5
|
+
Basic matchers:
|
|
6
|
+
>>> from codepathfinder import calls, variable
|
|
7
|
+
>>> calls("eval")
|
|
8
|
+
>>> variable("user_input")
|
|
9
|
+
|
|
10
|
+
Rule definition:
|
|
11
|
+
>>> from codepathfinder import rule, calls
|
|
12
|
+
>>> @rule(id="test", severity="high")
|
|
13
|
+
>>> def detect_eval():
|
|
14
|
+
>>> return calls("eval")
|
|
15
|
+
|
|
16
|
+
Dataflow analysis:
|
|
17
|
+
>>> from codepathfinder import flows, calls, propagates
|
|
18
|
+
>>> flows(
|
|
19
|
+
... from_sources=calls("request.GET"),
|
|
20
|
+
... to_sinks=calls("execute"),
|
|
21
|
+
... propagates_through=[propagates.assignment()]
|
|
22
|
+
... )
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
__version__ = "1.2.0"
|
|
26
|
+
|
|
27
|
+
from .matchers import calls, variable
|
|
28
|
+
from .decorators import rule
|
|
29
|
+
from .dataflow import flows
|
|
30
|
+
from .propagation import propagates
|
|
31
|
+
from .presets import PropagationPresets
|
|
32
|
+
from .config import set_default_propagation, set_default_scope
|
|
33
|
+
from .logic import And, Or, Not
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"calls",
|
|
37
|
+
"variable",
|
|
38
|
+
"rule",
|
|
39
|
+
"flows",
|
|
40
|
+
"propagates",
|
|
41
|
+
"PropagationPresets",
|
|
42
|
+
"set_default_propagation",
|
|
43
|
+
"set_default_scope",
|
|
44
|
+
"And",
|
|
45
|
+
"Or",
|
|
46
|
+
"Not",
|
|
47
|
+
"__version__",
|
|
48
|
+
]
|
|
Binary file
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""CLI wrapper for pathfinder binary.
|
|
2
|
+
|
|
3
|
+
This module provides the entry point for the `pathfinder` command.
|
|
4
|
+
It locates and executes the bundled Go binary, passing through all arguments.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import subprocess
|
|
10
|
+
import platform
|
|
11
|
+
import urllib.request
|
|
12
|
+
import tarfile
|
|
13
|
+
import zipfile
|
|
14
|
+
import tempfile
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from codepathfinder import __version__
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_binary_name() -> str:
|
|
21
|
+
"""Get the binary name for the current platform."""
|
|
22
|
+
if sys.platform == "win32":
|
|
23
|
+
return "pathfinder.exe"
|
|
24
|
+
return "pathfinder"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_binary_path() -> Path:
|
|
28
|
+
"""Get path to the pathfinder binary.
|
|
29
|
+
|
|
30
|
+
Priority:
|
|
31
|
+
1. Bundled binary in package (platform wheels)
|
|
32
|
+
2. Binary in PATH (for development or manual install)
|
|
33
|
+
3. Download on first use (fallback for source installs)
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Path to the executable binary
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
RuntimeError: If binary cannot be found or downloaded
|
|
40
|
+
"""
|
|
41
|
+
binary_name = get_binary_name()
|
|
42
|
+
|
|
43
|
+
# 1. Check bundled binary (primary - from platform wheel)
|
|
44
|
+
package_dir = Path(__file__).parent.parent
|
|
45
|
+
bin_dir = package_dir / "bin"
|
|
46
|
+
bundled_binary = bin_dir / binary_name
|
|
47
|
+
|
|
48
|
+
if bundled_binary.exists() and os.access(bundled_binary, os.X_OK):
|
|
49
|
+
return bundled_binary
|
|
50
|
+
|
|
51
|
+
# 2. Check PATH (development mode or manual install)
|
|
52
|
+
import shutil
|
|
53
|
+
|
|
54
|
+
path_binary = shutil.which("pathfinder")
|
|
55
|
+
if path_binary:
|
|
56
|
+
return Path(path_binary)
|
|
57
|
+
|
|
58
|
+
# 3. Download on first use (source distribution fallback)
|
|
59
|
+
return _download_binary(bin_dir, binary_name)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _is_musl() -> bool:
|
|
63
|
+
"""Detect if running on musl libc (Alpine Linux, etc.)."""
|
|
64
|
+
try:
|
|
65
|
+
result = subprocess.run(["ldd", "--version"], capture_output=True, text=True)
|
|
66
|
+
return "musl" in result.stderr.lower() or "musl" in result.stdout.lower()
|
|
67
|
+
except Exception:
|
|
68
|
+
try:
|
|
69
|
+
with open("/etc/os-release") as f:
|
|
70
|
+
content = f.read().lower()
|
|
71
|
+
return "alpine" in content
|
|
72
|
+
except Exception:
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _get_platform_string() -> str:
|
|
77
|
+
"""Get platform string for binary download.
|
|
78
|
+
|
|
79
|
+
Supports 96%+ of worldwide architectures:
|
|
80
|
+
- Linux glibc: x86_64, aarch64
|
|
81
|
+
- Linux musl: x86_64, aarch64 (Alpine Docker)
|
|
82
|
+
- macOS: arm64 (M1/M2/M3), x86_64 (Intel)
|
|
83
|
+
- Windows: x86_64
|
|
84
|
+
"""
|
|
85
|
+
system = platform.system().lower()
|
|
86
|
+
machine = platform.machine().lower()
|
|
87
|
+
|
|
88
|
+
arch_map = {
|
|
89
|
+
"x86_64": "amd64",
|
|
90
|
+
"amd64": "amd64",
|
|
91
|
+
"aarch64": "arm64",
|
|
92
|
+
"arm64": "arm64",
|
|
93
|
+
"armv8l": "arm64",
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
arch = arch_map.get(machine)
|
|
97
|
+
if not arch:
|
|
98
|
+
raise RuntimeError(
|
|
99
|
+
f"Unsupported architecture: {machine}\n"
|
|
100
|
+
f"Supported: x86_64, aarch64/arm64\n"
|
|
101
|
+
f"Download manually from: https://github.com/shivasurya/code-pathfinder/releases"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
os_map = {
|
|
105
|
+
"linux": "linux",
|
|
106
|
+
"darwin": "darwin",
|
|
107
|
+
"windows": "windows",
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
os_name = os_map.get(system)
|
|
111
|
+
if not os_name:
|
|
112
|
+
raise RuntimeError(
|
|
113
|
+
f"Unsupported operating system: {system}\n"
|
|
114
|
+
f"Supported: Linux, macOS, Windows\n"
|
|
115
|
+
f"Download manually from: https://github.com/shivasurya/code-pathfinder/releases"
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
if os_name == "linux" and _is_musl():
|
|
119
|
+
return f"{os_name}-{arch}-musl"
|
|
120
|
+
|
|
121
|
+
return f"{os_name}-{arch}"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _download_binary(bin_dir: Path, binary_name: str) -> Path:
|
|
125
|
+
"""Download binary for current platform from GitHub releases.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
bin_dir: Directory to store the binary
|
|
129
|
+
binary_name: Name of the binary file
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Path to the downloaded binary
|
|
133
|
+
|
|
134
|
+
Raises:
|
|
135
|
+
RuntimeError: If download fails
|
|
136
|
+
"""
|
|
137
|
+
platform_str = _get_platform_string()
|
|
138
|
+
|
|
139
|
+
if sys.platform == "win32":
|
|
140
|
+
archive_ext = ".zip"
|
|
141
|
+
else:
|
|
142
|
+
archive_ext = ".tar.gz"
|
|
143
|
+
|
|
144
|
+
url = (
|
|
145
|
+
f"https://github.com/shivasurya/code-pathfinder/releases/download/"
|
|
146
|
+
f"v{__version__}/pathfinder-{platform_str}{archive_ext}"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
print(f"Downloading pathfinder binary for {platform_str}...", file=sys.stderr)
|
|
150
|
+
|
|
151
|
+
bin_dir.mkdir(parents=True, exist_ok=True)
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
with tempfile.NamedTemporaryFile(suffix=archive_ext, delete=False) as tmp:
|
|
155
|
+
urllib.request.urlretrieve(url, tmp.name)
|
|
156
|
+
|
|
157
|
+
if archive_ext == ".tar.gz":
|
|
158
|
+
with tarfile.open(tmp.name, "r:gz") as tar:
|
|
159
|
+
for member in tar.getmembers():
|
|
160
|
+
if member.name == "pathfinder" or member.name.endswith(
|
|
161
|
+
"/pathfinder"
|
|
162
|
+
):
|
|
163
|
+
member.name = binary_name
|
|
164
|
+
tar.extract(member, bin_dir)
|
|
165
|
+
break
|
|
166
|
+
else:
|
|
167
|
+
with zipfile.ZipFile(tmp.name, "r") as zip_ref:
|
|
168
|
+
for name in zip_ref.namelist():
|
|
169
|
+
if name == "pathfinder.exe" or name.endswith("/pathfinder.exe"):
|
|
170
|
+
with zip_ref.open(name) as src:
|
|
171
|
+
(bin_dir / binary_name).write_bytes(src.read())
|
|
172
|
+
break
|
|
173
|
+
|
|
174
|
+
os.unlink(tmp.name)
|
|
175
|
+
except Exception as e:
|
|
176
|
+
raise RuntimeError(
|
|
177
|
+
f"Failed to download pathfinder binary from {url}: {e}\n"
|
|
178
|
+
f"You can manually download from: "
|
|
179
|
+
f"https://github.com/shivasurya/code-pathfinder/releases"
|
|
180
|
+
) from e
|
|
181
|
+
|
|
182
|
+
binary_path = bin_dir / binary_name
|
|
183
|
+
|
|
184
|
+
if sys.platform != "win32":
|
|
185
|
+
os.chmod(binary_path, 0o755)
|
|
186
|
+
|
|
187
|
+
print(f"Binary installed to: {binary_path}", file=sys.stderr)
|
|
188
|
+
return binary_path
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def main():
|
|
192
|
+
"""Entry point - execute pathfinder binary with all arguments."""
|
|
193
|
+
try:
|
|
194
|
+
binary = get_binary_path()
|
|
195
|
+
except RuntimeError as e:
|
|
196
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
197
|
+
sys.exit(2)
|
|
198
|
+
|
|
199
|
+
result = subprocess.run([str(binary)] + sys.argv[1:])
|
|
200
|
+
sys.exit(result.returncode)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
if __name__ == "__main__":
|
|
204
|
+
main()
|
codepathfinder/config.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Global configuration for codepathfinder DSL.
|
|
3
|
+
|
|
4
|
+
Allows setting default propagation, scope, etc.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
from .propagation import PropagationPrimitive
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PathfinderConfig:
|
|
12
|
+
"""Singleton configuration for codepathfinder."""
|
|
13
|
+
|
|
14
|
+
_instance: Optional["PathfinderConfig"] = None
|
|
15
|
+
_default_propagation: List[PropagationPrimitive] = []
|
|
16
|
+
_default_scope: str = "global"
|
|
17
|
+
|
|
18
|
+
def __new__(cls):
|
|
19
|
+
if cls._instance is None:
|
|
20
|
+
cls._instance = super().__new__(cls)
|
|
21
|
+
return cls._instance
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def default_propagation(self) -> List[PropagationPrimitive]:
|
|
25
|
+
"""Get default propagation primitives."""
|
|
26
|
+
return self._default_propagation
|
|
27
|
+
|
|
28
|
+
@default_propagation.setter
|
|
29
|
+
def default_propagation(self, value: List[PropagationPrimitive]):
|
|
30
|
+
"""Set default propagation primitives."""
|
|
31
|
+
self._default_propagation = value
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def default_scope(self) -> str:
|
|
35
|
+
"""Get default scope."""
|
|
36
|
+
return self._default_scope
|
|
37
|
+
|
|
38
|
+
@default_scope.setter
|
|
39
|
+
def default_scope(self, value: str):
|
|
40
|
+
"""Set default scope."""
|
|
41
|
+
if value not in ["local", "global"]:
|
|
42
|
+
raise ValueError(f"scope must be 'local' or 'global', got '{value}'")
|
|
43
|
+
self._default_scope = value
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# Global config instance
|
|
47
|
+
_config = PathfinderConfig()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def set_default_propagation(primitives: List[PropagationPrimitive]) -> None:
|
|
51
|
+
"""
|
|
52
|
+
Set global default propagation primitives.
|
|
53
|
+
|
|
54
|
+
All flows() calls without explicit propagates_through will use this default.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
primitives: List of PropagationPrimitive objects
|
|
58
|
+
|
|
59
|
+
Example:
|
|
60
|
+
set_default_propagation(PropagationPresets.standard())
|
|
61
|
+
|
|
62
|
+
# Now all flows() without propagates_through use standard()
|
|
63
|
+
flows(
|
|
64
|
+
from_sources=calls("request.GET"),
|
|
65
|
+
to_sinks=calls("eval"),
|
|
66
|
+
# propagates_through defaults to standard()
|
|
67
|
+
)
|
|
68
|
+
"""
|
|
69
|
+
_config.default_propagation = primitives
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def set_default_scope(scope: str) -> None:
|
|
73
|
+
"""
|
|
74
|
+
Set global default scope.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
scope: "local" or "global"
|
|
78
|
+
|
|
79
|
+
Example:
|
|
80
|
+
set_default_scope("local")
|
|
81
|
+
"""
|
|
82
|
+
_config.default_scope = scope
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_default_propagation() -> List[PropagationPrimitive]:
|
|
86
|
+
"""Get global default propagation primitives."""
|
|
87
|
+
return _config.default_propagation
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def get_default_scope() -> str:
|
|
91
|
+
"""Get global default scope."""
|
|
92
|
+
return _config.default_scope
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dataflow matcher for taint analysis.
|
|
3
|
+
|
|
4
|
+
The flows() function is the core of OWASP Top 10 pattern detection.
|
|
5
|
+
It describes how tainted data flows from sources to sinks.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import List, Optional, Union
|
|
9
|
+
from .matchers import CallMatcher
|
|
10
|
+
from .propagation import PropagationPrimitive, create_propagation_list
|
|
11
|
+
from .ir import IRType
|
|
12
|
+
from .config import get_default_propagation, get_default_scope
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DataflowMatcher:
|
|
16
|
+
"""
|
|
17
|
+
Matches tainted data flows from sources to sinks.
|
|
18
|
+
|
|
19
|
+
This is the primary matcher for security vulnerabilities like:
|
|
20
|
+
- SQL Injection (A03:2021)
|
|
21
|
+
- Command Injection (A03:2021)
|
|
22
|
+
- SSRF (A10:2021)
|
|
23
|
+
- Path Traversal (A01:2021)
|
|
24
|
+
- Insecure Deserialization (A08:2021)
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
sources: Matchers for taint sources (e.g., user input)
|
|
28
|
+
sinks: Matchers for dangerous sinks (e.g., eval, execute)
|
|
29
|
+
sanitizers: Optional matchers for sanitizer functions
|
|
30
|
+
propagates_through: List of propagation primitives (EXPLICIT!)
|
|
31
|
+
scope: "local" (same function) or "global" (cross-function)
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
from_sources: Union[CallMatcher, List[CallMatcher]],
|
|
37
|
+
to_sinks: Union[CallMatcher, List[CallMatcher]],
|
|
38
|
+
sanitized_by: Optional[Union[CallMatcher, List[CallMatcher]]] = None,
|
|
39
|
+
propagates_through: Optional[List[PropagationPrimitive]] = None,
|
|
40
|
+
scope: Optional[str] = None,
|
|
41
|
+
):
|
|
42
|
+
"""
|
|
43
|
+
Args:
|
|
44
|
+
from_sources: Source matcher(s) - where taint originates
|
|
45
|
+
to_sinks: Sink matcher(s) - dangerous functions
|
|
46
|
+
sanitized_by: Optional sanitizer matcher(s)
|
|
47
|
+
propagates_through: EXPLICIT list of propagation primitives
|
|
48
|
+
(default: None = no propagation!)
|
|
49
|
+
scope: "local" (intra-procedural) or "global" (inter-procedural)
|
|
50
|
+
|
|
51
|
+
Raises:
|
|
52
|
+
ValueError: If sources/sinks are empty, scope invalid, etc.
|
|
53
|
+
|
|
54
|
+
Examples:
|
|
55
|
+
# SQL Injection
|
|
56
|
+
flows(
|
|
57
|
+
from_sources=calls("request.GET", "request.POST"),
|
|
58
|
+
to_sinks=calls("execute", "executemany"),
|
|
59
|
+
sanitized_by=calls("quote_sql"),
|
|
60
|
+
propagates_through=[
|
|
61
|
+
propagates.assignment(),
|
|
62
|
+
propagates.function_args(),
|
|
63
|
+
],
|
|
64
|
+
scope="global"
|
|
65
|
+
)
|
|
66
|
+
"""
|
|
67
|
+
# Validate sources
|
|
68
|
+
if isinstance(from_sources, CallMatcher):
|
|
69
|
+
from_sources = [from_sources]
|
|
70
|
+
if not from_sources:
|
|
71
|
+
raise ValueError("flows() requires at least one source")
|
|
72
|
+
self.sources = from_sources
|
|
73
|
+
|
|
74
|
+
# Validate sinks
|
|
75
|
+
if isinstance(to_sinks, CallMatcher):
|
|
76
|
+
to_sinks = [to_sinks]
|
|
77
|
+
if not to_sinks:
|
|
78
|
+
raise ValueError("flows() requires at least one sink")
|
|
79
|
+
self.sinks = to_sinks
|
|
80
|
+
|
|
81
|
+
# Validate sanitizers
|
|
82
|
+
if sanitized_by is None:
|
|
83
|
+
sanitized_by = []
|
|
84
|
+
elif isinstance(sanitized_by, CallMatcher):
|
|
85
|
+
sanitized_by = [sanitized_by]
|
|
86
|
+
self.sanitizers = sanitized_by
|
|
87
|
+
|
|
88
|
+
# Validate propagation (use global default if not specified)
|
|
89
|
+
if propagates_through is None:
|
|
90
|
+
propagates_through = get_default_propagation()
|
|
91
|
+
self.propagates_through = propagates_through
|
|
92
|
+
|
|
93
|
+
# Validate scope (use global default if not specified)
|
|
94
|
+
if scope is None:
|
|
95
|
+
scope = get_default_scope()
|
|
96
|
+
if scope not in ["local", "global"]:
|
|
97
|
+
raise ValueError(f"scope must be 'local' or 'global', got '{scope}'")
|
|
98
|
+
self.scope = scope
|
|
99
|
+
|
|
100
|
+
def to_ir(self) -> dict:
|
|
101
|
+
"""
|
|
102
|
+
Serialize to JSON IR for Go executor.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
{
|
|
106
|
+
"type": "dataflow",
|
|
107
|
+
"sources": [
|
|
108
|
+
{"type": "call_matcher", "patterns": ["request.GET"], ...}
|
|
109
|
+
],
|
|
110
|
+
"sinks": [
|
|
111
|
+
{"type": "call_matcher", "patterns": ["execute"], ...}
|
|
112
|
+
],
|
|
113
|
+
"sanitizers": [
|
|
114
|
+
{"type": "call_matcher", "patterns": ["quote_sql"], ...}
|
|
115
|
+
],
|
|
116
|
+
"propagation": [
|
|
117
|
+
{"type": "assignment", "metadata": {}},
|
|
118
|
+
{"type": "function_args", "metadata": {}}
|
|
119
|
+
],
|
|
120
|
+
"scope": "global"
|
|
121
|
+
}
|
|
122
|
+
"""
|
|
123
|
+
return {
|
|
124
|
+
"type": IRType.DATAFLOW.value,
|
|
125
|
+
"sources": [src.to_ir() for src in self.sources],
|
|
126
|
+
"sinks": [sink.to_ir() for sink in self.sinks],
|
|
127
|
+
"sanitizers": [san.to_ir() for san in self.sanitizers],
|
|
128
|
+
"propagation": create_propagation_list(self.propagates_through),
|
|
129
|
+
"scope": self.scope,
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
def __repr__(self) -> str:
|
|
133
|
+
src_count = len(self.sources)
|
|
134
|
+
sink_count = len(self.sinks)
|
|
135
|
+
prop_count = len(self.propagates_through)
|
|
136
|
+
return (
|
|
137
|
+
f"flows(sources={src_count}, sinks={sink_count}, "
|
|
138
|
+
f"propagation={prop_count}, scope='{self.scope}')"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# Public API
|
|
143
|
+
def flows(
|
|
144
|
+
from_sources: Union[CallMatcher, List[CallMatcher]],
|
|
145
|
+
to_sinks: Union[CallMatcher, List[CallMatcher]],
|
|
146
|
+
sanitized_by: Optional[Union[CallMatcher, List[CallMatcher]]] = None,
|
|
147
|
+
propagates_through: Optional[List[PropagationPrimitive]] = None,
|
|
148
|
+
scope: Optional[str] = None,
|
|
149
|
+
) -> DataflowMatcher:
|
|
150
|
+
"""
|
|
151
|
+
Create a dataflow matcher for taint analysis.
|
|
152
|
+
|
|
153
|
+
This is the PRIMARY matcher for OWASP Top 10 vulnerabilities.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
from_sources: Where taint originates (e.g., user input)
|
|
157
|
+
to_sinks: Dangerous functions that consume tainted data
|
|
158
|
+
sanitized_by: Optional functions that neutralize taint
|
|
159
|
+
propagates_through: HOW taint flows (MUST be explicit!)
|
|
160
|
+
scope: "local" or "global" analysis
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
DataflowMatcher instance
|
|
164
|
+
|
|
165
|
+
Examples:
|
|
166
|
+
>>> from codepathfinder import flows, calls, propagates
|
|
167
|
+
>>>
|
|
168
|
+
>>> # SQL Injection
|
|
169
|
+
>>> flows(
|
|
170
|
+
... from_sources=calls("request.GET"),
|
|
171
|
+
... to_sinks=calls("execute"),
|
|
172
|
+
... propagates_through=[propagates.assignment()]
|
|
173
|
+
... )
|
|
174
|
+
>>>
|
|
175
|
+
>>> # Command Injection with sanitization
|
|
176
|
+
>>> flows(
|
|
177
|
+
... from_sources=calls("request.POST"),
|
|
178
|
+
... to_sinks=calls("os.system", "subprocess.call"),
|
|
179
|
+
... sanitized_by=calls("shlex.quote"),
|
|
180
|
+
... propagates_through=[
|
|
181
|
+
... propagates.assignment(),
|
|
182
|
+
... propagates.function_args()
|
|
183
|
+
... ],
|
|
184
|
+
... scope="global"
|
|
185
|
+
... )
|
|
186
|
+
"""
|
|
187
|
+
return DataflowMatcher(
|
|
188
|
+
from_sources=from_sources,
|
|
189
|
+
to_sinks=to_sinks,
|
|
190
|
+
sanitized_by=sanitized_by,
|
|
191
|
+
propagates_through=propagates_through,
|
|
192
|
+
scope=scope,
|
|
193
|
+
)
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Decorators for pathfinder rule definitions.
|
|
3
|
+
|
|
4
|
+
The @rule decorator marks functions as security patterns.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import atexit
|
|
8
|
+
import json
|
|
9
|
+
import sys
|
|
10
|
+
from typing import Callable, Optional, List
|
|
11
|
+
from .ir import serialize_ir
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Global registry for auto-execution
|
|
15
|
+
_rule_registry: List["Rule"] = []
|
|
16
|
+
_auto_execute_enabled = False
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _enable_auto_execute() -> None:
|
|
20
|
+
"""
|
|
21
|
+
Enable automatic rule execution when script ends.
|
|
22
|
+
|
|
23
|
+
This should be called once when the first rule is registered
|
|
24
|
+
and the module is being executed as a script (not imported).
|
|
25
|
+
"""
|
|
26
|
+
global _auto_execute_enabled
|
|
27
|
+
if _auto_execute_enabled:
|
|
28
|
+
return
|
|
29
|
+
|
|
30
|
+
_auto_execute_enabled = True
|
|
31
|
+
|
|
32
|
+
def _output_rules():
|
|
33
|
+
"""Output all registered rules as JSON when script ends."""
|
|
34
|
+
if not _rule_registry:
|
|
35
|
+
return
|
|
36
|
+
|
|
37
|
+
# Execute all rules and collect their JSON IR
|
|
38
|
+
rules_json = [rule.execute() for rule in _rule_registry]
|
|
39
|
+
|
|
40
|
+
# Output to stdout for Go loader to capture
|
|
41
|
+
print(json.dumps(rules_json))
|
|
42
|
+
|
|
43
|
+
# Register cleanup handler
|
|
44
|
+
atexit.register(_output_rules)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _register_rule(rule_obj: "Rule") -> None:
|
|
48
|
+
"""
|
|
49
|
+
Register a rule for auto-execution.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
rule_obj: The Rule instance to register
|
|
53
|
+
"""
|
|
54
|
+
_rule_registry.append(rule_obj)
|
|
55
|
+
|
|
56
|
+
# Enable auto-execution on first rule registration
|
|
57
|
+
# Check if module is being executed directly (not imported)
|
|
58
|
+
frame = sys._getframe(2) # Get caller's frame (the module defining the rule)
|
|
59
|
+
if frame.f_globals.get("__name__") == "__main__":
|
|
60
|
+
_enable_auto_execute()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class Rule:
|
|
64
|
+
"""
|
|
65
|
+
Represents a security rule with metadata.
|
|
66
|
+
|
|
67
|
+
Attributes:
|
|
68
|
+
id: Unique rule identifier (e.g., "sqli-001")
|
|
69
|
+
name: Human-readable name (defaults to function name)
|
|
70
|
+
severity: critical | high | medium | low
|
|
71
|
+
cwe: CWE identifier (e.g., "CWE-89")
|
|
72
|
+
owasp: OWASP category (e.g., "A03:2021")
|
|
73
|
+
description: What this rule detects (from docstring)
|
|
74
|
+
matcher: The matcher/combinator returned by the rule function
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
id: str,
|
|
80
|
+
severity: str,
|
|
81
|
+
func: Callable,
|
|
82
|
+
cwe: Optional[str] = None,
|
|
83
|
+
owasp: Optional[str] = None,
|
|
84
|
+
):
|
|
85
|
+
self.id = id
|
|
86
|
+
self.name = func.__name__
|
|
87
|
+
self.severity = severity
|
|
88
|
+
self.cwe = cwe
|
|
89
|
+
self.owasp = owasp
|
|
90
|
+
self.description = func.__doc__ or ""
|
|
91
|
+
self.func = func
|
|
92
|
+
|
|
93
|
+
def execute(self) -> dict:
|
|
94
|
+
"""
|
|
95
|
+
Execute the rule function and serialize to JSON IR.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
{
|
|
99
|
+
"rule": {
|
|
100
|
+
"id": "sqli-001",
|
|
101
|
+
"name": "detect_sql_injection",
|
|
102
|
+
"severity": "critical",
|
|
103
|
+
"cwe": "CWE-89",
|
|
104
|
+
"owasp": "A03:2021",
|
|
105
|
+
"description": "Detects SQL injection vulnerabilities"
|
|
106
|
+
},
|
|
107
|
+
"matcher": {
|
|
108
|
+
"type": "call_matcher",
|
|
109
|
+
"patterns": ["execute"],
|
|
110
|
+
"wildcard": false
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
"""
|
|
114
|
+
matcher = self.func()
|
|
115
|
+
return {
|
|
116
|
+
"rule": {
|
|
117
|
+
"id": self.id,
|
|
118
|
+
"name": self.name,
|
|
119
|
+
"severity": self.severity,
|
|
120
|
+
"cwe": self.cwe,
|
|
121
|
+
"owasp": self.owasp,
|
|
122
|
+
"description": self.description.strip(),
|
|
123
|
+
},
|
|
124
|
+
"matcher": serialize_ir(matcher),
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def rule(
|
|
129
|
+
id: str,
|
|
130
|
+
severity: str,
|
|
131
|
+
cwe: Optional[str] = None,
|
|
132
|
+
owasp: Optional[str] = None,
|
|
133
|
+
) -> Callable[[Callable], Rule]:
|
|
134
|
+
"""
|
|
135
|
+
Decorator to mark a function as a security rule.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
id: Unique rule identifier
|
|
139
|
+
severity: critical | high | medium | low
|
|
140
|
+
cwe: Optional CWE identifier
|
|
141
|
+
owasp: Optional OWASP category
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Decorator function
|
|
145
|
+
|
|
146
|
+
Example:
|
|
147
|
+
@rule(id="code-injection", severity="critical", cwe="CWE-94")
|
|
148
|
+
def detect_code_injection():
|
|
149
|
+
'''Detects code injection via eval'''
|
|
150
|
+
return calls("eval", "exec")
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
def decorator(func: Callable) -> Rule:
|
|
154
|
+
rule_obj = Rule(id=id, severity=severity, func=func, cwe=cwe, owasp=owasp)
|
|
155
|
+
_register_rule(rule_obj)
|
|
156
|
+
return rule_obj
|
|
157
|
+
|
|
158
|
+
return decorator
|