codepathfinder 1.2.0__py3-none-manylinux_2_17_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ """
2
+ codepathfinder - Python DSL for static analysis security patterns
3
+
4
+ Examples:
5
+ Basic matchers:
6
+ >>> from codepathfinder import calls, variable
7
+ >>> calls("eval")
8
+ >>> variable("user_input")
9
+
10
+ Rule definition:
11
+ >>> from codepathfinder import rule, calls
12
+ >>> @rule(id="test", severity="high")
13
+ >>> def detect_eval():
14
+ >>> return calls("eval")
15
+
16
+ Dataflow analysis:
17
+ >>> from codepathfinder import flows, calls, propagates
18
+ >>> flows(
19
+ ... from_sources=calls("request.GET"),
20
+ ... to_sinks=calls("execute"),
21
+ ... propagates_through=[propagates.assignment()]
22
+ ... )
23
+ """
24
+
25
+ __version__ = "1.2.0"
26
+
27
+ from .matchers import calls, variable
28
+ from .decorators import rule
29
+ from .dataflow import flows
30
+ from .propagation import propagates
31
+ from .presets import PropagationPresets
32
+ from .config import set_default_propagation, set_default_scope
33
+ from .logic import And, Or, Not
34
+
35
+ __all__ = [
36
+ "calls",
37
+ "variable",
38
+ "rule",
39
+ "flows",
40
+ "propagates",
41
+ "PropagationPresets",
42
+ "set_default_propagation",
43
+ "set_default_scope",
44
+ "And",
45
+ "Or",
46
+ "Not",
47
+ "__version__",
48
+ ]
Binary file
@@ -0,0 +1,204 @@
1
+ """CLI wrapper for pathfinder binary.
2
+
3
+ This module provides the entry point for the `pathfinder` command.
4
+ It locates and executes the bundled Go binary, passing through all arguments.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import subprocess
10
+ import platform
11
+ import urllib.request
12
+ import tarfile
13
+ import zipfile
14
+ import tempfile
15
+ from pathlib import Path
16
+
17
+ from codepathfinder import __version__
18
+
19
+
20
+ def get_binary_name() -> str:
21
+ """Get the binary name for the current platform."""
22
+ if sys.platform == "win32":
23
+ return "pathfinder.exe"
24
+ return "pathfinder"
25
+
26
+
27
+ def get_binary_path() -> Path:
28
+ """Get path to the pathfinder binary.
29
+
30
+ Priority:
31
+ 1. Bundled binary in package (platform wheels)
32
+ 2. Binary in PATH (for development or manual install)
33
+ 3. Download on first use (fallback for source installs)
34
+
35
+ Returns:
36
+ Path to the executable binary
37
+
38
+ Raises:
39
+ RuntimeError: If binary cannot be found or downloaded
40
+ """
41
+ binary_name = get_binary_name()
42
+
43
+ # 1. Check bundled binary (primary - from platform wheel)
44
+ package_dir = Path(__file__).parent.parent
45
+ bin_dir = package_dir / "bin"
46
+ bundled_binary = bin_dir / binary_name
47
+
48
+ if bundled_binary.exists() and os.access(bundled_binary, os.X_OK):
49
+ return bundled_binary
50
+
51
+ # 2. Check PATH (development mode or manual install)
52
+ import shutil
53
+
54
+ path_binary = shutil.which("pathfinder")
55
+ if path_binary:
56
+ return Path(path_binary)
57
+
58
+ # 3. Download on first use (source distribution fallback)
59
+ return _download_binary(bin_dir, binary_name)
60
+
61
+
62
+ def _is_musl() -> bool:
63
+ """Detect if running on musl libc (Alpine Linux, etc.)."""
64
+ try:
65
+ result = subprocess.run(["ldd", "--version"], capture_output=True, text=True)
66
+ return "musl" in result.stderr.lower() or "musl" in result.stdout.lower()
67
+ except Exception:
68
+ try:
69
+ with open("/etc/os-release") as f:
70
+ content = f.read().lower()
71
+ return "alpine" in content
72
+ except Exception:
73
+ return False
74
+
75
+
76
+ def _get_platform_string() -> str:
77
+ """Get platform string for binary download.
78
+
79
+ Supports 96%+ of worldwide architectures:
80
+ - Linux glibc: x86_64, aarch64
81
+ - Linux musl: x86_64, aarch64 (Alpine Docker)
82
+ - macOS: arm64 (M1/M2/M3), x86_64 (Intel)
83
+ - Windows: x86_64
84
+ """
85
+ system = platform.system().lower()
86
+ machine = platform.machine().lower()
87
+
88
+ arch_map = {
89
+ "x86_64": "amd64",
90
+ "amd64": "amd64",
91
+ "aarch64": "arm64",
92
+ "arm64": "arm64",
93
+ "armv8l": "arm64",
94
+ }
95
+
96
+ arch = arch_map.get(machine)
97
+ if not arch:
98
+ raise RuntimeError(
99
+ f"Unsupported architecture: {machine}\n"
100
+ f"Supported: x86_64, aarch64/arm64\n"
101
+ f"Download manually from: https://github.com/shivasurya/code-pathfinder/releases"
102
+ )
103
+
104
+ os_map = {
105
+ "linux": "linux",
106
+ "darwin": "darwin",
107
+ "windows": "windows",
108
+ }
109
+
110
+ os_name = os_map.get(system)
111
+ if not os_name:
112
+ raise RuntimeError(
113
+ f"Unsupported operating system: {system}\n"
114
+ f"Supported: Linux, macOS, Windows\n"
115
+ f"Download manually from: https://github.com/shivasurya/code-pathfinder/releases"
116
+ )
117
+
118
+ if os_name == "linux" and _is_musl():
119
+ return f"{os_name}-{arch}-musl"
120
+
121
+ return f"{os_name}-{arch}"
122
+
123
+
124
+ def _download_binary(bin_dir: Path, binary_name: str) -> Path:
125
+ """Download binary for current platform from GitHub releases.
126
+
127
+ Args:
128
+ bin_dir: Directory to store the binary
129
+ binary_name: Name of the binary file
130
+
131
+ Returns:
132
+ Path to the downloaded binary
133
+
134
+ Raises:
135
+ RuntimeError: If download fails
136
+ """
137
+ platform_str = _get_platform_string()
138
+
139
+ if sys.platform == "win32":
140
+ archive_ext = ".zip"
141
+ else:
142
+ archive_ext = ".tar.gz"
143
+
144
+ url = (
145
+ f"https://github.com/shivasurya/code-pathfinder/releases/download/"
146
+ f"v{__version__}/pathfinder-{platform_str}{archive_ext}"
147
+ )
148
+
149
+ print(f"Downloading pathfinder binary for {platform_str}...", file=sys.stderr)
150
+
151
+ bin_dir.mkdir(parents=True, exist_ok=True)
152
+
153
+ try:
154
+ with tempfile.NamedTemporaryFile(suffix=archive_ext, delete=False) as tmp:
155
+ urllib.request.urlretrieve(url, tmp.name)
156
+
157
+ if archive_ext == ".tar.gz":
158
+ with tarfile.open(tmp.name, "r:gz") as tar:
159
+ for member in tar.getmembers():
160
+ if member.name == "pathfinder" or member.name.endswith(
161
+ "/pathfinder"
162
+ ):
163
+ member.name = binary_name
164
+ tar.extract(member, bin_dir)
165
+ break
166
+ else:
167
+ with zipfile.ZipFile(tmp.name, "r") as zip_ref:
168
+ for name in zip_ref.namelist():
169
+ if name == "pathfinder.exe" or name.endswith("/pathfinder.exe"):
170
+ with zip_ref.open(name) as src:
171
+ (bin_dir / binary_name).write_bytes(src.read())
172
+ break
173
+
174
+ os.unlink(tmp.name)
175
+ except Exception as e:
176
+ raise RuntimeError(
177
+ f"Failed to download pathfinder binary from {url}: {e}\n"
178
+ f"You can manually download from: "
179
+ f"https://github.com/shivasurya/code-pathfinder/releases"
180
+ ) from e
181
+
182
+ binary_path = bin_dir / binary_name
183
+
184
+ if sys.platform != "win32":
185
+ os.chmod(binary_path, 0o755)
186
+
187
+ print(f"Binary installed to: {binary_path}", file=sys.stderr)
188
+ return binary_path
189
+
190
+
191
+ def main():
192
+ """Entry point - execute pathfinder binary with all arguments."""
193
+ try:
194
+ binary = get_binary_path()
195
+ except RuntimeError as e:
196
+ print(f"Error: {e}", file=sys.stderr)
197
+ sys.exit(2)
198
+
199
+ result = subprocess.run([str(binary)] + sys.argv[1:])
200
+ sys.exit(result.returncode)
201
+
202
+
203
+ if __name__ == "__main__":
204
+ main()
@@ -0,0 +1,92 @@
1
+ """
2
+ Global configuration for codepathfinder DSL.
3
+
4
+ Allows setting default propagation, scope, etc.
5
+ """
6
+
7
+ from typing import List, Optional
8
+ from .propagation import PropagationPrimitive
9
+
10
+
11
+ class PathfinderConfig:
12
+ """Singleton configuration for codepathfinder."""
13
+
14
+ _instance: Optional["PathfinderConfig"] = None
15
+ _default_propagation: List[PropagationPrimitive] = []
16
+ _default_scope: str = "global"
17
+
18
+ def __new__(cls):
19
+ if cls._instance is None:
20
+ cls._instance = super().__new__(cls)
21
+ return cls._instance
22
+
23
+ @property
24
+ def default_propagation(self) -> List[PropagationPrimitive]:
25
+ """Get default propagation primitives."""
26
+ return self._default_propagation
27
+
28
+ @default_propagation.setter
29
+ def default_propagation(self, value: List[PropagationPrimitive]):
30
+ """Set default propagation primitives."""
31
+ self._default_propagation = value
32
+
33
+ @property
34
+ def default_scope(self) -> str:
35
+ """Get default scope."""
36
+ return self._default_scope
37
+
38
+ @default_scope.setter
39
+ def default_scope(self, value: str):
40
+ """Set default scope."""
41
+ if value not in ["local", "global"]:
42
+ raise ValueError(f"scope must be 'local' or 'global', got '{value}'")
43
+ self._default_scope = value
44
+
45
+
46
+ # Global config instance
47
+ _config = PathfinderConfig()
48
+
49
+
50
+ def set_default_propagation(primitives: List[PropagationPrimitive]) -> None:
51
+ """
52
+ Set global default propagation primitives.
53
+
54
+ All flows() calls without explicit propagates_through will use this default.
55
+
56
+ Args:
57
+ primitives: List of PropagationPrimitive objects
58
+
59
+ Example:
60
+ set_default_propagation(PropagationPresets.standard())
61
+
62
+ # Now all flows() without propagates_through use standard()
63
+ flows(
64
+ from_sources=calls("request.GET"),
65
+ to_sinks=calls("eval"),
66
+ # propagates_through defaults to standard()
67
+ )
68
+ """
69
+ _config.default_propagation = primitives
70
+
71
+
72
+ def set_default_scope(scope: str) -> None:
73
+ """
74
+ Set global default scope.
75
+
76
+ Args:
77
+ scope: "local" or "global"
78
+
79
+ Example:
80
+ set_default_scope("local")
81
+ """
82
+ _config.default_scope = scope
83
+
84
+
85
+ def get_default_propagation() -> List[PropagationPrimitive]:
86
+ """Get global default propagation primitives."""
87
+ return _config.default_propagation
88
+
89
+
90
+ def get_default_scope() -> str:
91
+ """Get global default scope."""
92
+ return _config.default_scope
@@ -0,0 +1,193 @@
1
+ """
2
+ Dataflow matcher for taint analysis.
3
+
4
+ The flows() function is the core of OWASP Top 10 pattern detection.
5
+ It describes how tainted data flows from sources to sinks.
6
+ """
7
+
8
+ from typing import List, Optional, Union
9
+ from .matchers import CallMatcher
10
+ from .propagation import PropagationPrimitive, create_propagation_list
11
+ from .ir import IRType
12
+ from .config import get_default_propagation, get_default_scope
13
+
14
+
15
+ class DataflowMatcher:
16
+ """
17
+ Matches tainted data flows from sources to sinks.
18
+
19
+ This is the primary matcher for security vulnerabilities like:
20
+ - SQL Injection (A03:2021)
21
+ - Command Injection (A03:2021)
22
+ - SSRF (A10:2021)
23
+ - Path Traversal (A01:2021)
24
+ - Insecure Deserialization (A08:2021)
25
+
26
+ Attributes:
27
+ sources: Matchers for taint sources (e.g., user input)
28
+ sinks: Matchers for dangerous sinks (e.g., eval, execute)
29
+ sanitizers: Optional matchers for sanitizer functions
30
+ propagates_through: List of propagation primitives (EXPLICIT!)
31
+ scope: "local" (same function) or "global" (cross-function)
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ from_sources: Union[CallMatcher, List[CallMatcher]],
37
+ to_sinks: Union[CallMatcher, List[CallMatcher]],
38
+ sanitized_by: Optional[Union[CallMatcher, List[CallMatcher]]] = None,
39
+ propagates_through: Optional[List[PropagationPrimitive]] = None,
40
+ scope: Optional[str] = None,
41
+ ):
42
+ """
43
+ Args:
44
+ from_sources: Source matcher(s) - where taint originates
45
+ to_sinks: Sink matcher(s) - dangerous functions
46
+ sanitized_by: Optional sanitizer matcher(s)
47
+ propagates_through: EXPLICIT list of propagation primitives
48
+ (default: None = no propagation!)
49
+ scope: "local" (intra-procedural) or "global" (inter-procedural)
50
+
51
+ Raises:
52
+ ValueError: If sources/sinks are empty, scope invalid, etc.
53
+
54
+ Examples:
55
+ # SQL Injection
56
+ flows(
57
+ from_sources=calls("request.GET", "request.POST"),
58
+ to_sinks=calls("execute", "executemany"),
59
+ sanitized_by=calls("quote_sql"),
60
+ propagates_through=[
61
+ propagates.assignment(),
62
+ propagates.function_args(),
63
+ ],
64
+ scope="global"
65
+ )
66
+ """
67
+ # Validate sources
68
+ if isinstance(from_sources, CallMatcher):
69
+ from_sources = [from_sources]
70
+ if not from_sources:
71
+ raise ValueError("flows() requires at least one source")
72
+ self.sources = from_sources
73
+
74
+ # Validate sinks
75
+ if isinstance(to_sinks, CallMatcher):
76
+ to_sinks = [to_sinks]
77
+ if not to_sinks:
78
+ raise ValueError("flows() requires at least one sink")
79
+ self.sinks = to_sinks
80
+
81
+ # Validate sanitizers
82
+ if sanitized_by is None:
83
+ sanitized_by = []
84
+ elif isinstance(sanitized_by, CallMatcher):
85
+ sanitized_by = [sanitized_by]
86
+ self.sanitizers = sanitized_by
87
+
88
+ # Validate propagation (use global default if not specified)
89
+ if propagates_through is None:
90
+ propagates_through = get_default_propagation()
91
+ self.propagates_through = propagates_through
92
+
93
+ # Validate scope (use global default if not specified)
94
+ if scope is None:
95
+ scope = get_default_scope()
96
+ if scope not in ["local", "global"]:
97
+ raise ValueError(f"scope must be 'local' or 'global', got '{scope}'")
98
+ self.scope = scope
99
+
100
+ def to_ir(self) -> dict:
101
+ """
102
+ Serialize to JSON IR for Go executor.
103
+
104
+ Returns:
105
+ {
106
+ "type": "dataflow",
107
+ "sources": [
108
+ {"type": "call_matcher", "patterns": ["request.GET"], ...}
109
+ ],
110
+ "sinks": [
111
+ {"type": "call_matcher", "patterns": ["execute"], ...}
112
+ ],
113
+ "sanitizers": [
114
+ {"type": "call_matcher", "patterns": ["quote_sql"], ...}
115
+ ],
116
+ "propagation": [
117
+ {"type": "assignment", "metadata": {}},
118
+ {"type": "function_args", "metadata": {}}
119
+ ],
120
+ "scope": "global"
121
+ }
122
+ """
123
+ return {
124
+ "type": IRType.DATAFLOW.value,
125
+ "sources": [src.to_ir() for src in self.sources],
126
+ "sinks": [sink.to_ir() for sink in self.sinks],
127
+ "sanitizers": [san.to_ir() for san in self.sanitizers],
128
+ "propagation": create_propagation_list(self.propagates_through),
129
+ "scope": self.scope,
130
+ }
131
+
132
+ def __repr__(self) -> str:
133
+ src_count = len(self.sources)
134
+ sink_count = len(self.sinks)
135
+ prop_count = len(self.propagates_through)
136
+ return (
137
+ f"flows(sources={src_count}, sinks={sink_count}, "
138
+ f"propagation={prop_count}, scope='{self.scope}')"
139
+ )
140
+
141
+
142
+ # Public API
143
+ def flows(
144
+ from_sources: Union[CallMatcher, List[CallMatcher]],
145
+ to_sinks: Union[CallMatcher, List[CallMatcher]],
146
+ sanitized_by: Optional[Union[CallMatcher, List[CallMatcher]]] = None,
147
+ propagates_through: Optional[List[PropagationPrimitive]] = None,
148
+ scope: Optional[str] = None,
149
+ ) -> DataflowMatcher:
150
+ """
151
+ Create a dataflow matcher for taint analysis.
152
+
153
+ This is the PRIMARY matcher for OWASP Top 10 vulnerabilities.
154
+
155
+ Args:
156
+ from_sources: Where taint originates (e.g., user input)
157
+ to_sinks: Dangerous functions that consume tainted data
158
+ sanitized_by: Optional functions that neutralize taint
159
+ propagates_through: HOW taint flows (MUST be explicit!)
160
+ scope: "local" or "global" analysis
161
+
162
+ Returns:
163
+ DataflowMatcher instance
164
+
165
+ Examples:
166
+ >>> from codepathfinder import flows, calls, propagates
167
+ >>>
168
+ >>> # SQL Injection
169
+ >>> flows(
170
+ ... from_sources=calls("request.GET"),
171
+ ... to_sinks=calls("execute"),
172
+ ... propagates_through=[propagates.assignment()]
173
+ ... )
174
+ >>>
175
+ >>> # Command Injection with sanitization
176
+ >>> flows(
177
+ ... from_sources=calls("request.POST"),
178
+ ... to_sinks=calls("os.system", "subprocess.call"),
179
+ ... sanitized_by=calls("shlex.quote"),
180
+ ... propagates_through=[
181
+ ... propagates.assignment(),
182
+ ... propagates.function_args()
183
+ ... ],
184
+ ... scope="global"
185
+ ... )
186
+ """
187
+ return DataflowMatcher(
188
+ from_sources=from_sources,
189
+ to_sinks=to_sinks,
190
+ sanitized_by=sanitized_by,
191
+ propagates_through=propagates_through,
192
+ scope=scope,
193
+ )
@@ -0,0 +1,158 @@
1
+ """
2
+ Decorators for pathfinder rule definitions.
3
+
4
+ The @rule decorator marks functions as security patterns.
5
+ """
6
+
7
+ import atexit
8
+ import json
9
+ import sys
10
+ from typing import Callable, Optional, List
11
+ from .ir import serialize_ir
12
+
13
+
14
+ # Global registry for auto-execution
15
+ _rule_registry: List["Rule"] = []
16
+ _auto_execute_enabled = False
17
+
18
+
19
+ def _enable_auto_execute() -> None:
20
+ """
21
+ Enable automatic rule execution when script ends.
22
+
23
+ This should be called once when the first rule is registered
24
+ and the module is being executed as a script (not imported).
25
+ """
26
+ global _auto_execute_enabled
27
+ if _auto_execute_enabled:
28
+ return
29
+
30
+ _auto_execute_enabled = True
31
+
32
+ def _output_rules():
33
+ """Output all registered rules as JSON when script ends."""
34
+ if not _rule_registry:
35
+ return
36
+
37
+ # Execute all rules and collect their JSON IR
38
+ rules_json = [rule.execute() for rule in _rule_registry]
39
+
40
+ # Output to stdout for Go loader to capture
41
+ print(json.dumps(rules_json))
42
+
43
+ # Register cleanup handler
44
+ atexit.register(_output_rules)
45
+
46
+
47
+ def _register_rule(rule_obj: "Rule") -> None:
48
+ """
49
+ Register a rule for auto-execution.
50
+
51
+ Args:
52
+ rule_obj: The Rule instance to register
53
+ """
54
+ _rule_registry.append(rule_obj)
55
+
56
+ # Enable auto-execution on first rule registration
57
+ # Check if module is being executed directly (not imported)
58
+ frame = sys._getframe(2) # Get caller's frame (the module defining the rule)
59
+ if frame.f_globals.get("__name__") == "__main__":
60
+ _enable_auto_execute()
61
+
62
+
63
+ class Rule:
64
+ """
65
+ Represents a security rule with metadata.
66
+
67
+ Attributes:
68
+ id: Unique rule identifier (e.g., "sqli-001")
69
+ name: Human-readable name (defaults to function name)
70
+ severity: critical | high | medium | low
71
+ cwe: CWE identifier (e.g., "CWE-89")
72
+ owasp: OWASP category (e.g., "A03:2021")
73
+ description: What this rule detects (from docstring)
74
+ matcher: The matcher/combinator returned by the rule function
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ id: str,
80
+ severity: str,
81
+ func: Callable,
82
+ cwe: Optional[str] = None,
83
+ owasp: Optional[str] = None,
84
+ ):
85
+ self.id = id
86
+ self.name = func.__name__
87
+ self.severity = severity
88
+ self.cwe = cwe
89
+ self.owasp = owasp
90
+ self.description = func.__doc__ or ""
91
+ self.func = func
92
+
93
+ def execute(self) -> dict:
94
+ """
95
+ Execute the rule function and serialize to JSON IR.
96
+
97
+ Returns:
98
+ {
99
+ "rule": {
100
+ "id": "sqli-001",
101
+ "name": "detect_sql_injection",
102
+ "severity": "critical",
103
+ "cwe": "CWE-89",
104
+ "owasp": "A03:2021",
105
+ "description": "Detects SQL injection vulnerabilities"
106
+ },
107
+ "matcher": {
108
+ "type": "call_matcher",
109
+ "patterns": ["execute"],
110
+ "wildcard": false
111
+ }
112
+ }
113
+ """
114
+ matcher = self.func()
115
+ return {
116
+ "rule": {
117
+ "id": self.id,
118
+ "name": self.name,
119
+ "severity": self.severity,
120
+ "cwe": self.cwe,
121
+ "owasp": self.owasp,
122
+ "description": self.description.strip(),
123
+ },
124
+ "matcher": serialize_ir(matcher),
125
+ }
126
+
127
+
128
+ def rule(
129
+ id: str,
130
+ severity: str,
131
+ cwe: Optional[str] = None,
132
+ owasp: Optional[str] = None,
133
+ ) -> Callable[[Callable], Rule]:
134
+ """
135
+ Decorator to mark a function as a security rule.
136
+
137
+ Args:
138
+ id: Unique rule identifier
139
+ severity: critical | high | medium | low
140
+ cwe: Optional CWE identifier
141
+ owasp: Optional OWASP category
142
+
143
+ Returns:
144
+ Decorator function
145
+
146
+ Example:
147
+ @rule(id="code-injection", severity="critical", cwe="CWE-94")
148
+ def detect_code_injection():
149
+ '''Detects code injection via eval'''
150
+ return calls("eval", "exec")
151
+ """
152
+
153
+ def decorator(func: Callable) -> Rule:
154
+ rule_obj = Rule(id=id, severity=severity, func=func, cwe=cwe, owasp=owasp)
155
+ _register_rule(rule_obj)
156
+ return rule_obj
157
+
158
+ return decorator