codepathfinder 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of codepathfinder might be problematic. Click here for more details.

@@ -0,0 +1,148 @@
1
+ """
2
+ Core matchers for the pathfinder Python DSL.
3
+
4
+ These matchers generate JSON IR for the Go executor.
5
+ """
6
+
7
+ from .ir import IRType
8
+
9
+
10
+ class CallMatcher:
11
+ """
12
+ Matches function/method calls in the callgraph.
13
+
14
+ Examples:
15
+ calls("eval") # Exact match
16
+ calls("eval", "exec") # Multiple patterns
17
+ calls("request.*") # Wildcard (any request.* call)
18
+ calls("*.json") # Wildcard (any *.json call)
19
+ """
20
+
21
+ def __init__(self, *patterns: str):
22
+ """
23
+ Args:
24
+ *patterns: Function names to match. Supports wildcards (*).
25
+
26
+ Raises:
27
+ ValueError: If no patterns provided or pattern is empty
28
+ """
29
+ if not patterns:
30
+ raise ValueError("calls() requires at least one pattern")
31
+
32
+ if any(not p or not isinstance(p, str) for p in patterns):
33
+ raise ValueError("All patterns must be non-empty strings")
34
+
35
+ self.patterns = list(patterns)
36
+ self.wildcard = any("*" in p for p in patterns)
37
+
38
+ def to_ir(self) -> dict:
39
+ """
40
+ Serialize to JSON IR for Go executor.
41
+
42
+ Returns:
43
+ {
44
+ "type": "call_matcher",
45
+ "patterns": ["eval", "exec"],
46
+ "wildcard": false,
47
+ "match_mode": "any" # matches if ANY pattern matches
48
+ }
49
+ """
50
+ return {
51
+ "type": IRType.CALL_MATCHER.value,
52
+ "patterns": self.patterns,
53
+ "wildcard": self.wildcard,
54
+ "match_mode": "any",
55
+ }
56
+
57
+ def __repr__(self) -> str:
58
+ patterns_str = ", ".join(f'"{p}"' for p in self.patterns)
59
+ return f"calls({patterns_str})"
60
+
61
+
62
+ class VariableMatcher:
63
+ """
64
+ Matches variable references by name.
65
+
66
+ Examples:
67
+ variable("user_input") # Exact match
68
+ variable("user_*") # Wildcard prefix
69
+ variable("*_id") # Wildcard suffix
70
+ """
71
+
72
+ def __init__(self, pattern: str):
73
+ """
74
+ Args:
75
+ pattern: Variable name pattern. Supports wildcards (*).
76
+
77
+ Raises:
78
+ ValueError: If pattern is empty
79
+ """
80
+ if not pattern or not isinstance(pattern, str):
81
+ raise ValueError("variable() requires a non-empty string pattern")
82
+
83
+ self.pattern = pattern
84
+ self.wildcard = "*" in pattern
85
+
86
+ def to_ir(self) -> dict:
87
+ """
88
+ Serialize to JSON IR for Go executor.
89
+
90
+ Returns:
91
+ {
92
+ "type": "variable_matcher",
93
+ "pattern": "user_input",
94
+ "wildcard": false
95
+ }
96
+ """
97
+ return {
98
+ "type": IRType.VARIABLE_MATCHER.value,
99
+ "pattern": self.pattern,
100
+ "wildcard": self.wildcard,
101
+ }
102
+
103
+ def __repr__(self) -> str:
104
+ return f'variable("{self.pattern}")'
105
+
106
+
107
+ # Public API
108
+ def calls(*patterns: str) -> CallMatcher:
109
+ """
110
+ Create a matcher for function/method calls.
111
+
112
+ Args:
113
+ *patterns: Function names to match (supports wildcards)
114
+
115
+ Returns:
116
+ CallMatcher instance
117
+
118
+ Examples:
119
+ >>> calls("eval")
120
+ calls("eval")
121
+
122
+ >>> calls("request.GET", "request.POST")
123
+ calls("request.GET", "request.POST")
124
+
125
+ >>> calls("urllib.*")
126
+ calls("urllib.*")
127
+ """
128
+ return CallMatcher(*patterns)
129
+
130
+
131
+ def variable(pattern: str) -> VariableMatcher:
132
+ """
133
+ Create a matcher for variable references.
134
+
135
+ Args:
136
+ pattern: Variable name pattern (supports wildcards)
137
+
138
+ Returns:
139
+ VariableMatcher instance
140
+
141
+ Examples:
142
+ >>> variable("user_input")
143
+ variable("user_input")
144
+
145
+ >>> variable("*_id")
146
+ variable("*_id")
147
+ """
148
+ return VariableMatcher(pattern)
@@ -0,0 +1,135 @@
1
+ """
2
+ Propagation presets for common use cases.
3
+
4
+ Presets bundle propagation primitives for convenience.
5
+ """
6
+
7
+ from typing import List
8
+ from .propagation import propagates, PropagationPrimitive
9
+
10
+
11
+ class PropagationPresets:
12
+ """
13
+ Common propagation bundles.
14
+
15
+ Developers can use presets instead of manually listing primitives.
16
+ """
17
+
18
+ @staticmethod
19
+ def minimal() -> List[PropagationPrimitive]:
20
+ """
21
+ Bare minimum propagation (fastest, least false negatives).
22
+
23
+ Covers:
24
+ - Variable assignments
25
+ - Function arguments
26
+
27
+ Coverage: ~40% of real-world flows
28
+ Performance: Fastest (minimal overhead)
29
+ False negatives: Higher (misses return values, strings)
30
+
31
+ Use when:
32
+ - Performance is critical
33
+ - You only care about direct variable flows
34
+
35
+ Example:
36
+ flows(
37
+ from_sources=calls("request.GET"),
38
+ to_sinks=calls("eval"),
39
+ propagates_through=PropagationPresets.minimal(),
40
+ scope="local"
41
+ )
42
+ """
43
+ return [
44
+ propagates.assignment(),
45
+ propagates.function_args(),
46
+ ]
47
+
48
+ @staticmethod
49
+ def standard() -> List[PropagationPrimitive]:
50
+ """
51
+ Recommended default (good balance).
52
+
53
+ Covers:
54
+ - Phase 1: assignment, function_args, function_returns
55
+ - Phase 2: string_concat, string_format
56
+
57
+ Coverage: ~75-80% of real-world flows
58
+ Performance: Good (moderate overhead)
59
+ False negatives: Lower
60
+
61
+ Use when:
62
+ - General-purpose taint analysis
63
+ - OWASP Top 10 detection
64
+ - Good balance of coverage and performance
65
+
66
+ Example:
67
+ flows(
68
+ from_sources=calls("request.*"),
69
+ to_sinks=calls("execute"),
70
+ propagates_through=PropagationPresets.standard(),
71
+ scope="global"
72
+ )
73
+ """
74
+ return [
75
+ propagates.assignment(),
76
+ propagates.function_args(),
77
+ propagates.function_returns(),
78
+ propagates.string_concat(),
79
+ propagates.string_format(),
80
+ ]
81
+
82
+ @staticmethod
83
+ def comprehensive() -> List[PropagationPrimitive]:
84
+ """
85
+ All MVP primitives (Phase 1 + Phase 2).
86
+
87
+ Covers:
88
+ - All standard() primitives
89
+
90
+ Coverage: ~80% of real-world flows
91
+ Performance: Moderate
92
+ False negatives: Low
93
+
94
+ Use when:
95
+ - Maximum coverage within MVP scope
96
+ - Willing to accept moderate performance overhead
97
+
98
+ Example:
99
+ flows(
100
+ from_sources=calls("request.*"),
101
+ to_sinks=calls("eval"),
102
+ propagates_through=PropagationPresets.comprehensive(),
103
+ scope="global"
104
+ )
105
+ """
106
+ return PropagationPresets.standard() # For MVP, comprehensive = standard
107
+
108
+ @staticmethod
109
+ def exhaustive() -> List[PropagationPrimitive]:
110
+ """
111
+ All primitives (Phase 1-6, POST-MVP).
112
+
113
+ NOTE: For MVP, this is same as comprehensive().
114
+ Post-MVP will include collections, control flow, OOP, advanced.
115
+
116
+ Coverage: ~95% of real-world flows (POST-MVP)
117
+ Performance: Slower (comprehensive analysis)
118
+ False negatives: Minimal
119
+
120
+ Use when:
121
+ - Maximum security coverage required
122
+ - Performance is not a concern
123
+ - Production-critical code
124
+
125
+ Example:
126
+ flows(
127
+ from_sources=calls("request.*"),
128
+ to_sinks=calls("execute"),
129
+ propagates_through=PropagationPresets.exhaustive(),
130
+ scope="global"
131
+ )
132
+ """
133
+ # MVP: same as comprehensive
134
+ # POST-MVP: will include Phase 3-6 primitives
135
+ return PropagationPresets.comprehensive()
@@ -0,0 +1,250 @@
1
+ """
2
+ Taint propagation primitives for dataflow analysis.
3
+
4
+ These primitives define HOW taint propagates through code constructs.
5
+ Developers specify which primitives to enable via propagates_through parameter.
6
+ """
7
+
8
+ from typing import Dict, Any, List, Optional
9
+ from enum import Enum
10
+
11
+
12
+ class PropagationType(Enum):
13
+ """
14
+ Enum of all propagation primitive types.
15
+
16
+ Phase 1 (MVP - This PR):
17
+ ASSIGNMENT, FUNCTION_ARGS, FUNCTION_RETURNS
18
+
19
+ Phase 2 (MVP - Future PR):
20
+ STRING_CONCAT, STRING_FORMAT
21
+
22
+ Phase 3-6 (Post-MVP):
23
+ Collections, control flow, OOP, advanced
24
+ """
25
+
26
+ # ===== PHASE 1: BARE MINIMUM (MVP) =====
27
+ ASSIGNMENT = "assignment"
28
+ FUNCTION_ARGS = "function_args"
29
+ FUNCTION_RETURNS = "function_returns"
30
+
31
+ # ===== PHASE 2: STRING OPERATIONS (MVP - Future PR) =====
32
+ STRING_CONCAT = "string_concat"
33
+ STRING_FORMAT = "string_format"
34
+
35
+ # ===== PHASE 3: COLLECTIONS (POST-MVP) =====
36
+ LIST_APPEND = "list_append"
37
+ LIST_EXTEND = "list_extend"
38
+ DICT_VALUES = "dict_values"
39
+ DICT_UPDATE = "dict_update"
40
+ SET_ADD = "set_add"
41
+
42
+ # ===== PHASE 4: CONTROL FLOW (POST-MVP) =====
43
+ IF_CONDITION = "if_condition"
44
+ FOR_ITERATION = "for_iteration"
45
+ WHILE_CONDITION = "while_condition"
46
+ SWITCH_CASE = "switch_case"
47
+
48
+ # ===== PHASE 5: OOP (POST-MVP) =====
49
+ ATTRIBUTE_ASSIGNMENT = "attribute_assignment"
50
+ METHOD_CALL = "method_call"
51
+ CONSTRUCTOR = "constructor"
52
+
53
+ # ===== PHASE 6: ADVANCED (POST-MVP) =====
54
+ COMPREHENSION = "comprehension"
55
+ LAMBDA_CAPTURE = "lambda_capture"
56
+ YIELD_STMT = "yield_stmt"
57
+
58
+
59
+ class PropagationPrimitive:
60
+ """
61
+ Base class for propagation primitives.
62
+
63
+ Each primitive describes ONE way taint can flow through code.
64
+ """
65
+
66
+ def __init__(
67
+ self, prim_type: PropagationType, metadata: Optional[Dict[str, Any]] = None
68
+ ):
69
+ """
70
+ Args:
71
+ prim_type: The type of propagation
72
+ metadata: Optional additional configuration
73
+ """
74
+ self.type = prim_type
75
+ self.metadata = metadata or {}
76
+
77
+ def to_ir(self) -> Dict[str, Any]:
78
+ """
79
+ Serialize to JSON IR.
80
+
81
+ Returns:
82
+ {
83
+ "type": "assignment",
84
+ "metadata": {}
85
+ }
86
+ """
87
+ return {
88
+ "type": self.type.value,
89
+ "metadata": self.metadata,
90
+ }
91
+
92
+ def __repr__(self) -> str:
93
+ return f"propagates.{self.type.value}()"
94
+
95
+
96
+ class propagates:
97
+ """
98
+ Namespace for taint propagation primitives.
99
+
100
+ Usage:
101
+ propagates.assignment()
102
+ propagates.function_args()
103
+ propagates.function_returns()
104
+ """
105
+
106
+ # ===== PHASE 1: BARE MINIMUM (MVP - THIS PR) =====
107
+
108
+ @staticmethod
109
+ def assignment() -> PropagationPrimitive:
110
+ """
111
+ Taint propagates through variable assignment.
112
+
113
+ Patterns matched:
114
+ x = tainted # Simple assignment
115
+ a = b = tainted # Chained assignment
116
+ x, y = tainted, safe # Tuple unpacking (x is tainted)
117
+
118
+ This is the MOST COMMON propagation pattern (~40% of all flows).
119
+
120
+ Examples:
121
+ user_input = request.GET.get("id") # source
122
+ query = user_input # PROPAGATES via assignment
123
+ cursor.execute(query) # sink
124
+
125
+ Returns:
126
+ PropagationPrimitive for assignment
127
+ """
128
+ return PropagationPrimitive(PropagationType.ASSIGNMENT)
129
+
130
+ @staticmethod
131
+ def function_args() -> PropagationPrimitive:
132
+ """
133
+ Taint propagates through function arguments.
134
+
135
+ Patterns matched:
136
+ func(tainted) # Positional argument
137
+ func(arg=tainted) # Keyword argument
138
+ func(*tainted) # Args unpacking
139
+ func(**tainted) # Kwargs unpacking
140
+
141
+ Critical for inter-procedural analysis (~30% of flows).
142
+
143
+ Examples:
144
+ user_input = request.GET.get("id") # source
145
+ process_data(user_input) # PROPAGATES via function_args
146
+ def process_data(data):
147
+ execute(data) # sink (data is tainted)
148
+
149
+ Returns:
150
+ PropagationPrimitive for function arguments
151
+ """
152
+ return PropagationPrimitive(PropagationType.FUNCTION_ARGS)
153
+
154
+ @staticmethod
155
+ def function_returns() -> PropagationPrimitive:
156
+ """
157
+ Taint propagates through return values.
158
+
159
+ Patterns matched:
160
+ return tainted # Direct return
161
+ return tainted if cond else safe # Conditional return
162
+ return [tainted, safe] # Return list containing tainted
163
+
164
+ Essential for functions that transform tainted data (~20% of flows).
165
+
166
+ Examples:
167
+ def get_user_id():
168
+ user_input = request.GET.get("id") # source
169
+ return user_input # PROPAGATES via return
170
+
171
+ query = get_user_id() # query is now tainted
172
+ execute(query) # sink
173
+
174
+ Returns:
175
+ PropagationPrimitive for function returns
176
+ """
177
+ return PropagationPrimitive(PropagationType.FUNCTION_RETURNS)
178
+
179
+ # ===== PHASE 2: STRING OPERATIONS (MVP - THIS PR) =====
180
+
181
+ @staticmethod
182
+ def string_concat() -> PropagationPrimitive:
183
+ """
184
+ Taint propagates through string concatenation.
185
+
186
+ Patterns matched:
187
+ result = tainted + "suffix" # Right concat
188
+ result = "prefix" + tainted # Left concat
189
+ result = tainted + safe + more # Mixed concat
190
+
191
+ Critical for SQL/Command injection where queries are built via concat (~10% of flows).
192
+
193
+ Examples:
194
+ user_id = request.GET.get("id") # source
195
+ query = "SELECT * FROM users WHERE id = " + user_id # PROPAGATES via string_concat
196
+ cursor.execute(query) # sink
197
+
198
+ Returns:
199
+ PropagationPrimitive for string concatenation
200
+ """
201
+ return PropagationPrimitive(PropagationType.STRING_CONCAT)
202
+
203
+ @staticmethod
204
+ def string_format() -> PropagationPrimitive:
205
+ """
206
+ Taint propagates through string formatting.
207
+
208
+ Patterns matched:
209
+ f"{tainted}" # f-string
210
+ "{}".format(tainted) # str.format()
211
+ "%s" % tainted # % formatting
212
+ "{name}".format(name=tainted) # Named placeholders
213
+
214
+ Critical for SQL injection where ORM methods use format() (~8% of flows).
215
+
216
+ Examples:
217
+ user_id = request.GET.get("id") # source
218
+ query = f"SELECT * FROM users WHERE id = {user_id}" # PROPAGATES via string_format
219
+ cursor.execute(query) # sink
220
+
221
+ Returns:
222
+ PropagationPrimitive for string formatting
223
+ """
224
+ return PropagationPrimitive(PropagationType.STRING_FORMAT)
225
+
226
+ # ===== PHASE 3-6: POST-MVP =====
227
+ # Will be implemented in post-MVP PRs
228
+
229
+
230
+ def create_propagation_list(
231
+ primitives: List[PropagationPrimitive],
232
+ ) -> List[Dict[str, Any]]:
233
+ """
234
+ Convert a list of propagation primitives to JSON IR.
235
+
236
+ Args:
237
+ primitives: List of PropagationPrimitive objects
238
+
239
+ Returns:
240
+ List of JSON IR dictionaries
241
+
242
+ Example:
243
+ >>> prims = [propagates.assignment(), propagates.function_args()]
244
+ >>> create_propagation_list(prims)
245
+ [
246
+ {"type": "assignment", "metadata": {}},
247
+ {"type": "function_args", "metadata": {}}
248
+ ]
249
+ """
250
+ return [prim.to_ir() for prim in primitives]
@@ -0,0 +1,87 @@
1
+ Metadata-Version: 2.4
2
+ Name: codepathfinder
3
+ Version: 1.0.0
4
+ Summary: Python DSL for code-pathfinder security patterns
5
+ Home-page: https://github.com/shivasurya/code-pathfinder
6
+ Author: code-pathfinder contributors
7
+ License: AGPL-3.0
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: GNU Affero General Public License v3
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Security
18
+ Classifier: Topic :: Software Development :: Testing
19
+ Requires-Python: >=3.8
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
24
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
25
+ Requires-Dist: black>=23.0.0; extra == "dev"
26
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
27
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
28
+ Dynamic: home-page
29
+ Dynamic: license-file
30
+ Dynamic: requires-python
31
+
32
+ # Code-Pathfinder Python DSL
33
+
34
+ Python DSL for defining security patterns in Code Pathfinder - an open-source security suite combining structural code analysis with AI-powered vulnerability detection.
35
+
36
+ **Project Goals:**
37
+ - Real-time IDE integration bringing security insights directly into your editor
38
+ - AI-assisted analysis leveraging LLMs to understand context and identify vulnerabilities
39
+ - Unified workflow coverage from local development to CI/CD pipelines
40
+ - Flexible reporting supporting DefectDojo, GitHub Advanced Security, SARIF, and other platforms
41
+
42
+ **Documentation**: https://codepathfinder.dev/
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ pip install codepathfinder
48
+ ```
49
+
50
+ ## Quick Example
51
+
52
+ ```python
53
+ from codepathfinder import rule, flows, calls
54
+ from codepathfinder.presets import PropagationPresets
55
+
56
+ @rule(id="sql-injection", severity="critical", cwe="CWE-89")
57
+ def detect_sql_injection():
58
+ """Detects SQL injection vulnerabilities"""
59
+ return flows(
60
+ from_sources=calls("request.GET", "request.POST"),
61
+ to_sinks=calls("execute", "executemany"),
62
+ sanitized_by=calls("quote_sql"),
63
+ propagates_through=PropagationPresets.standard(),
64
+ scope="global"
65
+ )
66
+ ```
67
+
68
+ ## Features
69
+
70
+ - **Matchers**: `calls()`, `variable()` for pattern matching
71
+ - **Dataflow Analysis**: `flows()` for source-to-sink taint tracking
72
+ - **Propagation**: Explicit propagation primitives (assignment, function args, returns)
73
+ - **Logic Operators**: `And()`, `Or()`, `Not()` for complex rules
74
+ - **JSON IR**: Serializes to JSON for Go executor integration
75
+
76
+ ## Documentation
77
+
78
+ For detailed documentation, visit https://codepathfinder.dev/
79
+
80
+ ## Requirements
81
+
82
+ - Python 3.8+
83
+ - No external dependencies (stdlib only!)
84
+
85
+ ## License
86
+
87
+ AGPL-3.0 - GNU Affero General Public License v3
@@ -0,0 +1,14 @@
1
+ codepathfinder/__init__.py,sha256=rcfHf6nq7bOBDRLEb-ZDBlsFpCxrvie6J2IYYwxUKWY,1194
2
+ codepathfinder/config.py,sha256=jx1Q5QnX2zJKKhai6ISwFIWh7h9M4o06bgZpyieGx98,2473
3
+ codepathfinder/dataflow.py,sha256=H2X3uCc4Srl5WzmjmAeICJggUFSZnNhn1WbrWP7g8Cc,6815
4
+ codepathfinder/decorators.py,sha256=pkvHhf2TLHu1-Gjlqwu718yaIPsPZ4JiSSM2EReshg8,2870
5
+ codepathfinder/ir.py,sha256=K0YfGSFZyysDRd8B-o9gnyou5R3EbwApPsK3qSjmDSE,2837
6
+ codepathfinder/logic.py,sha256=cA76-mhE_A7WmWQtZtufZWxMKSrI4Bt7avJRWi20ud4,2418
7
+ codepathfinder/matchers.py,sha256=o3vINaXOnVVMtxSVYHCbtkID3uDY_Hjcfvma547luwc,3787
8
+ codepathfinder/presets.py,sha256=_EU2WNtMY5PfY1iRcoZuiLkzKRddvtdn6H8tSy1dzGw,3914
9
+ codepathfinder/propagation.py,sha256=yz1ODauUD0hnzDjPWfTIdQojWcvkYbwrnvou4C9Fy6U,7695
10
+ codepathfinder-1.0.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
11
+ codepathfinder-1.0.0.dist-info/METADATA,sha256=yoE0QafGRO_HJYvzfnQF_kflon-_Bd4I_7mCQw1jnWY,2936
12
+ codepathfinder-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ codepathfinder-1.0.0.dist-info/top_level.txt,sha256=Ll603QFZoCmFBDISN1VT5QHmodZsgNiPs00voNqpOZ4,15
14
+ codepathfinder-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+