diffsense 2.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adapters/__init__.py +0 -0
- adapters/base.py +27 -0
- adapters/github_adapter.py +164 -0
- adapters/gitlab_adapter.py +207 -0
- adapters/local_adapter.py +136 -0
- banner.py +71 -0
- cli.py +606 -0
- config/__init__.py +1 -0
- config/rules.yaml +371 -0
- core/__init__.py +235 -0
- core/ast_detector.py +853 -0
- core/change.py +46 -0
- core/composer.py +93 -0
- core/evaluator.py +15 -0
- core/ignore_manager.py +71 -0
- core/knowledge.py +77 -0
- core/parser.py +181 -0
- core/parser_manager.py +104 -0
- core/quality_manager.py +117 -0
- core/renderer.py +197 -0
- core/rule_base.py +98 -0
- core/rule_runtime.py +103 -0
- core/rules.py +718 -0
- core/run_config.py +85 -0
- core/semantic_diff.py +359 -0
- core/signal_model.py +21 -0
- core/signals_registry.py +62 -0
- diffsense-2.2.12.dist-info/METADATA +18 -0
- diffsense-2.2.12.dist-info/RECORD +58 -0
- diffsense-2.2.12.dist-info/WHEEL +5 -0
- diffsense-2.2.12.dist-info/entry_points.txt +3 -0
- diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
- diffsense-2.2.12.dist-info/top_level.txt +11 -0
- diffsense_mcp/__init__.py +1 -0
- diffsense_mcp/launcher.py +28 -0
- diffsense_mcp/server.py +687 -0
- governance/lifecycle.py +54 -0
- main.py +318 -0
- rules/__init__.py +246 -0
- rules/api_compatibility.py +372 -0
- rules/collection_handling.py +349 -0
- rules/concurrency.py +194 -0
- rules/concurrency_adapter.py +250 -0
- rules/cross_language_adapter.py +444 -0
- rules/exception_handling.py +320 -0
- rules/go_rules.py +401 -0
- rules/null_safety.py +301 -0
- rules/resource_management.py +222 -0
- rules/yaml_adapter.py +195 -0
- run_audit.py +478 -0
- sdk/cpp_adapter.py +238 -0
- sdk/go_adapter.py +199 -0
- sdk/java_adapter.py +199 -0
- sdk/javascript_adapter.py +229 -0
- sdk/language_adapter.py +313 -0
- sdk/python_adapter.py +195 -0
- sdk/rule.py +63 -0
- sdk/signal.py +14 -0
rules/go_rules.py
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Go Language Rules for DiffSense
|
|
3
|
+
|
|
4
|
+
These rules are Go-specific implementations following the same patterns
|
|
5
|
+
as the Java rules but adapted for Go language constructs.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from typing import Dict, Any, List, Optional
|
|
10
|
+
from sdk.rule import BaseRule
|
|
11
|
+
from sdk.signal import Signal
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GoGoroutineLeakRule(BaseRule):
|
|
15
|
+
"""检测 goroutine 泄漏风险(未正确退出的 goroutine)"""
|
|
16
|
+
|
|
17
|
+
def __init__(self):
|
|
18
|
+
self._goroutine_pattern = re.compile(r'^\+.*\bgo\s+\w+\.?\w*\s*\(')
|
|
19
|
+
self._context_pattern = re.compile(r'(?:context\.Context|ctx|done\s*chan)')
|
|
20
|
+
self._select_pattern = re.compile(r'\bselect\s*{')
|
|
21
|
+
self._defer_pattern = re.compile(r'\bdefer\s+')
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def id(self) -> str:
|
|
25
|
+
return "resource.goroutine_leak"
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def severity(self) -> str:
|
|
29
|
+
return "high"
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def impact(self) -> str:
|
|
33
|
+
return "runtime"
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def rationale(self) -> str:
|
|
37
|
+
return "Goroutine started without proper exit mechanism (context, channel, or defer)"
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def language(self) -> str:
|
|
41
|
+
return "go"
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def rule_type(self) -> str:
|
|
45
|
+
return "absolute"
|
|
46
|
+
|
|
47
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
48
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
49
|
+
|
|
50
|
+
# 检查是否创建了 goroutine
|
|
51
|
+
if self._goroutine_pattern.search(raw_diff):
|
|
52
|
+
added_lines = [line for line in raw_diff.split('\n') if line.startswith('+')]
|
|
53
|
+
|
|
54
|
+
for line in added_lines:
|
|
55
|
+
if self._goroutine_pattern.search(line):
|
|
56
|
+
# 检查是否有 context 或 channel 用于退出
|
|
57
|
+
has_context = bool(self._context_pattern.search(line))
|
|
58
|
+
has_select = any(self._select_pattern.search(l) for l in added_lines)
|
|
59
|
+
has_defer = any(self._defer_pattern.search(l) for l in added_lines)
|
|
60
|
+
|
|
61
|
+
if not (has_context or has_select or has_defer):
|
|
62
|
+
files = diff_data.get('files', [])
|
|
63
|
+
return {"file": files[0] if files else "unknown", "goroutine": line.strip()}
|
|
64
|
+
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class GoChannelLeakRule(BaseRule):
|
|
69
|
+
"""检测 channel 使用问题(未关闭、缓冲通道泄漏等)"""
|
|
70
|
+
|
|
71
|
+
def __init__(self):
|
|
72
|
+
self._chan_pattern = re.compile(r'^\+.*(?:make\s*\(\s*chan|chan\s+\w+\s*=)')
|
|
73
|
+
self._close_pattern = re.compile(r'\bclose\s*\(')
|
|
74
|
+
self._buffered_chan = re.compile(r'make\s*\(\s*chan\s+\w+\s*,\s*[1-9]\d*\s*\)')
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def id(self) -> str:
|
|
78
|
+
return "resource.channel_leak"
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def severity(self) -> str:
|
|
82
|
+
return "high"
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def impact(self) -> str:
|
|
86
|
+
return "runtime"
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def rationale(self) -> str:
|
|
90
|
+
return "Channel created without close or with large buffer, may cause goroutine hang"
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def language(self) -> str:
|
|
94
|
+
return "go"
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def rule_type(self) -> str:
|
|
98
|
+
return "absolute"
|
|
99
|
+
|
|
100
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
101
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
102
|
+
|
|
103
|
+
if self._chan_pattern.search(raw_diff):
|
|
104
|
+
# 检查是否有 close 调用
|
|
105
|
+
if not self._close_pattern.search(raw_diff):
|
|
106
|
+
files = diff_data.get('files', [])
|
|
107
|
+
return {"file": files[0] if files else "unknown"}
|
|
108
|
+
|
|
109
|
+
# 检查大的缓冲通道
|
|
110
|
+
buffered_matches = self._buffered_chan.findall(raw_diff)
|
|
111
|
+
if buffered_matches:
|
|
112
|
+
files = diff_data.get('files', [])
|
|
113
|
+
return {"file": files[0] if files else "unknown", "buffered_channels": buffered_matches}
|
|
114
|
+
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class GoDeferMisuseRule(BaseRule):
|
|
119
|
+
"""检测 defer 误用(循环中 defer、defer 参数问题)"""
|
|
120
|
+
|
|
121
|
+
def __init__(self):
|
|
122
|
+
self._defer_in_loop = re.compile(r'(?:for\s+|range\s+)\s*{[^}]*defer\s+', re.DOTALL)
|
|
123
|
+
self._defer_pattern = re.compile(r'^\+.*\bdefer\s+')
|
|
124
|
+
self._defer_with_args = re.compile(r'defer\s+\w+\s*\(\s*\w+\s*[+\-*/]')
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def id(self) -> str:
|
|
128
|
+
return "resource.defer_misuse"
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def severity(self) -> str:
|
|
132
|
+
return "medium"
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def impact(self) -> str:
|
|
136
|
+
return "runtime"
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def rationale(self) -> str:
|
|
140
|
+
return "Defer used in loop or with evaluated arguments, may cause resource leak"
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def language(self) -> str:
|
|
144
|
+
return "go"
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def rule_type(self) -> str:
|
|
148
|
+
return "absolute"
|
|
149
|
+
|
|
150
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
151
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
152
|
+
|
|
153
|
+
# 检查循环中的 defer
|
|
154
|
+
if self._defer_in_loop.search(raw_diff):
|
|
155
|
+
files = diff_data.get('files', [])
|
|
156
|
+
return {"file": files[0] if files else "unknown", "issue": "defer_in_loop"}
|
|
157
|
+
|
|
158
|
+
# 检查 defer 参数立即求值
|
|
159
|
+
added_lines = [line for line in raw_diff.split('\n') if line.startswith('+')]
|
|
160
|
+
for line in added_lines:
|
|
161
|
+
if self._defer_with_args.search(line):
|
|
162
|
+
files = diff_data.get('files', [])
|
|
163
|
+
return {"file": files[0] if files else "unknown", "issue": "defer_args_evaluated"}
|
|
164
|
+
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class GoUnsafeUsageRule(BaseRule):
|
|
169
|
+
"""检测 unsafe 包的使用(类型转换、指针运算)"""
|
|
170
|
+
|
|
171
|
+
def __init__(self):
|
|
172
|
+
self._unsafe_pattern = re.compile(r'^\+.*\bunsafe\.(?:Pointer|Sizeof|Alignof|Offsetof)\s*\(')
|
|
173
|
+
self._type_assertion = re.compile(r'\(\*\w+\)\(unsafe\.Pointer')
|
|
174
|
+
|
|
175
|
+
@property
|
|
176
|
+
def id(self) -> str:
|
|
177
|
+
return "security.unsafe_usage"
|
|
178
|
+
|
|
179
|
+
@property
|
|
180
|
+
def severity(self) -> str:
|
|
181
|
+
return "high"
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def impact(self) -> str:
|
|
185
|
+
return "security"
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def rationale(self) -> str:
|
|
189
|
+
return "Unsafe package usage may cause memory safety issues"
|
|
190
|
+
|
|
191
|
+
@property
|
|
192
|
+
def language(self) -> str:
|
|
193
|
+
return "go"
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def rule_type(self) -> str:
|
|
197
|
+
return "absolute"
|
|
198
|
+
|
|
199
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
200
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
201
|
+
|
|
202
|
+
if self._unsafe_pattern.search(raw_diff):
|
|
203
|
+
files = diff_data.get('files', [])
|
|
204
|
+
return {"file": files[0] if files else "unknown"}
|
|
205
|
+
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class GoErrorHandlingRule(BaseRule):
|
|
210
|
+
"""检测错误处理不当(忽略 error 返回值)"""
|
|
211
|
+
|
|
212
|
+
def __init__(self):
|
|
213
|
+
self._ignore_error = re.compile(r'_\s*=\s*\w+\s*\([^)]*\)')
|
|
214
|
+
self._error_return = re.compile(r'\w+\s*,\s*err\s*:=')
|
|
215
|
+
self._no_error_check = re.compile(r'if\s+err\s*(?:!=|==)\s*nil')
|
|
216
|
+
|
|
217
|
+
@property
|
|
218
|
+
def id(self) -> str:
|
|
219
|
+
return "exception.error_ignored"
|
|
220
|
+
|
|
221
|
+
@property
|
|
222
|
+
def severity(self) -> str:
|
|
223
|
+
return "medium"
|
|
224
|
+
|
|
225
|
+
@property
|
|
226
|
+
def impact(self) -> str:
|
|
227
|
+
return "maintenance"
|
|
228
|
+
|
|
229
|
+
@property
|
|
230
|
+
def rationale(self) -> str:
|
|
231
|
+
return "Error return value ignored, may hide failures"
|
|
232
|
+
|
|
233
|
+
@property
|
|
234
|
+
def language(self) -> str:
|
|
235
|
+
return "go"
|
|
236
|
+
|
|
237
|
+
@property
|
|
238
|
+
def rule_type(self) -> str:
|
|
239
|
+
return "absolute"
|
|
240
|
+
|
|
241
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
242
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
243
|
+
|
|
244
|
+
added_lines = [line for line in raw_diff.split('\n') if line.startswith('+')]
|
|
245
|
+
for line in added_lines:
|
|
246
|
+
# 检查忽略错误
|
|
247
|
+
if self._ignore_error.search(line):
|
|
248
|
+
files = diff_data.get('files', [])
|
|
249
|
+
return {"file": files[0] if files else "unknown", "issue": "error_ignored"}
|
|
250
|
+
|
|
251
|
+
# 检查有 error 返回但未检查
|
|
252
|
+
if self._error_return.search(line):
|
|
253
|
+
# 在后续几行查找错误检查
|
|
254
|
+
context = '\n'.join(added_lines[added_lines.index(line):added_lines.index(line)+5])
|
|
255
|
+
if not self._no_error_check.search(context):
|
|
256
|
+
files = diff_data.get('files', [])
|
|
257
|
+
return {"file": files[0] if files else "unknown", "issue": "error_not_checked"}
|
|
258
|
+
|
|
259
|
+
return None
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class GoNilPointerRule(BaseRule):
|
|
263
|
+
"""检测 nil 指针解引用风险"""
|
|
264
|
+
|
|
265
|
+
def __init__(self):
|
|
266
|
+
self._method_call = re.compile(r'^\+.*\w+\s*\.\s*\w+\s*\(')
|
|
267
|
+
self._nil_check = re.compile(r'(?:if\s+\w+\s*==\s*nil|if\s+\w+\s*!=\s*nil|\w+\s*==\s*nil\s*\?|nil\s*check)')
|
|
268
|
+
|
|
269
|
+
@property
|
|
270
|
+
def id(self) -> str:
|
|
271
|
+
return "null.nil_dereference"
|
|
272
|
+
|
|
273
|
+
@property
|
|
274
|
+
def severity(self) -> str:
|
|
275
|
+
return "high"
|
|
276
|
+
|
|
277
|
+
@property
|
|
278
|
+
def impact(self) -> str:
|
|
279
|
+
return "runtime"
|
|
280
|
+
|
|
281
|
+
@property
|
|
282
|
+
def rationale(self) -> str:
|
|
283
|
+
return "Method call on potentially nil pointer without nil check"
|
|
284
|
+
|
|
285
|
+
@property
|
|
286
|
+
def language(self) -> str:
|
|
287
|
+
return "go"
|
|
288
|
+
|
|
289
|
+
@property
|
|
290
|
+
def rule_type(self) -> str:
|
|
291
|
+
return "absolute"
|
|
292
|
+
|
|
293
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
294
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
295
|
+
|
|
296
|
+
added_lines = raw_diff.split('\n')
|
|
297
|
+
for i, line in enumerate(added_lines):
|
|
298
|
+
if line.startswith('+') and self._method_call.search(line):
|
|
299
|
+
# 检查前后是否有 nil 检查
|
|
300
|
+
context = '\n'.join(added_lines[max(0,i-3):i+4])
|
|
301
|
+
if not self._nil_check.search(context):
|
|
302
|
+
files = diff_data.get('files', [])
|
|
303
|
+
return {"file": files[0] if files else "unknown", "call": line.strip()}
|
|
304
|
+
|
|
305
|
+
return None
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
class GoRaceConditionRule(BaseRule):
|
|
309
|
+
"""检测竞态条件风险(共享变量无锁保护)"""
|
|
310
|
+
|
|
311
|
+
def __init__(self):
|
|
312
|
+
self._shared_var = re.compile(r'^\+.*(?:var\s+\w+\s+|:=\s*\w+\s*=).*(?:map|slice|chan)')
|
|
313
|
+
self._mutex_pattern = re.compile(r'(?:sync\.(?:Mutex|RWMutex)|\.Lock\(\)|\.Unlock\(\))')
|
|
314
|
+
self._atomic_pattern = re.compile(r'atomic\.(?:Load|Store|Add|Swap|CompareAndSwap)')
|
|
315
|
+
|
|
316
|
+
@property
|
|
317
|
+
def id(self) -> str:
|
|
318
|
+
return "runtime.race_condition"
|
|
319
|
+
|
|
320
|
+
@property
|
|
321
|
+
def severity(self) -> str:
|
|
322
|
+
return "high"
|
|
323
|
+
|
|
324
|
+
@property
|
|
325
|
+
def impact(self) -> str:
|
|
326
|
+
return "runtime"
|
|
327
|
+
|
|
328
|
+
@property
|
|
329
|
+
def rationale(self) -> str:
|
|
330
|
+
return "Shared variable access without synchronization, potential race condition"
|
|
331
|
+
|
|
332
|
+
@property
|
|
333
|
+
def language(self) -> str:
|
|
334
|
+
return "go"
|
|
335
|
+
|
|
336
|
+
@property
|
|
337
|
+
def rule_type(self) -> str:
|
|
338
|
+
return "absolute"
|
|
339
|
+
|
|
340
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
341
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
342
|
+
|
|
343
|
+
if self._shared_var.search(raw_diff):
|
|
344
|
+
# 检查是否有同步机制
|
|
345
|
+
has_mutex = bool(self._mutex_pattern.search(raw_diff))
|
|
346
|
+
has_atomic = bool(self._atomic_pattern.search(raw_diff))
|
|
347
|
+
|
|
348
|
+
if not (has_mutex or has_atomic):
|
|
349
|
+
files = diff_data.get('files', [])
|
|
350
|
+
return {"file": files[0] if files else "unknown"}
|
|
351
|
+
|
|
352
|
+
return None
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
class GoHTTPSecurityRule(BaseRule):
|
|
356
|
+
"""检测 HTTP 安全问题(路径遍历、未验证输入等)"""
|
|
357
|
+
|
|
358
|
+
def __init__(self):
|
|
359
|
+
self._http_pattern = re.compile(r'^\+.*http\.(?:Get|Post|HandleFunc|Handle)\s*\(')
|
|
360
|
+
self._path_traversal = re.compile(r'http\.(?:Get|Post)\s*\([^)]*\+[^)]*\)')
|
|
361
|
+
self._no_auth = re.compile(r'http\.ListenAndServe\s*\(\s*"[^"]*"', re.IGNORECASE)
|
|
362
|
+
|
|
363
|
+
@property
|
|
364
|
+
def id(self) -> str:
|
|
365
|
+
return "security.http_vulnerability"
|
|
366
|
+
|
|
367
|
+
@property
|
|
368
|
+
def severity(self) -> str:
|
|
369
|
+
return "high"
|
|
370
|
+
|
|
371
|
+
@property
|
|
372
|
+
def impact(self) -> str:
|
|
373
|
+
return "security"
|
|
374
|
+
|
|
375
|
+
@property
|
|
376
|
+
def rationale(self) -> str:
|
|
377
|
+
return "HTTP handler with potential security issues (path traversal, no auth)"
|
|
378
|
+
|
|
379
|
+
@property
|
|
380
|
+
def language(self) -> str:
|
|
381
|
+
return "go"
|
|
382
|
+
|
|
383
|
+
@property
|
|
384
|
+
def rule_type(self) -> str:
|
|
385
|
+
return "absolute"
|
|
386
|
+
|
|
387
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
388
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
389
|
+
|
|
390
|
+
if self._http_pattern.search(raw_diff):
|
|
391
|
+
# 检查路径遍历风险
|
|
392
|
+
if self._path_traversal.search(raw_diff):
|
|
393
|
+
files = diff_data.get('files', [])
|
|
394
|
+
return {"file": files[0] if files else "unknown", "issue": "path_traversal"}
|
|
395
|
+
|
|
396
|
+
# 检查无认证的 HTTP 服务
|
|
397
|
+
if self._no_auth.search(raw_diff):
|
|
398
|
+
files = diff_data.get('files', [])
|
|
399
|
+
return {"file": files[0] if files else "unknown", "issue": "no_authentication"}
|
|
400
|
+
|
|
401
|
+
return None
|
rules/null_safety.py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, Any, List, Optional
|
|
3
|
+
from sdk.rule import BaseRule
|
|
4
|
+
from sdk.signal import Signal
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class NullReturnIgnoredRule(BaseRule):
|
|
8
|
+
"""检测可能返回 null 的方法调用未进行空值检查 - 降低误报,只在高风险场景触发"""
|
|
9
|
+
|
|
10
|
+
def __init__(self):
|
|
11
|
+
# 只检查真正危险的方法调用(来自 Map/Collection 的 get)
|
|
12
|
+
self._null_return_methods = [
|
|
13
|
+
r'\.get\s*\([^)]+\)', # Map.get(key) - 有参数的
|
|
14
|
+
]
|
|
15
|
+
self._added_call = re.compile(r'^\+.*(' + '|'.join(self._null_return_methods) + r')')
|
|
16
|
+
# 扩展 null 检查模式
|
|
17
|
+
self._null_check = re.compile(
|
|
18
|
+
r'(?:if\s*\([^)]*(?:==|!=|isNull|nonNull|Objects\.|Optional)|Optional\.|orElse|orElseGet)',
|
|
19
|
+
re.IGNORECASE
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def id(self) -> str:
|
|
24
|
+
return "null.return_ignored"
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def severity(self) -> str:
|
|
28
|
+
# 降级为 LOW,因为 .get() 调用后不检查 null 是非常常见的模式
|
|
29
|
+
return "low"
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def impact(self) -> str:
|
|
33
|
+
return "runtime"
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def rationale(self) -> str:
|
|
37
|
+
return "Map.get() without null check may cause NPE"
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def rule_type(self) -> str:
|
|
41
|
+
return "absolute"
|
|
42
|
+
|
|
43
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
44
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
45
|
+
|
|
46
|
+
added_lines = raw_diff.split('\n')
|
|
47
|
+
for i, line in enumerate(added_lines):
|
|
48
|
+
if line.startswith('+') and self._added_call.search(line):
|
|
49
|
+
# 只在没有 orElse/orElseGet 等安全写法时触发
|
|
50
|
+
if not self._null_check.search(line):
|
|
51
|
+
# 额外检查:只对关键路径触发
|
|
52
|
+
files = diff_data.get('files', [])
|
|
53
|
+
return {"file": files[0] if files else "unknown", "method": line.strip()}
|
|
54
|
+
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class OptionalUnwrapRule(BaseRule):
|
|
59
|
+
"""检测 Optional 未正确解包 - 排除安全的 orElse 用法"""
|
|
60
|
+
|
|
61
|
+
def __init__(self):
|
|
62
|
+
self._dangerous_get = re.compile(
|
|
63
|
+
r'^\+.*(?<!\.)\.get\s*\(\s*\)', # Optional.get() without check
|
|
64
|
+
re.MULTILINE
|
|
65
|
+
)
|
|
66
|
+
# 扩展安全模式:包含 orElseGet
|
|
67
|
+
self._optional_safe = re.compile(
|
|
68
|
+
r'\.orElse(?:Get)?\s*\(',
|
|
69
|
+
re.MULTILINE
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def id(self) -> str:
|
|
74
|
+
return "null.optional_unsafe_get"
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def severity(self) -> str:
|
|
78
|
+
# 降级为 LOW,因为 Optional.get() 在很多场景是合理的
|
|
79
|
+
return "low"
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def impact(self) -> str:
|
|
83
|
+
return "runtime"
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def rationale(self) -> str:
|
|
87
|
+
return "Optional.get() without isPresent() check - ensure null is handled"
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def rule_type(self) -> str:
|
|
91
|
+
return "absolute"
|
|
92
|
+
|
|
93
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
94
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
95
|
+
|
|
96
|
+
# 查找所有 .get() 调用
|
|
97
|
+
import re as re_module
|
|
98
|
+
get_matches = re.findall(r'[^\n]*\.get\s*\(\s*\)[^\n]*', raw_diff)
|
|
99
|
+
|
|
100
|
+
for match in get_matches:
|
|
101
|
+
# 跳过已经使用 orElse/orElseGet 的代码
|
|
102
|
+
if self._optional_safe.search(match):
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
# 跳过注释
|
|
106
|
+
if '//' in match or '/*' in match:
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
files = diff_data.get('files', [])
|
|
110
|
+
return {"file": files[0] if files else "unknown"}
|
|
111
|
+
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
dangerous = self._dangerous_get.findall(raw_diff)
|
|
115
|
+
if dangerous:
|
|
116
|
+
# 检查是否有 orElse 等安全用法
|
|
117
|
+
if not self._optional_or_else.search(raw_diff):
|
|
118
|
+
files = diff_data.get('files', [])
|
|
119
|
+
return {"file": files[0] if files else "unknown"}
|
|
120
|
+
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class AutoboxingNPERule(BaseRule):
|
|
125
|
+
"""检测自动拆箱可能导致的 NPE"""
|
|
126
|
+
|
|
127
|
+
def __init__(self):
|
|
128
|
+
self._wrapper_types = ['Integer', 'Long', 'Double', 'Float', 'Boolean', 'Character', 'Short', 'Byte']
|
|
129
|
+
self._unbox_pattern = re.compile(
|
|
130
|
+
r'^\+.*(?:' + '|'.join(self._wrapper_types) + r')\s*\w+\s*=\s*(?!new\s+(?:' + '|'.join(self._wrapper_types) + r'))'
|
|
131
|
+
)
|
|
132
|
+
self._method_return_wrapper = re.compile(
|
|
133
|
+
r'^\+.*\.(?:get|getValue|getOrDefault)\s*\([^)]*\)\s*(?:[+\-*/]|compareTo)',
|
|
134
|
+
re.MULTILINE
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def id(self) -> str:
|
|
139
|
+
return "null.autoboxing_npe"
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def severity(self) -> str:
|
|
143
|
+
return "high"
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def impact(self) -> str:
|
|
147
|
+
return "runtime"
|
|
148
|
+
|
|
149
|
+
@property
|
|
150
|
+
def rationale(self) -> str:
|
|
151
|
+
return "Wrapper type auto-unboxed to primitive, NPE if null"
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def rule_type(self) -> str:
|
|
155
|
+
return "absolute"
|
|
156
|
+
|
|
157
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
158
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
159
|
+
|
|
160
|
+
if self._unbox_pattern.search(raw_diff) or self._method_return_wrapper.search(raw_diff):
|
|
161
|
+
files = diff_data.get('files', [])
|
|
162
|
+
return {"file": files[0] if files else "unknown"}
|
|
163
|
+
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class ChainedMethodCallNPERule(BaseRule):
|
|
168
|
+
"""检测链式调用的 NPE 风险 - 仅在高风险场景触发"""
|
|
169
|
+
|
|
170
|
+
def __init__(self):
|
|
171
|
+
# 更严格的链式调用模式:必须是方法调用链,不能是简单的属性访问
|
|
172
|
+
self._chained_call = re.compile(
|
|
173
|
+
r'^\+.*\w+\s*\.\s*\w+\s*\([^)]*\)\s*\.\s*\w+\s*\(',
|
|
174
|
+
re.MULTILINE
|
|
175
|
+
)
|
|
176
|
+
self._null_safe = re.compile(
|
|
177
|
+
r'(?:Objects\.(?:requireNonNull|firstNonNull)|Optional\.ofNullable|\.map\s*\(|\?\.)',
|
|
178
|
+
re.IGNORECASE
|
|
179
|
+
)
|
|
180
|
+
# 高风险场景:DTO/Entity/Model 的 getter 链式调用
|
|
181
|
+
self._high_risk_patterns = [
|
|
182
|
+
r'/dto/',
|
|
183
|
+
r'/entity/',
|
|
184
|
+
r'/model/',
|
|
185
|
+
r'/vo/',
|
|
186
|
+
r'/bo/',
|
|
187
|
+
r'/domain/'
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def id(self) -> str:
|
|
192
|
+
return "null.chained_call_unsafe"
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def severity(self) -> str:
|
|
196
|
+
# 降级为 LOW,因为大多数业务代码的链式调用是安全的
|
|
197
|
+
return "low"
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def impact(self) -> str:
|
|
201
|
+
return "runtime"
|
|
202
|
+
|
|
203
|
+
@property
|
|
204
|
+
def rationale(self) -> str:
|
|
205
|
+
return "Chained method calls in DTO/Entity without null safety"
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def rule_type(self) -> str:
|
|
209
|
+
return "absolute"
|
|
210
|
+
|
|
211
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
212
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
213
|
+
files = diff_data.get('files', [])
|
|
214
|
+
|
|
215
|
+
added_lines = [line for line in raw_diff.split('\n') if line.startswith('+')]
|
|
216
|
+
for line in added_lines:
|
|
217
|
+
if self._chained_call.search(line):
|
|
218
|
+
if not self._null_safe.search(line):
|
|
219
|
+
# 只在 DTO/Entity/Model 相关的文件中触发
|
|
220
|
+
for f in files:
|
|
221
|
+
if any(re.search(p, f, re.IGNORECASE) for p in self._high_risk_patterns):
|
|
222
|
+
return {"file": f, "chain": line.strip()}
|
|
223
|
+
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
class ArrayIndexOutOfBoundsRule(BaseRule):
|
|
228
|
+
"""检测数组/集合索引越界风险"""
|
|
229
|
+
|
|
230
|
+
def __init__(self):
|
|
231
|
+
self._direct_access = re.compile(
|
|
232
|
+
r'^\+.*\w+\s*\[\s*(?:0|[1-9]\d*)\s*\]',
|
|
233
|
+
re.MULTILINE
|
|
234
|
+
)
|
|
235
|
+
self._size_dependent = re.compile(
|
|
236
|
+
r'^\+.*\w+\s*\[\s*\w+\.(?:size|length)\s*[-+]\s*[1-9]',
|
|
237
|
+
re.MULTILINE
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
@property
|
|
241
|
+
def id(self) -> str:
|
|
242
|
+
return "null.array_index_unsafe"
|
|
243
|
+
|
|
244
|
+
@property
|
|
245
|
+
def severity(self) -> str:
|
|
246
|
+
return "medium"
|
|
247
|
+
|
|
248
|
+
@property
|
|
249
|
+
def impact(self) -> str:
|
|
250
|
+
return "runtime"
|
|
251
|
+
|
|
252
|
+
@property
|
|
253
|
+
def rationale(self) -> str:
|
|
254
|
+
return "Direct array access with hardcoded index or size-based calculation, bounds not checked"
|
|
255
|
+
|
|
256
|
+
@property
|
|
257
|
+
def rule_type(self) -> str:
|
|
258
|
+
return "absolute"
|
|
259
|
+
|
|
260
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
261
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
262
|
+
|
|
263
|
+
if self._size_dependent.search(raw_diff):
|
|
264
|
+
files = diff_data.get('files', [])
|
|
265
|
+
return {"file": files[0] if files else "unknown"}
|
|
266
|
+
|
|
267
|
+
return None
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
class StringConcatNPERule(BaseRule):
|
|
271
|
+
"""检测字符串拼接的 NPE 风险 - 关闭,因为太容易误报"""
|
|
272
|
+
|
|
273
|
+
def __init__(self):
|
|
274
|
+
# 禁用此规则:字符串拼接 "str" + var 是非常常见的模式
|
|
275
|
+
# 即使 var 可能为 null,Java 也不会抛 NPE,而是输出 "null" 字符串
|
|
276
|
+
pass
|
|
277
|
+
|
|
278
|
+
@property
|
|
279
|
+
def id(self) -> str:
|
|
280
|
+
return "null.string_concat_unsafe"
|
|
281
|
+
|
|
282
|
+
@property
|
|
283
|
+
def severity(self) -> str:
|
|
284
|
+
return "low"
|
|
285
|
+
|
|
286
|
+
@property
|
|
287
|
+
def impact(self) -> str:
|
|
288
|
+
return "maintenance"
|
|
289
|
+
|
|
290
|
+
@property
|
|
291
|
+
def rationale(self) -> str:
|
|
292
|
+
# 修改为更准确的描述
|
|
293
|
+
return "Disabled: Java string concatenation handles null gracefully"
|
|
294
|
+
|
|
295
|
+
@property
|
|
296
|
+
def rule_type(self) -> str:
|
|
297
|
+
return "absolute"
|
|
298
|
+
|
|
299
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
300
|
+
# 禁用规则:总是返回 None
|
|
301
|
+
return None
|