diffsense 2.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. adapters/__init__.py +0 -0
  2. adapters/base.py +27 -0
  3. adapters/github_adapter.py +164 -0
  4. adapters/gitlab_adapter.py +207 -0
  5. adapters/local_adapter.py +136 -0
  6. banner.py +71 -0
  7. cli.py +606 -0
  8. config/__init__.py +1 -0
  9. config/rules.yaml +371 -0
  10. core/__init__.py +235 -0
  11. core/ast_detector.py +853 -0
  12. core/change.py +46 -0
  13. core/composer.py +93 -0
  14. core/evaluator.py +15 -0
  15. core/ignore_manager.py +71 -0
  16. core/knowledge.py +77 -0
  17. core/parser.py +181 -0
  18. core/parser_manager.py +104 -0
  19. core/quality_manager.py +117 -0
  20. core/renderer.py +197 -0
  21. core/rule_base.py +98 -0
  22. core/rule_runtime.py +103 -0
  23. core/rules.py +718 -0
  24. core/run_config.py +85 -0
  25. core/semantic_diff.py +359 -0
  26. core/signal_model.py +21 -0
  27. core/signals_registry.py +62 -0
  28. diffsense-2.2.12.dist-info/METADATA +18 -0
  29. diffsense-2.2.12.dist-info/RECORD +58 -0
  30. diffsense-2.2.12.dist-info/WHEEL +5 -0
  31. diffsense-2.2.12.dist-info/entry_points.txt +3 -0
  32. diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
  33. diffsense-2.2.12.dist-info/top_level.txt +11 -0
  34. diffsense_mcp/__init__.py +1 -0
  35. diffsense_mcp/launcher.py +28 -0
  36. diffsense_mcp/server.py +687 -0
  37. governance/lifecycle.py +54 -0
  38. main.py +318 -0
  39. rules/__init__.py +246 -0
  40. rules/api_compatibility.py +372 -0
  41. rules/collection_handling.py +349 -0
  42. rules/concurrency.py +194 -0
  43. rules/concurrency_adapter.py +250 -0
  44. rules/cross_language_adapter.py +444 -0
  45. rules/exception_handling.py +320 -0
  46. rules/go_rules.py +401 -0
  47. rules/null_safety.py +301 -0
  48. rules/resource_management.py +222 -0
  49. rules/yaml_adapter.py +195 -0
  50. run_audit.py +478 -0
  51. sdk/cpp_adapter.py +238 -0
  52. sdk/go_adapter.py +199 -0
  53. sdk/java_adapter.py +199 -0
  54. sdk/javascript_adapter.py +229 -0
  55. sdk/language_adapter.py +313 -0
  56. sdk/python_adapter.py +195 -0
  57. sdk/rule.py +63 -0
  58. sdk/signal.py +14 -0
rules/go_rules.py ADDED
@@ -0,0 +1,401 @@
1
+ """
2
+ Go Language Rules for DiffSense
3
+
4
+ These rules are Go-specific implementations following the same patterns
5
+ as the Java rules but adapted for Go language constructs.
6
+ """
7
+
8
+ import re
9
+ from typing import Dict, Any, List, Optional
10
+ from sdk.rule import BaseRule
11
+ from sdk.signal import Signal
12
+
13
+
14
+ class GoGoroutineLeakRule(BaseRule):
15
+ """检测 goroutine 泄漏风险(未正确退出的 goroutine)"""
16
+
17
+ def __init__(self):
18
+ self._goroutine_pattern = re.compile(r'^\+.*\bgo\s+\w+\.?\w*\s*\(')
19
+ self._context_pattern = re.compile(r'(?:context\.Context|ctx|done\s*chan)')
20
+ self._select_pattern = re.compile(r'\bselect\s*{')
21
+ self._defer_pattern = re.compile(r'\bdefer\s+')
22
+
23
+ @property
24
+ def id(self) -> str:
25
+ return "resource.goroutine_leak"
26
+
27
+ @property
28
+ def severity(self) -> str:
29
+ return "high"
30
+
31
+ @property
32
+ def impact(self) -> str:
33
+ return "runtime"
34
+
35
+ @property
36
+ def rationale(self) -> str:
37
+ return "Goroutine started without proper exit mechanism (context, channel, or defer)"
38
+
39
+ @property
40
+ def language(self) -> str:
41
+ return "go"
42
+
43
+ @property
44
+ def rule_type(self) -> str:
45
+ return "absolute"
46
+
47
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
48
+ raw_diff = diff_data.get('raw_diff', "")
49
+
50
+ # 检查是否创建了 goroutine
51
+ if self._goroutine_pattern.search(raw_diff):
52
+ added_lines = [line for line in raw_diff.split('\n') if line.startswith('+')]
53
+
54
+ for line in added_lines:
55
+ if self._goroutine_pattern.search(line):
56
+ # 检查是否有 context 或 channel 用于退出
57
+ has_context = bool(self._context_pattern.search(line))
58
+ has_select = any(self._select_pattern.search(l) for l in added_lines)
59
+ has_defer = any(self._defer_pattern.search(l) for l in added_lines)
60
+
61
+ if not (has_context or has_select or has_defer):
62
+ files = diff_data.get('files', [])
63
+ return {"file": files[0] if files else "unknown", "goroutine": line.strip()}
64
+
65
+ return None
66
+
67
+
68
+ class GoChannelLeakRule(BaseRule):
69
+ """检测 channel 使用问题(未关闭、缓冲通道泄漏等)"""
70
+
71
+ def __init__(self):
72
+ self._chan_pattern = re.compile(r'^\+.*(?:make\s*\(\s*chan|chan\s+\w+\s*=)')
73
+ self._close_pattern = re.compile(r'\bclose\s*\(')
74
+ self._buffered_chan = re.compile(r'make\s*\(\s*chan\s+\w+\s*,\s*[1-9]\d*\s*\)')
75
+
76
+ @property
77
+ def id(self) -> str:
78
+ return "resource.channel_leak"
79
+
80
+ @property
81
+ def severity(self) -> str:
82
+ return "high"
83
+
84
+ @property
85
+ def impact(self) -> str:
86
+ return "runtime"
87
+
88
+ @property
89
+ def rationale(self) -> str:
90
+ return "Channel created without close or with large buffer, may cause goroutine hang"
91
+
92
+ @property
93
+ def language(self) -> str:
94
+ return "go"
95
+
96
+ @property
97
+ def rule_type(self) -> str:
98
+ return "absolute"
99
+
100
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
101
+ raw_diff = diff_data.get('raw_diff', "")
102
+
103
+ if self._chan_pattern.search(raw_diff):
104
+ # 检查是否有 close 调用
105
+ if not self._close_pattern.search(raw_diff):
106
+ files = diff_data.get('files', [])
107
+ return {"file": files[0] if files else "unknown"}
108
+
109
+ # 检查大的缓冲通道
110
+ buffered_matches = self._buffered_chan.findall(raw_diff)
111
+ if buffered_matches:
112
+ files = diff_data.get('files', [])
113
+ return {"file": files[0] if files else "unknown", "buffered_channels": buffered_matches}
114
+
115
+ return None
116
+
117
+
118
+ class GoDeferMisuseRule(BaseRule):
119
+ """检测 defer 误用(循环中 defer、defer 参数问题)"""
120
+
121
+ def __init__(self):
122
+ self._defer_in_loop = re.compile(r'(?:for\s+|range\s+)\s*{[^}]*defer\s+', re.DOTALL)
123
+ self._defer_pattern = re.compile(r'^\+.*\bdefer\s+')
124
+ self._defer_with_args = re.compile(r'defer\s+\w+\s*\(\s*\w+\s*[+\-*/]')
125
+
126
+ @property
127
+ def id(self) -> str:
128
+ return "resource.defer_misuse"
129
+
130
+ @property
131
+ def severity(self) -> str:
132
+ return "medium"
133
+
134
+ @property
135
+ def impact(self) -> str:
136
+ return "runtime"
137
+
138
+ @property
139
+ def rationale(self) -> str:
140
+ return "Defer used in loop or with evaluated arguments, may cause resource leak"
141
+
142
+ @property
143
+ def language(self) -> str:
144
+ return "go"
145
+
146
+ @property
147
+ def rule_type(self) -> str:
148
+ return "absolute"
149
+
150
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
151
+ raw_diff = diff_data.get('raw_diff', "")
152
+
153
+ # 检查循环中的 defer
154
+ if self._defer_in_loop.search(raw_diff):
155
+ files = diff_data.get('files', [])
156
+ return {"file": files[0] if files else "unknown", "issue": "defer_in_loop"}
157
+
158
+ # 检查 defer 参数立即求值
159
+ added_lines = [line for line in raw_diff.split('\n') if line.startswith('+')]
160
+ for line in added_lines:
161
+ if self._defer_with_args.search(line):
162
+ files = diff_data.get('files', [])
163
+ return {"file": files[0] if files else "unknown", "issue": "defer_args_evaluated"}
164
+
165
+ return None
166
+
167
+
168
+ class GoUnsafeUsageRule(BaseRule):
169
+ """检测 unsafe 包的使用(类型转换、指针运算)"""
170
+
171
+ def __init__(self):
172
+ self._unsafe_pattern = re.compile(r'^\+.*\bunsafe\.(?:Pointer|Sizeof|Alignof|Offsetof)\s*\(')
173
+ self._type_assertion = re.compile(r'\(\*\w+\)\(unsafe\.Pointer')
174
+
175
+ @property
176
+ def id(self) -> str:
177
+ return "security.unsafe_usage"
178
+
179
+ @property
180
+ def severity(self) -> str:
181
+ return "high"
182
+
183
+ @property
184
+ def impact(self) -> str:
185
+ return "security"
186
+
187
+ @property
188
+ def rationale(self) -> str:
189
+ return "Unsafe package usage may cause memory safety issues"
190
+
191
+ @property
192
+ def language(self) -> str:
193
+ return "go"
194
+
195
+ @property
196
+ def rule_type(self) -> str:
197
+ return "absolute"
198
+
199
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
200
+ raw_diff = diff_data.get('raw_diff', "")
201
+
202
+ if self._unsafe_pattern.search(raw_diff):
203
+ files = diff_data.get('files', [])
204
+ return {"file": files[0] if files else "unknown"}
205
+
206
+ return None
207
+
208
+
209
+ class GoErrorHandlingRule(BaseRule):
210
+ """检测错误处理不当(忽略 error 返回值)"""
211
+
212
+ def __init__(self):
213
+ self._ignore_error = re.compile(r'_\s*=\s*\w+\s*\([^)]*\)')
214
+ self._error_return = re.compile(r'\w+\s*,\s*err\s*:=')
215
+ self._no_error_check = re.compile(r'if\s+err\s*(?:!=|==)\s*nil')
216
+
217
+ @property
218
+ def id(self) -> str:
219
+ return "exception.error_ignored"
220
+
221
+ @property
222
+ def severity(self) -> str:
223
+ return "medium"
224
+
225
+ @property
226
+ def impact(self) -> str:
227
+ return "maintenance"
228
+
229
+ @property
230
+ def rationale(self) -> str:
231
+ return "Error return value ignored, may hide failures"
232
+
233
+ @property
234
+ def language(self) -> str:
235
+ return "go"
236
+
237
+ @property
238
+ def rule_type(self) -> str:
239
+ return "absolute"
240
+
241
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
242
+ raw_diff = diff_data.get('raw_diff', "")
243
+
244
+ added_lines = [line for line in raw_diff.split('\n') if line.startswith('+')]
245
+ for line in added_lines:
246
+ # 检查忽略错误
247
+ if self._ignore_error.search(line):
248
+ files = diff_data.get('files', [])
249
+ return {"file": files[0] if files else "unknown", "issue": "error_ignored"}
250
+
251
+ # 检查有 error 返回但未检查
252
+ if self._error_return.search(line):
253
+ # 在后续几行查找错误检查
254
+ context = '\n'.join(added_lines[added_lines.index(line):added_lines.index(line)+5])
255
+ if not self._no_error_check.search(context):
256
+ files = diff_data.get('files', [])
257
+ return {"file": files[0] if files else "unknown", "issue": "error_not_checked"}
258
+
259
+ return None
260
+
261
+
262
+ class GoNilPointerRule(BaseRule):
263
+ """检测 nil 指针解引用风险"""
264
+
265
+ def __init__(self):
266
+ self._method_call = re.compile(r'^\+.*\w+\s*\.\s*\w+\s*\(')
267
+ self._nil_check = re.compile(r'(?:if\s+\w+\s*==\s*nil|if\s+\w+\s*!=\s*nil|\w+\s*==\s*nil\s*\?|nil\s*check)')
268
+
269
+ @property
270
+ def id(self) -> str:
271
+ return "null.nil_dereference"
272
+
273
+ @property
274
+ def severity(self) -> str:
275
+ return "high"
276
+
277
+ @property
278
+ def impact(self) -> str:
279
+ return "runtime"
280
+
281
+ @property
282
+ def rationale(self) -> str:
283
+ return "Method call on potentially nil pointer without nil check"
284
+
285
+ @property
286
+ def language(self) -> str:
287
+ return "go"
288
+
289
+ @property
290
+ def rule_type(self) -> str:
291
+ return "absolute"
292
+
293
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
294
+ raw_diff = diff_data.get('raw_diff', "")
295
+
296
+ added_lines = raw_diff.split('\n')
297
+ for i, line in enumerate(added_lines):
298
+ if line.startswith('+') and self._method_call.search(line):
299
+ # 检查前后是否有 nil 检查
300
+ context = '\n'.join(added_lines[max(0,i-3):i+4])
301
+ if not self._nil_check.search(context):
302
+ files = diff_data.get('files', [])
303
+ return {"file": files[0] if files else "unknown", "call": line.strip()}
304
+
305
+ return None
306
+
307
+
308
+ class GoRaceConditionRule(BaseRule):
309
+ """检测竞态条件风险(共享变量无锁保护)"""
310
+
311
+ def __init__(self):
312
+ self._shared_var = re.compile(r'^\+.*(?:var\s+\w+\s+|:=\s*\w+\s*=).*(?:map|slice|chan)')
313
+ self._mutex_pattern = re.compile(r'(?:sync\.(?:Mutex|RWMutex)|\.Lock\(\)|\.Unlock\(\))')
314
+ self._atomic_pattern = re.compile(r'atomic\.(?:Load|Store|Add|Swap|CompareAndSwap)')
315
+
316
+ @property
317
+ def id(self) -> str:
318
+ return "runtime.race_condition"
319
+
320
+ @property
321
+ def severity(self) -> str:
322
+ return "high"
323
+
324
+ @property
325
+ def impact(self) -> str:
326
+ return "runtime"
327
+
328
+ @property
329
+ def rationale(self) -> str:
330
+ return "Shared variable access without synchronization, potential race condition"
331
+
332
+ @property
333
+ def language(self) -> str:
334
+ return "go"
335
+
336
+ @property
337
+ def rule_type(self) -> str:
338
+ return "absolute"
339
+
340
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
341
+ raw_diff = diff_data.get('raw_diff', "")
342
+
343
+ if self._shared_var.search(raw_diff):
344
+ # 检查是否有同步机制
345
+ has_mutex = bool(self._mutex_pattern.search(raw_diff))
346
+ has_atomic = bool(self._atomic_pattern.search(raw_diff))
347
+
348
+ if not (has_mutex or has_atomic):
349
+ files = diff_data.get('files', [])
350
+ return {"file": files[0] if files else "unknown"}
351
+
352
+ return None
353
+
354
+
355
+ class GoHTTPSecurityRule(BaseRule):
356
+ """检测 HTTP 安全问题(路径遍历、未验证输入等)"""
357
+
358
+ def __init__(self):
359
+ self._http_pattern = re.compile(r'^\+.*http\.(?:Get|Post|HandleFunc|Handle)\s*\(')
360
+ self._path_traversal = re.compile(r'http\.(?:Get|Post)\s*\([^)]*\+[^)]*\)')
361
+ self._no_auth = re.compile(r'http\.ListenAndServe\s*\(\s*"[^"]*"', re.IGNORECASE)
362
+
363
+ @property
364
+ def id(self) -> str:
365
+ return "security.http_vulnerability"
366
+
367
+ @property
368
+ def severity(self) -> str:
369
+ return "high"
370
+
371
+ @property
372
+ def impact(self) -> str:
373
+ return "security"
374
+
375
+ @property
376
+ def rationale(self) -> str:
377
+ return "HTTP handler with potential security issues (path traversal, no auth)"
378
+
379
+ @property
380
+ def language(self) -> str:
381
+ return "go"
382
+
383
+ @property
384
+ def rule_type(self) -> str:
385
+ return "absolute"
386
+
387
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
388
+ raw_diff = diff_data.get('raw_diff', "")
389
+
390
+ if self._http_pattern.search(raw_diff):
391
+ # 检查路径遍历风险
392
+ if self._path_traversal.search(raw_diff):
393
+ files = diff_data.get('files', [])
394
+ return {"file": files[0] if files else "unknown", "issue": "path_traversal"}
395
+
396
+ # 检查无认证的 HTTP 服务
397
+ if self._no_auth.search(raw_diff):
398
+ files = diff_data.get('files', [])
399
+ return {"file": files[0] if files else "unknown", "issue": "no_authentication"}
400
+
401
+ return None
rules/null_safety.py ADDED
@@ -0,0 +1,301 @@
1
+ import re
2
+ from typing import Dict, Any, List, Optional
3
+ from sdk.rule import BaseRule
4
+ from sdk.signal import Signal
5
+
6
+
7
+ class NullReturnIgnoredRule(BaseRule):
8
+ """检测可能返回 null 的方法调用未进行空值检查 - 降低误报,只在高风险场景触发"""
9
+
10
+ def __init__(self):
11
+ # 只检查真正危险的方法调用(来自 Map/Collection 的 get)
12
+ self._null_return_methods = [
13
+ r'\.get\s*\([^)]+\)', # Map.get(key) - 有参数的
14
+ ]
15
+ self._added_call = re.compile(r'^\+.*(' + '|'.join(self._null_return_methods) + r')')
16
+ # 扩展 null 检查模式
17
+ self._null_check = re.compile(
18
+ r'(?:if\s*\([^)]*(?:==|!=|isNull|nonNull|Objects\.|Optional)|Optional\.|orElse|orElseGet)',
19
+ re.IGNORECASE
20
+ )
21
+
22
+ @property
23
+ def id(self) -> str:
24
+ return "null.return_ignored"
25
+
26
+ @property
27
+ def severity(self) -> str:
28
+ # 降级为 LOW,因为 .get() 调用后不检查 null 是非常常见的模式
29
+ return "low"
30
+
31
+ @property
32
+ def impact(self) -> str:
33
+ return "runtime"
34
+
35
+ @property
36
+ def rationale(self) -> str:
37
+ return "Map.get() without null check may cause NPE"
38
+
39
+ @property
40
+ def rule_type(self) -> str:
41
+ return "absolute"
42
+
43
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
44
+ raw_diff = diff_data.get('raw_diff', "")
45
+
46
+ added_lines = raw_diff.split('\n')
47
+ for i, line in enumerate(added_lines):
48
+ if line.startswith('+') and self._added_call.search(line):
49
+ # 只在没有 orElse/orElseGet 等安全写法时触发
50
+ if not self._null_check.search(line):
51
+ # 额外检查:只对关键路径触发
52
+ files = diff_data.get('files', [])
53
+ return {"file": files[0] if files else "unknown", "method": line.strip()}
54
+
55
+ return None
56
+
57
+
58
+ class OptionalUnwrapRule(BaseRule):
59
+ """检测 Optional 未正确解包 - 排除安全的 orElse 用法"""
60
+
61
+ def __init__(self):
62
+ self._dangerous_get = re.compile(
63
+ r'^\+.*(?<!\.)\.get\s*\(\s*\)', # Optional.get() without check
64
+ re.MULTILINE
65
+ )
66
+ # 扩展安全模式:包含 orElseGet
67
+ self._optional_safe = re.compile(
68
+ r'\.orElse(?:Get)?\s*\(',
69
+ re.MULTILINE
70
+ )
71
+
72
+ @property
73
+ def id(self) -> str:
74
+ return "null.optional_unsafe_get"
75
+
76
+ @property
77
+ def severity(self) -> str:
78
+ # 降级为 LOW,因为 Optional.get() 在很多场景是合理的
79
+ return "low"
80
+
81
+ @property
82
+ def impact(self) -> str:
83
+ return "runtime"
84
+
85
+ @property
86
+ def rationale(self) -> str:
87
+ return "Optional.get() without isPresent() check - ensure null is handled"
88
+
89
+ @property
90
+ def rule_type(self) -> str:
91
+ return "absolute"
92
+
93
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
94
+ raw_diff = diff_data.get('raw_diff', "")
95
+
96
+ # 查找所有 .get() 调用
97
+ import re as re_module
98
+ get_matches = re.findall(r'[^\n]*\.get\s*\(\s*\)[^\n]*', raw_diff)
99
+
100
+ for match in get_matches:
101
+ # 跳过已经使用 orElse/orElseGet 的代码
102
+ if self._optional_safe.search(match):
103
+ continue
104
+
105
+ # 跳过注释
106
+ if '//' in match or '/*' in match:
107
+ continue
108
+
109
+ files = diff_data.get('files', [])
110
+ return {"file": files[0] if files else "unknown"}
111
+
112
+ return None
113
+
114
+ dangerous = self._dangerous_get.findall(raw_diff)
115
+ if dangerous:
116
+ # 检查是否有 orElse 等安全用法
117
+ if not self._optional_or_else.search(raw_diff):
118
+ files = diff_data.get('files', [])
119
+ return {"file": files[0] if files else "unknown"}
120
+
121
+ return None
122
+
123
+
124
+ class AutoboxingNPERule(BaseRule):
125
+ """检测自动拆箱可能导致的 NPE"""
126
+
127
+ def __init__(self):
128
+ self._wrapper_types = ['Integer', 'Long', 'Double', 'Float', 'Boolean', 'Character', 'Short', 'Byte']
129
+ self._unbox_pattern = re.compile(
130
+ r'^\+.*(?:' + '|'.join(self._wrapper_types) + r')\s*\w+\s*=\s*(?!new\s+(?:' + '|'.join(self._wrapper_types) + r'))'
131
+ )
132
+ self._method_return_wrapper = re.compile(
133
+ r'^\+.*\.(?:get|getValue|getOrDefault)\s*\([^)]*\)\s*(?:[+\-*/]|compareTo)',
134
+ re.MULTILINE
135
+ )
136
+
137
+ @property
138
+ def id(self) -> str:
139
+ return "null.autoboxing_npe"
140
+
141
+ @property
142
+ def severity(self) -> str:
143
+ return "high"
144
+
145
+ @property
146
+ def impact(self) -> str:
147
+ return "runtime"
148
+
149
+ @property
150
+ def rationale(self) -> str:
151
+ return "Wrapper type auto-unboxed to primitive, NPE if null"
152
+
153
+ @property
154
+ def rule_type(self) -> str:
155
+ return "absolute"
156
+
157
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
158
+ raw_diff = diff_data.get('raw_diff', "")
159
+
160
+ if self._unbox_pattern.search(raw_diff) or self._method_return_wrapper.search(raw_diff):
161
+ files = diff_data.get('files', [])
162
+ return {"file": files[0] if files else "unknown"}
163
+
164
+ return None
165
+
166
+
167
+ class ChainedMethodCallNPERule(BaseRule):
168
+ """检测链式调用的 NPE 风险 - 仅在高风险场景触发"""
169
+
170
+ def __init__(self):
171
+ # 更严格的链式调用模式:必须是方法调用链,不能是简单的属性访问
172
+ self._chained_call = re.compile(
173
+ r'^\+.*\w+\s*\.\s*\w+\s*\([^)]*\)\s*\.\s*\w+\s*\(',
174
+ re.MULTILINE
175
+ )
176
+ self._null_safe = re.compile(
177
+ r'(?:Objects\.(?:requireNonNull|firstNonNull)|Optional\.ofNullable|\.map\s*\(|\?\.)',
178
+ re.IGNORECASE
179
+ )
180
+ # 高风险场景:DTO/Entity/Model 的 getter 链式调用
181
+ self._high_risk_patterns = [
182
+ r'/dto/',
183
+ r'/entity/',
184
+ r'/model/',
185
+ r'/vo/',
186
+ r'/bo/',
187
+ r'/domain/'
188
+ ]
189
+
190
+ @property
191
+ def id(self) -> str:
192
+ return "null.chained_call_unsafe"
193
+
194
+ @property
195
+ def severity(self) -> str:
196
+ # 降级为 LOW,因为大多数业务代码的链式调用是安全的
197
+ return "low"
198
+
199
+ @property
200
+ def impact(self) -> str:
201
+ return "runtime"
202
+
203
+ @property
204
+ def rationale(self) -> str:
205
+ return "Chained method calls in DTO/Entity without null safety"
206
+
207
+ @property
208
+ def rule_type(self) -> str:
209
+ return "absolute"
210
+
211
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
212
+ raw_diff = diff_data.get('raw_diff', "")
213
+ files = diff_data.get('files', [])
214
+
215
+ added_lines = [line for line in raw_diff.split('\n') if line.startswith('+')]
216
+ for line in added_lines:
217
+ if self._chained_call.search(line):
218
+ if not self._null_safe.search(line):
219
+ # 只在 DTO/Entity/Model 相关的文件中触发
220
+ for f in files:
221
+ if any(re.search(p, f, re.IGNORECASE) for p in self._high_risk_patterns):
222
+ return {"file": f, "chain": line.strip()}
223
+
224
+ return None
225
+
226
+
227
+ class ArrayIndexOutOfBoundsRule(BaseRule):
228
+ """检测数组/集合索引越界风险"""
229
+
230
+ def __init__(self):
231
+ self._direct_access = re.compile(
232
+ r'^\+.*\w+\s*\[\s*(?:0|[1-9]\d*)\s*\]',
233
+ re.MULTILINE
234
+ )
235
+ self._size_dependent = re.compile(
236
+ r'^\+.*\w+\s*\[\s*\w+\.(?:size|length)\s*[-+]\s*[1-9]',
237
+ re.MULTILINE
238
+ )
239
+
240
+ @property
241
+ def id(self) -> str:
242
+ return "null.array_index_unsafe"
243
+
244
+ @property
245
+ def severity(self) -> str:
246
+ return "medium"
247
+
248
+ @property
249
+ def impact(self) -> str:
250
+ return "runtime"
251
+
252
+ @property
253
+ def rationale(self) -> str:
254
+ return "Direct array access with hardcoded index or size-based calculation, bounds not checked"
255
+
256
+ @property
257
+ def rule_type(self) -> str:
258
+ return "absolute"
259
+
260
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
261
+ raw_diff = diff_data.get('raw_diff', "")
262
+
263
+ if self._size_dependent.search(raw_diff):
264
+ files = diff_data.get('files', [])
265
+ return {"file": files[0] if files else "unknown"}
266
+
267
+ return None
268
+
269
+
270
+ class StringConcatNPERule(BaseRule):
271
+ """检测字符串拼接的 NPE 风险 - 关闭,因为太容易误报"""
272
+
273
+ def __init__(self):
274
+ # 禁用此规则:字符串拼接 "str" + var 是非常常见的模式
275
+ # 即使 var 可能为 null,Java 也不会抛 NPE,而是输出 "null" 字符串
276
+ pass
277
+
278
+ @property
279
+ def id(self) -> str:
280
+ return "null.string_concat_unsafe"
281
+
282
+ @property
283
+ def severity(self) -> str:
284
+ return "low"
285
+
286
+ @property
287
+ def impact(self) -> str:
288
+ return "maintenance"
289
+
290
+ @property
291
+ def rationale(self) -> str:
292
+ # 修改为更准确的描述
293
+ return "Disabled: Java string concatenation handles null gracefully"
294
+
295
+ @property
296
+ def rule_type(self) -> str:
297
+ return "absolute"
298
+
299
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
300
+ # 禁用规则:总是返回 None
301
+ return None