diffsense 2.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. adapters/__init__.py +0 -0
  2. adapters/base.py +27 -0
  3. adapters/github_adapter.py +164 -0
  4. adapters/gitlab_adapter.py +207 -0
  5. adapters/local_adapter.py +136 -0
  6. banner.py +71 -0
  7. cli.py +606 -0
  8. config/__init__.py +1 -0
  9. config/rules.yaml +371 -0
  10. core/__init__.py +235 -0
  11. core/ast_detector.py +853 -0
  12. core/change.py +46 -0
  13. core/composer.py +93 -0
  14. core/evaluator.py +15 -0
  15. core/ignore_manager.py +71 -0
  16. core/knowledge.py +77 -0
  17. core/parser.py +181 -0
  18. core/parser_manager.py +104 -0
  19. core/quality_manager.py +117 -0
  20. core/renderer.py +197 -0
  21. core/rule_base.py +98 -0
  22. core/rule_runtime.py +103 -0
  23. core/rules.py +718 -0
  24. core/run_config.py +85 -0
  25. core/semantic_diff.py +359 -0
  26. core/signal_model.py +21 -0
  27. core/signals_registry.py +62 -0
  28. diffsense-2.2.12.dist-info/METADATA +18 -0
  29. diffsense-2.2.12.dist-info/RECORD +58 -0
  30. diffsense-2.2.12.dist-info/WHEEL +5 -0
  31. diffsense-2.2.12.dist-info/entry_points.txt +3 -0
  32. diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
  33. diffsense-2.2.12.dist-info/top_level.txt +11 -0
  34. diffsense_mcp/__init__.py +1 -0
  35. diffsense_mcp/launcher.py +28 -0
  36. diffsense_mcp/server.py +687 -0
  37. governance/lifecycle.py +54 -0
  38. main.py +318 -0
  39. rules/__init__.py +246 -0
  40. rules/api_compatibility.py +372 -0
  41. rules/collection_handling.py +349 -0
  42. rules/concurrency.py +194 -0
  43. rules/concurrency_adapter.py +250 -0
  44. rules/cross_language_adapter.py +444 -0
  45. rules/exception_handling.py +320 -0
  46. rules/go_rules.py +401 -0
  47. rules/null_safety.py +301 -0
  48. rules/resource_management.py +222 -0
  49. rules/yaml_adapter.py +195 -0
  50. run_audit.py +478 -0
  51. sdk/cpp_adapter.py +238 -0
  52. sdk/go_adapter.py +199 -0
  53. sdk/java_adapter.py +199 -0
  54. sdk/javascript_adapter.py +229 -0
  55. sdk/language_adapter.py +313 -0
  56. sdk/python_adapter.py +195 -0
  57. sdk/rule.py +63 -0
  58. sdk/signal.py +14 -0
@@ -0,0 +1,349 @@
1
+ import re
2
+ from typing import Dict, Any, List, Optional
3
+ from sdk.rule import BaseRule
4
+ from sdk.signal import Signal
5
+
6
+
7
+ class RawTypeUsageRule(BaseRule):
8
+ """检测使用集合原始类型(未指定泛型)"""
9
+
10
+ def __init__(self):
11
+ self._collection_types = [
12
+ 'List', 'Set', 'Map', 'Collection', 'ArrayList', 'HashSet',
13
+ 'HashMap', 'TreeSet', 'TreeMap', 'LinkedList', 'LinkedHashMap'
14
+ ]
15
+ self._raw_type = re.compile(
16
+ r'^\+.*(?:' + '|'.join(self._collection_types) + r')\s+\w+\s*=\s*new\s+(?:' + '|'.join(self._collection_types) + r')\s*<\s*>'
17
+ )
18
+ self._with_generic = re.compile(
19
+ r'<\s*\w+',
20
+ re.MULTILINE
21
+ )
22
+
23
+ @property
24
+ def id(self) -> str:
25
+ return "collection.raw_type"
26
+
27
+ @property
28
+ def severity(self) -> str:
29
+ return "medium"
30
+
31
+ @property
32
+ def impact(self) -> str:
33
+ return "maintenance"
34
+
35
+ @property
36
+ def rationale(self) -> str:
37
+ return "Collection declared with raw type, loses type safety"
38
+
39
+ @property
40
+ def rule_type(self) -> str:
41
+ return "absolute"
42
+
43
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
44
+ raw_diff = diff_data.get('raw_diff', "")
45
+
46
+ added_lines = [line for line in raw_diff.split('\n') if line.startswith('+')]
47
+ for line in added_lines:
48
+ if self._raw_type.search(line):
49
+ # 检查右边是否有泛型
50
+ if not self._with_generic.search(line.split('=')[1] if '=' in line else line):
51
+ files = diff_data.get('files', [])
52
+ return {"file": files[0] if files else "unknown", "declaration": line.strip()}
53
+
54
+ return None
55
+
56
+
57
+ class UnmodifiableCollectionRule(BaseRule):
58
+ """检测返回可变集合(应返回不可变视图)"""
59
+
60
+ def __init__(self):
61
+ self._return_mutable = re.compile(
62
+ r'^\+.*return\s+(?:this\.|m_)?(?:list|map|set|collection)\w*\s*;',
63
+ re.IGNORECASE | re.MULTILINE
64
+ )
65
+ self._unmodifiable = re.compile(
66
+ r'Collections\.(?:unmodifiable|singleton)|List\.of|Map\.of|Set\.of|Collections\.empty',
67
+ re.IGNORECASE
68
+ )
69
+
70
+ @property
71
+ def id(self) -> str:
72
+ return "collection.mutable_return"
73
+
74
+ @property
75
+ def severity(self) -> str:
76
+ return "medium"
77
+
78
+ @property
79
+ def impact(self) -> str:
80
+ return "maintenance"
81
+
82
+ @property
83
+ def rationale(self) -> str:
84
+ return "Returning mutable collection, caller can modify internal state"
85
+
86
+ @property
87
+ def rule_type(self) -> str:
88
+ return "absolute"
89
+
90
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
91
+ raw_diff = diff_data.get('raw_diff', "")
92
+
93
+ added_lines = [line for line in raw_diff.split('\n') if line.startswith('+')]
94
+ for line in added_lines:
95
+ if self._return_mutable.search(line):
96
+ if not self._unmodifiable.search(line):
97
+ files = diff_data.get('files', [])
98
+ return {"file": files[0] if files else "unknown", "return": line.strip()}
99
+
100
+ return None
101
+
102
+
103
+ class ConcurrentModificationRule(BaseRule):
104
+ """检测在遍历时修改集合的风险"""
105
+
106
+ def __init__(self):
107
+ self._foreach_loop = re.compile(
108
+ r'for\s*\(\s*\w+\s+\w+\s*:\s*\w+\s*\)',
109
+ re.MULTILINE
110
+ )
111
+ self._remove_call = re.compile(
112
+ r'\.remove\s*\(',
113
+ re.MULTILINE
114
+ )
115
+ self._iterator_remove = re.compile(
116
+ r'\w+Iterator\.remove\s*\(\)',
117
+ re.MULTILINE
118
+ )
119
+
120
+ @property
121
+ def id(self) -> str:
122
+ return "collection.concurrent_modification"
123
+
124
+ @property
125
+ def severity(self) -> str:
126
+ return "high"
127
+
128
+ @property
129
+ def impact(self) -> str:
130
+ return "runtime"
131
+
132
+ @property
133
+ def rationale(self) -> str:
134
+ return "Collection modified during foreach iteration, may throw ConcurrentModificationException"
135
+
136
+ @property
137
+ def rule_type(self) -> str:
138
+ return "absolute"
139
+
140
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
141
+ raw_diff = diff_data.get('raw_diff', "")
142
+
143
+ added_lines = raw_diff.split('\n')
144
+ in_foreach = False
145
+ foreach_start = -1
146
+
147
+ for i, line in enumerate(added_lines):
148
+ if line.startswith('+') and self._foreach_loop.search(line):
149
+ in_foreach = True
150
+ foreach_start = i
151
+ elif in_foreach and line.startswith('+'):
152
+ if self._remove_call.search(line) and not self._iterator_remove.search(line):
153
+ files = diff_data.get('files', [])
154
+ return {"file": files[0] if files else "unknown", "loop_line": foreach_start}
155
+ # 检查是否还在 foreach 块内(简化处理)
156
+ if line.strip().startswith('}'):
157
+ in_foreach = False
158
+
159
+ return None
160
+
161
+
162
+ class MapComputeRule(BaseRule):
163
+ """检测 Map 操作可以简化为 compute 方法"""
164
+
165
+ def __init__(self):
166
+ self._contains_put = re.compile(
167
+ r'if\s*\(\s*!?map\.containsKey\s*\([^)]+\)\s*\)',
168
+ re.MULTILINE
169
+ )
170
+ self._put_inside = re.compile(
171
+ r'\.put\s*\([^)]+\)',
172
+ re.MULTILINE
173
+ )
174
+
175
+ @property
176
+ def id(self) -> str:
177
+ return "collection.map_compute_opportunity"
178
+
179
+ @property
180
+ def severity(self) -> str:
181
+ return "low"
182
+
183
+ @property
184
+ def impact(self) -> str:
185
+ return "maintenance"
186
+
187
+ @property
188
+ def rationale(self) -> str:
189
+ return "Map containsKey + put pattern can be replaced with compute/merge"
190
+
191
+ @property
192
+ def rule_type(self) -> str:
193
+ return "absolute"
194
+
195
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
196
+ raw_diff = diff_data.get('raw_diff', "")
197
+
198
+ if self._contains_put.search(raw_diff) and self._put_inside.search(raw_diff):
199
+ files = diff_data.get('files', [])
200
+ return {"file": files[0] if files else "unknown"}
201
+
202
+ return None
203
+
204
+
205
+ class StreamCollectorRule(BaseRule):
206
+ """检测 Stream 收集器的不当使用"""
207
+
208
+ def __init__(self):
209
+ self._to_list = re.compile(
210
+ r'\.collect\s*\(\s*Collectors\.toList\s*\(\s*\)\s*\)',
211
+ re.MULTILINE
212
+ )
213
+ self._to_set = re.compile(
214
+ r'\.collect\s*\(\s*Collectors\.toSet\s*\(\s*\)\s*\)',
215
+ re.MULTILINE
216
+ )
217
+ self._to_map_no_merge = re.compile(
218
+ r'\.collect\s*\(\s*Collectors\.toMap\s*\([^,]+,[^)]+\)\s*\)',
219
+ re.MULTILINE
220
+ )
221
+
222
+ @property
223
+ def id(self) -> str:
224
+ return "collection.stream_collector_unsafe"
225
+
226
+ @property
227
+ def severity(self) -> str:
228
+ return "medium"
229
+
230
+ @property
231
+ def impact(self) -> str:
232
+ return "runtime"
233
+
234
+ @property
235
+ def rationale(self) -> str:
236
+ return "Stream collector without merge function may throw IllegalStateException on duplicate keys"
237
+
238
+ @property
239
+ def rule_type(self) -> str:
240
+ return "absolute"
241
+
242
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
243
+ raw_diff = diff_data.get('raw_diff', "")
244
+
245
+ added_lines = [line for line in raw_diff.split('\n') if line.startswith('+')]
246
+ for line in added_lines:
247
+ if self._to_map_no_merge.search(line):
248
+ files = diff_data.get('files', [])
249
+ return {"file": files[0] if files else "unknown", "collector": line.strip()}
250
+
251
+ return None
252
+
253
+
254
+ class ImmutableCollectionRule(BaseRule):
255
+ """检测使用过时的集合工厂方法"""
256
+
257
+ def __init__(self):
258
+ self._legacy_factory = re.compile(
259
+ r'^\+.*Collections\.(?:singletonList|singletonSet|singletonMap|emptyList|emptySet|emptyMap)\s*\(',
260
+ re.MULTILINE
261
+ )
262
+ self._modern_factory = re.compile(
263
+ r'(?:List|Set|Map|Collection)\.of\s*\(',
264
+ re.MULTILINE
265
+ )
266
+
267
+ @property
268
+ def id(self) -> str:
269
+ return "collection.legacy_factory"
270
+
271
+ @property
272
+ def severity(self) -> str:
273
+ return "low"
274
+
275
+ @property
276
+ def impact(self) -> str:
277
+ return "maintenance"
278
+
279
+ @property
280
+ def rationale(self) -> str:
281
+ return "Using legacy Collections.factory(), prefer List.of()/Set.of()/Map.of() (Java 9+)"
282
+
283
+ @property
284
+ def rule_type(self) -> str:
285
+ return "absolute"
286
+
287
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
288
+ raw_diff = diff_data.get('raw_diff', "")
289
+
290
+ if self._legacy_factory.search(raw_diff):
291
+ if not self._modern_factory.search(raw_diff):
292
+ files = diff_data.get('files', [])
293
+ return {"file": files[0] if files else "unknown"}
294
+
295
+ return None
296
+
297
+
298
+ class ListResizeRule(BaseRule):
299
+ """检测对不可变列表的修改操作"""
300
+
301
+ def __init__(self):
302
+ self._as_list = re.compile(
303
+ r'Arrays\.asList\s*\([^)]+\)',
304
+ re.MULTILINE
305
+ )
306
+ self._modifying_op = re.compile(
307
+ r'\.(?:add|remove|clear)\s*\(',
308
+ re.MULTILINE
309
+ )
310
+
311
+ @property
312
+ def id(self) -> str:
313
+ return "collection.aslist_modify"
314
+
315
+ @property
316
+ def severity(self) -> str:
317
+ return "high"
318
+
319
+ @property
320
+ def impact(self) -> str:
321
+ return "runtime"
322
+
323
+ @property
324
+ def rationale(self) -> str:
325
+ return "Calling add/remove on Arrays.asList() result throws UnsupportedOperationException"
326
+
327
+ @property
328
+ def rule_type(self) -> str:
329
+ return "absolute"
330
+
331
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
332
+ raw_diff = diff_data.get('raw_diff', "")
333
+
334
+ added_lines = raw_diff.split('\n')
335
+ has_aslist = False
336
+ aslist_var = None
337
+
338
+ for i, line in enumerate(added_lines):
339
+ if line.startswith('+'):
340
+ match = re.search(r'(\w+)\s*=\s*Arrays\.asList', line)
341
+ if match:
342
+ has_aslist = True
343
+ aslist_var = match.group(1)
344
+ elif has_aslist and aslist_var and self._modifying_op.search(line):
345
+ if aslist_var in line:
346
+ files = diff_data.get('files', [])
347
+ return {"file": files[0] if files else "unknown", "operation": line.strip()}
348
+
349
+ return None
rules/concurrency.py ADDED
@@ -0,0 +1,194 @@
1
+ import re
2
+ from typing import Dict, Any, List, Optional
3
+ from sdk.rule import BaseRule
4
+ from sdk.signal import Signal
5
+
6
+ class ThreadPoolSemanticChangeRule(BaseRule):
7
+ def __init__(self):
8
+ self._tpe_pattern = re.compile(r'new\s+ThreadPoolExecutor\s*\(\s*0\s*,\s*Integer\.MAX_VALUE')
9
+ self._sync_queue_pattern = re.compile(r'new\s+SynchronousQueue')
10
+ @property
11
+ def id(self) -> str:
12
+ return "runtime.threadpool_semantic_change"
13
+
14
+ @property
15
+ def severity(self) -> str:
16
+ return "high"
17
+
18
+ @property
19
+ def impact(self) -> str:
20
+ return "runtime"
21
+
22
+ @property
23
+ def rationale(self) -> str:
24
+ return "High risk thread pool configuration detected (unbounded or zero core)"
25
+
26
+ @property
27
+ def rule_type(self) -> str:
28
+ return "absolute"
29
+
30
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
31
+ raw_diff = diff_data.get('raw_diff', "")
32
+
33
+ # Check both added lines (in diff) and overall context if provided
34
+ if self._tpe_pattern.search(raw_diff) or self._sync_queue_pattern.search(raw_diff):
35
+ # Try to find the file
36
+ files = diff_data.get('files', [])
37
+ return {"file": files[0] if files else "unknown"}
38
+
39
+ return None
40
+
41
+ def _find_file_for_line(self, line: str, diff_data: Dict[str, Any]) -> str:
42
+ return diff_data.get('files', ["unknown"])[0]
43
+
44
+
45
+ class ConcurrencyRegressionRule(BaseRule):
46
+ def __init__(self):
47
+ self._regressions = []
48
+ pairs = [
49
+ ("ConcurrentHashMap", "HashMap"),
50
+ ("ConcurrentMap", "HashMap"),
51
+ ("CopyOnWriteArrayList", "ArrayList"),
52
+ ("CopyOnWriteArraySet", "HashSet"),
53
+ ("AtomicInteger", "Integer"),
54
+ ("AtomicLong", "Long"),
55
+ ("AtomicBoolean", "Boolean")
56
+ ]
57
+ for strong, weak in pairs:
58
+ strong_re = re.compile(r'^-.*' + re.escape(strong), re.MULTILINE)
59
+ if "HashMap" in weak:
60
+ weak_pattern = r'^\+.*(?<!Concurrent)' + re.escape(weak)
61
+ elif "ArrayList" in weak:
62
+ weak_pattern = r'^\+.*(?<!CopyOnWrite)' + re.escape(weak)
63
+ elif "Integer" in weak or "Long" in weak or "Boolean" in weak:
64
+ weak_pattern = r'^\+.*(?<!Atomic)' + re.escape(weak)
65
+ else:
66
+ weak_pattern = r'^\+.*' + re.escape(weak)
67
+ weak_re = re.compile(weak_pattern, re.MULTILINE)
68
+ self._regressions.append((strong, weak, strong_re, weak_re))
69
+ @property
70
+ def id(self) -> str:
71
+ return "runtime.concurrency_regression"
72
+
73
+ @property
74
+ def severity(self) -> str:
75
+ return "high"
76
+
77
+ @property
78
+ def impact(self) -> str:
79
+ return "runtime"
80
+
81
+ @property
82
+ def rationale(self) -> str:
83
+ return "Downgrade from concurrent/atomic type to non-thread-safe implementation"
84
+
85
+ @property
86
+ def rule_type(self) -> str:
87
+ return "regression"
88
+
89
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
90
+ # Prefer Signal-based detection if available
91
+ for sig in signals:
92
+ if sig.id == "runtime.concurrency.thread_safety_downgrade":
93
+ return {"file": sig.file}
94
+
95
+ # Fallback to Regex (Legacy logic)
96
+ raw_diff = diff_data.get('raw_diff', "")
97
+
98
+ for strong, weak, strong_re, weak_re in self._regressions:
99
+ if strong_re.search(raw_diff) and weak_re.search(raw_diff):
100
+ return {"file": f"regression_{strong}_to_{weak}"}
101
+
102
+ return None
103
+
104
+
105
+ class ThreadSafetyRemovalRule(BaseRule):
106
+ def __init__(self):
107
+ self._removed_sync_re = re.compile(r'^-\s.*synchronized', re.MULTILINE)
108
+ self._added_sync_re = re.compile(r'^\+\s.*synchronized', re.MULTILINE)
109
+ self._removed_vol_re = re.compile(r'^-\s.*volatile', re.MULTILINE)
110
+ self._added_vol_re = re.compile(r'^\+\s.*volatile', re.MULTILINE)
111
+ self._removed_lock_re = re.compile(r'^-\s.*\.(lock|unlock|tryLock)\(.*\)', re.MULTILINE)
112
+ self._added_lock_re = re.compile(r'^\+\s.*\.(lock|unlock|tryLock)\(.*\)', re.MULTILINE)
113
+ @property
114
+ def id(self) -> str:
115
+ return "runtime.thread_safety_removal"
116
+
117
+ @property
118
+ def severity(self) -> str:
119
+ return "high"
120
+
121
+ @property
122
+ def impact(self) -> str:
123
+ return "runtime"
124
+
125
+ @property
126
+ def rationale(self) -> str:
127
+ return "Removal of synchronization (synchronized, volatile, locks) from shared code"
128
+
129
+ @property
130
+ def rule_type(self) -> str:
131
+ return "regression"
132
+
133
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
134
+ raw_diff = diff_data.get('raw_diff', "")
135
+
136
+ if self._removed_sync_re.search(raw_diff):
137
+ removed_sync_count = len(self._removed_sync_re.findall(raw_diff))
138
+ added_sync_count = len(self._added_sync_re.findall(raw_diff))
139
+
140
+ if removed_sync_count > added_sync_count:
141
+ return {"file": "synchronized_removed"}
142
+
143
+ if self._removed_vol_re.search(raw_diff):
144
+ removed_vol_count = len(self._removed_vol_re.findall(raw_diff))
145
+ added_vol_count = len(self._added_vol_re.findall(raw_diff))
146
+
147
+ if removed_vol_count > added_vol_count:
148
+ return {"file": "volatile_removed"}
149
+
150
+ if self._removed_lock_re.search(raw_diff):
151
+ removed_lock_calls = len(self._removed_lock_re.findall(raw_diff))
152
+ added_lock_calls = len(self._added_lock_re.findall(raw_diff))
153
+
154
+ if removed_lock_calls > added_lock_calls:
155
+ return {"file": "explicit_lock_removed"}
156
+
157
+ return None
158
+
159
+
160
+ class LatchMisuseRule(BaseRule):
161
+ def __init__(self):
162
+ self._removed_count_re = re.compile(r'^-\s.*\.countDown\(\)', re.MULTILINE)
163
+ self._added_count_re = re.compile(r'^\+\s.*\.countDown\(\)', re.MULTILINE)
164
+ @property
165
+ def id(self) -> str:
166
+ return "runtime.latch_misuse"
167
+
168
+ @property
169
+ def severity(self) -> str:
170
+ return "high"
171
+
172
+ @property
173
+ def impact(self) -> str:
174
+ return "runtime"
175
+
176
+ @property
177
+ def rationale(self) -> str:
178
+ return "Removal of CountDownLatch.countDown() - potential deadlock or hang"
179
+
180
+ @property
181
+ def rule_type(self) -> str:
182
+ return "regression"
183
+
184
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
185
+ raw_diff = diff_data.get('raw_diff', "")
186
+
187
+ if self._removed_count_re.search(raw_diff):
188
+ removed_count = len(self._removed_count_re.findall(raw_diff))
189
+ added_count = len(self._added_count_re.findall(raw_diff))
190
+
191
+ if removed_count > added_count:
192
+ return {"file": "latch_countdown_removed"}
193
+
194
+ return None