diffsense 2.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adapters/__init__.py +0 -0
- adapters/base.py +27 -0
- adapters/github_adapter.py +164 -0
- adapters/gitlab_adapter.py +207 -0
- adapters/local_adapter.py +136 -0
- banner.py +71 -0
- cli.py +606 -0
- config/__init__.py +1 -0
- config/rules.yaml +371 -0
- core/__init__.py +235 -0
- core/ast_detector.py +853 -0
- core/change.py +46 -0
- core/composer.py +93 -0
- core/evaluator.py +15 -0
- core/ignore_manager.py +71 -0
- core/knowledge.py +77 -0
- core/parser.py +181 -0
- core/parser_manager.py +104 -0
- core/quality_manager.py +117 -0
- core/renderer.py +197 -0
- core/rule_base.py +98 -0
- core/rule_runtime.py +103 -0
- core/rules.py +718 -0
- core/run_config.py +85 -0
- core/semantic_diff.py +359 -0
- core/signal_model.py +21 -0
- core/signals_registry.py +62 -0
- diffsense-2.2.12.dist-info/METADATA +18 -0
- diffsense-2.2.12.dist-info/RECORD +58 -0
- diffsense-2.2.12.dist-info/WHEEL +5 -0
- diffsense-2.2.12.dist-info/entry_points.txt +3 -0
- diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
- diffsense-2.2.12.dist-info/top_level.txt +11 -0
- diffsense_mcp/__init__.py +1 -0
- diffsense_mcp/launcher.py +28 -0
- diffsense_mcp/server.py +687 -0
- governance/lifecycle.py +54 -0
- main.py +318 -0
- rules/__init__.py +246 -0
- rules/api_compatibility.py +372 -0
- rules/collection_handling.py +349 -0
- rules/concurrency.py +194 -0
- rules/concurrency_adapter.py +250 -0
- rules/cross_language_adapter.py +444 -0
- rules/exception_handling.py +320 -0
- rules/go_rules.py +401 -0
- rules/null_safety.py +301 -0
- rules/resource_management.py +222 -0
- rules/yaml_adapter.py +195 -0
- run_audit.py +478 -0
- sdk/cpp_adapter.py +238 -0
- sdk/go_adapter.py +199 -0
- sdk/java_adapter.py +199 -0
- sdk/javascript_adapter.py +229 -0
- sdk/language_adapter.py +313 -0
- sdk/python_adapter.py +195 -0
- sdk/rule.py +63 -0
- sdk/signal.py +14 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Concurrency Rules using Language Adapter
|
|
3
|
+
|
|
4
|
+
This module demonstrates how to use LanguageAdapter to write
|
|
5
|
+
language-agnostic concurrency rules that work across Java, Go, and Python.
|
|
6
|
+
|
|
7
|
+
Key advantage: One rule implementation can detect the same semantic issue
|
|
8
|
+
in multiple languages without duplicating logic.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
from typing import Dict, Any, List, Optional, Set
|
|
13
|
+
from sdk.rule import BaseRule
|
|
14
|
+
from sdk.signal import Signal
|
|
15
|
+
from sdk.language_adapter import AdapterFactory, LanguageAdapter
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ConcurrencyRegressionRuleAdapter(BaseRule):
|
|
19
|
+
"""
|
|
20
|
+
Concurrency Regression Rule - Adapter Version.
|
|
21
|
+
|
|
22
|
+
Detects when code is downgraded from thread-safe to non-thread-safe types.
|
|
23
|
+
|
|
24
|
+
Works for:
|
|
25
|
+
- Java: ConcurrentHashMap -> HashMap, AtomicInteger -> Integer
|
|
26
|
+
- Go: sync.Map -> map, chan -> (removed)
|
|
27
|
+
- Python: threading.Lock -> (removed)
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, language: str = "java"):
|
|
31
|
+
"""
|
|
32
|
+
Initialize with specific language adapter.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
language: One of 'java', 'go', 'python'
|
|
36
|
+
"""
|
|
37
|
+
self._adapter = AdapterFactory.get_adapter(language)
|
|
38
|
+
self._language = language
|
|
39
|
+
|
|
40
|
+
# Build regression pairs based on adapter
|
|
41
|
+
self._build_regression_pairs()
|
|
42
|
+
|
|
43
|
+
def _build_regression_pairs(self):
|
|
44
|
+
"""Build language-specific regression detection patterns."""
|
|
45
|
+
self._regressions = []
|
|
46
|
+
|
|
47
|
+
# Get language-specific types
|
|
48
|
+
safe_types = self._adapter.get_thread_safe_types()
|
|
49
|
+
unsafe_types = self._adapter.get_unsafe_types()
|
|
50
|
+
|
|
51
|
+
# Build pairs for detection
|
|
52
|
+
if self._language == "java":
|
|
53
|
+
# Java-specific: common downgrade pairs
|
|
54
|
+
pairs = [
|
|
55
|
+
("ConcurrentHashMap", "HashMap"),
|
|
56
|
+
("ConcurrentMap", "HashMap"),
|
|
57
|
+
("CopyOnWriteArrayList", "ArrayList"),
|
|
58
|
+
("CopyOnWriteArraySet", "HashSet"),
|
|
59
|
+
("AtomicInteger", "Integer"),
|
|
60
|
+
("AtomicLong", "Long"),
|
|
61
|
+
("AtomicBoolean", "Boolean"),
|
|
62
|
+
]
|
|
63
|
+
elif self._language == "go":
|
|
64
|
+
# Go-specific: sync.Map -> map
|
|
65
|
+
pairs = [
|
|
66
|
+
("sync.Map", "map"),
|
|
67
|
+
("sync.Mutex", "(mutex removed)"),
|
|
68
|
+
("chan", "(channel removed)"),
|
|
69
|
+
]
|
|
70
|
+
elif self._language == "python":
|
|
71
|
+
# Python-specific
|
|
72
|
+
pairs = [
|
|
73
|
+
("threading.Lock", "(lock removed)"),
|
|
74
|
+
("threading.RLock", "(lock removed)"),
|
|
75
|
+
("queue.Queue", "(queue removed)"),
|
|
76
|
+
]
|
|
77
|
+
else:
|
|
78
|
+
pairs = []
|
|
79
|
+
|
|
80
|
+
for strong, weak in pairs:
|
|
81
|
+
# Build patterns for diff detection
|
|
82
|
+
strong_re = re.compile(r'^[-\+].*' + re.escape(strong))
|
|
83
|
+
weak_re = re.compile(r'^[\-+].*' + re.escape(weak))
|
|
84
|
+
self._regressions.append((strong, weak, strong_re, weak_re))
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def id(self) -> str:
|
|
88
|
+
return "runtime.concurrency_regression"
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def severity(self) -> str:
|
|
92
|
+
return "high"
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def impact(self) -> str:
|
|
96
|
+
return "runtime"
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def rationale(self) -> str:
|
|
100
|
+
return "Downgrade from thread-safe to non-thread-safe implementation"
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def language(self) -> str:
|
|
104
|
+
return self._language
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def rule_type(self) -> str:
|
|
108
|
+
return "regression"
|
|
109
|
+
|
|
110
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
111
|
+
# First, try signal-based detection
|
|
112
|
+
for sig in signals:
|
|
113
|
+
if sig.id == "runtime.concurrency.thread_safety_downgrade":
|
|
114
|
+
return {"file": sig.file}
|
|
115
|
+
|
|
116
|
+
# Fallback: regex-based detection
|
|
117
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
118
|
+
|
|
119
|
+
for strong, weak, strong_re, weak_re in self._regressions:
|
|
120
|
+
has_removed_safe = strong_re.search(raw_diff)
|
|
121
|
+
has_added_unsafe = weak_re.search(raw_diff)
|
|
122
|
+
|
|
123
|
+
if has_removed_safe and has_added_unsafe:
|
|
124
|
+
return {"file": f"regression_{strong}_to_{weak}"}
|
|
125
|
+
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class ThreadSafetyRemovalRuleAdapter(BaseRule):
|
|
130
|
+
"""
|
|
131
|
+
Thread Safety Removal Rule - Adapter Version.
|
|
132
|
+
|
|
133
|
+
Detects removal of synchronization primitives (locks, volatile, etc.)
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
def __init__(self, language: str = "java"):
|
|
137
|
+
self._adapter = AdapterFactory.get_adapter(language)
|
|
138
|
+
self._language = language
|
|
139
|
+
self._compile_patterns()
|
|
140
|
+
|
|
141
|
+
def _compile_patterns(self):
|
|
142
|
+
"""Compile patterns using adapter."""
|
|
143
|
+
lock_pats = self._adapter.get_lock_patterns()
|
|
144
|
+
unlock_pats = self._adapter.get_unlock_patterns()
|
|
145
|
+
volatile_pats = self._adapter.get_volatile_patterns()
|
|
146
|
+
|
|
147
|
+
# Build regex strings
|
|
148
|
+
lock_strs = [p.pattern for p in lock_pats]
|
|
149
|
+
unlock_strs = [p.pattern for p in unlock_pats]
|
|
150
|
+
volatile_strs = [p.pattern for p in volatile_pats]
|
|
151
|
+
|
|
152
|
+
self._removed_lock_re = re.compile(
|
|
153
|
+
r'^-\s.*(?:' + '|'.join(lock_strs) + ')', re.MULTILINE
|
|
154
|
+
)
|
|
155
|
+
self._added_lock_re = re.compile(
|
|
156
|
+
r'^\+\s.*(?:' + '|'.join(lock_strs) + ')', re.MULTILINE
|
|
157
|
+
)
|
|
158
|
+
self._removed_unlock_re = re.compile(
|
|
159
|
+
r'^-\s.*(?:' + '|'.join(unlock_strs) + ')', re.MULTILINE
|
|
160
|
+
)
|
|
161
|
+
self._added_unlock_re = re.compile(
|
|
162
|
+
r'^\+\s.*(?:' + '|'.join(unlock_strs) + ')', re.MULTILINE
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
if volatile_strs:
|
|
166
|
+
self._removed_volatile_re = re.compile(
|
|
167
|
+
r'^-\s.*(?:' + '|'.join(volatile_strs) + ')', re.MULTILINE
|
|
168
|
+
)
|
|
169
|
+
self._added_volatile_re = re.compile(
|
|
170
|
+
r'^\+\s.*(?:' + '|'.join(volatile_strs) + ')', re.MULTILINE
|
|
171
|
+
)
|
|
172
|
+
else:
|
|
173
|
+
self._removed_volatile_re = None
|
|
174
|
+
self._added_volatile_re = None
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def id(self) -> str:
|
|
178
|
+
return "runtime.thread_safety_removal"
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def severity(self) -> str:
|
|
182
|
+
return "high"
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def impact(self) -> str:
|
|
186
|
+
return "runtime"
|
|
187
|
+
|
|
188
|
+
@property
|
|
189
|
+
def rationale(self) -> str:
|
|
190
|
+
return "Removal of synchronization primitives may cause race conditions"
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def language(self) -> str:
|
|
194
|
+
return self._language
|
|
195
|
+
|
|
196
|
+
@property
|
|
197
|
+
def rule_type(self) -> str:
|
|
198
|
+
return "regression"
|
|
199
|
+
|
|
200
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
201
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
202
|
+
|
|
203
|
+
# Check lock removal vs addition
|
|
204
|
+
removed_locks = len(self._removed_lock_re.findall(raw_diff))
|
|
205
|
+
added_locks = len(self._added_lock_re.findall(raw_diff))
|
|
206
|
+
|
|
207
|
+
if removed_locks > added_locks:
|
|
208
|
+
return {"file": "lock_removed"}
|
|
209
|
+
|
|
210
|
+
# Check unlock removal
|
|
211
|
+
removed_unlocks = len(self._removed_unlock_re.findall(raw_diff))
|
|
212
|
+
added_unlocks = len(self._added_unlock_re.findall(raw_diff))
|
|
213
|
+
|
|
214
|
+
if removed_unlocks > added_unlocks:
|
|
215
|
+
return {"file": "unlock_removed"}
|
|
216
|
+
|
|
217
|
+
# Check volatile/atomic removal (if applicable)
|
|
218
|
+
if self._removed_volatile_re:
|
|
219
|
+
removed_vol = len(self._removed_volatile_re.findall(raw_diff))
|
|
220
|
+
added_vol = len(self._added_volatile_re.findall(raw_diff))
|
|
221
|
+
|
|
222
|
+
if removed_vol > added_vol:
|
|
223
|
+
return {"file": "volatile_removed"}
|
|
224
|
+
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
# Convenience function to create language-specific rules
|
|
229
|
+
def create_concurrency_rule(rule_class, language: str):
|
|
230
|
+
"""Create a concurrency rule for the specified language."""
|
|
231
|
+
return rule_class(language=language)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# Example: Create rules for different languages
|
|
235
|
+
def get_all_language_concurrency_rules():
|
|
236
|
+
"""
|
|
237
|
+
Get all concurrency rules for all supported languages.
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Dict[str, List[BaseRule]]: Rules grouped by language
|
|
241
|
+
"""
|
|
242
|
+
rules = {}
|
|
243
|
+
|
|
244
|
+
for lang in ['java', 'go', 'python']:
|
|
245
|
+
rules[lang] = [
|
|
246
|
+
ConcurrencyRegressionRuleAdapter(language=lang),
|
|
247
|
+
ThreadSafetyRemovalRuleAdapter(language=lang),
|
|
248
|
+
]
|
|
249
|
+
|
|
250
|
+
return rules
|
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
"""
|
|
2
|
+
跨语言规则兼容层原型
|
|
3
|
+
|
|
4
|
+
这个模块展示了如何通过兼容层将 Java 规则的逻辑复用到 Go 语言。
|
|
5
|
+
核心思想:提取规则的"语义模式",而非"语法模式"。
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from typing import Dict, Any, List, Optional, Callable, Set
|
|
10
|
+
from sdk.rule import BaseRule
|
|
11
|
+
from sdk.signal import Signal
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# ============================================================================
|
|
15
|
+
# 1. 语言适配层:定义语言特定的模式映射
|
|
16
|
+
# ============================================================================
|
|
17
|
+
|
|
18
|
+
class LanguageAdapter:
|
|
19
|
+
"""
|
|
20
|
+
语言适配器:将通用语义模式映射到特定语言的语法模式。
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, language: str):
|
|
24
|
+
self.language = language
|
|
25
|
+
|
|
26
|
+
def get_closeable_types(self) -> List[str]:
|
|
27
|
+
"""获取该语言中可关闭的资源类型"""
|
|
28
|
+
mapping = {
|
|
29
|
+
'java': ['InputStream', 'OutputStream', 'Reader', 'Writer',
|
|
30
|
+
'Socket', 'Connection', 'Statement', 'ResultSet'],
|
|
31
|
+
'go': ['io.Closer', 'io.ReadCloser', 'io.WriteCloser',
|
|
32
|
+
'os.File', 'net.Conn', 'sql.DB', 'sql.Rows'],
|
|
33
|
+
'python': ['IOBase', 'TextIOBase', 'BinaryIO', 'Socket', 'Connection'],
|
|
34
|
+
}
|
|
35
|
+
return mapping.get(self.language, [])
|
|
36
|
+
|
|
37
|
+
def get_resource_open_pattern(self) -> str:
|
|
38
|
+
"""获取资源打开的正则模式"""
|
|
39
|
+
patterns = {
|
|
40
|
+
'java': r'(?:new\s+\w+(?:InputStream|OutputStream|Reader|Writer|Connection)\s*\()',
|
|
41
|
+
'go': r'(?:os\.Open|os\.Create|net\.Dial|sql\.Open|os\.Pipe)\s*\(',
|
|
42
|
+
'python': r'(?:open\s*\(|socket\.socket\(|connect\s*\()',
|
|
43
|
+
'javascript': r'(?:fs\.open\(|fs\.createReadStream\(|new\s+Client\(\)\.connect\()',
|
|
44
|
+
'typescript': r'(?:fs\.open\(|fs\.createReadStream\(|new\s+Client\(\)\.connect\()',
|
|
45
|
+
'cpp': r'(?:new\s+\w+|fopen\s*\(|ifstream\s*\()',
|
|
46
|
+
'c': r'(?:malloc\s*\(|fopen\s*\(|calloc\s*\()',
|
|
47
|
+
}
|
|
48
|
+
return patterns.get(self.language, r'')
|
|
49
|
+
|
|
50
|
+
def get_resource_close_pattern(self) -> str:
|
|
51
|
+
"""获取资源关闭的正则模式"""
|
|
52
|
+
patterns = {
|
|
53
|
+
'java': r'(?:\.close\(\)|try\s*\([^)]*\.close)',
|
|
54
|
+
'go': r'(?:\.Close\(\)|defer\s+.*\.Close)',
|
|
55
|
+
'python': r'(?:\.close\(\)|with\s+)',
|
|
56
|
+
'javascript': r'(?:\.close\(\)|\.end\(\)|\.destroy\(\))',
|
|
57
|
+
'typescript': r'(?:\.close\(\)|\.end\(\)|\.destroy\(\))',
|
|
58
|
+
'cpp': r'(?:delete\s+|fclose\s*\(|close\s*\()',
|
|
59
|
+
'c': r'(?:free\s*\(|fclose\s*\(|close\s*\()',
|
|
60
|
+
}
|
|
61
|
+
return patterns.get(self.language, r'')
|
|
62
|
+
|
|
63
|
+
def get_null_check_pattern(self) -> str:
|
|
64
|
+
"""获取空值检查的正则模式"""
|
|
65
|
+
patterns = {
|
|
66
|
+
'java': r'(?:if\s*\([^)]*(?:==|!=)\s*null|Objects\.(?:nonNull|isNull)|Optional)',
|
|
67
|
+
'go': r'(?:if\s+\w+\s*(?:==|!=)\s*nil|if\s+err\s*(?:!=|==)\s*nil)',
|
|
68
|
+
'python': r'(?:if\s+\w+\s+(?:is|is\s+not|==|!=)\s+None)',
|
|
69
|
+
'javascript': r'(?:if\s*\(\s*\w+\s*(?:===|!==|==|!=)\s*(?:null|undefined)|if\s*\(\s*!\w+)',
|
|
70
|
+
'typescript': r'(?:if\s*\(\s*\w+\s*(?:===|!==|==|!=)\s*(?:null|undefined)|if\s*\(\s*!\w+)',
|
|
71
|
+
'cpp': r'(?:if\s*\(\s*\w+\s*(?:==|!=)\s*nullptr|if\s*\(\s*!\w+)',
|
|
72
|
+
'c': r'(?:if\s*\(\s*\w+\s*(?:==|!=)\s*NULL|if\s*\(\s*!\w+)',
|
|
73
|
+
}
|
|
74
|
+
return patterns.get(self.language, r'')
|
|
75
|
+
|
|
76
|
+
def get_exception_catch_pattern(self) -> str:
|
|
77
|
+
"""获取异常捕获的正则模式"""
|
|
78
|
+
patterns = {
|
|
79
|
+
'java': r'(?:catch\s*\([^)]+\)\s*{)',
|
|
80
|
+
'go': r'(?:if\s+err\s*(?:!=|==)\s*nil)',
|
|
81
|
+
'python': r'(?:except\s+.*:)',
|
|
82
|
+
'javascript': r'(?:catch\s*\([^)]*\)\s*\{)',
|
|
83
|
+
'typescript': r'(?:catch\s*\([^)]*\)\s*\{)',
|
|
84
|
+
'cpp': r'(?:catch\s*\([^)]*\)\s*\{)',
|
|
85
|
+
'c': r'(?:if\s*\([^)]*error[^)]*\)|if\s*\([^)]*errno)',
|
|
86
|
+
}
|
|
87
|
+
return patterns.get(self.language, r'')
|
|
88
|
+
|
|
89
|
+
def get_empty_catch_pattern(self) -> str:
|
|
90
|
+
"""获取空异常处理的正则模式"""
|
|
91
|
+
patterns = {
|
|
92
|
+
'java': r'(?:catch\s*\([^)]+\)\s*{\s*})',
|
|
93
|
+
'go': r'(?:if\s+err\s*(?:!=|==)\s*nil\s*{\s*})',
|
|
94
|
+
'python': r'(?:except\s+.*:\s*\n\s*pass)',
|
|
95
|
+
'javascript': r'(?:catch\s*\([^)]*\)\s*\{\s*\})',
|
|
96
|
+
'typescript': r'(?:catch\s*\([^)]*\)\s*\{\s*\})',
|
|
97
|
+
'cpp': r'(?:catch\s*\([^)]*\)\s*\{\s*\})',
|
|
98
|
+
'c': r'(?:if\s*\([^)]*error[^)]*\)\s*\{\s*\})',
|
|
99
|
+
}
|
|
100
|
+
return patterns.get(self.language, r'')
|
|
101
|
+
|
|
102
|
+
def get_null_value(self) -> str:
|
|
103
|
+
"""获取该语言的空值关键字"""
|
|
104
|
+
mapping = {
|
|
105
|
+
'java': 'null',
|
|
106
|
+
'go': 'nil',
|
|
107
|
+
'python': 'None',
|
|
108
|
+
'javascript': 'null',
|
|
109
|
+
'typescript': 'null',
|
|
110
|
+
'cpp': 'nullptr',
|
|
111
|
+
'c': 'NULL',
|
|
112
|
+
}
|
|
113
|
+
return mapping.get(self.language, 'null')
|
|
114
|
+
|
|
115
|
+
def get_file_extensions(self) -> List[str]:
|
|
116
|
+
"""获取该语言的文件扩展名"""
|
|
117
|
+
mapping = {
|
|
118
|
+
'java': ['.java'],
|
|
119
|
+
'go': ['.go'],
|
|
120
|
+
'python': ['.py'],
|
|
121
|
+
'javascript': ['.js', '.jsx', '.mjs', '.cjs'],
|
|
122
|
+
'typescript': ['.ts', '.tsx'],
|
|
123
|
+
'cpp': ['.cpp', '.cc', '.cxx', '.h', '.hpp'],
|
|
124
|
+
'c': ['.c', '.h'],
|
|
125
|
+
}
|
|
126
|
+
return mapping.get(self.language, [])
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ============================================================================
|
|
130
|
+
# 2. 通用规则模板:使用适配器的语义规则
|
|
131
|
+
# ============================================================================
|
|
132
|
+
|
|
133
|
+
class GenericResourceLeakRule(BaseRule):
|
|
134
|
+
"""
|
|
135
|
+
通用资源泄漏检测规则 - 通过适配器支持多语言
|
|
136
|
+
|
|
137
|
+
语义模式:
|
|
138
|
+
1. 创建了可关闭资源
|
|
139
|
+
2. 没有在使用后正确关闭
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
def __init__(self, language: str = '*'):
|
|
143
|
+
self.adapter = LanguageAdapter(language)
|
|
144
|
+
self._language = language
|
|
145
|
+
|
|
146
|
+
# 使用适配器获取语言特定的模式
|
|
147
|
+
closeable_types = self.adapter.get_closeable_types()
|
|
148
|
+
open_pattern = self.adapter.get_resource_open_pattern()
|
|
149
|
+
close_pattern = self.adapter.get_resource_close_pattern()
|
|
150
|
+
|
|
151
|
+
self._open_re = re.compile(open_pattern) if open_pattern else None
|
|
152
|
+
self._close_re = re.compile(close_pattern) if close_pattern else None
|
|
153
|
+
|
|
154
|
+
@property
|
|
155
|
+
def id(self) -> str:
|
|
156
|
+
# Use prorule prefix to match test expectations for pro rules
|
|
157
|
+
return f"prorule.{self._language}.resource_leak"
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def severity(self) -> str:
|
|
161
|
+
return "high"
|
|
162
|
+
|
|
163
|
+
@property
|
|
164
|
+
def impact(self) -> str:
|
|
165
|
+
return "runtime"
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def rationale(self) -> str:
|
|
169
|
+
return "Resource opened but not properly closed"
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def language(self) -> str:
|
|
173
|
+
return self._language
|
|
174
|
+
|
|
175
|
+
@property
|
|
176
|
+
def rule_type(self) -> str:
|
|
177
|
+
return "absolute"
|
|
178
|
+
|
|
179
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
180
|
+
if not self._open_re:
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
184
|
+
|
|
185
|
+
# 检查是否打开了资源
|
|
186
|
+
if self._open_re.search(raw_diff):
|
|
187
|
+
# 检查是否有关闭操作
|
|
188
|
+
if not self._close_re or not self._close_re.search(raw_diff):
|
|
189
|
+
files = diff_data.get('files', [])
|
|
190
|
+
return {"file": files[0] if files else "unknown", "language": self._language}
|
|
191
|
+
|
|
192
|
+
return None
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class GenericNullSafetyRule(BaseRule):
|
|
196
|
+
"""
|
|
197
|
+
通用空值安全检查规则 - 通过适配器支持多语言
|
|
198
|
+
|
|
199
|
+
语义模式:
|
|
200
|
+
1. 调用了可能返回空值的方法
|
|
201
|
+
2. 没有进行空值检查就直接使用
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
def __init__(self, language: str = '*'):
|
|
205
|
+
self.adapter = LanguageAdapter(language)
|
|
206
|
+
self._language = language
|
|
207
|
+
|
|
208
|
+
# 语言特定的可能返回空值的方法
|
|
209
|
+
null_return_methods = self._get_null_return_methods()
|
|
210
|
+
null_check_pattern = self.adapter.get_null_check_pattern()
|
|
211
|
+
|
|
212
|
+
self._call_re = re.compile(r'^\+.*(' + '|'.join(null_return_methods) + r')') if null_return_methods else None
|
|
213
|
+
self._null_check_re = re.compile(null_check_pattern) if null_check_pattern else None
|
|
214
|
+
|
|
215
|
+
def _get_null_return_methods(self) -> List[str]:
|
|
216
|
+
"""获取该语言中可能返回空值的方法"""
|
|
217
|
+
mapping = {
|
|
218
|
+
'java': [
|
|
219
|
+
r'\.get\s*\(', # Map.get()
|
|
220
|
+
r'\.findFirst\s*\(', # Stream.findFirst()
|
|
221
|
+
r'\.readLine\s*\(', # BufferedReader.readLine()
|
|
222
|
+
],
|
|
223
|
+
'go': [
|
|
224
|
+
r'\.Read\s*\(', # io.Reader
|
|
225
|
+
r'\.Next\s*\(', # sql.Rows.Next()
|
|
226
|
+
r'map\[.*\]', # map 访问
|
|
227
|
+
],
|
|
228
|
+
'python': [
|
|
229
|
+
r'\.get\s*\(', # dict.get()
|
|
230
|
+
r'\.pop\s*\(', # dict.pop()
|
|
231
|
+
r'\[.*\]', # 索引访问
|
|
232
|
+
],
|
|
233
|
+
'javascript': [
|
|
234
|
+
r'\.get\s*\(', # Map.get()
|
|
235
|
+
r'\.find\s*\(', # Array.find()
|
|
236
|
+
r'\.findFirst\s*\(', # Optional.findFirst()
|
|
237
|
+
r'\[.*\]', # 索引访问
|
|
238
|
+
],
|
|
239
|
+
'typescript': [
|
|
240
|
+
r'\.get\s*\(', # Map.get()
|
|
241
|
+
r'\.find\s*\(', # Array.find()
|
|
242
|
+
r'\[.*\]', # 索引访问
|
|
243
|
+
],
|
|
244
|
+
'cpp': [
|
|
245
|
+
r'->\w+', # 指针访问
|
|
246
|
+
r'\.at\s*\(', # std::vector::at()
|
|
247
|
+
r'\[\s*\w+\s*\]', # 数组索引
|
|
248
|
+
],
|
|
249
|
+
'c': [
|
|
250
|
+
r'->\w+', # 结构体指针访问
|
|
251
|
+
r'\[\s*\w+\s*\]', # 数组索引
|
|
252
|
+
],
|
|
253
|
+
}
|
|
254
|
+
return mapping.get(self._language, [])
|
|
255
|
+
|
|
256
|
+
@property
|
|
257
|
+
def id(self) -> str:
|
|
258
|
+
return f"prorule.{self._language}.null_safety"
|
|
259
|
+
|
|
260
|
+
@property
|
|
261
|
+
def severity(self) -> str:
|
|
262
|
+
return "high"
|
|
263
|
+
|
|
264
|
+
@property
|
|
265
|
+
def impact(self) -> str:
|
|
266
|
+
return "runtime"
|
|
267
|
+
|
|
268
|
+
@property
|
|
269
|
+
def rationale(self) -> str:
|
|
270
|
+
return "Potentially null/nil value used without check"
|
|
271
|
+
|
|
272
|
+
@property
|
|
273
|
+
def language(self) -> str:
|
|
274
|
+
return self._language
|
|
275
|
+
|
|
276
|
+
@property
|
|
277
|
+
def rule_type(self) -> str:
|
|
278
|
+
return "absolute"
|
|
279
|
+
|
|
280
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
281
|
+
if not self._call_re:
|
|
282
|
+
return None
|
|
283
|
+
|
|
284
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
285
|
+
|
|
286
|
+
added_lines = raw_diff.split('\n')
|
|
287
|
+
for i, line in enumerate(added_lines):
|
|
288
|
+
if line.startswith('+') and self._call_re.search(line):
|
|
289
|
+
# 检查是否有空值检查
|
|
290
|
+
context = '\n'.join(added_lines[max(0, i-3):i+4])
|
|
291
|
+
if not self._null_check_re or not self._null_check_re.search(context):
|
|
292
|
+
files = diff_data.get('files', [])
|
|
293
|
+
return {"file": files[0] if files else "unknown", "language": self._language}
|
|
294
|
+
|
|
295
|
+
return None
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
class GenericExceptionHandlingRule(BaseRule):
|
|
299
|
+
"""
|
|
300
|
+
通用异常处理检查规则 - 通过适配器支持多语言
|
|
301
|
+
|
|
302
|
+
语义模式:
|
|
303
|
+
1. 捕获了异常/错误
|
|
304
|
+
2. 但没有实际处理(空块或仅注释)
|
|
305
|
+
"""
|
|
306
|
+
|
|
307
|
+
def __init__(self, language: str = '*'):
|
|
308
|
+
self.adapter = LanguageAdapter(language)
|
|
309
|
+
self._language = language
|
|
310
|
+
|
|
311
|
+
empty_catch_pattern = self.adapter.get_empty_catch_pattern()
|
|
312
|
+
|
|
313
|
+
self._empty_catch_re = re.compile(empty_catch_pattern) if empty_catch_pattern else None
|
|
314
|
+
|
|
315
|
+
@property
|
|
316
|
+
def id(self) -> str:
|
|
317
|
+
return f"prorule.{self._language}.exception_swallowed"
|
|
318
|
+
|
|
319
|
+
@property
|
|
320
|
+
def severity(self) -> str:
|
|
321
|
+
return "high"
|
|
322
|
+
|
|
323
|
+
@property
|
|
324
|
+
def impact(self) -> str:
|
|
325
|
+
return "maintenance"
|
|
326
|
+
|
|
327
|
+
@property
|
|
328
|
+
def rationale(self) -> str:
|
|
329
|
+
return "Exception/error caught but not handled"
|
|
330
|
+
|
|
331
|
+
@property
|
|
332
|
+
def language(self) -> str:
|
|
333
|
+
return self._language
|
|
334
|
+
|
|
335
|
+
@property
|
|
336
|
+
def rule_type(self) -> str:
|
|
337
|
+
return "absolute"
|
|
338
|
+
|
|
339
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
340
|
+
if not self._empty_catch_re:
|
|
341
|
+
return None
|
|
342
|
+
|
|
343
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
344
|
+
|
|
345
|
+
added_lines = [line for line in raw_diff.split('\n') if line.startswith('+')]
|
|
346
|
+
added_diff = '\n'.join(added_lines)
|
|
347
|
+
|
|
348
|
+
if self._empty_catch_re.search(added_diff):
|
|
349
|
+
files = diff_data.get('files', [])
|
|
350
|
+
return {"file": files[0] if files else "unknown", "language": self._language}
|
|
351
|
+
|
|
352
|
+
return None
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
# ============================================================================
|
|
356
|
+
# 3. 规则工厂:根据语言自动创建规则实例
|
|
357
|
+
# ============================================================================
|
|
358
|
+
|
|
359
|
+
# 支持的语言列表
|
|
360
|
+
SUPPORTED_LANGUAGES = ['java', 'go', 'python', 'javascript', 'typescript', 'cpp', 'c']
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
class CrossLanguageRuleFactory:
|
|
364
|
+
"""
|
|
365
|
+
跨语言规则工厂:根据目标语言创建适当的规则实例。
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
@staticmethod
|
|
369
|
+
def create_resource_leak_rule(language: str) -> BaseRule:
|
|
370
|
+
"""创建资源泄漏检测规则"""
|
|
371
|
+
return GenericResourceLeakRule(language=language)
|
|
372
|
+
|
|
373
|
+
@staticmethod
|
|
374
|
+
def create_null_safety_rule(language: str) -> BaseRule:
|
|
375
|
+
"""创建空值安全检查规则"""
|
|
376
|
+
return GenericNullSafetyRule(language=language)
|
|
377
|
+
|
|
378
|
+
@staticmethod
|
|
379
|
+
def create_exception_handling_rule(language: str) -> BaseRule:
|
|
380
|
+
"""创建异常处理检查规则"""
|
|
381
|
+
return GenericExceptionHandlingRule(language=language)
|
|
382
|
+
|
|
383
|
+
@staticmethod
|
|
384
|
+
def create_all_rules_for_language(language: str) -> List[BaseRule]:
|
|
385
|
+
"""为指定语言创建所有适用的规则"""
|
|
386
|
+
if language not in SUPPORTED_LANGUAGES:
|
|
387
|
+
return []
|
|
388
|
+
return [
|
|
389
|
+
CrossLanguageRuleFactory.create_resource_leak_rule(language),
|
|
390
|
+
CrossLanguageRuleFactory.create_null_safety_rule(language),
|
|
391
|
+
CrossLanguageRuleFactory.create_exception_handling_rule(language),
|
|
392
|
+
]
|
|
393
|
+
|
|
394
|
+
@staticmethod
|
|
395
|
+
def create_all_rules_for_all_languages() -> Dict[str, List[BaseRule]]:
|
|
396
|
+
"""为所有支持的语言创建规则"""
|
|
397
|
+
result = {}
|
|
398
|
+
for language in SUPPORTED_LANGUAGES:
|
|
399
|
+
result[language] = CrossLanguageRuleFactory.create_all_rules_for_language(language)
|
|
400
|
+
return result
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
# ============================================================================
|
|
404
|
+
# 4. 使用示例
|
|
405
|
+
# ============================================================================
|
|
406
|
+
|
|
407
|
+
def example_usage():
|
|
408
|
+
"""示例:如何使用跨语言规则"""
|
|
409
|
+
|
|
410
|
+
# 为 Java 创建规则
|
|
411
|
+
java_rules = CrossLanguageRuleFactory.create_all_rules_for_language('java')
|
|
412
|
+
|
|
413
|
+
# 为 Go 创建规则
|
|
414
|
+
go_rules = CrossLanguageRuleFactory.create_all_rules_for_language('go')
|
|
415
|
+
|
|
416
|
+
# 为 Python 创建规则
|
|
417
|
+
python_rules = CrossLanguageRuleFactory.create_all_rules_for_language('python')
|
|
418
|
+
|
|
419
|
+
# 模拟 diff 数据
|
|
420
|
+
java_diff = {
|
|
421
|
+
'files': ['test.java'],
|
|
422
|
+
'raw_diff': '+ InputStream is = new FileInputStream("test.txt");'
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
go_diff = {
|
|
426
|
+
'files': ['test.go'],
|
|
427
|
+
'raw_diff': '+ file, _ := os.Open("test.txt")'
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
# 测试 Java 规则
|
|
431
|
+
for rule in java_rules:
|
|
432
|
+
result = rule.evaluate(java_diff, [])
|
|
433
|
+
if result:
|
|
434
|
+
print(f"Java rule matched: {rule.id} in {result['file']}")
|
|
435
|
+
|
|
436
|
+
# 测试 Go 规则
|
|
437
|
+
for rule in go_rules:
|
|
438
|
+
result = rule.evaluate(go_diff, [])
|
|
439
|
+
if result:
|
|
440
|
+
print(f"Go rule matched: {rule.id} in {result['file']}")
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
if __name__ == '__main__':
|
|
444
|
+
example_usage()
|