diffsense 2.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adapters/__init__.py +0 -0
- adapters/base.py +27 -0
- adapters/github_adapter.py +164 -0
- adapters/gitlab_adapter.py +207 -0
- adapters/local_adapter.py +136 -0
- banner.py +71 -0
- cli.py +606 -0
- config/__init__.py +1 -0
- config/rules.yaml +371 -0
- core/__init__.py +235 -0
- core/ast_detector.py +853 -0
- core/change.py +46 -0
- core/composer.py +93 -0
- core/evaluator.py +15 -0
- core/ignore_manager.py +71 -0
- core/knowledge.py +77 -0
- core/parser.py +181 -0
- core/parser_manager.py +104 -0
- core/quality_manager.py +117 -0
- core/renderer.py +197 -0
- core/rule_base.py +98 -0
- core/rule_runtime.py +103 -0
- core/rules.py +718 -0
- core/run_config.py +85 -0
- core/semantic_diff.py +359 -0
- core/signal_model.py +21 -0
- core/signals_registry.py +62 -0
- diffsense-2.2.12.dist-info/METADATA +18 -0
- diffsense-2.2.12.dist-info/RECORD +58 -0
- diffsense-2.2.12.dist-info/WHEEL +5 -0
- diffsense-2.2.12.dist-info/entry_points.txt +3 -0
- diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
- diffsense-2.2.12.dist-info/top_level.txt +11 -0
- diffsense_mcp/__init__.py +1 -0
- diffsense_mcp/launcher.py +28 -0
- diffsense_mcp/server.py +687 -0
- governance/lifecycle.py +54 -0
- main.py +318 -0
- rules/__init__.py +246 -0
- rules/api_compatibility.py +372 -0
- rules/collection_handling.py +349 -0
- rules/concurrency.py +194 -0
- rules/concurrency_adapter.py +250 -0
- rules/cross_language_adapter.py +444 -0
- rules/exception_handling.py +320 -0
- rules/go_rules.py +401 -0
- rules/null_safety.py +301 -0
- rules/resource_management.py +222 -0
- rules/yaml_adapter.py +195 -0
- run_audit.py +478 -0
- sdk/cpp_adapter.py +238 -0
- sdk/go_adapter.py +199 -0
- sdk/java_adapter.py +199 -0
- sdk/javascript_adapter.py +229 -0
- sdk/language_adapter.py +313 -0
- sdk/python_adapter.py +195 -0
- sdk/rule.py +63 -0
- sdk/signal.py +14 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, Any, List, Optional
|
|
3
|
+
from sdk.rule import BaseRule
|
|
4
|
+
from sdk.signal import Signal
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CloseableResourceLeakRule(BaseRule):
|
|
8
|
+
"""检测未正确关闭的资源(Stream, Connection 等)"""
|
|
9
|
+
|
|
10
|
+
def __init__(self):
|
|
11
|
+
self._closeable_types = [
|
|
12
|
+
'InputStream', 'OutputStream', 'Reader', 'Writer',
|
|
13
|
+
'Socket', 'ServerSocket', 'Connection', 'Statement',
|
|
14
|
+
'ResultSet', 'BufferedReader', 'BufferedWriter'
|
|
15
|
+
]
|
|
16
|
+
self._added_pattern = re.compile(r'^\+.*\b(new\s+\w+(?:' + '|'.join(self._closeable_types) + r')\s*\()')
|
|
17
|
+
self._try_with_resources = re.compile(r'^\+.*try\s*\([^)]*(?:' + '|'.join(self._closeable_types) + r')')
|
|
18
|
+
self._finally_close = re.compile(r'^\+.*\.close\(\)')
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def id(self) -> str:
|
|
22
|
+
return "resource.closeable_leak"
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def severity(self) -> str:
|
|
26
|
+
return "high"
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def impact(self) -> str:
|
|
30
|
+
return "runtime"
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def rationale(self) -> str:
|
|
34
|
+
return "Closeable resources opened but not closed in try-with-resources or finally block"
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def rule_type(self) -> str:
|
|
38
|
+
return "absolute"
|
|
39
|
+
|
|
40
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
41
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
42
|
+
|
|
43
|
+
# 检查是否添加了可关闭资源
|
|
44
|
+
added_resources = self._added_pattern.findall(raw_diff)
|
|
45
|
+
if not added_resources:
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
# 检查是否有 try-with-resources 或 finally 中关闭
|
|
49
|
+
has_try_resources = bool(self._try_with_resources.search(raw_diff))
|
|
50
|
+
has_finally_close = bool(self._finally_close.search(raw_diff))
|
|
51
|
+
|
|
52
|
+
# 如果既没有 try-with-resources 也没有 finally close,则报告
|
|
53
|
+
if not has_try_resources and not has_finally_close:
|
|
54
|
+
files = diff_data.get('files', [])
|
|
55
|
+
return {"file": files[0] if files else "unknown", "resources": added_resources}
|
|
56
|
+
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class DatabaseConnectionLeakRule(BaseRule):
|
|
61
|
+
"""检测数据库连接泄漏风险"""
|
|
62
|
+
|
|
63
|
+
def __init__(self):
|
|
64
|
+
self._conn_patterns = [
|
|
65
|
+
r'DriverManager\.getConnection',
|
|
66
|
+
r'DataSource\.getConnection',
|
|
67
|
+
r'new\s+HikariDataSource',
|
|
68
|
+
r'new\s+BasicDataSource'
|
|
69
|
+
]
|
|
70
|
+
self._added_conn = re.compile(r'^\+.*(' + '|'.join(self._conn_patterns) + r')')
|
|
71
|
+
self._conn_close = re.compile(r'^\+.*(?:connection|conn)\.close\(\)', re.IGNORECASE)
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def id(self) -> str:
|
|
75
|
+
return "resource.database_connection_leak"
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def severity(self) -> str:
|
|
79
|
+
return "critical"
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def impact(self) -> str:
|
|
83
|
+
return "runtime"
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def rationale(self) -> str:
|
|
87
|
+
return "Database connection opened without proper close, may cause connection pool exhaustion"
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def rule_type(self) -> str:
|
|
91
|
+
return "absolute"
|
|
92
|
+
|
|
93
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
94
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
95
|
+
|
|
96
|
+
if self._added_conn.search(raw_diff):
|
|
97
|
+
if not self._conn_close.search(raw_diff):
|
|
98
|
+
files = diff_data.get('files', [])
|
|
99
|
+
return {"file": files[0] if files else "unknown"}
|
|
100
|
+
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class StreamWrapperRule(BaseRule):
|
|
105
|
+
"""检测流包装时未指定编码"""
|
|
106
|
+
|
|
107
|
+
def __init__(self):
|
|
108
|
+
self._unencoded_patterns = [
|
|
109
|
+
r'new\s+InputStreamReader\s*\(\s*(?!.*Charset|charset|StandardCharsets)',
|
|
110
|
+
r'new\s+OutputStreamWriter\s*\(\s*(?!.*Charset|charset|StandardCharsets)',
|
|
111
|
+
r'new\s+FileReader\s*\(',
|
|
112
|
+
r'new\s+FileWriter\s*\('
|
|
113
|
+
]
|
|
114
|
+
self._added_stream = re.compile(r'^\+.*(' + '|'.join(self._unencoded_patterns) + r')')
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def id(self) -> str:
|
|
118
|
+
return "resource.stream_encoding_missing"
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def severity(self) -> str:
|
|
122
|
+
return "medium"
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def impact(self) -> str:
|
|
126
|
+
return "maintenance"
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def rationale(self) -> str:
|
|
130
|
+
return "Stream reader/writer created without explicit charset, uses platform default encoding"
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def rule_type(self) -> str:
|
|
134
|
+
return "absolute"
|
|
135
|
+
|
|
136
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
137
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
138
|
+
|
|
139
|
+
matches = self._added_stream.findall(raw_diff)
|
|
140
|
+
if matches:
|
|
141
|
+
files = diff_data.get('files', [])
|
|
142
|
+
return {"file": files[0] if files else "unknown", "patterns": matches}
|
|
143
|
+
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class IOStreamChainingRule(BaseRule):
|
|
148
|
+
"""检测 IO 流链接调用中的资源泄漏"""
|
|
149
|
+
|
|
150
|
+
def __init__(self):
|
|
151
|
+
self._chaining_pattern = re.compile(
|
|
152
|
+
r'^\+.*new\s+\w+(?:InputStream|OutputStream|Reader|Writer)\s*\([^)]*\.get\s*\w*\s*\(\s*\)'
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
def id(self) -> str:
|
|
157
|
+
return "resource.stream_chaining_leak"
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def severity(self) -> str:
|
|
161
|
+
return "high"
|
|
162
|
+
|
|
163
|
+
@property
|
|
164
|
+
def impact(self) -> str:
|
|
165
|
+
return "runtime"
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def rationale(self) -> str:
|
|
169
|
+
return "IO stream created from method call result, intermediate stream may leak"
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def rule_type(self) -> str:
|
|
173
|
+
return "absolute"
|
|
174
|
+
|
|
175
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
176
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
177
|
+
|
|
178
|
+
if self._chaining_pattern.search(raw_diff):
|
|
179
|
+
files = diff_data.get('files', [])
|
|
180
|
+
return {"file": files[0] if files else "unknown"}
|
|
181
|
+
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class ExecutorServiceShutdownRule(BaseRule):
|
|
186
|
+
"""检测线程池未正确关闭"""
|
|
187
|
+
|
|
188
|
+
def __init__(self):
|
|
189
|
+
self._executor_creation = re.compile(
|
|
190
|
+
r'^\+.*(?:Executors\.(newFixedThreadPool|newCachedThreadPool|newSingleThreadExecutor)|new\s+ThreadPoolExecutor)\s*\('
|
|
191
|
+
)
|
|
192
|
+
self._executor_shutdown = re.compile(r'^\+.*\.shutdown\s*\(\s*\)')
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def id(self) -> str:
|
|
196
|
+
return "resource.executor_shutdown_missing"
|
|
197
|
+
|
|
198
|
+
@property
|
|
199
|
+
def severity(self) -> str:
|
|
200
|
+
return "high"
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def impact(self) -> str:
|
|
204
|
+
return "runtime"
|
|
205
|
+
|
|
206
|
+
@property
|
|
207
|
+
def rationale(self) -> str:
|
|
208
|
+
return "ExecutorService created without shutdown, threads may not terminate"
|
|
209
|
+
|
|
210
|
+
@property
|
|
211
|
+
def rule_type(self) -> str:
|
|
212
|
+
return "absolute"
|
|
213
|
+
|
|
214
|
+
def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
|
|
215
|
+
raw_diff = diff_data.get('raw_diff', "")
|
|
216
|
+
|
|
217
|
+
if self._executor_creation.search(raw_diff):
|
|
218
|
+
if not self._executor_shutdown.search(raw_diff):
|
|
219
|
+
files = diff_data.get('files', [])
|
|
220
|
+
return {"file": files[0] if files else "unknown"}
|
|
221
|
+
|
|
222
|
+
return None
|
rules/yaml_adapter.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import fnmatch
|
|
3
|
+
import os
|
|
4
|
+
from typing import Dict, Any, List, Optional
|
|
5
|
+
from core.rule_base import Rule
|
|
6
|
+
|
|
7
|
+
def _match_file_pattern(filename: str, pattern: str) -> bool:
|
|
8
|
+
"""Match file against pattern, supporting ** for recursive matching."""
|
|
9
|
+
if pattern == '**' or pattern == '*':
|
|
10
|
+
return True
|
|
11
|
+
|
|
12
|
+
# Handle **/*.py style patterns (ends with extension)
|
|
13
|
+
if pattern.startswith('**/') and '*' not in pattern[3:]:
|
|
14
|
+
# Convert **/*.py to *.py
|
|
15
|
+
pattern = pattern[3:]
|
|
16
|
+
|
|
17
|
+
# Handle **/auth/** style patterns (match directory in path)
|
|
18
|
+
if pattern.startswith('**/') and pattern.endswith('/**'):
|
|
19
|
+
# Extract the directory name and check if it's in the path
|
|
20
|
+
dir_name = pattern[3:-2] # Remove **/ and /**
|
|
21
|
+
return dir_name in filename
|
|
22
|
+
|
|
23
|
+
return fnmatch.fnmatch(filename, pattern)
|
|
24
|
+
|
|
25
|
+
class YamlRule(Rule):
|
|
26
|
+
"""
|
|
27
|
+
Adapter to treat legacy YAML rules as first-class Plugins.
|
|
28
|
+
Supports full rule metadata: category, confidence, tags, enabled, language, scope.
|
|
29
|
+
"""
|
|
30
|
+
def __init__(self, rule_dict: Dict[str, Any]):
|
|
31
|
+
self._rule_dict = rule_dict
|
|
32
|
+
self._file_pattern = self._rule_dict.get('file')
|
|
33
|
+
self._compiled_match = None
|
|
34
|
+
content_regex = self._rule_dict.get('match')
|
|
35
|
+
if content_regex:
|
|
36
|
+
flags = re.MULTILINE
|
|
37
|
+
if self._rule_dict.get('case_insensitive', False):
|
|
38
|
+
flags |= re.IGNORECASE
|
|
39
|
+
try:
|
|
40
|
+
self._compiled_match = re.compile(content_regex, flags)
|
|
41
|
+
except re.error:
|
|
42
|
+
self._compiled_match = None
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def id(self) -> str:
|
|
46
|
+
return self._rule_dict.get('id', 'unknown')
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def severity(self) -> str:
|
|
50
|
+
return self._rule_dict.get('severity', 'low')
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def impact(self) -> str:
|
|
54
|
+
return self._rule_dict.get('impact', 'general')
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def rationale(self) -> str:
|
|
58
|
+
return self._rule_dict.get('rationale', '')
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def title(self) -> str:
|
|
62
|
+
return self._rule_dict.get('title', self.id)
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def category(self) -> str:
|
|
66
|
+
return self._rule_dict.get('category', 'general')
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def confidence(self) -> float:
|
|
70
|
+
v = self._rule_dict.get('confidence', 1.0)
|
|
71
|
+
if isinstance(v, (int, float)):
|
|
72
|
+
return float(v)
|
|
73
|
+
return 1.0
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def tags(self) -> List[str]:
|
|
77
|
+
t = self._rule_dict.get('tags', [])
|
|
78
|
+
return list(t) if isinstance(t, (list, tuple)) else []
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def enabled(self) -> bool:
|
|
82
|
+
return self._rule_dict.get('enabled', True) is True
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def language(self) -> str:
|
|
86
|
+
return self._rule_dict.get('language', '*')
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def scope(self) -> str:
|
|
90
|
+
return self._rule_dict.get('scope', self._rule_dict.get('file', '**'))
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def package(self) -> Optional[Dict[str, Any]]:
|
|
94
|
+
"""CVE 规则:package.ecosystem + package.name 用于与 dependency_versions 精确匹配。"""
|
|
95
|
+
return self._rule_dict.get('package')
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def versions(self) -> Optional[Dict[str, Any]]:
|
|
99
|
+
"""CVE 规则:versions.introduced / versions.fixed 定义受影响版本区间。"""
|
|
100
|
+
return self._rule_dict.get('versions')
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def status(self) -> str:
|
|
104
|
+
return str(self._rule_dict.get('status', 'stable')).lower()
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def is_blocking(self) -> bool:
|
|
108
|
+
# Default to True for 'critical' absolute rules, or if explicitly set
|
|
109
|
+
explicit = self._rule_dict.get('is_blocking')
|
|
110
|
+
if explicit is not None:
|
|
111
|
+
return bool(explicit)
|
|
112
|
+
|
|
113
|
+
# Absolute critical rules are blocking by default
|
|
114
|
+
if self.rule_type == 'absolute' and self.severity == 'critical':
|
|
115
|
+
return True
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def rule_type(self) -> str:
|
|
120
|
+
"""
|
|
121
|
+
Determines if the rule is 'regression' or 'absolute'.
|
|
122
|
+
Defaults to 'regression' if action is 'removed' or 'changed'.
|
|
123
|
+
"""
|
|
124
|
+
explicit = self._rule_dict.get('rule_type')
|
|
125
|
+
if explicit:
|
|
126
|
+
return str(explicit)
|
|
127
|
+
|
|
128
|
+
action = self._rule_dict.get('action', '').lower()
|
|
129
|
+
if action in ['removed', 'deleted', 'changed', 'modified']:
|
|
130
|
+
return 'regression'
|
|
131
|
+
|
|
132
|
+
return 'absolute'
|
|
133
|
+
|
|
134
|
+
def evaluate(self, diff_data: Dict[str, Any], ast_signals: List[Any]) -> Optional[Dict[str, Any]]:
|
|
135
|
+
# Logic extracted from old RuleEngine._match_rule
|
|
136
|
+
|
|
137
|
+
# 0. Check AST Signals (New First-Class Check)
|
|
138
|
+
target_signal = self._rule_dict.get('signal')
|
|
139
|
+
if target_signal:
|
|
140
|
+
# Look for this signal in ast_signals
|
|
141
|
+
for sig in ast_signals:
|
|
142
|
+
if sig.id == target_signal:
|
|
143
|
+
# Signal Matched!
|
|
144
|
+
|
|
145
|
+
# Check action constraint if present in rule
|
|
146
|
+
rule_action = self._rule_dict.get('action')
|
|
147
|
+
if rule_action and rule_action != sig.action:
|
|
148
|
+
continue # Action mismatch
|
|
149
|
+
|
|
150
|
+
# Check if there are other constraints (like file)
|
|
151
|
+
# We match file pattern against the signal's file
|
|
152
|
+
rule_file_pattern = self._rule_dict.get('file')
|
|
153
|
+
if rule_file_pattern:
|
|
154
|
+
# Use _match_file_pattern to check if sig.file matches pattern
|
|
155
|
+
if rule_file_pattern != "**" and not _match_file_pattern(sig.file, rule_file_pattern):
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
return {"file": sig.file}
|
|
159
|
+
|
|
160
|
+
# If we are looking for a signal but didn't find it, rule fails
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
# Fallback to old regex/file matching logic
|
|
164
|
+
|
|
165
|
+
# 1. Check File Pattern
|
|
166
|
+
matched_files = []
|
|
167
|
+
if self._file_pattern:
|
|
168
|
+
pattern = self._file_pattern
|
|
169
|
+
for f in diff_data.get('files', []):
|
|
170
|
+
if _match_file_pattern(f, pattern):
|
|
171
|
+
matched_files.append(f)
|
|
172
|
+
|
|
173
|
+
if not matched_files:
|
|
174
|
+
return None # File pattern constraint failed
|
|
175
|
+
else:
|
|
176
|
+
# If no file pattern, consider all files
|
|
177
|
+
matched_files = diff_data.get('files', [])
|
|
178
|
+
|
|
179
|
+
# 2. Check Content Match (Regex)
|
|
180
|
+
if self._rule_dict.get('match'):
|
|
181
|
+
# Get raw diff from file_patches or raw_diff field
|
|
182
|
+
raw_diff = diff_data.get('raw_diff', '')
|
|
183
|
+
if not raw_diff:
|
|
184
|
+
file_patches = diff_data.get('file_patches', [])
|
|
185
|
+
for fp in file_patches:
|
|
186
|
+
raw_diff += fp.get('patch', '')
|
|
187
|
+
|
|
188
|
+
if not self._compiled_match:
|
|
189
|
+
return None
|
|
190
|
+
if not self._compiled_match.search(raw_diff):
|
|
191
|
+
return None
|
|
192
|
+
|
|
193
|
+
# Return the first matched file for reporting purposes
|
|
194
|
+
file_report = matched_files[0] if matched_files else "unknown"
|
|
195
|
+
return {"file": file_report}
|