diffsense 2.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. adapters/__init__.py +0 -0
  2. adapters/base.py +27 -0
  3. adapters/github_adapter.py +164 -0
  4. adapters/gitlab_adapter.py +207 -0
  5. adapters/local_adapter.py +136 -0
  6. banner.py +71 -0
  7. cli.py +606 -0
  8. config/__init__.py +1 -0
  9. config/rules.yaml +371 -0
  10. core/__init__.py +235 -0
  11. core/ast_detector.py +853 -0
  12. core/change.py +46 -0
  13. core/composer.py +93 -0
  14. core/evaluator.py +15 -0
  15. core/ignore_manager.py +71 -0
  16. core/knowledge.py +77 -0
  17. core/parser.py +181 -0
  18. core/parser_manager.py +104 -0
  19. core/quality_manager.py +117 -0
  20. core/renderer.py +197 -0
  21. core/rule_base.py +98 -0
  22. core/rule_runtime.py +103 -0
  23. core/rules.py +718 -0
  24. core/run_config.py +85 -0
  25. core/semantic_diff.py +359 -0
  26. core/signal_model.py +21 -0
  27. core/signals_registry.py +62 -0
  28. diffsense-2.2.12.dist-info/METADATA +18 -0
  29. diffsense-2.2.12.dist-info/RECORD +58 -0
  30. diffsense-2.2.12.dist-info/WHEEL +5 -0
  31. diffsense-2.2.12.dist-info/entry_points.txt +3 -0
  32. diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
  33. diffsense-2.2.12.dist-info/top_level.txt +11 -0
  34. diffsense_mcp/__init__.py +1 -0
  35. diffsense_mcp/launcher.py +28 -0
  36. diffsense_mcp/server.py +687 -0
  37. governance/lifecycle.py +54 -0
  38. main.py +318 -0
  39. rules/__init__.py +246 -0
  40. rules/api_compatibility.py +372 -0
  41. rules/collection_handling.py +349 -0
  42. rules/concurrency.py +194 -0
  43. rules/concurrency_adapter.py +250 -0
  44. rules/cross_language_adapter.py +444 -0
  45. rules/exception_handling.py +320 -0
  46. rules/go_rules.py +401 -0
  47. rules/null_safety.py +301 -0
  48. rules/resource_management.py +222 -0
  49. rules/yaml_adapter.py +195 -0
  50. run_audit.py +478 -0
  51. sdk/cpp_adapter.py +238 -0
  52. sdk/go_adapter.py +199 -0
  53. sdk/java_adapter.py +199 -0
  54. sdk/javascript_adapter.py +229 -0
  55. sdk/language_adapter.py +313 -0
  56. sdk/python_adapter.py +195 -0
  57. sdk/rule.py +63 -0
  58. sdk/signal.py +14 -0
@@ -0,0 +1,222 @@
1
+ import re
2
+ from typing import Dict, Any, List, Optional
3
+ from sdk.rule import BaseRule
4
+ from sdk.signal import Signal
5
+
6
+
7
+ class CloseableResourceLeakRule(BaseRule):
8
+ """检测未正确关闭的资源(Stream, Connection 等)"""
9
+
10
+ def __init__(self):
11
+ self._closeable_types = [
12
+ 'InputStream', 'OutputStream', 'Reader', 'Writer',
13
+ 'Socket', 'ServerSocket', 'Connection', 'Statement',
14
+ 'ResultSet', 'BufferedReader', 'BufferedWriter'
15
+ ]
16
+ self._added_pattern = re.compile(r'^\+.*\b(new\s+\w+(?:' + '|'.join(self._closeable_types) + r')\s*\()')
17
+ self._try_with_resources = re.compile(r'^\+.*try\s*\([^)]*(?:' + '|'.join(self._closeable_types) + r')')
18
+ self._finally_close = re.compile(r'^\+.*\.close\(\)')
19
+
20
+ @property
21
+ def id(self) -> str:
22
+ return "resource.closeable_leak"
23
+
24
+ @property
25
+ def severity(self) -> str:
26
+ return "high"
27
+
28
+ @property
29
+ def impact(self) -> str:
30
+ return "runtime"
31
+
32
+ @property
33
+ def rationale(self) -> str:
34
+ return "Closeable resources opened but not closed in try-with-resources or finally block"
35
+
36
+ @property
37
+ def rule_type(self) -> str:
38
+ return "absolute"
39
+
40
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
41
+ raw_diff = diff_data.get('raw_diff', "")
42
+
43
+ # 检查是否添加了可关闭资源
44
+ added_resources = self._added_pattern.findall(raw_diff)
45
+ if not added_resources:
46
+ return None
47
+
48
+ # 检查是否有 try-with-resources 或 finally 中关闭
49
+ has_try_resources = bool(self._try_with_resources.search(raw_diff))
50
+ has_finally_close = bool(self._finally_close.search(raw_diff))
51
+
52
+ # 如果既没有 try-with-resources 也没有 finally close,则报告
53
+ if not has_try_resources and not has_finally_close:
54
+ files = diff_data.get('files', [])
55
+ return {"file": files[0] if files else "unknown", "resources": added_resources}
56
+
57
+ return None
58
+
59
+
60
+ class DatabaseConnectionLeakRule(BaseRule):
61
+ """检测数据库连接泄漏风险"""
62
+
63
+ def __init__(self):
64
+ self._conn_patterns = [
65
+ r'DriverManager\.getConnection',
66
+ r'DataSource\.getConnection',
67
+ r'new\s+HikariDataSource',
68
+ r'new\s+BasicDataSource'
69
+ ]
70
+ self._added_conn = re.compile(r'^\+.*(' + '|'.join(self._conn_patterns) + r')')
71
+ self._conn_close = re.compile(r'^\+.*(?:connection|conn)\.close\(\)', re.IGNORECASE)
72
+
73
+ @property
74
+ def id(self) -> str:
75
+ return "resource.database_connection_leak"
76
+
77
+ @property
78
+ def severity(self) -> str:
79
+ return "critical"
80
+
81
+ @property
82
+ def impact(self) -> str:
83
+ return "runtime"
84
+
85
+ @property
86
+ def rationale(self) -> str:
87
+ return "Database connection opened without proper close, may cause connection pool exhaustion"
88
+
89
+ @property
90
+ def rule_type(self) -> str:
91
+ return "absolute"
92
+
93
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
94
+ raw_diff = diff_data.get('raw_diff', "")
95
+
96
+ if self._added_conn.search(raw_diff):
97
+ if not self._conn_close.search(raw_diff):
98
+ files = diff_data.get('files', [])
99
+ return {"file": files[0] if files else "unknown"}
100
+
101
+ return None
102
+
103
+
104
+ class StreamWrapperRule(BaseRule):
105
+ """检测流包装时未指定编码"""
106
+
107
+ def __init__(self):
108
+ self._unencoded_patterns = [
109
+ r'new\s+InputStreamReader\s*\(\s*(?!.*Charset|charset|StandardCharsets)',
110
+ r'new\s+OutputStreamWriter\s*\(\s*(?!.*Charset|charset|StandardCharsets)',
111
+ r'new\s+FileReader\s*\(',
112
+ r'new\s+FileWriter\s*\('
113
+ ]
114
+ self._added_stream = re.compile(r'^\+.*(' + '|'.join(self._unencoded_patterns) + r')')
115
+
116
+ @property
117
+ def id(self) -> str:
118
+ return "resource.stream_encoding_missing"
119
+
120
+ @property
121
+ def severity(self) -> str:
122
+ return "medium"
123
+
124
+ @property
125
+ def impact(self) -> str:
126
+ return "maintenance"
127
+
128
+ @property
129
+ def rationale(self) -> str:
130
+ return "Stream reader/writer created without explicit charset, uses platform default encoding"
131
+
132
+ @property
133
+ def rule_type(self) -> str:
134
+ return "absolute"
135
+
136
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
137
+ raw_diff = diff_data.get('raw_diff', "")
138
+
139
+ matches = self._added_stream.findall(raw_diff)
140
+ if matches:
141
+ files = diff_data.get('files', [])
142
+ return {"file": files[0] if files else "unknown", "patterns": matches}
143
+
144
+ return None
145
+
146
+
147
+ class IOStreamChainingRule(BaseRule):
148
+ """检测 IO 流链接调用中的资源泄漏"""
149
+
150
+ def __init__(self):
151
+ self._chaining_pattern = re.compile(
152
+ r'^\+.*new\s+\w+(?:InputStream|OutputStream|Reader|Writer)\s*\([^)]*\.get\s*\w*\s*\(\s*\)'
153
+ )
154
+
155
+ @property
156
+ def id(self) -> str:
157
+ return "resource.stream_chaining_leak"
158
+
159
+ @property
160
+ def severity(self) -> str:
161
+ return "high"
162
+
163
+ @property
164
+ def impact(self) -> str:
165
+ return "runtime"
166
+
167
+ @property
168
+ def rationale(self) -> str:
169
+ return "IO stream created from method call result, intermediate stream may leak"
170
+
171
+ @property
172
+ def rule_type(self) -> str:
173
+ return "absolute"
174
+
175
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
176
+ raw_diff = diff_data.get('raw_diff', "")
177
+
178
+ if self._chaining_pattern.search(raw_diff):
179
+ files = diff_data.get('files', [])
180
+ return {"file": files[0] if files else "unknown"}
181
+
182
+ return None
183
+
184
+
185
+ class ExecutorServiceShutdownRule(BaseRule):
186
+ """检测线程池未正确关闭"""
187
+
188
+ def __init__(self):
189
+ self._executor_creation = re.compile(
190
+ r'^\+.*(?:Executors\.(newFixedThreadPool|newCachedThreadPool|newSingleThreadExecutor)|new\s+ThreadPoolExecutor)\s*\('
191
+ )
192
+ self._executor_shutdown = re.compile(r'^\+.*\.shutdown\s*\(\s*\)')
193
+
194
+ @property
195
+ def id(self) -> str:
196
+ return "resource.executor_shutdown_missing"
197
+
198
+ @property
199
+ def severity(self) -> str:
200
+ return "high"
201
+
202
+ @property
203
+ def impact(self) -> str:
204
+ return "runtime"
205
+
206
+ @property
207
+ def rationale(self) -> str:
208
+ return "ExecutorService created without shutdown, threads may not terminate"
209
+
210
+ @property
211
+ def rule_type(self) -> str:
212
+ return "absolute"
213
+
214
+ def evaluate(self, diff_data: Dict[str, Any], signals: List[Signal]) -> Optional[Dict[str, Any]]:
215
+ raw_diff = diff_data.get('raw_diff', "")
216
+
217
+ if self._executor_creation.search(raw_diff):
218
+ if not self._executor_shutdown.search(raw_diff):
219
+ files = diff_data.get('files', [])
220
+ return {"file": files[0] if files else "unknown"}
221
+
222
+ return None
rules/yaml_adapter.py ADDED
@@ -0,0 +1,195 @@
1
+ import re
2
+ import fnmatch
3
+ import os
4
+ from typing import Dict, Any, List, Optional
5
+ from core.rule_base import Rule
6
+
7
+ def _match_file_pattern(filename: str, pattern: str) -> bool:
8
+ """Match file against pattern, supporting ** for recursive matching."""
9
+ if pattern == '**' or pattern == '*':
10
+ return True
11
+
12
+ # Handle **/*.py style patterns (ends with extension)
13
+ if pattern.startswith('**/') and '*' not in pattern[3:]:
14
+ # Convert **/*.py to *.py
15
+ pattern = pattern[3:]
16
+
17
+ # Handle **/auth/** style patterns (match directory in path)
18
+ if pattern.startswith('**/') and pattern.endswith('/**'):
19
+ # Extract the directory name and check if it's in the path
20
+ dir_name = pattern[3:-2] # Remove **/ and /**
21
+ return dir_name in filename
22
+
23
+ return fnmatch.fnmatch(filename, pattern)
24
+
25
+ class YamlRule(Rule):
26
+ """
27
+ Adapter to treat legacy YAML rules as first-class Plugins.
28
+ Supports full rule metadata: category, confidence, tags, enabled, language, scope.
29
+ """
30
+ def __init__(self, rule_dict: Dict[str, Any]):
31
+ self._rule_dict = rule_dict
32
+ self._file_pattern = self._rule_dict.get('file')
33
+ self._compiled_match = None
34
+ content_regex = self._rule_dict.get('match')
35
+ if content_regex:
36
+ flags = re.MULTILINE
37
+ if self._rule_dict.get('case_insensitive', False):
38
+ flags |= re.IGNORECASE
39
+ try:
40
+ self._compiled_match = re.compile(content_regex, flags)
41
+ except re.error:
42
+ self._compiled_match = None
43
+
44
+ @property
45
+ def id(self) -> str:
46
+ return self._rule_dict.get('id', 'unknown')
47
+
48
+ @property
49
+ def severity(self) -> str:
50
+ return self._rule_dict.get('severity', 'low')
51
+
52
+ @property
53
+ def impact(self) -> str:
54
+ return self._rule_dict.get('impact', 'general')
55
+
56
+ @property
57
+ def rationale(self) -> str:
58
+ return self._rule_dict.get('rationale', '')
59
+
60
+ @property
61
+ def title(self) -> str:
62
+ return self._rule_dict.get('title', self.id)
63
+
64
+ @property
65
+ def category(self) -> str:
66
+ return self._rule_dict.get('category', 'general')
67
+
68
+ @property
69
+ def confidence(self) -> float:
70
+ v = self._rule_dict.get('confidence', 1.0)
71
+ if isinstance(v, (int, float)):
72
+ return float(v)
73
+ return 1.0
74
+
75
+ @property
76
+ def tags(self) -> List[str]:
77
+ t = self._rule_dict.get('tags', [])
78
+ return list(t) if isinstance(t, (list, tuple)) else []
79
+
80
+ @property
81
+ def enabled(self) -> bool:
82
+ return self._rule_dict.get('enabled', True) is True
83
+
84
+ @property
85
+ def language(self) -> str:
86
+ return self._rule_dict.get('language', '*')
87
+
88
+ @property
89
+ def scope(self) -> str:
90
+ return self._rule_dict.get('scope', self._rule_dict.get('file', '**'))
91
+
92
+ @property
93
+ def package(self) -> Optional[Dict[str, Any]]:
94
+ """CVE 规则:package.ecosystem + package.name 用于与 dependency_versions 精确匹配。"""
95
+ return self._rule_dict.get('package')
96
+
97
+ @property
98
+ def versions(self) -> Optional[Dict[str, Any]]:
99
+ """CVE 规则:versions.introduced / versions.fixed 定义受影响版本区间。"""
100
+ return self._rule_dict.get('versions')
101
+
102
+ @property
103
+ def status(self) -> str:
104
+ return str(self._rule_dict.get('status', 'stable')).lower()
105
+
106
+ @property
107
+ def is_blocking(self) -> bool:
108
+ # Default to True for 'critical' absolute rules, or if explicitly set
109
+ explicit = self._rule_dict.get('is_blocking')
110
+ if explicit is not None:
111
+ return bool(explicit)
112
+
113
+ # Absolute critical rules are blocking by default
114
+ if self.rule_type == 'absolute' and self.severity == 'critical':
115
+ return True
116
+ return False
117
+
118
+ @property
119
+ def rule_type(self) -> str:
120
+ """
121
+ Determines if the rule is 'regression' or 'absolute'.
122
+ Defaults to 'regression' if action is 'removed' or 'changed'.
123
+ """
124
+ explicit = self._rule_dict.get('rule_type')
125
+ if explicit:
126
+ return str(explicit)
127
+
128
+ action = self._rule_dict.get('action', '').lower()
129
+ if action in ['removed', 'deleted', 'changed', 'modified']:
130
+ return 'regression'
131
+
132
+ return 'absolute'
133
+
134
+ def evaluate(self, diff_data: Dict[str, Any], ast_signals: List[Any]) -> Optional[Dict[str, Any]]:
135
+ # Logic extracted from old RuleEngine._match_rule
136
+
137
+ # 0. Check AST Signals (New First-Class Check)
138
+ target_signal = self._rule_dict.get('signal')
139
+ if target_signal:
140
+ # Look for this signal in ast_signals
141
+ for sig in ast_signals:
142
+ if sig.id == target_signal:
143
+ # Signal Matched!
144
+
145
+ # Check action constraint if present in rule
146
+ rule_action = self._rule_dict.get('action')
147
+ if rule_action and rule_action != sig.action:
148
+ continue # Action mismatch
149
+
150
+ # Check if there are other constraints (like file)
151
+ # We match file pattern against the signal's file
152
+ rule_file_pattern = self._rule_dict.get('file')
153
+ if rule_file_pattern:
154
+ # Use _match_file_pattern to check if sig.file matches pattern
155
+ if rule_file_pattern != "**" and not _match_file_pattern(sig.file, rule_file_pattern):
156
+ continue
157
+
158
+ return {"file": sig.file}
159
+
160
+ # If we are looking for a signal but didn't find it, rule fails
161
+ return None
162
+
163
+ # Fallback to old regex/file matching logic
164
+
165
+ # 1. Check File Pattern
166
+ matched_files = []
167
+ if self._file_pattern:
168
+ pattern = self._file_pattern
169
+ for f in diff_data.get('files', []):
170
+ if _match_file_pattern(f, pattern):
171
+ matched_files.append(f)
172
+
173
+ if not matched_files:
174
+ return None # File pattern constraint failed
175
+ else:
176
+ # If no file pattern, consider all files
177
+ matched_files = diff_data.get('files', [])
178
+
179
+ # 2. Check Content Match (Regex)
180
+ if self._rule_dict.get('match'):
181
+ # Get raw diff from file_patches or raw_diff field
182
+ raw_diff = diff_data.get('raw_diff', '')
183
+ if not raw_diff:
184
+ file_patches = diff_data.get('file_patches', [])
185
+ for fp in file_patches:
186
+ raw_diff += fp.get('patch', '')
187
+
188
+ if not self._compiled_match:
189
+ return None
190
+ if not self._compiled_match.search(raw_diff):
191
+ return None
192
+
193
+ # Return the first matched file for reporting purposes
194
+ file_report = matched_files[0] if matched_files else "unknown"
195
+ return {"file": file_report}