diffsense 2.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adapters/__init__.py +0 -0
- adapters/base.py +27 -0
- adapters/github_adapter.py +164 -0
- adapters/gitlab_adapter.py +207 -0
- adapters/local_adapter.py +136 -0
- banner.py +71 -0
- cli.py +606 -0
- config/__init__.py +1 -0
- config/rules.yaml +371 -0
- core/__init__.py +235 -0
- core/ast_detector.py +853 -0
- core/change.py +46 -0
- core/composer.py +93 -0
- core/evaluator.py +15 -0
- core/ignore_manager.py +71 -0
- core/knowledge.py +77 -0
- core/parser.py +181 -0
- core/parser_manager.py +104 -0
- core/quality_manager.py +117 -0
- core/renderer.py +197 -0
- core/rule_base.py +98 -0
- core/rule_runtime.py +103 -0
- core/rules.py +718 -0
- core/run_config.py +85 -0
- core/semantic_diff.py +359 -0
- core/signal_model.py +21 -0
- core/signals_registry.py +62 -0
- diffsense-2.2.12.dist-info/METADATA +18 -0
- diffsense-2.2.12.dist-info/RECORD +58 -0
- diffsense-2.2.12.dist-info/WHEEL +5 -0
- diffsense-2.2.12.dist-info/entry_points.txt +3 -0
- diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
- diffsense-2.2.12.dist-info/top_level.txt +11 -0
- diffsense_mcp/__init__.py +1 -0
- diffsense_mcp/launcher.py +28 -0
- diffsense_mcp/server.py +687 -0
- governance/lifecycle.py +54 -0
- main.py +318 -0
- rules/__init__.py +246 -0
- rules/api_compatibility.py +372 -0
- rules/collection_handling.py +349 -0
- rules/concurrency.py +194 -0
- rules/concurrency_adapter.py +250 -0
- rules/cross_language_adapter.py +444 -0
- rules/exception_handling.py +320 -0
- rules/go_rules.py +401 -0
- rules/null_safety.py +301 -0
- rules/resource_management.py +222 -0
- rules/yaml_adapter.py +195 -0
- run_audit.py +478 -0
- sdk/cpp_adapter.py +238 -0
- sdk/go_adapter.py +199 -0
- sdk/java_adapter.py +199 -0
- sdk/javascript_adapter.py +229 -0
- sdk/language_adapter.py +313 -0
- sdk/python_adapter.py +195 -0
- sdk/rule.py +63 -0
- sdk/signal.py +14 -0
core/rules.py
ADDED
|
@@ -0,0 +1,718 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import yaml
|
|
3
|
+
import fnmatch
|
|
4
|
+
import time
|
|
5
|
+
from typing import Dict, List, Any, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
def _version_segments(v: str) -> Tuple[int, ...]:
|
|
8
|
+
"""将版本字符串转为可比较的整数元组,便于区间判断。"""
|
|
9
|
+
v = (v or "").strip()
|
|
10
|
+
if not v:
|
|
11
|
+
return (0,)
|
|
12
|
+
parts = []
|
|
13
|
+
for s in v.replace("-", ".").split("."):
|
|
14
|
+
s = "".join(c for c in s if c.isdigit())
|
|
15
|
+
parts.append(int(s) if s else 0)
|
|
16
|
+
return tuple(parts) if parts else (0,)
|
|
17
|
+
|
|
18
|
+
def _version_in_cve_range(current: str, introduced: List[str], fixed: List[str]) -> bool:
|
|
19
|
+
"""
|
|
20
|
+
判断当前版本是否在 CVE 受影响区间内。
|
|
21
|
+
约定:introduced = 首个受影响版本(>= 即可能受影响),fixed = 首个修复版本(< fixed 即受影响)。
|
|
22
|
+
受影响区间为 [min(introduced), min(fixed));若无 fixed 则仅要求 current >= any introduced。
|
|
23
|
+
"""
|
|
24
|
+
if not introduced and not fixed:
|
|
25
|
+
return True
|
|
26
|
+
cur = _version_segments(current)
|
|
27
|
+
intro_vers = [_version_segments(x) for x in (introduced or []) if x]
|
|
28
|
+
fix_vers = [_version_segments(x) for x in (fixed or []) if x]
|
|
29
|
+
if intro_vers and cur < min(intro_vers):
|
|
30
|
+
return False
|
|
31
|
+
if fix_vers and cur >= min(fix_vers):
|
|
32
|
+
return False
|
|
33
|
+
return True
|
|
34
|
+
from core.rule_base import Rule
|
|
35
|
+
from core.quality_manager import RuleQualityManager
|
|
36
|
+
from core.parser_manager import ParserManager
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
from importlib.metadata import entry_points
|
|
40
|
+
except ImportError:
|
|
41
|
+
entry_points = None # type: ignore
|
|
42
|
+
from core.ignore_manager import IgnoreManager
|
|
43
|
+
try:
|
|
44
|
+
from ..governance.lifecycle import LifecycleManager
|
|
45
|
+
except ImportError:
|
|
46
|
+
from governance.lifecycle import LifecycleManager
|
|
47
|
+
from rules.concurrency import (
|
|
48
|
+
ThreadPoolSemanticChangeRule,
|
|
49
|
+
ConcurrencyRegressionRule,
|
|
50
|
+
ThreadSafetyRemovalRule,
|
|
51
|
+
LatchMisuseRule
|
|
52
|
+
)
|
|
53
|
+
from rules.yaml_adapter import YamlRule
|
|
54
|
+
|
|
55
|
+
# 导入新增的规则模块(向后兼容:如果模块不存在不会报错)
|
|
56
|
+
try:
|
|
57
|
+
from rules.resource_management import (
|
|
58
|
+
CloseableResourceLeakRule,
|
|
59
|
+
DatabaseConnectionLeakRule,
|
|
60
|
+
StreamWrapperRule,
|
|
61
|
+
IOStreamChainingRule,
|
|
62
|
+
ExecutorServiceShutdownRule,
|
|
63
|
+
)
|
|
64
|
+
RESOURCE_RULES_AVAILABLE = True
|
|
65
|
+
except ImportError:
|
|
66
|
+
RESOURCE_RULES_AVAILABLE = False
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
from rules.exception_handling import (
|
|
70
|
+
SwallowedExceptionRule,
|
|
71
|
+
GenericExceptionRule,
|
|
72
|
+
ThrowRuntimeExceptionRule,
|
|
73
|
+
ThrowsClauseRemovedRule,
|
|
74
|
+
FinallyBlockMissingRule,
|
|
75
|
+
ExceptionLoggingRule,
|
|
76
|
+
)
|
|
77
|
+
EXCEPTION_RULES_AVAILABLE = True
|
|
78
|
+
except ImportError:
|
|
79
|
+
EXCEPTION_RULES_AVAILABLE = False
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
from rules.null_safety import (
|
|
83
|
+
NullReturnIgnoredRule,
|
|
84
|
+
OptionalUnwrapRule,
|
|
85
|
+
AutoboxingNPERule,
|
|
86
|
+
ChainedMethodCallNPERule,
|
|
87
|
+
ArrayIndexOutOfBoundsRule,
|
|
88
|
+
StringConcatNPERule,
|
|
89
|
+
)
|
|
90
|
+
NULL_SAFETY_RULES_AVAILABLE = True
|
|
91
|
+
except ImportError:
|
|
92
|
+
NULL_SAFETY_RULES_AVAILABLE = False
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
from rules.collection_handling import (
|
|
96
|
+
RawTypeUsageRule,
|
|
97
|
+
UnmodifiableCollectionRule,
|
|
98
|
+
ConcurrentModificationRule,
|
|
99
|
+
MapComputeRule,
|
|
100
|
+
StreamCollectorRule,
|
|
101
|
+
ImmutableCollectionRule,
|
|
102
|
+
ListResizeRule,
|
|
103
|
+
)
|
|
104
|
+
COLLECTION_RULES_AVAILABLE = True
|
|
105
|
+
except ImportError:
|
|
106
|
+
COLLECTION_RULES_AVAILABLE = False
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
from rules.api_compatibility import (
|
|
110
|
+
PublicMethodRemovedRule,
|
|
111
|
+
MethodSignatureChangedRule,
|
|
112
|
+
FieldRemovedRule,
|
|
113
|
+
ConstructorRemovedRule,
|
|
114
|
+
InterfaceChangedRule,
|
|
115
|
+
AnnotationRemovedRule,
|
|
116
|
+
DeprecatedApiAddedRule,
|
|
117
|
+
SerialVersionUIDChangedRule,
|
|
118
|
+
)
|
|
119
|
+
API_RULES_AVAILABLE = True
|
|
120
|
+
except ImportError:
|
|
121
|
+
API_RULES_AVAILABLE = False
|
|
122
|
+
|
|
123
|
+
# Go 规则已迁移到 YAML 配置
|
|
124
|
+
GO_RULES_AVAILABLE = False
|
|
125
|
+
|
|
126
|
+
# Python/C++/JavaScript 规则已迁移到 YAML 配置
|
|
127
|
+
# 参见 diffsense/config/rules/ 目录
|
|
128
|
+
|
|
129
|
+
PYTHON_RULES_AVAILABLE = False
|
|
130
|
+
CPP_RULES_AVAILABLE = False
|
|
131
|
+
JAVASCRIPT_RULES_AVAILABLE = False
|
|
132
|
+
|
|
133
|
+
# Cross-language rules (Python, JavaScript, C++)
|
|
134
|
+
try:
|
|
135
|
+
from rules.cross_language_adapter import (
|
|
136
|
+
CrossLanguageRuleFactory,
|
|
137
|
+
)
|
|
138
|
+
CROSS_LANGUAGE_RULES_AVAILABLE = True
|
|
139
|
+
except ImportError:
|
|
140
|
+
CROSS_LANGUAGE_RULES_AVAILABLE = False
|
|
141
|
+
|
|
142
|
+
class RuleEngine:
|
|
143
|
+
def __init__(self, rules_path: Optional[str] = None, profile: Optional[str] = None, config: Optional[Dict[str, Any]] = None, pro_rules_path: Optional[str] = None):
|
|
144
|
+
self.rules: List[Rule] = []
|
|
145
|
+
self.metrics: Dict[str, Dict[str, Any]] = {} # id -> {calls, hits, time_ns, errors}
|
|
146
|
+
self.ignore_manager = IgnoreManager()
|
|
147
|
+
self.profile = profile
|
|
148
|
+
self.config = config or {}
|
|
149
|
+
self.lifecycle = LifecycleManager(self.config)
|
|
150
|
+
self.quality_manager = self._init_quality_manager()
|
|
151
|
+
exp_cfg = self.config.get("experimental", {})
|
|
152
|
+
self.experimental_enabled = bool(exp_cfg.get("enabled", False))
|
|
153
|
+
self.experimental_report_only = bool(exp_cfg.get("report_only", True))
|
|
154
|
+
|
|
155
|
+
# 1. Register Built-in Rules (Plugins)
|
|
156
|
+
self._register_builtins()
|
|
157
|
+
|
|
158
|
+
# 2. Load YAML Rules (Plugins)
|
|
159
|
+
self._load_yaml_rules(rules_path)
|
|
160
|
+
|
|
161
|
+
# 3. Load PRO rules if path is provided (skip java/go/python/cve subdirs with single-rule schema)
|
|
162
|
+
# Support tier-based loading for Java CVE rules
|
|
163
|
+
if pro_rules_path and os.path.exists(pro_rules_path):
|
|
164
|
+
self._load_pro_rules_with_tiers(pro_rules_path)
|
|
165
|
+
|
|
166
|
+
# 4. Load rules from pip-installed packages (entry point group: diffsense.rules)
|
|
167
|
+
self._load_entry_point_rules()
|
|
168
|
+
self._load_rulesets_from_config()
|
|
169
|
+
|
|
170
|
+
# 5. Apply profile filter (lightweight = only critical; standard = critical+high; strict = all)
|
|
171
|
+
self._apply_profile_filter(profile)
|
|
172
|
+
|
|
173
|
+
def _register_builtins(self):
|
|
174
|
+
"""
|
|
175
|
+
Registers core rules that are implemented as Python classes.
|
|
176
|
+
Backward compatible: old rules always available, new rules loaded if present.
|
|
177
|
+
"""
|
|
178
|
+
# Original 4 concurrency rules (always available)
|
|
179
|
+
self.rules.append(ThreadPoolSemanticChangeRule())
|
|
180
|
+
self.rules.append(ConcurrencyRegressionRule())
|
|
181
|
+
self.rules.append(ThreadSafetyRemovalRule())
|
|
182
|
+
self.rules.append(LatchMisuseRule())
|
|
183
|
+
|
|
184
|
+
# New built-in rules (loaded if available - backward compatible)
|
|
185
|
+
if RESOURCE_RULES_AVAILABLE:
|
|
186
|
+
self.rules.append(CloseableResourceLeakRule())
|
|
187
|
+
self.rules.append(DatabaseConnectionLeakRule())
|
|
188
|
+
self.rules.append(StreamWrapperRule())
|
|
189
|
+
self.rules.append(IOStreamChainingRule())
|
|
190
|
+
self.rules.append(ExecutorServiceShutdownRule())
|
|
191
|
+
|
|
192
|
+
if EXCEPTION_RULES_AVAILABLE:
|
|
193
|
+
self.rules.append(SwallowedExceptionRule())
|
|
194
|
+
self.rules.append(GenericExceptionRule())
|
|
195
|
+
self.rules.append(ThrowRuntimeExceptionRule())
|
|
196
|
+
self.rules.append(ThrowsClauseRemovedRule())
|
|
197
|
+
self.rules.append(FinallyBlockMissingRule())
|
|
198
|
+
self.rules.append(ExceptionLoggingRule())
|
|
199
|
+
|
|
200
|
+
if NULL_SAFETY_RULES_AVAILABLE:
|
|
201
|
+
self.rules.append(NullReturnIgnoredRule())
|
|
202
|
+
self.rules.append(OptionalUnwrapRule())
|
|
203
|
+
self.rules.append(AutoboxingNPERule())
|
|
204
|
+
self.rules.append(ChainedMethodCallNPERule())
|
|
205
|
+
self.rules.append(ArrayIndexOutOfBoundsRule())
|
|
206
|
+
self.rules.append(StringConcatNPERule())
|
|
207
|
+
|
|
208
|
+
if COLLECTION_RULES_AVAILABLE:
|
|
209
|
+
self.rules.append(RawTypeUsageRule())
|
|
210
|
+
self.rules.append(UnmodifiableCollectionRule())
|
|
211
|
+
self.rules.append(ConcurrentModificationRule())
|
|
212
|
+
self.rules.append(MapComputeRule())
|
|
213
|
+
self.rules.append(StreamCollectorRule())
|
|
214
|
+
self.rules.append(ImmutableCollectionRule())
|
|
215
|
+
self.rules.append(ListResizeRule())
|
|
216
|
+
|
|
217
|
+
if API_RULES_AVAILABLE:
|
|
218
|
+
self.rules.append(PublicMethodRemovedRule())
|
|
219
|
+
self.rules.append(MethodSignatureChangedRule())
|
|
220
|
+
self.rules.append(FieldRemovedRule())
|
|
221
|
+
self.rules.append(ConstructorRemovedRule())
|
|
222
|
+
self.rules.append(InterfaceChangedRule())
|
|
223
|
+
self.rules.append(AnnotationRemovedRule())
|
|
224
|
+
self.rules.append(DeprecatedApiAddedRule())
|
|
225
|
+
self.rules.append(SerialVersionUIDChangedRule())
|
|
226
|
+
|
|
227
|
+
# Python/C++/JavaScript/Go 规则已迁移到 YAML 配置
|
|
228
|
+
# 参见 diffsense/config/rules/ 目录
|
|
229
|
+
|
|
230
|
+
# Cross-language rules (Python, JavaScript, C++)
|
|
231
|
+
if CROSS_LANGUAGE_RULES_AVAILABLE:
|
|
232
|
+
for language in ['python', 'javascript', 'cpp', 'c']:
|
|
233
|
+
rules = CrossLanguageRuleFactory.create_all_rules_for_language(language)
|
|
234
|
+
for rule in rules:
|
|
235
|
+
self.rules.append(rule)
|
|
236
|
+
|
|
237
|
+
def _load_yaml_rules(self, path: Optional[str], skip_single_rule_subdirs: bool = False):
|
|
238
|
+
"""
|
|
239
|
+
Loads YAML rules from a single file or a directory of .yaml files.
|
|
240
|
+
If path is a directory, loads all .yaml files in that directory recursively.
|
|
241
|
+
Each file must have top-level 'rules: [...]'. Load order is deterministic (sorted by name).
|
|
242
|
+
When skip_single_rule_subdirs is True (e.g. for pro-rules), skips subdirs java/go/python (bulk single-rule);
|
|
243
|
+
subdir cve/ is still walked so cve/java and cve/JavaScript single-rule YAMLs can be loaded and recognized by language.
|
|
244
|
+
"""
|
|
245
|
+
if not path or not os.path.exists(path):
|
|
246
|
+
return
|
|
247
|
+
|
|
248
|
+
if os.path.isdir(path):
|
|
249
|
+
# 仅在 pro-rules 根目录跳过 java/go/python(大批量单文件);不跳过 cve/java、cve/JavaScript
|
|
250
|
+
skip_dirs = {'java', 'go', 'python'} if skip_single_rule_subdirs else set()
|
|
251
|
+
for root, dirs, files in os.walk(path):
|
|
252
|
+
if skip_dirs and os.path.normpath(root) == os.path.normpath(path):
|
|
253
|
+
dirs[:] = [d for d in dirs if d not in skip_dirs]
|
|
254
|
+
for name in sorted(f for f in files if f.endswith('.yaml')):
|
|
255
|
+
file_path = os.path.join(root, name)
|
|
256
|
+
self._load_yaml_file(file_path)
|
|
257
|
+
else:
|
|
258
|
+
self._load_yaml_file(path)
|
|
259
|
+
|
|
260
|
+
def _single_rule_to_engine_format(self, data: dict) -> Optional[dict]:
|
|
261
|
+
"""将按语言单条规则 schema (id, language, severity, description, category, ...) 转为引擎 YamlRule 所需格式.
|
|
262
|
+
支持 id / rule_name(如 pro-rules/cve/java、cve/Go 单文件)."""
|
|
263
|
+
if not data:
|
|
264
|
+
return None
|
|
265
|
+
rule_id = data.get('id') or data.get('rule_name')
|
|
266
|
+
if not rule_id:
|
|
267
|
+
return None
|
|
268
|
+
out = {
|
|
269
|
+
'id': str(rule_id),
|
|
270
|
+
'language': data.get('language', '*'),
|
|
271
|
+
'severity': (data.get('severity') or 'high').lower(),
|
|
272
|
+
'rationale': data.get('rationale') or data.get('description') or '',
|
|
273
|
+
'file': data.get('file', '**'),
|
|
274
|
+
'action': data.get('action', 'report'),
|
|
275
|
+
'signal': data.get('signal') or 'security.vulnerability',
|
|
276
|
+
'impact': data.get('impact') or data.get('category') or 'security',
|
|
277
|
+
}
|
|
278
|
+
if data.get('package') is not None:
|
|
279
|
+
out['package'] = data['package']
|
|
280
|
+
if data.get('versions') is not None:
|
|
281
|
+
out['versions'] = data['versions']
|
|
282
|
+
return out
|
|
283
|
+
|
|
284
|
+
def _load_yaml_file(self, path: str):
|
|
285
|
+
"""Loads a single YAML file: either top-level 'rules: [...]' or single-rule schema (id, language, severity, ...) for cve/java etc.
|
|
286
|
+
也支持「单 key 即 rule id」格式(如 pro-rules/cve/Go/*.yaml:prorule.go_2021_0265_go: { description, language, ... })."""
|
|
287
|
+
try:
|
|
288
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
289
|
+
data = yaml.safe_load(f) or {}
|
|
290
|
+
|
|
291
|
+
# Extract and merge global config from YAML (e.g., skip_paths)
|
|
292
|
+
yaml_config = data.get('config', {})
|
|
293
|
+
if yaml_config:
|
|
294
|
+
for key, value in yaml_config.items():
|
|
295
|
+
if key not in self.config:
|
|
296
|
+
self.config[key] = value
|
|
297
|
+
|
|
298
|
+
raw_rules = data.get('rules', [])
|
|
299
|
+
if isinstance(raw_rules, list) and raw_rules:
|
|
300
|
+
for r in raw_rules:
|
|
301
|
+
self.rules.append(YamlRule(r))
|
|
302
|
+
return
|
|
303
|
+
# 单 key 即 rule id 的格式(如 cve/Go/*.yaml)
|
|
304
|
+
if isinstance(data, dict) and len(data) == 1:
|
|
305
|
+
key = next(iter(data))
|
|
306
|
+
val = data[key]
|
|
307
|
+
if isinstance(val, dict) and (key.startswith('prorule.') or 'language' in val or 'description' in val):
|
|
308
|
+
data = dict(val)
|
|
309
|
+
data['id'] = key
|
|
310
|
+
# 单条规则 schema(如 pro-rules/cve/java/*.yaml)
|
|
311
|
+
one = self._single_rule_to_engine_format(data)
|
|
312
|
+
if one:
|
|
313
|
+
self.rules.append(YamlRule(one))
|
|
314
|
+
except FileNotFoundError:
|
|
315
|
+
pass
|
|
316
|
+
except yaml.YAMLError:
|
|
317
|
+
pass
|
|
318
|
+
|
|
319
|
+
def _load_entry_point_rules(self):
|
|
320
|
+
"""
|
|
321
|
+
Discovers and loads rules from packages that register under entry point group 'diffsense.rules'.
|
|
322
|
+
Each entry point must be a callable returning either List[Rule] or a str path (file or directory).
|
|
323
|
+
Failures in a single plugin are caught so one bad package does not break the engine.
|
|
324
|
+
"""
|
|
325
|
+
if entry_points is None:
|
|
326
|
+
return
|
|
327
|
+
try:
|
|
328
|
+
eps = entry_points(group="diffsense.rules")
|
|
329
|
+
except TypeError:
|
|
330
|
+
# Python < 3.10: entry_points() takes no keyword argument
|
|
331
|
+
eps = entry_points().get("diffsense.rules", [])
|
|
332
|
+
for ep in eps:
|
|
333
|
+
try:
|
|
334
|
+
fn = ep.load()
|
|
335
|
+
result = fn()
|
|
336
|
+
if isinstance(result, list):
|
|
337
|
+
for r in result:
|
|
338
|
+
if isinstance(r, Rule) and getattr(r, 'enabled', True):
|
|
339
|
+
self.rules.append(r)
|
|
340
|
+
elif isinstance(result, str) and result:
|
|
341
|
+
self._load_yaml_rules(result)
|
|
342
|
+
except Exception:
|
|
343
|
+
pass # skip broken plugin
|
|
344
|
+
|
|
345
|
+
def _init_quality_manager(self) -> RuleQualityManager:
|
|
346
|
+
cfg = self.config.get("rule_quality", {})
|
|
347
|
+
path = os.environ.get("DIFFSENSE_RULE_METRICS") or os.path.join(os.getcwd(), "rule_metrics.json")
|
|
348
|
+
auto_tune = cfg.get("auto_tune", False)
|
|
349
|
+
degrade = cfg.get("degrade_threshold", 0.5)
|
|
350
|
+
disable = cfg.get("disable_threshold", 0.3)
|
|
351
|
+
min_samples = cfg.get("min_samples", 30)
|
|
352
|
+
try:
|
|
353
|
+
degrade = float(degrade)
|
|
354
|
+
except Exception:
|
|
355
|
+
degrade = 0.5
|
|
356
|
+
try:
|
|
357
|
+
disable = float(disable)
|
|
358
|
+
except Exception:
|
|
359
|
+
disable = 0.3
|
|
360
|
+
try:
|
|
361
|
+
min_samples = int(min_samples)
|
|
362
|
+
except Exception:
|
|
363
|
+
min_samples = 30
|
|
364
|
+
auto_tune = bool(auto_tune)
|
|
365
|
+
return RuleQualityManager(path, auto_tune, degrade, disable, min_samples)
|
|
366
|
+
|
|
367
|
+
def _load_rulesets_from_config(self) -> None:
|
|
368
|
+
rulesets = []
|
|
369
|
+
cfg_sets = self.config.get("rulesets")
|
|
370
|
+
if isinstance(cfg_sets, list):
|
|
371
|
+
rulesets.extend([s for s in cfg_sets if isinstance(s, str)])
|
|
372
|
+
env_sets = os.environ.get("DIFFSENSE_RULESETS")
|
|
373
|
+
if env_sets:
|
|
374
|
+
for s in env_sets.split(","):
|
|
375
|
+
s = s.strip()
|
|
376
|
+
if s:
|
|
377
|
+
rulesets.append(s)
|
|
378
|
+
for path in rulesets:
|
|
379
|
+
if os.path.exists(path):
|
|
380
|
+
self._load_yaml_rules(path)
|
|
381
|
+
|
|
382
|
+
def _load_pro_rules_with_tiers(self, pro_rules_path: str):
|
|
383
|
+
"""
|
|
384
|
+
Load PRO rules with tier-based filtering for Java CVE rules.
|
|
385
|
+
Supports profile-based tier selection:
|
|
386
|
+
- lightweight: Load only tier1_critical
|
|
387
|
+
- standard: Load tier1_critical + tier2_high
|
|
388
|
+
- strict: Load all tiers
|
|
389
|
+
|
|
390
|
+
For other pro-rules (non-tiered), loads normally.
|
|
391
|
+
"""
|
|
392
|
+
if not os.path.exists(pro_rules_path):
|
|
393
|
+
return
|
|
394
|
+
|
|
395
|
+
# Check if this is the java CVE directory with tier subdirs
|
|
396
|
+
java_tier_base = os.path.join(pro_rules_path, "cve", "java")
|
|
397
|
+
if os.path.isdir(java_tier_base):
|
|
398
|
+
# Load tier directories based on profile
|
|
399
|
+
tiers_to_load = self._get_tiers_for_profile()
|
|
400
|
+
for tier_dir in tiers_to_load:
|
|
401
|
+
tier_path = os.path.join(java_tier_base, tier_dir)
|
|
402
|
+
if os.path.isdir(tier_path):
|
|
403
|
+
# Load tier rules, skip further subdirs
|
|
404
|
+
self._load_yaml_rules(tier_path, skip_single_rule_subdirs=False)
|
|
405
|
+
|
|
406
|
+
# Load non-tiered files in java root (if any)
|
|
407
|
+
for f in sorted(os.listdir(java_tier_base)):
|
|
408
|
+
if f.endswith('.yaml') and not f.startswith('tier'):
|
|
409
|
+
self._load_yaml_file(os.path.join(java_tier_base, f))
|
|
410
|
+
else:
|
|
411
|
+
# Not a tiered directory, load normally
|
|
412
|
+
self._load_yaml_rules(pro_rules_path, skip_single_rule_subdirs=True)
|
|
413
|
+
|
|
414
|
+
def _get_tiers_for_profile(self) -> List[str]:
|
|
415
|
+
"""
|
|
416
|
+
Get list of tier directories to load based on profile.
|
|
417
|
+
Returns tier directory names.
|
|
418
|
+
"""
|
|
419
|
+
if self.profile == "lightweight":
|
|
420
|
+
return ["tier1_critical"]
|
|
421
|
+
elif self.profile == "standard":
|
|
422
|
+
return ["tier1_critical", "tier2_high"]
|
|
423
|
+
else: # strict or None
|
|
424
|
+
return ["tier1_critical", "tier2_high", "tier3_medium", "tier4_low"]
|
|
425
|
+
|
|
426
|
+
def _apply_profile_filter(self, profile: Optional[str]):
|
|
427
|
+
"""
|
|
428
|
+
Apply profile-based filtering to loaded rules.
|
|
429
|
+
- lightweight: Only severity=critical
|
|
430
|
+
- standard: severity in (critical, high)
|
|
431
|
+
- strict: All rules
|
|
432
|
+
"""
|
|
433
|
+
if not profile or profile == "strict":
|
|
434
|
+
# No filtering, keep all rules
|
|
435
|
+
return
|
|
436
|
+
|
|
437
|
+
filtered_rules = []
|
|
438
|
+
for rule in self.rules:
|
|
439
|
+
if not getattr(rule, 'enabled', True):
|
|
440
|
+
continue
|
|
441
|
+
|
|
442
|
+
severity = getattr(rule, 'severity', '').lower()
|
|
443
|
+
|
|
444
|
+
if profile == "lightweight":
|
|
445
|
+
# Only critical rules
|
|
446
|
+
if severity == "critical":
|
|
447
|
+
filtered_rules.append(rule)
|
|
448
|
+
elif profile == "standard":
|
|
449
|
+
# Critical + high rules
|
|
450
|
+
if severity in ("critical", "high"):
|
|
451
|
+
filtered_rules.append(rule)
|
|
452
|
+
else:
|
|
453
|
+
# Unknown profile, keep the rule
|
|
454
|
+
filtered_rules.append(rule)
|
|
455
|
+
|
|
456
|
+
self.rules = filtered_rules
|
|
457
|
+
|
|
458
|
+
def persist_rule_quality(self) -> None:
|
|
459
|
+
self._update_quality_report()
|
|
460
|
+
self.quality_manager.persist()
|
|
461
|
+
|
|
462
|
+
def get_rule_quality_metrics(self) -> Dict[str, Any]:
|
|
463
|
+
return self.quality_manager.get_metrics()
|
|
464
|
+
|
|
465
|
+
def get_quality_warnings(self) -> List[Dict[str, Any]]:
|
|
466
|
+
return self.quality_manager.warnings()
|
|
467
|
+
|
|
468
|
+
def get_rule_stats(self, limit: int = 10) -> Dict[str, Any]:
|
|
469
|
+
metrics = self.metrics
|
|
470
|
+
quality = self.get_rule_quality_metrics()
|
|
471
|
+
rows = []
|
|
472
|
+
for rule_id, m in metrics.items():
|
|
473
|
+
calls = int(m.get("calls", 0))
|
|
474
|
+
hits = int(m.get("hits", 0))
|
|
475
|
+
ignores = int(m.get("ignores", 0))
|
|
476
|
+
errors = int(m.get("errors", 0))
|
|
477
|
+
time_ns = int(m.get("time_ns", 0))
|
|
478
|
+
avg_time_ms = (time_ns / 1_000_000 / calls) if calls else 0.0
|
|
479
|
+
fp_rate = (ignores / hits) if hits else 0.0
|
|
480
|
+
q = quality.get(rule_id, {})
|
|
481
|
+
precision = q.get("precision") if isinstance(q, dict) else None
|
|
482
|
+
rows.append({
|
|
483
|
+
"rule_id": rule_id,
|
|
484
|
+
"calls": calls,
|
|
485
|
+
"hits": hits,
|
|
486
|
+
"ignores": ignores,
|
|
487
|
+
"errors": errors,
|
|
488
|
+
"time_ms": time_ns / 1_000_000,
|
|
489
|
+
"avg_time_ms": avg_time_ms,
|
|
490
|
+
"fp_rate": fp_rate,
|
|
491
|
+
"precision": precision
|
|
492
|
+
})
|
|
493
|
+
top_slow = sorted(rows, key=lambda r: r["time_ms"], reverse=True)[:limit]
|
|
494
|
+
top_noisy = sorted(rows, key=lambda r: r["fp_rate"], reverse=True)[:limit]
|
|
495
|
+
top_triggered = sorted(rows, key=lambda r: r["hits"], reverse=True)[:limit]
|
|
496
|
+
total_rules = len(self.rules)
|
|
497
|
+
executed_count = len(metrics)
|
|
498
|
+
return {
|
|
499
|
+
"total_rules": total_rules,
|
|
500
|
+
"executed_count": executed_count,
|
|
501
|
+
"top_slow": top_slow,
|
|
502
|
+
"top_noisy": top_noisy,
|
|
503
|
+
"top_triggered": top_triggered
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
def evaluate(self, diff_data: Dict[str, Any], ast_signals: List[Any] = None) -> List[Dict[str, Any]]:
|
|
507
|
+
"""
|
|
508
|
+
Evaluates all registered rules against the diff.
|
|
509
|
+
"""
|
|
510
|
+
triggered_rules = []
|
|
511
|
+
ast_signals = ast_signals or []
|
|
512
|
+
|
|
513
|
+
# Incremental Scheduling: Extract unique file extensions and paths from diff_data
|
|
514
|
+
changed_files = diff_data.get("files", [])
|
|
515
|
+
new_files = diff_data.get("new_files", [])
|
|
516
|
+
stats = diff_data.get("stats", {"add": 0, "del": 0})
|
|
517
|
+
|
|
518
|
+
# Global skip_paths filtering: remove non-code files (docs/logs/config noise) before rule matching.
|
|
519
|
+
# NOTE: We filter per-file, not per-diff. A single skipped file should not suppress all rules.
|
|
520
|
+
skip_paths = self.config.get("skip_paths", [])
|
|
521
|
+
effective_changed_files = []
|
|
522
|
+
for file_path in changed_files:
|
|
523
|
+
if any(fnmatch.fnmatch(file_path, pattern) for pattern in skip_paths):
|
|
524
|
+
continue
|
|
525
|
+
effective_changed_files.append(file_path)
|
|
526
|
+
|
|
527
|
+
if not effective_changed_files:
|
|
528
|
+
return triggered_rules
|
|
529
|
+
|
|
530
|
+
# Adaptive Scheduling: If this is a "pure new project/file" diff, skip regression rules
|
|
531
|
+
# Logic: If deletions are very low compared to additions, it's likely new code.
|
|
532
|
+
total_changes = stats["add"] + stats["del"]
|
|
533
|
+
is_mostly_new = False
|
|
534
|
+
if total_changes > 10: # Only apply heuristic for non-trivial diffs
|
|
535
|
+
if stats["del"] / total_changes < 0.1: # Less than 10% deletions
|
|
536
|
+
is_mostly_new = True
|
|
537
|
+
|
|
538
|
+
# Another heuristic: If > 80% of files are new
|
|
539
|
+
if len(effective_changed_files) > 0 and (len(new_files) / len(effective_changed_files)) > 0.8:
|
|
540
|
+
is_mostly_new = True
|
|
541
|
+
|
|
542
|
+
for rule in self.rules:
|
|
543
|
+
if not getattr(rule, 'enabled', True):
|
|
544
|
+
continue
|
|
545
|
+
status = getattr(rule, "status", "stable")
|
|
546
|
+
if status == "disabled":
|
|
547
|
+
continue
|
|
548
|
+
if status == "experimental" and not self.experimental_enabled:
|
|
549
|
+
continue
|
|
550
|
+
if not self.lifecycle.should_run(rule):
|
|
551
|
+
continue
|
|
552
|
+
|
|
553
|
+
# Adaptive Filter: Skip regression rules if the diff is mostly new files
|
|
554
|
+
rule_type = getattr(rule, 'rule_type', 'absolute')
|
|
555
|
+
if is_mostly_new and rule_type == 'regression':
|
|
556
|
+
# Skip regression rules for new projects/files as they are meaningless
|
|
557
|
+
continue
|
|
558
|
+
|
|
559
|
+
# Incremental Filtering: Only run rule if it matches at least one changed file
|
|
560
|
+
rule_lang = getattr(rule, 'language', '*')
|
|
561
|
+
rule_scope = getattr(rule, 'scope', '**')
|
|
562
|
+
|
|
563
|
+
# Map language to file extensions
|
|
564
|
+
lang_extensions = {
|
|
565
|
+
'java': ['.java'],
|
|
566
|
+
'go': ['.go'],
|
|
567
|
+
'python': ['.py'],
|
|
568
|
+
'javascript': ['.js', '.jsx', '.mjs', '.cjs'],
|
|
569
|
+
'typescript': ['.ts', '.tsx'],
|
|
570
|
+
'cpp': ['.cpp', '.cc', '.cxx', '.h', '.hpp', '.c++'],
|
|
571
|
+
'c': ['.c', '.h'],
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
should_run = False
|
|
575
|
+
if rule_lang == '*' and rule_scope == '**':
|
|
576
|
+
should_run = True
|
|
577
|
+
else:
|
|
578
|
+
for file_path in effective_changed_files:
|
|
579
|
+
# Get extensions for this language
|
|
580
|
+
extensions = lang_extensions.get(rule_lang, [f".{rule_lang}"])
|
|
581
|
+
|
|
582
|
+
# Check if file matches any extension
|
|
583
|
+
lang_match = False
|
|
584
|
+
for ext in extensions:
|
|
585
|
+
if file_path.endswith(ext):
|
|
586
|
+
lang_match = True
|
|
587
|
+
break
|
|
588
|
+
|
|
589
|
+
if rule_lang != '*' and not lang_match:
|
|
590
|
+
continue
|
|
591
|
+
# Simple scope check (basic substring for now, could be improved to glob)
|
|
592
|
+
if rule_scope != '**' and not fnmatch.fnmatch(file_path, rule_scope):
|
|
593
|
+
continue
|
|
594
|
+
should_run = True
|
|
595
|
+
break
|
|
596
|
+
|
|
597
|
+
if not should_run:
|
|
598
|
+
continue
|
|
599
|
+
|
|
600
|
+
# CVE 版本精确匹配:若规则带 package + versions 且用户配置了 dependency_versions,仅当配置版本在受影响区间内才执行
|
|
601
|
+
rule_package = getattr(rule, 'package', None)
|
|
602
|
+
rule_versions = getattr(rule, 'versions', None)
|
|
603
|
+
if rule_package and rule_versions and isinstance(rule_package, dict):
|
|
604
|
+
dep_versions = self.config.get("dependency_versions") or {}
|
|
605
|
+
eco = (rule_package.get("ecosystem") or "").strip().lower()
|
|
606
|
+
pkg_name = (rule_package.get("name") or "").strip()
|
|
607
|
+
if eco and pkg_name:
|
|
608
|
+
eco_map = dep_versions.get(eco)
|
|
609
|
+
if isinstance(eco_map, dict):
|
|
610
|
+
current_ver = eco_map.get(pkg_name)
|
|
611
|
+
if current_ver is None:
|
|
612
|
+
continue # 未配置该包版本,不执行此 CVE 规则(需用户配置以精确匹配)
|
|
613
|
+
intro = rule_versions.get("introduced") or []
|
|
614
|
+
fixed = rule_versions.get("fixed") or []
|
|
615
|
+
if not _version_in_cve_range(str(current_ver), intro if isinstance(intro, list) else [intro], fixed if isinstance(fixed, list) else [fixed] if fixed else []):
|
|
616
|
+
continue # 配置版本不在受影响区间,跳过
|
|
617
|
+
|
|
618
|
+
rule_id = rule.id
|
|
619
|
+
quality_status, precision, _ = self.quality_manager.status(rule_id)
|
|
620
|
+
if self.quality_manager.auto_tune and quality_status == "disabled":
|
|
621
|
+
continue
|
|
622
|
+
degrade_severity = self.quality_manager.auto_tune and quality_status == "degraded"
|
|
623
|
+
if rule_id not in self.metrics:
|
|
624
|
+
self.metrics[rule_id] = {"calls": 0, "hits": 0, "ignores": 0, "time_ns": 0, "errors": 0}
|
|
625
|
+
|
|
626
|
+
self.metrics[rule_id]["calls"] += 1
|
|
627
|
+
|
|
628
|
+
start_time = time.time_ns()
|
|
629
|
+
match_details = None
|
|
630
|
+
|
|
631
|
+
try:
|
|
632
|
+
match_details = rule.evaluate(diff_data, ast_signals)
|
|
633
|
+
if match_details:
|
|
634
|
+
matched_file = match_details.get('file', 'unknown')
|
|
635
|
+
if self.ignore_manager.is_ignored(rule_id, matched_file):
|
|
636
|
+
self.metrics[rule_id]["hits"] += 1
|
|
637
|
+
self.metrics[rule_id]["ignores"] += 1
|
|
638
|
+
self.quality_manager.record_false_positive(rule_id)
|
|
639
|
+
match_details = None
|
|
640
|
+
except Exception:
|
|
641
|
+
self.metrics[rule_id]["errors"] += 1
|
|
642
|
+
finally:
|
|
643
|
+
duration = time.time_ns() - start_time
|
|
644
|
+
self.metrics[rule_id]["time_ns"] += duration
|
|
645
|
+
|
|
646
|
+
if match_details:
|
|
647
|
+
self.metrics[rule_id]["hits"] += 1
|
|
648
|
+
quality_entry = self.quality_manager.record_hit(rule_id)
|
|
649
|
+
severity = self.lifecycle.adjust_severity(rule, rule.severity)
|
|
650
|
+
if degrade_severity:
|
|
651
|
+
severity = self._downgrade_severity(severity)
|
|
652
|
+
triggered = {
|
|
653
|
+
"id": rule.id,
|
|
654
|
+
"title": getattr(rule, 'title', rule.id), # Fallback to id if title not available
|
|
655
|
+
"severity": severity,
|
|
656
|
+
"impact": rule.impact,
|
|
657
|
+
"rationale": rule.rationale,
|
|
658
|
+
"matched_file": match_details.get('file', 'unknown'),
|
|
659
|
+
"precision": quality_entry.get("precision", precision),
|
|
660
|
+
"quality_status": quality_status,
|
|
661
|
+
"is_blocking": getattr(rule, 'is_blocking', False)
|
|
662
|
+
}
|
|
663
|
+
if status == "experimental" and self.experimental_report_only:
|
|
664
|
+
triggered["experimental"] = True
|
|
665
|
+
triggered_rules.append(triggered)
|
|
666
|
+
|
|
667
|
+
return triggered_rules
|
|
668
|
+
|
|
669
|
+
def get_metrics(self) -> Dict[str, Any]:
|
|
670
|
+
"""Returns the collected performance metrics (calls, hits, ignores, time_ns, errors)."""
|
|
671
|
+
return self.metrics
|
|
672
|
+
|
|
673
|
+
@staticmethod
|
|
674
|
+
def _downgrade_severity(severity: str) -> str:
|
|
675
|
+
order = ["critical", "high", "medium", "low"]
|
|
676
|
+
try:
|
|
677
|
+
idx = order.index(str(severity).lower())
|
|
678
|
+
except ValueError:
|
|
679
|
+
return severity
|
|
680
|
+
return order[min(idx + 1, len(order) - 1)]
|
|
681
|
+
|
|
682
|
+
def _rule_confidences(self) -> Dict[str, float]:
|
|
683
|
+
result = {}
|
|
684
|
+
for rule in self.rules:
|
|
685
|
+
try:
|
|
686
|
+
result[rule.id] = float(getattr(rule, "confidence", 1.0))
|
|
687
|
+
except Exception:
|
|
688
|
+
result[rule.id] = 1.0
|
|
689
|
+
return result
|
|
690
|
+
|
|
691
|
+
def _update_quality_report(self) -> None:
|
|
692
|
+
metrics = self.metrics
|
|
693
|
+
confidences = self._rule_confidences()
|
|
694
|
+
self.quality_manager.update_report(metrics, confidences)
|
|
695
|
+
|
|
696
|
+
@staticmethod
|
|
697
|
+
def quality_report_from_metrics(metrics: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
698
|
+
"""
|
|
699
|
+
Builds rule quality report from metrics. Each row: rule_id, hits, accepts, ignores, fp_rate.
|
|
700
|
+
fp_rate = ignores/hits when hits > 0; used to flag noisy rules.
|
|
701
|
+
Skips non-rule keys (e.g. cache, rule_stats) when _metrics from replay is passed.
|
|
702
|
+
"""
|
|
703
|
+
rows = []
|
|
704
|
+
for rule_id, m in metrics.items():
|
|
705
|
+
if rule_id in ("cache", "rule_stats") or not isinstance(m, dict):
|
|
706
|
+
continue
|
|
707
|
+
hits = m.get("hits", 0)
|
|
708
|
+
ignores = m.get("ignores", 0)
|
|
709
|
+
accepts = max(0, hits - ignores)
|
|
710
|
+
fp_rate = (ignores / hits) if hits else 0.0
|
|
711
|
+
rows.append({
|
|
712
|
+
"rule_id": rule_id,
|
|
713
|
+
"hits": hits,
|
|
714
|
+
"accepts": accepts,
|
|
715
|
+
"ignores": ignores,
|
|
716
|
+
"fp_rate": fp_rate,
|
|
717
|
+
})
|
|
718
|
+
return sorted(rows, key=lambda r: (-r["hits"], r["rule_id"]))
|