diffsense 2.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. adapters/__init__.py +0 -0
  2. adapters/base.py +27 -0
  3. adapters/github_adapter.py +164 -0
  4. adapters/gitlab_adapter.py +207 -0
  5. adapters/local_adapter.py +136 -0
  6. banner.py +71 -0
  7. cli.py +606 -0
  8. config/__init__.py +1 -0
  9. config/rules.yaml +371 -0
  10. core/__init__.py +235 -0
  11. core/ast_detector.py +853 -0
  12. core/change.py +46 -0
  13. core/composer.py +93 -0
  14. core/evaluator.py +15 -0
  15. core/ignore_manager.py +71 -0
  16. core/knowledge.py +77 -0
  17. core/parser.py +181 -0
  18. core/parser_manager.py +104 -0
  19. core/quality_manager.py +117 -0
  20. core/renderer.py +197 -0
  21. core/rule_base.py +98 -0
  22. core/rule_runtime.py +103 -0
  23. core/rules.py +718 -0
  24. core/run_config.py +85 -0
  25. core/semantic_diff.py +359 -0
  26. core/signal_model.py +21 -0
  27. core/signals_registry.py +62 -0
  28. diffsense-2.2.12.dist-info/METADATA +18 -0
  29. diffsense-2.2.12.dist-info/RECORD +58 -0
  30. diffsense-2.2.12.dist-info/WHEEL +5 -0
  31. diffsense-2.2.12.dist-info/entry_points.txt +3 -0
  32. diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
  33. diffsense-2.2.12.dist-info/top_level.txt +11 -0
  34. diffsense_mcp/__init__.py +1 -0
  35. diffsense_mcp/launcher.py +28 -0
  36. diffsense_mcp/server.py +687 -0
  37. governance/lifecycle.py +54 -0
  38. main.py +318 -0
  39. rules/__init__.py +246 -0
  40. rules/api_compatibility.py +372 -0
  41. rules/collection_handling.py +349 -0
  42. rules/concurrency.py +194 -0
  43. rules/concurrency_adapter.py +250 -0
  44. rules/cross_language_adapter.py +444 -0
  45. rules/exception_handling.py +320 -0
  46. rules/go_rules.py +401 -0
  47. rules/null_safety.py +301 -0
  48. rules/resource_management.py +222 -0
  49. rules/yaml_adapter.py +195 -0
  50. run_audit.py +478 -0
  51. sdk/cpp_adapter.py +238 -0
  52. sdk/go_adapter.py +199 -0
  53. sdk/java_adapter.py +199 -0
  54. sdk/javascript_adapter.py +229 -0
  55. sdk/language_adapter.py +313 -0
  56. sdk/python_adapter.py +195 -0
  57. sdk/rule.py +63 -0
  58. sdk/signal.py +14 -0
core/rules.py ADDED
@@ -0,0 +1,718 @@
1
+ import os
2
+ import yaml
3
+ import fnmatch
4
+ import time
5
+ from typing import Dict, List, Any, Optional, Tuple
6
+
7
+ def _version_segments(v: str) -> Tuple[int, ...]:
8
+ """将版本字符串转为可比较的整数元组,便于区间判断。"""
9
+ v = (v or "").strip()
10
+ if not v:
11
+ return (0,)
12
+ parts = []
13
+ for s in v.replace("-", ".").split("."):
14
+ s = "".join(c for c in s if c.isdigit())
15
+ parts.append(int(s) if s else 0)
16
+ return tuple(parts) if parts else (0,)
17
+
18
+ def _version_in_cve_range(current: str, introduced: List[str], fixed: List[str]) -> bool:
19
+ """
20
+ 判断当前版本是否在 CVE 受影响区间内。
21
+ 约定:introduced = 首个受影响版本(>= 即可能受影响),fixed = 首个修复版本(< fixed 即受影响)。
22
+ 受影响区间为 [min(introduced), min(fixed));若无 fixed 则仅要求 current >= any introduced。
23
+ """
24
+ if not introduced and not fixed:
25
+ return True
26
+ cur = _version_segments(current)
27
+ intro_vers = [_version_segments(x) for x in (introduced or []) if x]
28
+ fix_vers = [_version_segments(x) for x in (fixed or []) if x]
29
+ if intro_vers and cur < min(intro_vers):
30
+ return False
31
+ if fix_vers and cur >= min(fix_vers):
32
+ return False
33
+ return True
34
+ from core.rule_base import Rule
35
+ from core.quality_manager import RuleQualityManager
36
+ from core.parser_manager import ParserManager
37
+
38
+ try:
39
+ from importlib.metadata import entry_points
40
+ except ImportError:
41
+ entry_points = None # type: ignore
42
+ from core.ignore_manager import IgnoreManager
43
+ try:
44
+ from ..governance.lifecycle import LifecycleManager
45
+ except ImportError:
46
+ from governance.lifecycle import LifecycleManager
47
+ from rules.concurrency import (
48
+ ThreadPoolSemanticChangeRule,
49
+ ConcurrencyRegressionRule,
50
+ ThreadSafetyRemovalRule,
51
+ LatchMisuseRule
52
+ )
53
+ from rules.yaml_adapter import YamlRule
54
+
55
+ # 导入新增的规则模块(向后兼容:如果模块不存在不会报错)
56
+ try:
57
+ from rules.resource_management import (
58
+ CloseableResourceLeakRule,
59
+ DatabaseConnectionLeakRule,
60
+ StreamWrapperRule,
61
+ IOStreamChainingRule,
62
+ ExecutorServiceShutdownRule,
63
+ )
64
+ RESOURCE_RULES_AVAILABLE = True
65
+ except ImportError:
66
+ RESOURCE_RULES_AVAILABLE = False
67
+
68
+ try:
69
+ from rules.exception_handling import (
70
+ SwallowedExceptionRule,
71
+ GenericExceptionRule,
72
+ ThrowRuntimeExceptionRule,
73
+ ThrowsClauseRemovedRule,
74
+ FinallyBlockMissingRule,
75
+ ExceptionLoggingRule,
76
+ )
77
+ EXCEPTION_RULES_AVAILABLE = True
78
+ except ImportError:
79
+ EXCEPTION_RULES_AVAILABLE = False
80
+
81
+ try:
82
+ from rules.null_safety import (
83
+ NullReturnIgnoredRule,
84
+ OptionalUnwrapRule,
85
+ AutoboxingNPERule,
86
+ ChainedMethodCallNPERule,
87
+ ArrayIndexOutOfBoundsRule,
88
+ StringConcatNPERule,
89
+ )
90
+ NULL_SAFETY_RULES_AVAILABLE = True
91
+ except ImportError:
92
+ NULL_SAFETY_RULES_AVAILABLE = False
93
+
94
+ try:
95
+ from rules.collection_handling import (
96
+ RawTypeUsageRule,
97
+ UnmodifiableCollectionRule,
98
+ ConcurrentModificationRule,
99
+ MapComputeRule,
100
+ StreamCollectorRule,
101
+ ImmutableCollectionRule,
102
+ ListResizeRule,
103
+ )
104
+ COLLECTION_RULES_AVAILABLE = True
105
+ except ImportError:
106
+ COLLECTION_RULES_AVAILABLE = False
107
+
108
+ try:
109
+ from rules.api_compatibility import (
110
+ PublicMethodRemovedRule,
111
+ MethodSignatureChangedRule,
112
+ FieldRemovedRule,
113
+ ConstructorRemovedRule,
114
+ InterfaceChangedRule,
115
+ AnnotationRemovedRule,
116
+ DeprecatedApiAddedRule,
117
+ SerialVersionUIDChangedRule,
118
+ )
119
+ API_RULES_AVAILABLE = True
120
+ except ImportError:
121
+ API_RULES_AVAILABLE = False
122
+
123
+ # Go 规则已迁移到 YAML 配置
124
+ GO_RULES_AVAILABLE = False
125
+
126
+ # Python/C++/JavaScript 规则已迁移到 YAML 配置
127
+ # 参见 diffsense/config/rules/ 目录
128
+
129
+ PYTHON_RULES_AVAILABLE = False
130
+ CPP_RULES_AVAILABLE = False
131
+ JAVASCRIPT_RULES_AVAILABLE = False
132
+
133
+ # Cross-language rules (Python, JavaScript, C++)
134
+ try:
135
+ from rules.cross_language_adapter import (
136
+ CrossLanguageRuleFactory,
137
+ )
138
+ CROSS_LANGUAGE_RULES_AVAILABLE = True
139
+ except ImportError:
140
+ CROSS_LANGUAGE_RULES_AVAILABLE = False
141
+
142
+ class RuleEngine:
143
+ def __init__(self, rules_path: Optional[str] = None, profile: Optional[str] = None, config: Optional[Dict[str, Any]] = None, pro_rules_path: Optional[str] = None):
144
+ self.rules: List[Rule] = []
145
+ self.metrics: Dict[str, Dict[str, Any]] = {} # id -> {calls, hits, time_ns, errors}
146
+ self.ignore_manager = IgnoreManager()
147
+ self.profile = profile
148
+ self.config = config or {}
149
+ self.lifecycle = LifecycleManager(self.config)
150
+ self.quality_manager = self._init_quality_manager()
151
+ exp_cfg = self.config.get("experimental", {})
152
+ self.experimental_enabled = bool(exp_cfg.get("enabled", False))
153
+ self.experimental_report_only = bool(exp_cfg.get("report_only", True))
154
+
155
+ # 1. Register Built-in Rules (Plugins)
156
+ self._register_builtins()
157
+
158
+ # 2. Load YAML Rules (Plugins)
159
+ self._load_yaml_rules(rules_path)
160
+
161
+ # 3. Load PRO rules if path is provided (skip java/go/python/cve subdirs with single-rule schema)
162
+ # Support tier-based loading for Java CVE rules
163
+ if pro_rules_path and os.path.exists(pro_rules_path):
164
+ self._load_pro_rules_with_tiers(pro_rules_path)
165
+
166
+ # 4. Load rules from pip-installed packages (entry point group: diffsense.rules)
167
+ self._load_entry_point_rules()
168
+ self._load_rulesets_from_config()
169
+
170
+ # 5. Apply profile filter (lightweight = only critical; standard = critical+high; strict = all)
171
+ self._apply_profile_filter(profile)
172
+
173
+ def _register_builtins(self):
174
+ """
175
+ Registers core rules that are implemented as Python classes.
176
+ Backward compatible: old rules always available, new rules loaded if present.
177
+ """
178
+ # Original 4 concurrency rules (always available)
179
+ self.rules.append(ThreadPoolSemanticChangeRule())
180
+ self.rules.append(ConcurrencyRegressionRule())
181
+ self.rules.append(ThreadSafetyRemovalRule())
182
+ self.rules.append(LatchMisuseRule())
183
+
184
+ # New built-in rules (loaded if available - backward compatible)
185
+ if RESOURCE_RULES_AVAILABLE:
186
+ self.rules.append(CloseableResourceLeakRule())
187
+ self.rules.append(DatabaseConnectionLeakRule())
188
+ self.rules.append(StreamWrapperRule())
189
+ self.rules.append(IOStreamChainingRule())
190
+ self.rules.append(ExecutorServiceShutdownRule())
191
+
192
+ if EXCEPTION_RULES_AVAILABLE:
193
+ self.rules.append(SwallowedExceptionRule())
194
+ self.rules.append(GenericExceptionRule())
195
+ self.rules.append(ThrowRuntimeExceptionRule())
196
+ self.rules.append(ThrowsClauseRemovedRule())
197
+ self.rules.append(FinallyBlockMissingRule())
198
+ self.rules.append(ExceptionLoggingRule())
199
+
200
+ if NULL_SAFETY_RULES_AVAILABLE:
201
+ self.rules.append(NullReturnIgnoredRule())
202
+ self.rules.append(OptionalUnwrapRule())
203
+ self.rules.append(AutoboxingNPERule())
204
+ self.rules.append(ChainedMethodCallNPERule())
205
+ self.rules.append(ArrayIndexOutOfBoundsRule())
206
+ self.rules.append(StringConcatNPERule())
207
+
208
+ if COLLECTION_RULES_AVAILABLE:
209
+ self.rules.append(RawTypeUsageRule())
210
+ self.rules.append(UnmodifiableCollectionRule())
211
+ self.rules.append(ConcurrentModificationRule())
212
+ self.rules.append(MapComputeRule())
213
+ self.rules.append(StreamCollectorRule())
214
+ self.rules.append(ImmutableCollectionRule())
215
+ self.rules.append(ListResizeRule())
216
+
217
+ if API_RULES_AVAILABLE:
218
+ self.rules.append(PublicMethodRemovedRule())
219
+ self.rules.append(MethodSignatureChangedRule())
220
+ self.rules.append(FieldRemovedRule())
221
+ self.rules.append(ConstructorRemovedRule())
222
+ self.rules.append(InterfaceChangedRule())
223
+ self.rules.append(AnnotationRemovedRule())
224
+ self.rules.append(DeprecatedApiAddedRule())
225
+ self.rules.append(SerialVersionUIDChangedRule())
226
+
227
+ # Python/C++/JavaScript/Go 规则已迁移到 YAML 配置
228
+ # 参见 diffsense/config/rules/ 目录
229
+
230
+ # Cross-language rules (Python, JavaScript, C++)
231
+ if CROSS_LANGUAGE_RULES_AVAILABLE:
232
+ for language in ['python', 'javascript', 'cpp', 'c']:
233
+ rules = CrossLanguageRuleFactory.create_all_rules_for_language(language)
234
+ for rule in rules:
235
+ self.rules.append(rule)
236
+
237
+ def _load_yaml_rules(self, path: Optional[str], skip_single_rule_subdirs: bool = False):
238
+ """
239
+ Loads YAML rules from a single file or a directory of .yaml files.
240
+ If path is a directory, loads all .yaml files in that directory recursively.
241
+ Each file must have top-level 'rules: [...]'. Load order is deterministic (sorted by name).
242
+ When skip_single_rule_subdirs is True (e.g. for pro-rules), skips subdirs java/go/python (bulk single-rule);
243
+ subdir cve/ is still walked so cve/java and cve/JavaScript single-rule YAMLs can be loaded and recognized by language.
244
+ """
245
+ if not path or not os.path.exists(path):
246
+ return
247
+
248
+ if os.path.isdir(path):
249
+ # 仅在 pro-rules 根目录跳过 java/go/python(大批量单文件);不跳过 cve/java、cve/JavaScript
250
+ skip_dirs = {'java', 'go', 'python'} if skip_single_rule_subdirs else set()
251
+ for root, dirs, files in os.walk(path):
252
+ if skip_dirs and os.path.normpath(root) == os.path.normpath(path):
253
+ dirs[:] = [d for d in dirs if d not in skip_dirs]
254
+ for name in sorted(f for f in files if f.endswith('.yaml')):
255
+ file_path = os.path.join(root, name)
256
+ self._load_yaml_file(file_path)
257
+ else:
258
+ self._load_yaml_file(path)
259
+
260
+ def _single_rule_to_engine_format(self, data: dict) -> Optional[dict]:
261
+ """将按语言单条规则 schema (id, language, severity, description, category, ...) 转为引擎 YamlRule 所需格式.
262
+ 支持 id / rule_name(如 pro-rules/cve/java、cve/Go 单文件)."""
263
+ if not data:
264
+ return None
265
+ rule_id = data.get('id') or data.get('rule_name')
266
+ if not rule_id:
267
+ return None
268
+ out = {
269
+ 'id': str(rule_id),
270
+ 'language': data.get('language', '*'),
271
+ 'severity': (data.get('severity') or 'high').lower(),
272
+ 'rationale': data.get('rationale') or data.get('description') or '',
273
+ 'file': data.get('file', '**'),
274
+ 'action': data.get('action', 'report'),
275
+ 'signal': data.get('signal') or 'security.vulnerability',
276
+ 'impact': data.get('impact') or data.get('category') or 'security',
277
+ }
278
+ if data.get('package') is not None:
279
+ out['package'] = data['package']
280
+ if data.get('versions') is not None:
281
+ out['versions'] = data['versions']
282
+ return out
283
+
284
+ def _load_yaml_file(self, path: str):
285
+ """Loads a single YAML file: either top-level 'rules: [...]' or single-rule schema (id, language, severity, ...) for cve/java etc.
286
+ 也支持「单 key 即 rule id」格式(如 pro-rules/cve/Go/*.yaml:prorule.go_2021_0265_go: { description, language, ... })."""
287
+ try:
288
+ with open(path, 'r', encoding='utf-8') as f:
289
+ data = yaml.safe_load(f) or {}
290
+
291
+ # Extract and merge global config from YAML (e.g., skip_paths)
292
+ yaml_config = data.get('config', {})
293
+ if yaml_config:
294
+ for key, value in yaml_config.items():
295
+ if key not in self.config:
296
+ self.config[key] = value
297
+
298
+ raw_rules = data.get('rules', [])
299
+ if isinstance(raw_rules, list) and raw_rules:
300
+ for r in raw_rules:
301
+ self.rules.append(YamlRule(r))
302
+ return
303
+ # 单 key 即 rule id 的格式(如 cve/Go/*.yaml)
304
+ if isinstance(data, dict) and len(data) == 1:
305
+ key = next(iter(data))
306
+ val = data[key]
307
+ if isinstance(val, dict) and (key.startswith('prorule.') or 'language' in val or 'description' in val):
308
+ data = dict(val)
309
+ data['id'] = key
310
+ # 单条规则 schema(如 pro-rules/cve/java/*.yaml)
311
+ one = self._single_rule_to_engine_format(data)
312
+ if one:
313
+ self.rules.append(YamlRule(one))
314
+ except FileNotFoundError:
315
+ pass
316
+ except yaml.YAMLError:
317
+ pass
318
+
319
+ def _load_entry_point_rules(self):
320
+ """
321
+ Discovers and loads rules from packages that register under entry point group 'diffsense.rules'.
322
+ Each entry point must be a callable returning either List[Rule] or a str path (file or directory).
323
+ Failures in a single plugin are caught so one bad package does not break the engine.
324
+ """
325
+ if entry_points is None:
326
+ return
327
+ try:
328
+ eps = entry_points(group="diffsense.rules")
329
+ except TypeError:
330
+ # Python < 3.10: entry_points() takes no keyword argument
331
+ eps = entry_points().get("diffsense.rules", [])
332
+ for ep in eps:
333
+ try:
334
+ fn = ep.load()
335
+ result = fn()
336
+ if isinstance(result, list):
337
+ for r in result:
338
+ if isinstance(r, Rule) and getattr(r, 'enabled', True):
339
+ self.rules.append(r)
340
+ elif isinstance(result, str) and result:
341
+ self._load_yaml_rules(result)
342
+ except Exception:
343
+ pass # skip broken plugin
344
+
345
+ def _init_quality_manager(self) -> RuleQualityManager:
346
+ cfg = self.config.get("rule_quality", {})
347
+ path = os.environ.get("DIFFSENSE_RULE_METRICS") or os.path.join(os.getcwd(), "rule_metrics.json")
348
+ auto_tune = cfg.get("auto_tune", False)
349
+ degrade = cfg.get("degrade_threshold", 0.5)
350
+ disable = cfg.get("disable_threshold", 0.3)
351
+ min_samples = cfg.get("min_samples", 30)
352
+ try:
353
+ degrade = float(degrade)
354
+ except Exception:
355
+ degrade = 0.5
356
+ try:
357
+ disable = float(disable)
358
+ except Exception:
359
+ disable = 0.3
360
+ try:
361
+ min_samples = int(min_samples)
362
+ except Exception:
363
+ min_samples = 30
364
+ auto_tune = bool(auto_tune)
365
+ return RuleQualityManager(path, auto_tune, degrade, disable, min_samples)
366
+
367
+ def _load_rulesets_from_config(self) -> None:
368
+ rulesets = []
369
+ cfg_sets = self.config.get("rulesets")
370
+ if isinstance(cfg_sets, list):
371
+ rulesets.extend([s for s in cfg_sets if isinstance(s, str)])
372
+ env_sets = os.environ.get("DIFFSENSE_RULESETS")
373
+ if env_sets:
374
+ for s in env_sets.split(","):
375
+ s = s.strip()
376
+ if s:
377
+ rulesets.append(s)
378
+ for path in rulesets:
379
+ if os.path.exists(path):
380
+ self._load_yaml_rules(path)
381
+
382
+ def _load_pro_rules_with_tiers(self, pro_rules_path: str):
383
+ """
384
+ Load PRO rules with tier-based filtering for Java CVE rules.
385
+ Supports profile-based tier selection:
386
+ - lightweight: Load only tier1_critical
387
+ - standard: Load tier1_critical + tier2_high
388
+ - strict: Load all tiers
389
+
390
+ For other pro-rules (non-tiered), loads normally.
391
+ """
392
+ if not os.path.exists(pro_rules_path):
393
+ return
394
+
395
+ # Check if this is the java CVE directory with tier subdirs
396
+ java_tier_base = os.path.join(pro_rules_path, "cve", "java")
397
+ if os.path.isdir(java_tier_base):
398
+ # Load tier directories based on profile
399
+ tiers_to_load = self._get_tiers_for_profile()
400
+ for tier_dir in tiers_to_load:
401
+ tier_path = os.path.join(java_tier_base, tier_dir)
402
+ if os.path.isdir(tier_path):
403
+ # Load tier rules, skip further subdirs
404
+ self._load_yaml_rules(tier_path, skip_single_rule_subdirs=False)
405
+
406
+ # Load non-tiered files in java root (if any)
407
+ for f in sorted(os.listdir(java_tier_base)):
408
+ if f.endswith('.yaml') and not f.startswith('tier'):
409
+ self._load_yaml_file(os.path.join(java_tier_base, f))
410
+ else:
411
+ # Not a tiered directory, load normally
412
+ self._load_yaml_rules(pro_rules_path, skip_single_rule_subdirs=True)
413
+
414
+ def _get_tiers_for_profile(self) -> List[str]:
415
+ """
416
+ Get list of tier directories to load based on profile.
417
+ Returns tier directory names.
418
+ """
419
+ if self.profile == "lightweight":
420
+ return ["tier1_critical"]
421
+ elif self.profile == "standard":
422
+ return ["tier1_critical", "tier2_high"]
423
+ else: # strict or None
424
+ return ["tier1_critical", "tier2_high", "tier3_medium", "tier4_low"]
425
+
426
+ def _apply_profile_filter(self, profile: Optional[str]):
427
+ """
428
+ Apply profile-based filtering to loaded rules.
429
+ - lightweight: Only severity=critical
430
+ - standard: severity in (critical, high)
431
+ - strict: All rules
432
+ """
433
+ if not profile or profile == "strict":
434
+ # No filtering, keep all rules
435
+ return
436
+
437
+ filtered_rules = []
438
+ for rule in self.rules:
439
+ if not getattr(rule, 'enabled', True):
440
+ continue
441
+
442
+ severity = getattr(rule, 'severity', '').lower()
443
+
444
+ if profile == "lightweight":
445
+ # Only critical rules
446
+ if severity == "critical":
447
+ filtered_rules.append(rule)
448
+ elif profile == "standard":
449
+ # Critical + high rules
450
+ if severity in ("critical", "high"):
451
+ filtered_rules.append(rule)
452
+ else:
453
+ # Unknown profile, keep the rule
454
+ filtered_rules.append(rule)
455
+
456
+ self.rules = filtered_rules
457
+
458
+ def persist_rule_quality(self) -> None:
459
+ self._update_quality_report()
460
+ self.quality_manager.persist()
461
+
462
+ def get_rule_quality_metrics(self) -> Dict[str, Any]:
463
+ return self.quality_manager.get_metrics()
464
+
465
+ def get_quality_warnings(self) -> List[Dict[str, Any]]:
466
+ return self.quality_manager.warnings()
467
+
468
+ def get_rule_stats(self, limit: int = 10) -> Dict[str, Any]:
469
+ metrics = self.metrics
470
+ quality = self.get_rule_quality_metrics()
471
+ rows = []
472
+ for rule_id, m in metrics.items():
473
+ calls = int(m.get("calls", 0))
474
+ hits = int(m.get("hits", 0))
475
+ ignores = int(m.get("ignores", 0))
476
+ errors = int(m.get("errors", 0))
477
+ time_ns = int(m.get("time_ns", 0))
478
+ avg_time_ms = (time_ns / 1_000_000 / calls) if calls else 0.0
479
+ fp_rate = (ignores / hits) if hits else 0.0
480
+ q = quality.get(rule_id, {})
481
+ precision = q.get("precision") if isinstance(q, dict) else None
482
+ rows.append({
483
+ "rule_id": rule_id,
484
+ "calls": calls,
485
+ "hits": hits,
486
+ "ignores": ignores,
487
+ "errors": errors,
488
+ "time_ms": time_ns / 1_000_000,
489
+ "avg_time_ms": avg_time_ms,
490
+ "fp_rate": fp_rate,
491
+ "precision": precision
492
+ })
493
+ top_slow = sorted(rows, key=lambda r: r["time_ms"], reverse=True)[:limit]
494
+ top_noisy = sorted(rows, key=lambda r: r["fp_rate"], reverse=True)[:limit]
495
+ top_triggered = sorted(rows, key=lambda r: r["hits"], reverse=True)[:limit]
496
+ total_rules = len(self.rules)
497
+ executed_count = len(metrics)
498
+ return {
499
+ "total_rules": total_rules,
500
+ "executed_count": executed_count,
501
+ "top_slow": top_slow,
502
+ "top_noisy": top_noisy,
503
+ "top_triggered": top_triggered
504
+ }
505
+
506
+ def evaluate(self, diff_data: Dict[str, Any], ast_signals: List[Any] = None) -> List[Dict[str, Any]]:
507
+ """
508
+ Evaluates all registered rules against the diff.
509
+ """
510
+ triggered_rules = []
511
+ ast_signals = ast_signals or []
512
+
513
+ # Incremental Scheduling: Extract unique file extensions and paths from diff_data
514
+ changed_files = diff_data.get("files", [])
515
+ new_files = diff_data.get("new_files", [])
516
+ stats = diff_data.get("stats", {"add": 0, "del": 0})
517
+
518
+ # Global skip_paths filtering: remove non-code files (docs/logs/config noise) before rule matching.
519
+ # NOTE: We filter per-file, not per-diff. A single skipped file should not suppress all rules.
520
+ skip_paths = self.config.get("skip_paths", [])
521
+ effective_changed_files = []
522
+ for file_path in changed_files:
523
+ if any(fnmatch.fnmatch(file_path, pattern) for pattern in skip_paths):
524
+ continue
525
+ effective_changed_files.append(file_path)
526
+
527
+ if not effective_changed_files:
528
+ return triggered_rules
529
+
530
+ # Adaptive Scheduling: If this is a "pure new project/file" diff, skip regression rules
531
+ # Logic: If deletions are very low compared to additions, it's likely new code.
532
+ total_changes = stats["add"] + stats["del"]
533
+ is_mostly_new = False
534
+ if total_changes > 10: # Only apply heuristic for non-trivial diffs
535
+ if stats["del"] / total_changes < 0.1: # Less than 10% deletions
536
+ is_mostly_new = True
537
+
538
+ # Another heuristic: If > 80% of files are new
539
+ if len(effective_changed_files) > 0 and (len(new_files) / len(effective_changed_files)) > 0.8:
540
+ is_mostly_new = True
541
+
542
+ for rule in self.rules:
543
+ if not getattr(rule, 'enabled', True):
544
+ continue
545
+ status = getattr(rule, "status", "stable")
546
+ if status == "disabled":
547
+ continue
548
+ if status == "experimental" and not self.experimental_enabled:
549
+ continue
550
+ if not self.lifecycle.should_run(rule):
551
+ continue
552
+
553
+ # Adaptive Filter: Skip regression rules if the diff is mostly new files
554
+ rule_type = getattr(rule, 'rule_type', 'absolute')
555
+ if is_mostly_new and rule_type == 'regression':
556
+ # Skip regression rules for new projects/files as they are meaningless
557
+ continue
558
+
559
+ # Incremental Filtering: Only run rule if it matches at least one changed file
560
+ rule_lang = getattr(rule, 'language', '*')
561
+ rule_scope = getattr(rule, 'scope', '**')
562
+
563
+ # Map language to file extensions
564
+ lang_extensions = {
565
+ 'java': ['.java'],
566
+ 'go': ['.go'],
567
+ 'python': ['.py'],
568
+ 'javascript': ['.js', '.jsx', '.mjs', '.cjs'],
569
+ 'typescript': ['.ts', '.tsx'],
570
+ 'cpp': ['.cpp', '.cc', '.cxx', '.h', '.hpp', '.c++'],
571
+ 'c': ['.c', '.h'],
572
+ }
573
+
574
+ should_run = False
575
+ if rule_lang == '*' and rule_scope == '**':
576
+ should_run = True
577
+ else:
578
+ for file_path in effective_changed_files:
579
+ # Get extensions for this language
580
+ extensions = lang_extensions.get(rule_lang, [f".{rule_lang}"])
581
+
582
+ # Check if file matches any extension
583
+ lang_match = False
584
+ for ext in extensions:
585
+ if file_path.endswith(ext):
586
+ lang_match = True
587
+ break
588
+
589
+ if rule_lang != '*' and not lang_match:
590
+ continue
591
+ # Simple scope check (basic substring for now, could be improved to glob)
592
+ if rule_scope != '**' and not fnmatch.fnmatch(file_path, rule_scope):
593
+ continue
594
+ should_run = True
595
+ break
596
+
597
+ if not should_run:
598
+ continue
599
+
600
+ # CVE 版本精确匹配:若规则带 package + versions 且用户配置了 dependency_versions,仅当配置版本在受影响区间内才执行
601
+ rule_package = getattr(rule, 'package', None)
602
+ rule_versions = getattr(rule, 'versions', None)
603
+ if rule_package and rule_versions and isinstance(rule_package, dict):
604
+ dep_versions = self.config.get("dependency_versions") or {}
605
+ eco = (rule_package.get("ecosystem") or "").strip().lower()
606
+ pkg_name = (rule_package.get("name") or "").strip()
607
+ if eco and pkg_name:
608
+ eco_map = dep_versions.get(eco)
609
+ if isinstance(eco_map, dict):
610
+ current_ver = eco_map.get(pkg_name)
611
+ if current_ver is None:
612
+ continue # 未配置该包版本,不执行此 CVE 规则(需用户配置以精确匹配)
613
+ intro = rule_versions.get("introduced") or []
614
+ fixed = rule_versions.get("fixed") or []
615
+ if not _version_in_cve_range(str(current_ver), intro if isinstance(intro, list) else [intro], fixed if isinstance(fixed, list) else [fixed] if fixed else []):
616
+ continue # 配置版本不在受影响区间,跳过
617
+
618
+ rule_id = rule.id
619
+ quality_status, precision, _ = self.quality_manager.status(rule_id)
620
+ if self.quality_manager.auto_tune and quality_status == "disabled":
621
+ continue
622
+ degrade_severity = self.quality_manager.auto_tune and quality_status == "degraded"
623
+ if rule_id not in self.metrics:
624
+ self.metrics[rule_id] = {"calls": 0, "hits": 0, "ignores": 0, "time_ns": 0, "errors": 0}
625
+
626
+ self.metrics[rule_id]["calls"] += 1
627
+
628
+ start_time = time.time_ns()
629
+ match_details = None
630
+
631
+ try:
632
+ match_details = rule.evaluate(diff_data, ast_signals)
633
+ if match_details:
634
+ matched_file = match_details.get('file', 'unknown')
635
+ if self.ignore_manager.is_ignored(rule_id, matched_file):
636
+ self.metrics[rule_id]["hits"] += 1
637
+ self.metrics[rule_id]["ignores"] += 1
638
+ self.quality_manager.record_false_positive(rule_id)
639
+ match_details = None
640
+ except Exception:
641
+ self.metrics[rule_id]["errors"] += 1
642
+ finally:
643
+ duration = time.time_ns() - start_time
644
+ self.metrics[rule_id]["time_ns"] += duration
645
+
646
+ if match_details:
647
+ self.metrics[rule_id]["hits"] += 1
648
+ quality_entry = self.quality_manager.record_hit(rule_id)
649
+ severity = self.lifecycle.adjust_severity(rule, rule.severity)
650
+ if degrade_severity:
651
+ severity = self._downgrade_severity(severity)
652
+ triggered = {
653
+ "id": rule.id,
654
+ "title": getattr(rule, 'title', rule.id), # Fallback to id if title not available
655
+ "severity": severity,
656
+ "impact": rule.impact,
657
+ "rationale": rule.rationale,
658
+ "matched_file": match_details.get('file', 'unknown'),
659
+ "precision": quality_entry.get("precision", precision),
660
+ "quality_status": quality_status,
661
+ "is_blocking": getattr(rule, 'is_blocking', False)
662
+ }
663
+ if status == "experimental" and self.experimental_report_only:
664
+ triggered["experimental"] = True
665
+ triggered_rules.append(triggered)
666
+
667
+ return triggered_rules
668
+
669
+ def get_metrics(self) -> Dict[str, Any]:
670
+ """Returns the collected performance metrics (calls, hits, ignores, time_ns, errors)."""
671
+ return self.metrics
672
+
673
+ @staticmethod
674
+ def _downgrade_severity(severity: str) -> str:
675
+ order = ["critical", "high", "medium", "low"]
676
+ try:
677
+ idx = order.index(str(severity).lower())
678
+ except ValueError:
679
+ return severity
680
+ return order[min(idx + 1, len(order) - 1)]
681
+
682
+ def _rule_confidences(self) -> Dict[str, float]:
683
+ result = {}
684
+ for rule in self.rules:
685
+ try:
686
+ result[rule.id] = float(getattr(rule, "confidence", 1.0))
687
+ except Exception:
688
+ result[rule.id] = 1.0
689
+ return result
690
+
691
+ def _update_quality_report(self) -> None:
692
+ metrics = self.metrics
693
+ confidences = self._rule_confidences()
694
+ self.quality_manager.update_report(metrics, confidences)
695
+
696
+ @staticmethod
697
+ def quality_report_from_metrics(metrics: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]:
698
+ """
699
+ Builds rule quality report from metrics. Each row: rule_id, hits, accepts, ignores, fp_rate.
700
+ fp_rate = ignores/hits when hits > 0; used to flag noisy rules.
701
+ Skips non-rule keys (e.g. cache, rule_stats) when _metrics from replay is passed.
702
+ """
703
+ rows = []
704
+ for rule_id, m in metrics.items():
705
+ if rule_id in ("cache", "rule_stats") or not isinstance(m, dict):
706
+ continue
707
+ hits = m.get("hits", 0)
708
+ ignores = m.get("ignores", 0)
709
+ accepts = max(0, hits - ignores)
710
+ fp_rate = (ignores / hits) if hits else 0.0
711
+ rows.append({
712
+ "rule_id": rule_id,
713
+ "hits": hits,
714
+ "accepts": accepts,
715
+ "ignores": ignores,
716
+ "fp_rate": fp_rate,
717
+ })
718
+ return sorted(rows, key=lambda r: (-r["hits"], r["rule_id"]))