diffsense 2.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. adapters/__init__.py +0 -0
  2. adapters/base.py +27 -0
  3. adapters/github_adapter.py +164 -0
  4. adapters/gitlab_adapter.py +207 -0
  5. adapters/local_adapter.py +136 -0
  6. banner.py +71 -0
  7. cli.py +606 -0
  8. config/__init__.py +1 -0
  9. config/rules.yaml +371 -0
  10. core/__init__.py +235 -0
  11. core/ast_detector.py +853 -0
  12. core/change.py +46 -0
  13. core/composer.py +93 -0
  14. core/evaluator.py +15 -0
  15. core/ignore_manager.py +71 -0
  16. core/knowledge.py +77 -0
  17. core/parser.py +181 -0
  18. core/parser_manager.py +104 -0
  19. core/quality_manager.py +117 -0
  20. core/renderer.py +197 -0
  21. core/rule_base.py +98 -0
  22. core/rule_runtime.py +103 -0
  23. core/rules.py +718 -0
  24. core/run_config.py +85 -0
  25. core/semantic_diff.py +359 -0
  26. core/signal_model.py +21 -0
  27. core/signals_registry.py +62 -0
  28. diffsense-2.2.12.dist-info/METADATA +18 -0
  29. diffsense-2.2.12.dist-info/RECORD +58 -0
  30. diffsense-2.2.12.dist-info/WHEEL +5 -0
  31. diffsense-2.2.12.dist-info/entry_points.txt +3 -0
  32. diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
  33. diffsense-2.2.12.dist-info/top_level.txt +11 -0
  34. diffsense_mcp/__init__.py +1 -0
  35. diffsense_mcp/launcher.py +28 -0
  36. diffsense_mcp/server.py +687 -0
  37. governance/lifecycle.py +54 -0
  38. main.py +318 -0
  39. rules/__init__.py +246 -0
  40. rules/api_compatibility.py +372 -0
  41. rules/collection_handling.py +349 -0
  42. rules/concurrency.py +194 -0
  43. rules/concurrency_adapter.py +250 -0
  44. rules/cross_language_adapter.py +444 -0
  45. rules/exception_handling.py +320 -0
  46. rules/go_rules.py +401 -0
  47. rules/null_safety.py +301 -0
  48. rules/resource_management.py +222 -0
  49. rules/yaml_adapter.py +195 -0
  50. run_audit.py +478 -0
  51. sdk/cpp_adapter.py +238 -0
  52. sdk/go_adapter.py +199 -0
  53. sdk/java_adapter.py +199 -0
  54. sdk/javascript_adapter.py +229 -0
  55. sdk/language_adapter.py +313 -0
  56. sdk/python_adapter.py +195 -0
  57. sdk/rule.py +63 -0
  58. sdk/signal.py +14 -0
config/rules.yaml ADDED
@@ -0,0 +1,371 @@
1
+ # 全局配置
2
+ config:
3
+ # 跳过非业务代码文件(支持通配符)
4
+ skip_paths:
5
+ - "**/test/**"
6
+ - "**/tests/**"
7
+ - "**/*Test*.java"
8
+ - "**/spec/**"
9
+ - "**/__tests__/**"
10
+ - "**/*_test.py"
11
+ - "**/test_*.py"
12
+ - "**/docs/**"
13
+ - "**/*.md"
14
+ - "**/*.rst"
15
+ - "**/*.txt"
16
+ - "**/*.log"
17
+ - "**/CHANGELOG*"
18
+ - "**/.github/**"
19
+ - "**/.gitlab/**"
20
+ - "**/.vscode/**"
21
+
22
+ rules:
23
+ - id: runtime.concurrency_risk
24
+ # Use AST Signal instead of regex match
25
+ signal: "runtime.concurrency.synchronized"
26
+ # match: "(?i)(thread|async|lock|synchronized)" <-- Removed regex
27
+ action: "added"
28
+ file: "**/core/**"
29
+ impact: runtime
30
+ severity: high
31
+ rationale: "Concurrency changes (synchronized) in core modules pose high stability risk"
32
+
33
+ - id: runtime.concurrency_synchronized_removed_critical
34
+ signal: "runtime.concurrency.synchronized"
35
+ action: "removed"
36
+ file: "**"
37
+ impact: runtime
38
+ severity: critical
39
+ rationale: "CRITICAL: Removal of synchronized block/keyword detected! This is a major stability risk."
40
+
41
+ - id: runtime.concurrency_lock_risk
42
+ signal: "runtime.concurrency.lock"
43
+ action: "added"
44
+ file: "**/core/**"
45
+ impact: runtime
46
+ severity: high
47
+ rationale: "Explicit lock usage in core modules"
48
+
49
+ - id: runtime.concurrency_lock_removed_critical
50
+ signal: "runtime.concurrency.lock"
51
+ action: "removed"
52
+ file: "**"
53
+ impact: runtime
54
+ severity: critical
55
+ rationale: "CRITICAL: Removal of lock/concurrency protection detected! This may lead to race conditions."
56
+
57
+ - id: runtime.concurrency_volatile_risk
58
+ signal: "runtime.concurrency.volatile"
59
+ action: "added"
60
+ file: "**/core/**"
61
+ impact: runtime
62
+ severity: high
63
+ rationale: "Volatile field usage in core modules"
64
+
65
+ - id: runtime.concurrency_volatile_removed_risk
66
+ signal: "runtime.concurrency.volatile"
67
+ action: "removed"
68
+ file: "**"
69
+ impact: runtime
70
+ severity: critical
71
+ rationale: "CRITICAL: Removal of volatile keyword detected! This may cause visibility issues across threads."
72
+
73
+ - id: runtime.concurrency_map_removed_risk
74
+ signal: "runtime.concurrency.concurrent_map"
75
+ action: "removed"
76
+ file: "**"
77
+ impact: runtime
78
+ severity: high
79
+ rationale: "Removal of ConcurrentHashMap detected. Ensure it is not replaced by a non-thread-safe collection."
80
+
81
+ - id: runtime.concurrency.thread_safety_downgrade
82
+ signal: "runtime.concurrency.thread_safety_downgrade"
83
+ action: "downgrade"
84
+ file: "**"
85
+ impact: runtime
86
+ severity: critical
87
+ rationale: "CRITICAL: Detected downgrade from ThreadSafe type to Non-ThreadSafe type (e.g., ConcurrentHashMap -> HashMap). High risk of race conditions!"
88
+
89
+ - id: runtime.concurrency.static_unsafe_collection
90
+ signal: "runtime.concurrency.static_unsafe_collection"
91
+ action: "added"
92
+ file: "**"
93
+ impact: runtime
94
+ severity: high
95
+ rationale: "Usage of static non-thread-safe collection detected. This is a common cause of race conditions in multi-threaded environments."
96
+
97
+ - id: runtime.performance.sleep_added
98
+ signal: "runtime.performance.sleep_added"
99
+ action: "added"
100
+ file: "**"
101
+ impact: performance
102
+ severity: medium
103
+ rationale: "Thread.sleep() introduced. This may indicate poor concurrency handling or debug code left in production."
104
+
105
+ - id: runtime.concurrency.executors_factory_risk
106
+ signal: "runtime.concurrency.executors_factory_risk"
107
+ action: "added"
108
+ file: "**"
109
+ impact: runtime
110
+ severity: critical
111
+ rationale: "CRITICAL: Use of Executors.newFixedThreadPool/newCachedThreadPool detected. These methods have unbounded queues or thread creation, which can lead to OOM in high-load scenarios. Use ThreadPoolExecutor manually."
112
+
113
+ - id: runtime.concurrency.future_get_without_timeout
114
+ signal: "runtime.concurrency.future_get_without_timeout"
115
+ action: "added"
116
+ file: "**"
117
+ impact: runtime
118
+ severity: critical
119
+ rationale: "CRITICAL: Future.get() called without timeout. This can cause thread blocking and cascading failures (Avalanche Effect). Always use get(timeout, unit)."
120
+
121
+ - id: runtime.concurrency.threadpool_creation
122
+ signal: "runtime.concurrency.threadpool_creation"
123
+ action: "added"
124
+ file: "**"
125
+ impact: runtime
126
+ severity: high
127
+ rationale: "ThreadPoolExecutor creation detected. Ensure corePoolSize, maxPoolSize, and queueCapacity are configured correctly to avoid OOM or thread explosion."
128
+
129
+ - id: runtime.service_layer
130
+ # 优化:只在 service 层修改了方法签名或新增业务逻辑时触发
131
+ # 仅文件路径匹配不再触发,必须有实际的方法签名变化
132
+ signal: "api.method_signature_changed"
133
+ file: "**/service/**"
134
+ impact: runtime
135
+ severity: medium
136
+ rationale: "Service layer method signature changes may affect API contract"
137
+
138
+ - id: data.destructive_schema
139
+ match: "ALTER TABLE|DROP TABLE"
140
+ file: "**/migrations/**"
141
+ impact: data
142
+ severity: high
143
+ rationale: "Destructive schema changes require careful review"
144
+
145
+ - id: data.query_change
146
+ # 优化:只在 SQL 查询语句本身变化时触发,避免 DTO/Entity 修改误报
147
+ match: "^\\s*(SELECT|INSERT|UPDATE|DELETE)\\s+"
148
+ action: "changed"
149
+ impact: data
150
+ severity: medium
151
+ rationale: "SQL query modifications in SQL files or MyBatis mapper files"
152
+
153
+ - id: architecture.dependency_change
154
+ file: "**/package.json"
155
+ impact: architecture
156
+ severity: high
157
+ rationale: "Dependency changes affect build and security"
158
+
159
+ - id: architecture.dependency_change_python
160
+ file: "**/requirements.txt"
161
+ impact: architecture
162
+ severity: high
163
+ rationale: "Dependency changes affect build and security"
164
+
165
+ # --- Semantic Regression Rules (Added by Request) ---
166
+
167
+ - id: runtime.input_normalization_removed
168
+ signal: "runtime.input_normalization_removed"
169
+ action: "removed"
170
+ file: "**"
171
+ impact: runtime
172
+ severity: high
173
+ rationale: "CRITICAL: Removal of input normalization/validation call (encode/decode/validate/check). High risk of data integrity or security issues."
174
+
175
+ - id: data.pagination_semantic_change
176
+ # 优化:只在分页参数的实际语义变化时触发(如 limit 变为 limit+1)
177
+ signal: "data.pagination_semantic_change"
178
+ action: "changed"
179
+ file: "**"
180
+ impact: data
181
+ severity: high
182
+ rationale: "Changes to pagination logic (pageNo/pageSize/start/limit) detected in diff. Verify if pagination semantics changed."
183
+
184
+ - id: runtime.collection_mutation_inside_loop
185
+ signal: "runtime.collection_mutation_inside_loop"
186
+ action: "added"
187
+ file: "**"
188
+ impact: runtime
189
+ severity: high
190
+ rationale: "Collection modification (remove) detected inside a loop. This often leads to ConcurrentModificationException or undefined behavior."
191
+
192
+ - id: security.behavior_change_auth
193
+ file: "**/auth/**"
194
+ impact: security
195
+ severity: high
196
+ rationale: "Changes in authentication modules detected. Requires careful security review."
197
+
198
+ - id: security.behavior_change_security
199
+ file: "**/security/**"
200
+ impact: security
201
+ severity: high
202
+ rationale: "Changes in security modules detected. Requires careful security review."
203
+
204
+ - id: runtime.validation_removed
205
+ # This overlaps with input_normalization_removed, but we can add a specific one if needed.
206
+ # The user asked for it as a separate category, but we covered it with the signal above.
207
+ # Let's add a placeholder or duplicate if distinct signal is available.
208
+ # For now, relying on input_normalization_removed covers 'validate/check' calls.
209
+ # But let's add a rule that watches for 'validation' in filename or package.
210
+ file: "**/validation/**"
211
+ match: ".*" # Match any change
212
+ impact: runtime
213
+ severity: high
214
+ rationale: "Changes in validation logic modules."
215
+
216
+ # --- New Semantic Signal Rules (P0/P1/P2) ---
217
+
218
+ - id: runtime.concurrency.lock_removed
219
+ signal: "runtime.concurrency.lock_removed"
220
+ action: "removed"
221
+ file: "**"
222
+ impact: runtime
223
+ severity: critical
224
+ rationale: "CRITICAL: Removal of Lock/Synchronized detected! This is a P0 stability risk (TOP1 accident cause)."
225
+
226
+ - id: runtime.concurrency.volatile_removed
227
+ signal: "runtime.concurrency.volatile_removed"
228
+ action: "removed"
229
+ file: "**"
230
+ impact: runtime
231
+ severity: critical
232
+ rationale: "CRITICAL: Removal of volatile keyword detected! This causes visibility issues."
233
+
234
+ - id: runtime.concurrency.final_removed
235
+ signal: "runtime.concurrency.final_removed"
236
+ action: "removed"
237
+ file: "**"
238
+ impact: runtime
239
+ severity: critical
240
+ rationale: "CRITICAL: Removal of final modifier detected! Immutable object might become mutable (thread-safety risk)."
241
+
242
+ - id: runtime.concurrency.atomic_to_non_atomic_write
243
+ signal: "runtime.concurrency.atomic_to_non_atomic_write"
244
+ action: "removed"
245
+ file: "**"
246
+ impact: runtime
247
+ severity: critical
248
+ rationale: "CRITICAL: Atomic write operation removed/replaced. This is a semantic downgrade."
249
+
250
+ - id: runtime.concurrency.threadpool_param_change
251
+ signal: "runtime.concurrency.threadpool_param_change"
252
+ action: "changed"
253
+ file: "**"
254
+ impact: runtime
255
+ severity: high
256
+ rationale: "ThreadPoolExecutor parameters changed. Verify corePoolSize/maxQueue/timeout logic."
257
+
258
+ - id: runtime.concurrency.threadpool_unbounded_queue
259
+ signal: "runtime.concurrency.threadpool_unbounded_queue"
260
+ action: "added"
261
+ file: "**"
262
+ impact: runtime
263
+ severity: critical
264
+ rationale: "CRITICAL: Unbounded queue (LinkedBlockingQueue without capacity) detected in ThreadPool. Risk of OOM."
265
+
266
+ - id: runtime.concurrency.busy_wait_added
267
+ signal: "runtime.concurrency.busy_wait_added"
268
+ action: "added"
269
+ file: "**"
270
+ impact: performance
271
+ severity: critical
272
+ rationale: "CRITICAL: Busy wait loop (while(true)) detected. High CPU usage risk."
273
+
274
+ - id: runtime.resource.try_with_resource_removed
275
+ signal: "runtime.resource.try_with_resource_removed"
276
+ action: "removed"
277
+ file: "**"
278
+ impact: runtime
279
+ severity: high
280
+ rationale: "Try-with-resources block removed. Verify resource closing logic to prevent leaks."
281
+
282
+ - id: runtime.resource.cache_eviction_removed
283
+ signal: "runtime.resource.cache_eviction_removed"
284
+ action: "removed"
285
+ file: "**"
286
+ impact: runtime
287
+ severity: high
288
+ rationale: "Cache eviction/TTL logic removed. Risk of memory leak/cache explosion."
289
+
290
+ - id: runtime.network.timeout_removed
291
+ signal: "runtime.network.timeout_removed"
292
+ action: "removed"
293
+ file: "**"
294
+ impact: runtime
295
+ severity: high
296
+ rationale: "Timeout setting removed. Risk of thread hanging/cascading failure."
297
+
298
+ - id: runtime.data.null_check_removed
299
+ signal: "runtime.data.null_check_removed"
300
+ action: "removed"
301
+ file: "**"
302
+ impact: runtime
303
+ severity: medium
304
+ rationale: "Null check removed. Potential NPE risk."
305
+
306
+ - id: runtime.data.equals_to_reference_compare
307
+ signal: "runtime.data.equals_to_reference_compare"
308
+ action: "changed"
309
+ file: "**"
310
+ impact: runtime
311
+ severity: high
312
+ rationale: "Semantic change: equals() replaced by == reference comparison."
313
+
314
+ # ==============================================
315
+ # Security Rules (Ported from SonarQube Core)
316
+ # These detect risks via Diff only - no full AST required
317
+ # ==============================================
318
+
319
+ # P0: Hardcoded Secrets
320
+ - id: security.hardcoded_secret
321
+ signal: "security.hardcoded_secret"
322
+ action: "added"
323
+ file: "**"
324
+ impact: security
325
+ severity: critical
326
+ rationale: "CRITICAL: Hardcoded password, secret, or API key detected. This is a major security risk - secrets should be in environment variables or secure vaults."
327
+ tags: ["security", "secret", "credentials"]
328
+ is_blocking: true
329
+
330
+ # P0: SQL Injection
331
+ - id: security.sql_injection
332
+ signal: "security.sql_injection"
333
+ action: "added"
334
+ file: "**"
335
+ impact: security
336
+ severity: critical
337
+ rationale: "CRITICAL: SQL string concatenation detected. High risk of SQL injection. Use parameterized queries (PreparedStatement) or ORM instead."
338
+ tags: ["security", "sql", "injection"]
339
+ is_blocking: true
340
+ scan_mode: incremental
341
+
342
+ # P1: Weak Cryptography
343
+ - id: security.weak_crypto
344
+ signal: "security.weak_crypto"
345
+ action: "added"
346
+ file: "**"
347
+ impact: security
348
+ severity: high
349
+ rationale: "Weak cryptographic algorithm detected (DES, MD5, SHA1, RC4). These are cryptographically broken and should not be used."
350
+ tags: ["security", "crypto", "encryption"]
351
+
352
+ # P1: Command Injection
353
+ - id: security.command_injection
354
+ signal: "security.command_injection"
355
+ action: "added"
356
+ file: "**"
357
+ impact: security
358
+ severity: critical
359
+ rationale: "CRITICAL: Command execution (Runtime.exec, ProcessBuilder) detected. Ensure input is sanitized to prevent command injection."
360
+ tags: ["security", "command", "injection"]
361
+ is_blocking: true
362
+
363
+ # P2: Secret Removed (Regression)
364
+ - id: security.hardcoded_secret_removed
365
+ signal: "security.hardcoded_secret_removed"
366
+ action: "removed"
367
+ file: "**"
368
+ impact: security
369
+ severity: medium
370
+ rationale: "Hardcoded secret was removed. Verify this is intentional and not a security regression."
371
+ tags: ["security", "secret"]
core/__init__.py ADDED
@@ -0,0 +1,235 @@
1
+ # Core version for cache invalidation
2
+ # Increment this whenever the parser logic, AST detection logic, or data structures change.
3
+ CACHE_VERSION = "v2.2.0-rev1"
4
+
5
+ import os
6
+ import json
7
+ import time
8
+ from typing import Dict, Any, List, Optional, Tuple
9
+
10
+
11
+ def get_cache_max_age_seconds() -> int:
12
+ """Return cache TTL in seconds; 0 means no expiry. From env DIFFSENSE_CACHE_MAX_AGE_DAYS."""
13
+ try:
14
+ days = os.environ.get("DIFFSENSE_CACHE_MAX_AGE_DAYS", "")
15
+ if not days:
16
+ return 0
17
+ return max(0, int(float(days) * 86400))
18
+ except (ValueError, TypeError):
19
+ return 0
20
+
21
+
22
+ def analyze_diff(
23
+ diff_content: str,
24
+ rules_path: str = "config",
25
+ profile: Optional[str] = None,
26
+ quality_config: Optional[Dict[str, Any]] = None,
27
+ pro_rules_path: Optional[str] = None,
28
+ experimental: bool = False,
29
+ experimental_report_only: bool = True,
30
+ baseline_file: Optional[str] = None,
31
+ since_baseline: bool = False,
32
+ ) -> Dict[str, Any]:
33
+ """
34
+ 核心分析函数 - 纯函数式接口,输入 diff 内容,返回结构化审计结果。
35
+
36
+ Args:
37
+ diff_content: Git unified diff 内容
38
+ rules_path: 规则配置文件或目录路径
39
+ profile: 规则 profile (strict, lightweight, 或 None)
40
+ quality_config: 规则质量配置
41
+ pro_rules_path: 高级规则路径
42
+ experimental: 是否启用实验性规则
43
+ experimental_report_only: 实验性规则是否仅报告
44
+ baseline_file: baseline 文件路径
45
+ since_baseline: 是否只报告 baseline 之后的增量
46
+
47
+ Returns:
48
+ 包含 review_level, details, _metrics 等字段的审计结果字典
49
+ """
50
+ from .parser import DiffParser
51
+ from .ast_detector import ASTDetector
52
+ from .rules import RuleEngine
53
+ from .evaluator import ImpactEvaluator
54
+ from .composer import DecisionComposer
55
+
56
+ # 1. Parse Diff
57
+ diff_parser = DiffParser()
58
+ diff_data = diff_parser.parse(diff_content)
59
+
60
+ # 2. Detect AST Signals
61
+ ast_detector = ASTDetector()
62
+ ast_signals = ast_detector.detect_signals(diff_data)
63
+
64
+ # 3. Init Engine & Evaluator
65
+ if quality_config is None:
66
+ quality_config = {
67
+ "auto_tune": False,
68
+ "disable_threshold": 0.3,
69
+ "degrade_threshold": 0.5,
70
+ "min_samples": 30
71
+ }
72
+
73
+ engine_config = {
74
+ "rule_quality": quality_config,
75
+ "experimental": {"enabled": experimental, "report_only": experimental_report_only},
76
+ }
77
+
78
+ # Try to load dependency_versions from run_config
79
+ try:
80
+ from .run_config import get_run_config
81
+ run_cfg = get_run_config(os.getcwd())
82
+ if run_cfg.get("dependency_versions"):
83
+ engine_config["dependency_versions"] = run_cfg["dependency_versions"]
84
+ except Exception:
85
+ pass
86
+
87
+ # Resolve pro_rules_path if not provided
88
+ if pro_rules_path is None:
89
+ try:
90
+ from .run_config import get_pro_rules_path
91
+ pro_rules_path = get_pro_rules_path(os.getcwd())
92
+ except Exception:
93
+ pass
94
+
95
+ rule_engine = RuleEngine(
96
+ rules_path,
97
+ profile=profile,
98
+ config=engine_config,
99
+ pro_rules_path=pro_rules_path,
100
+ )
101
+ evaluator = ImpactEvaluator(rule_engine)
102
+
103
+ # 4. Evaluate Impact
104
+ triggered_rules = evaluator.evaluate(diff_data, ast_signals)
105
+
106
+ # 5. Baseline filtering
107
+ if baseline_file and since_baseline:
108
+ baseline_data = _load_baseline(baseline_file)
109
+ baseline_keys = _baseline_set(baseline_data)
110
+ triggered_rules = [r for r in triggered_rules if _baseline_key(r) not in baseline_keys]
111
+
112
+ # 6. Compose Decision
113
+ composer = DecisionComposer()
114
+ result = composer.compose(triggered_rules, diff_data.get('files', []))
115
+
116
+ # 7. Add Metrics
117
+ result['_metrics'] = dict(rule_engine.get_metrics())
118
+ result['_metrics']['cache'] = {
119
+ "diff": diff_parser.metrics,
120
+ "ast": ast_detector.metrics
121
+ }
122
+ result['_metrics']['rule_stats'] = rule_engine.get_rule_stats()
123
+ result['_rule_quality'] = rule_engine.get_rule_quality_metrics()
124
+ result['_quality_warnings'] = rule_engine.get_quality_warnings()
125
+
126
+ # 8. Performance metrics
127
+ result["_performance"] = {
128
+ "cache_hit_rate_pct": _calc_cache_hit_rate(diff_parser.metrics, ast_detector.metrics),
129
+ "rules_executed_pct": _calc_rules_executed_pct(rule_engine.get_rule_stats()),
130
+ }
131
+
132
+ return result
133
+
134
+
135
+ def _calc_cache_hit_rate(diff_metrics: Dict, ast_metrics: Dict) -> float:
136
+ d_total = diff_metrics.get("hits", 0) + diff_metrics.get("misses", 0)
137
+ a_total = ast_metrics.get("hits", 0) + ast_metrics.get("misses", 0)
138
+ total = d_total + a_total
139
+ if total == 0:
140
+ return 0.0
141
+ hits = diff_metrics.get("hits", 0) + ast_metrics.get("hits", 0)
142
+ return round(hits / total * 100, 2)
143
+
144
+
145
+ def _calc_rules_executed_pct(rule_stats: Dict) -> float:
146
+ total = rule_stats.get("total_rules", 0)
147
+ executed = rule_stats.get("executed_count", 0)
148
+ if total == 0:
149
+ return 0.0
150
+ return round(executed / total * 100, 2)
151
+
152
+
153
+ def _baseline_key(rule: Dict[str, Any]) -> str:
154
+ return f"{rule.get('id', '')}::{rule.get('matched_file', '')}"
155
+
156
+
157
+ def _load_baseline(path: str) -> Dict[str, Any]:
158
+ if not os.path.exists(path):
159
+ return {"items": []}
160
+ try:
161
+ with open(path, "r", encoding="utf-8") as f:
162
+ data = json.load(f)
163
+ if isinstance(data, dict) and isinstance(data.get("items"), list):
164
+ return data
165
+ except Exception:
166
+ pass
167
+ return {"items": []}
168
+
169
+
170
+ def _baseline_set(data: Dict[str, Any]) -> set:
171
+ items = data.get("items", [])
172
+ return {f"{i.get('rule_id', '')}::{i.get('file', '')}" for i in items}
173
+
174
+
175
+ def build_inline_comments(triggered_rules: List[Dict[str, Any]], diff_data: Dict[str, Any]) -> List[Dict[str, Any]]:
176
+ """
177
+ 构建内联评论(用于 AI Agent 场景)
178
+
179
+ Args:
180
+ triggered_rules: 触发的规则列表
181
+ diff_data: 解析后的 diff 数据
182
+
183
+ Returns:
184
+ 内联评论列表,每条包含 path, line, body, rule_id
185
+ """
186
+ import re
187
+
188
+ patches = {p.get("file"): p.get("patch", "") for p in diff_data.get("file_patches", [])}
189
+ comments = []
190
+
191
+ for r in triggered_rules:
192
+ path = r.get("matched_file", "")
193
+ patch_text = patches.get(path, "")
194
+ if not patch_text and diff_data.get("file_patches"):
195
+ for p in diff_data.get("file_patches", []):
196
+ if p.get("file"):
197
+ path = p.get("file")
198
+ patch_text = p.get("patch", "")
199
+ break
200
+
201
+ position, line = _first_added_position(patch_text) if patch_text else (1, 1)
202
+ body = f"{r.get('severity', '').upper()} {r.get('id', '')}: {r.get('rationale', '')}"
203
+ comments.append({
204
+ "path": path,
205
+ "position": position,
206
+ "line": line,
207
+ "body": body,
208
+ "rule_id": r.get("id", "")
209
+ })
210
+ return comments
211
+
212
+
213
+ def _first_added_position(patch_text: str) -> Tuple[int, int]:
214
+ lines = patch_text.splitlines()
215
+ position = 1
216
+ new_line = None
217
+ for i, line in enumerate(lines, start=1):
218
+ if line.startswith("@@"):
219
+ m = re.search(r"\+(\d+)", line)
220
+ if m:
221
+ try:
222
+ new_line = int(m.group(1))
223
+ except Exception:
224
+ new_line = None
225
+ position = i
226
+ continue
227
+ if line.startswith("+") and not line.startswith("+++"):
228
+ if new_line is None:
229
+ new_line = 1
230
+ return i, new_line
231
+ if line.startswith("-") and not line.startswith("---"):
232
+ continue
233
+ if new_line is not None:
234
+ new_line += 1
235
+ return position, new_line or 1