diffsense 2.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. adapters/__init__.py +0 -0
  2. adapters/base.py +27 -0
  3. adapters/github_adapter.py +164 -0
  4. adapters/gitlab_adapter.py +207 -0
  5. adapters/local_adapter.py +136 -0
  6. banner.py +71 -0
  7. cli.py +606 -0
  8. config/__init__.py +1 -0
  9. config/rules.yaml +371 -0
  10. core/__init__.py +235 -0
  11. core/ast_detector.py +853 -0
  12. core/change.py +46 -0
  13. core/composer.py +93 -0
  14. core/evaluator.py +15 -0
  15. core/ignore_manager.py +71 -0
  16. core/knowledge.py +77 -0
  17. core/parser.py +181 -0
  18. core/parser_manager.py +104 -0
  19. core/quality_manager.py +117 -0
  20. core/renderer.py +197 -0
  21. core/rule_base.py +98 -0
  22. core/rule_runtime.py +103 -0
  23. core/rules.py +718 -0
  24. core/run_config.py +85 -0
  25. core/semantic_diff.py +359 -0
  26. core/signal_model.py +21 -0
  27. core/signals_registry.py +62 -0
  28. diffsense-2.2.12.dist-info/METADATA +18 -0
  29. diffsense-2.2.12.dist-info/RECORD +58 -0
  30. diffsense-2.2.12.dist-info/WHEEL +5 -0
  31. diffsense-2.2.12.dist-info/entry_points.txt +3 -0
  32. diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
  33. diffsense-2.2.12.dist-info/top_level.txt +11 -0
  34. diffsense_mcp/__init__.py +1 -0
  35. diffsense_mcp/launcher.py +28 -0
  36. diffsense_mcp/server.py +687 -0
  37. governance/lifecycle.py +54 -0
  38. main.py +318 -0
  39. rules/__init__.py +246 -0
  40. rules/api_compatibility.py +372 -0
  41. rules/collection_handling.py +349 -0
  42. rules/concurrency.py +194 -0
  43. rules/concurrency_adapter.py +250 -0
  44. rules/cross_language_adapter.py +444 -0
  45. rules/exception_handling.py +320 -0
  46. rules/go_rules.py +401 -0
  47. rules/null_safety.py +301 -0
  48. rules/resource_management.py +222 -0
  49. rules/yaml_adapter.py +195 -0
  50. run_audit.py +478 -0
  51. sdk/cpp_adapter.py +238 -0
  52. sdk/go_adapter.py +199 -0
  53. sdk/java_adapter.py +199 -0
  54. sdk/javascript_adapter.py +229 -0
  55. sdk/language_adapter.py +313 -0
  56. sdk/python_adapter.py +195 -0
  57. sdk/rule.py +63 -0
  58. sdk/signal.py +14 -0
core/ast_detector.py ADDED
@@ -0,0 +1,853 @@
1
+ import re
2
+ import os
3
+ import time
4
+ import hashlib
5
+ import pickle
6
+ import javalang
7
+ from javalang.tokenizer import BasicType, Identifier
8
+ from javalang.tree import SynchronizedStatement, MethodInvocation, FieldDeclaration, MethodDeclaration, LocalVariableDeclaration, VariableDeclarator, ForStatement, WhileStatement, DoStatement, ClassCreator, ReferenceType, BasicType as TreeBasicType, Assignment, TryResource, TryStatement, IfStatement, BinaryOperation, Literal
9
+ from typing import List, Set, Dict, Any, Tuple, Optional
10
+ from . import CACHE_VERSION
11
+ from . import get_cache_max_age_seconds
12
+ from .signal_model import Signal
13
+ from .change import Change, ChangeKind
14
+ from .knowledge import is_thread_safe, is_lock_type
15
+
16
+ class ASTDetector:
17
+ def __init__(self):
18
+ self.pagination_vars = {"pageNo", "pageSize", "start", "limit", "offset"}
19
+ self.critical_calls = {"encode", "decode", "validate", "check", "normalize", "sanitize"}
20
+ self.risky_executors = {"newFixedThreadPool", "newCachedThreadPool", "newSingleThreadExecutor"}
21
+ self.cache_dir = self._resolve_cache_dir()
22
+ self.metrics = {"hits": 0, "misses": 0, "saved_ms": 0}
23
+
24
+ # === Security Detection Patterns ===
25
+ # Hardcoded secrets patterns (regex-based detection in token analysis)
26
+ self.secret_patterns = {
27
+ "password", "passwd", "pwd", "secret", "token", "api_key", "apikey",
28
+ "access_key", "accesskey", "private_key", "privatekey", "credential"
29
+ }
30
+
31
+ # Dangerous method calls
32
+ self.sql_concat_methods = {"concat", "append", "+"}
33
+ self.dangerous_methods = {
34
+ "execute", "exec", "query", "executeQuery", "executeUpdate",
35
+ "createStatement", "prepareStatement"
36
+ }
37
+
38
+ # Insecure crypto algorithms
39
+ self.weak_crypto = {
40
+ "DES", "RC4", "MD5", "SHA1", "MessageDigest",
41
+ "setAlgorithm" # Common method pattern
42
+ }
43
+
44
+ # Command injection risks
45
+ self.command_methods = {
46
+ "exec", "runtime", "processbuilder", "ProcessBuilder",
47
+ "getRuntime", "system"
48
+ }
49
+
50
+ def _resolve_cache_dir(self) -> str:
51
+ base_dir = os.environ.get("DIFFSENSE_CACHE_DIR")
52
+ if not base_dir:
53
+ base_dir = os.path.join(os.path.expanduser("~"), ".diffsense", "cache")
54
+ return os.path.join(base_dir, CACHE_VERSION, "ast")
55
+
56
+ def _ast_cache_key(self, wrapper_type: str, wrapper_text: str) -> str:
57
+ hasher = hashlib.sha1()
58
+ hasher.update(wrapper_type.encode("utf-8", errors="ignore"))
59
+ hasher.update(wrapper_text.encode("utf-8", errors="ignore"))
60
+ return hasher.hexdigest()
61
+
62
+ def _cache_path(self, cache_key: str) -> str:
63
+ return os.path.join(self.cache_dir, f"{cache_key}.pkl")
64
+
65
+ def _load_cached_tree(self, cache_key: str) -> Optional[Dict[str, Any]]:
66
+ path = self._cache_path(cache_key)
67
+ if not os.path.exists(path):
68
+ return None
69
+ max_age = get_cache_max_age_seconds()
70
+ if max_age > 0:
71
+ try:
72
+ mtime = os.path.getmtime(path)
73
+ if (time.time() - mtime) > max_age:
74
+ try:
75
+ os.remove(path)
76
+ except OSError:
77
+ pass
78
+ return None
79
+ except OSError:
80
+ return None
81
+ try:
82
+ with open(path, "rb") as f:
83
+ data = pickle.load(f)
84
+ if isinstance(data, dict) and "ok" in data:
85
+ return data
86
+ except Exception:
87
+ return None
88
+ return None
89
+
90
+ def _save_cached_tree(self, cache_key: str, tree: Any, ok: bool) -> None:
91
+ os.makedirs(self.cache_dir, exist_ok=True)
92
+ path = self._cache_path(cache_key)
93
+ tmp_path = f"{path}.{os.getpid()}.tmp"
94
+ try:
95
+ with open(tmp_path, "wb") as f:
96
+ pickle.dump({"ok": ok, "tree": tree}, f)
97
+ # Atomic rename (replace existing if any)
98
+ os.replace(tmp_path, path)
99
+ except Exception:
100
+ if os.path.exists(tmp_path):
101
+ os.remove(tmp_path)
102
+ pass
103
+
104
+ def _parse_with_cache(self, wrapper_type: str, wrapper_text: str) -> Optional[Any]:
105
+ import time
106
+ start_time = time.time()
107
+
108
+ cache_key = self._ast_cache_key(wrapper_type, wrapper_text)
109
+ cached = self._load_cached_tree(cache_key)
110
+ if cached is not None:
111
+ self.metrics["hits"] += 1
112
+ if cached.get("ok") is False:
113
+ return None
114
+ return cached.get("tree")
115
+
116
+ self.metrics["misses"] += 1
117
+ try:
118
+ tree = javalang.parse.parse(wrapper_text)
119
+ self._save_cached_tree(cache_key, tree, ok=True)
120
+
121
+ duration_ms = (time.time() - start_time) * 1000
122
+ self.metrics["saved_ms"] += duration_ms
123
+
124
+ return tree
125
+ except Exception:
126
+ self._save_cached_tree(cache_key, None, ok=False)
127
+ return None
128
+
129
+ def detect_changes(self, diff_data: Dict[str, Any]) -> List[Change]:
130
+ """
131
+ New Entry Point: Returns semantic changes instead of raw signals.
132
+ """
133
+ changes = []
134
+ file_patches = diff_data.get('file_patches', [])
135
+
136
+ # Fallback if parser isn't upgraded
137
+ if not file_patches and 'raw_diff' in diff_data:
138
+ file_patches = [{'file': 'unknown', 'patch': diff_data['raw_diff']}]
139
+
140
+ # Determine Analysis Tier
141
+ java_files = [f for f in file_patches if f.get('file', '').endswith('.java')]
142
+
143
+ # Always use deep analysis to avoid "Security Blind Spots" (Architecture Principle Violation)
144
+ analysis_mode = "deep"
145
+
146
+ for entry in file_patches:
147
+ filename = entry.get('file', 'unknown')
148
+ patch_content = entry.get('patch', '')
149
+
150
+ # Supported languages: Java, Python, C++, JavaScript
151
+ supported_extensions = {'.java', '.py', '.cpp', '.cc', '.cxx', '.c', '.h', '.hpp', '.js', '.jsx', '.ts', '.tsx'}
152
+ ext = os.path.splitext(filename)[1].lower() if '.' in filename else ''
153
+
154
+ if ext not in supported_extensions:
155
+ print(f"DEBUG: Skipping unsupported file: {filename}")
156
+ continue
157
+
158
+ print(f"DEBUG: Analyzing Java file: {filename}")
159
+ file_changes = self._detect_changes_in_patch(filename, patch_content, mode=analysis_mode)
160
+ changes.extend(file_changes)
161
+
162
+ # Deduplicate changes
163
+ unique_changes = []
164
+ seen = set()
165
+ for ch in changes:
166
+ # Create a tuple for hashing
167
+ meta_items = []
168
+ for k, v in sorted(ch.meta.items()):
169
+ if isinstance(v, list):
170
+ v = tuple(v)
171
+ meta_items.append((k, v))
172
+
173
+ key = (ch.kind, ch.file, ch.symbol, ch.before, ch.after, ch.line_no, tuple(meta_items))
174
+ if key not in seen:
175
+ seen.add(key)
176
+ unique_changes.append(ch)
177
+
178
+ return unique_changes
179
+
180
+ def detect_signals(self, diff_data: Dict[str, Any]) -> List[Signal]:
181
+ """
182
+ Legacy Adapter: Converts Changes -> Signals for backward compatibility with RuleEngine.
183
+ """
184
+ changes = self.detect_changes(diff_data)
185
+ signals = []
186
+
187
+ for ch in changes:
188
+ # Handle Tier 3 Signal
189
+ if ch.symbol == "LargeRefactor":
190
+ signals.append(Signal(
191
+ id="meta.large_refactor",
192
+ file="meta",
193
+ confidence=1.0,
194
+ action="detected",
195
+ meta=ch.meta
196
+ ))
197
+ continue
198
+
199
+ # Map Change -> Signal ID
200
+ sig_id = self._map_change_to_signal_id(ch)
201
+ if sig_id:
202
+ # Check for inline ignores
203
+ ignored_rules = ch.meta.get('ignores', [])
204
+ if sig_id in ignored_rules or 'all' in ignored_rules:
205
+ # Signal is suppressed
206
+ continue
207
+
208
+ # Map ChangeKind -> Action string
209
+ action = self._map_kind_to_action(ch.kind)
210
+
211
+ signals.append(Signal(
212
+ id=sig_id,
213
+ file=ch.file,
214
+ confidence=1.0,
215
+ action=action,
216
+ meta=ch.meta,
217
+ line=ch.line_no
218
+ ))
219
+ return signals
220
+
221
+ def _map_change_to_signal_id(self, change: Change) -> Optional[str]:
222
+ # Mapping logic (Change -> Signal ID)
223
+ if change.kind == ChangeKind.TYPE_CHANGED:
224
+ if change.meta.get('downgrade'):
225
+ return "runtime.concurrency.thread_safety_downgrade"
226
+
227
+ if change.kind == ChangeKind.FIELD_ADDED:
228
+ if change.meta.get('static_unsafe'):
229
+ return "runtime.concurrency.static_unsafe_collection"
230
+
231
+ if change.symbol == "lock":
232
+ if change.kind == ChangeKind.CALL_REMOVED:
233
+ return "runtime.concurrency.lock_removed"
234
+ return "runtime.concurrency.lock"
235
+
236
+ if change.symbol == "synchronized":
237
+ if change.kind == ChangeKind.MODIFIER_REMOVED:
238
+ return "runtime.concurrency.lock_removed"
239
+ return "runtime.concurrency.synchronized"
240
+
241
+ if change.symbol == "volatile":
242
+ if change.kind == ChangeKind.MODIFIER_REMOVED:
243
+ return "runtime.concurrency.volatile_removed"
244
+ return "runtime.concurrency.volatile"
245
+
246
+ if change.symbol == "final":
247
+ if change.kind == ChangeKind.MODIFIER_REMOVED:
248
+ return "runtime.concurrency.final_removed"
249
+
250
+ if change.symbol == "atomic_set" and change.kind == ChangeKind.CALL_REMOVED:
251
+ return "runtime.concurrency.atomic_to_non_atomic_write"
252
+
253
+ if change.symbol == "ThreadPoolExecutor":
254
+ if change.meta.get('param_change'):
255
+ return "runtime.concurrency.threadpool_param_change"
256
+ if change.kind == ChangeKind.OBJECT_CREATION and change.meta.get('args_count'):
257
+ return "runtime.concurrency.threadpool_creation"
258
+
259
+ if change.symbol == "LinkedBlockingQueue":
260
+ if change.meta.get('unbounded'):
261
+ return "runtime.concurrency.threadpool_unbounded_queue"
262
+
263
+ if change.symbol == "sleep":
264
+ if change.kind == ChangeKind.CALL_ADDED:
265
+ return "runtime.performance.sleep_added"
266
+
267
+ if change.symbol == "while_true":
268
+ if change.kind == ChangeKind.CALL_ADDED:
269
+ return "runtime.concurrency.busy_wait_added"
270
+
271
+ # P1 Resource
272
+ if change.symbol == "try_with_resources" and change.kind == ChangeKind.CALL_REMOVED:
273
+ return "runtime.resource.try_with_resource_removed"
274
+
275
+ if change.meta.get('cache_eviction'):
276
+ return "runtime.resource.cache_eviction_removed"
277
+
278
+ if change.meta.get('timeout_removed'):
279
+ return "runtime.network.timeout_removed"
280
+
281
+ # P2 Data
282
+ if change.symbol == "null_check" and change.meta.get('action') == "removed":
283
+ return "runtime.data.null_check_removed"
284
+
285
+ if change.symbol == "equals_to_ref":
286
+ return "runtime.data.equals_to_reference_compare"
287
+
288
+ # === Security Signals ===
289
+ # Hardcoded secrets
290
+ if change.symbol == "hardcoded_secret":
291
+ if change.kind == ChangeKind.LITERAL_ADDED:
292
+ return "security.hardcoded_secret"
293
+ if change.kind == ChangeKind.LITERAL_REMOVED:
294
+ return "security.hardcoded_secret_removed"
295
+
296
+ # SQL injection risk
297
+ if change.symbol == "sql_concat":
298
+ if change.meta.get("risk") == "sql_injection":
299
+ return "security.sql_injection"
300
+
301
+ # Weak encryption
302
+ if change.symbol == "weak_crypto":
303
+ return "security.weak_crypto"
304
+
305
+ # Command injection
306
+ if change.symbol == "command_execution":
307
+ return "security.command_injection"
308
+ # === End Security Signals ===
309
+
310
+ if change.kind == ChangeKind.CALL_ADDED:
311
+ if change.symbol == "sleep":
312
+ return "runtime.performance.sleep_added"
313
+ if change.symbol == "remove" and change.meta.get("in_loop"):
314
+ return "runtime.collection_mutation_inside_loop"
315
+ if change.symbol == "newFixedThreadPool" or change.symbol == "newCachedThreadPool":
316
+ return "runtime.concurrency.executors_factory_risk"
317
+ if change.symbol == "get" and change.meta.get("blocking_get"):
318
+ return "runtime.concurrency.future_get_without_timeout"
319
+
320
+ if change.kind == ChangeKind.OBJECT_CREATION:
321
+ if change.symbol == "ThreadPoolExecutor":
322
+ return "runtime.concurrency.threadpool_creation"
323
+
324
+ if change.kind == ChangeKind.CALL_REMOVED:
325
+ if change.symbol in self.critical_calls:
326
+ return "runtime.input_normalization_removed"
327
+
328
+ if change.symbol == "ConcurrentHashMap":
329
+ return "runtime.concurrency.concurrent_map"
330
+
331
+ if change.symbol in self.pagination_vars:
332
+ return "data.pagination_semantic_change"
333
+
334
+ return None
335
+
336
+ def _map_kind_to_action(self, kind: ChangeKind) -> str:
337
+ if kind in [ChangeKind.CALL_ADDED, ChangeKind.FIELD_ADDED, ChangeKind.MODIFIER_ADDED, ChangeKind.OBJECT_CREATION, ChangeKind.LITERAL_ADDED]:
338
+ return "added"
339
+ if kind in [ChangeKind.CALL_REMOVED, ChangeKind.FIELD_REMOVED, ChangeKind.MODIFIER_REMOVED, ChangeKind.LITERAL_REMOVED]:
340
+ return "removed"
341
+ if kind == ChangeKind.TYPE_CHANGED:
342
+ return "downgrade" # Specific mapping for now
343
+ if kind == ChangeKind.UNKNOWN and "action" in kind.name: # Fallback?
344
+ return "changed"
345
+ return "changed"
346
+
347
+ def _detect_changes_in_patch(self, filename: str, patch_content: str, mode: str = "deep") -> List[Change]:
348
+ changes = []
349
+
350
+ added_lines = []
351
+ removed_lines = []
352
+
353
+ for line in patch_content.splitlines():
354
+ if line.startswith('+') and not line.startswith('+++'):
355
+ added_lines.append(line[1:].strip())
356
+ elif line.startswith('-') and not line.startswith('---'):
357
+ removed_lines.append(line[1:].strip())
358
+
359
+ # Analyze Removed
360
+ removed_vars = {}
361
+ removed_calls = set()
362
+ removed_modifiers = set()
363
+
364
+ if removed_lines:
365
+ self._analyze_snippet_for_changes(removed_lines, filename, is_added=False,
366
+ var_map=removed_vars, call_set=removed_calls, mod_set=removed_modifiers, changes=changes, mode=mode)
367
+
368
+ # Analyze Added
369
+ added_vars = {}
370
+ added_calls = set()
371
+ added_modifiers = set()
372
+
373
+ if added_lines:
374
+ self._analyze_snippet_for_changes(added_lines, filename, is_added=True,
375
+ var_map=added_vars, call_set=added_calls, mod_set=added_modifiers, changes=changes, mode=mode)
376
+
377
+ # Cross-Analyze: Type Downgrade (Only in Deep Mode or if we have enough info)
378
+ # Tokenizer might not give us full type info, so this is best effort in light mode
379
+ for var_name, old_type in removed_vars.items():
380
+ if var_name in added_vars:
381
+ new_type = added_vars[var_name]
382
+ if is_thread_safe(old_type) and not is_thread_safe(new_type):
383
+ changes.append(Change(
384
+ kind=ChangeKind.TYPE_CHANGED,
385
+ file=filename,
386
+ symbol=var_name,
387
+ before=old_type,
388
+ after=new_type,
389
+ meta={"downgrade": True, "from": old_type, "to": new_type, "var": var_name}
390
+ ))
391
+
392
+ # Cross-Analyze: ThreadPoolExecutor Param Change
393
+ tpe_removed = any(c.symbol == "ThreadPoolExecutor" and c.meta.get("action") == "removed" for c in changes)
394
+ tpe_added = any(c.symbol == "ThreadPoolExecutor" and c.meta.get("action") == "added" for c in changes)
395
+
396
+ if tpe_removed and tpe_added:
397
+ changes.append(Change(kind=ChangeKind.UNKNOWN, file=filename, symbol="ThreadPoolExecutor", meta={"param_change": True}, line_no=None))
398
+
399
+ # Cross-Analyze: equals -> ==
400
+ equals_removed = any(c.symbol == "equals" and c.kind == ChangeKind.CALL_REMOVED for c in changes)
401
+ eq_added = any(c.symbol == "==" and c.kind == ChangeKind.CALL_ADDED for c in changes)
402
+ if equals_removed and eq_added:
403
+ changes.append(Change(kind=ChangeKind.UNKNOWN, file=filename, symbol="equals_to_ref", meta={"semantic": True}, line_no=None))
404
+
405
+ return changes
406
+
407
+ def _analyze_snippet_for_changes(self, lines: List[str], filename: str, is_added: bool,
408
+ var_map: Dict, call_set: Set, mod_set: Set, changes: List[Change], mode: str = "deep"):
409
+
410
+ start_change_idx = len(changes)
411
+
412
+ # 1. Scan for Ignores
413
+ ignores_map = {} # line_idx (0-based) -> set(rule_ids)
414
+ ignore_pattern = re.compile(r"//\s*diffsense-ignore:\s*([\w\.]+)")
415
+
416
+ for i, line in enumerate(lines):
417
+ match = ignore_pattern.search(line)
418
+ if match:
419
+ rule_id = match.group(1)
420
+ # Apply to current line
421
+ if i not in ignores_map: ignores_map[i] = set()
422
+ ignores_map[i].add(rule_id)
423
+ # Apply to next line (often comments are above)
424
+ if i + 1 < len(lines):
425
+ if i + 1 not in ignores_map: ignores_map[i+1] = set()
426
+ ignores_map[i+1].add(rule_id)
427
+
428
+ code_snippet = "\n".join(lines)
429
+
430
+ # 2. Tokenizer
431
+ try:
432
+ tokens = list(javalang.tokenizer.tokenize(code_snippet))
433
+ except:
434
+ return
435
+
436
+ # token_values = [t.value for t in tokens]
437
+ # Iterate tokens directly to get position
438
+
439
+ # Raw Token Checks (Legacy/Simple)
440
+ for token in tokens:
441
+ token_val = token.value
442
+ line_no = token.position.line # 1-based relative to snippet
443
+
444
+ if token_val == "synchronized":
445
+ kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
446
+ changes.append(Change(kind=kind, file=filename, symbol="synchronized", line_no=line_no))
447
+
448
+ if token_val == "volatile":
449
+ kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
450
+ changes.append(Change(kind=kind, file=filename, symbol="volatile", line_no=line_no))
451
+
452
+ if token_val == "ConcurrentHashMap":
453
+ if not is_added:
454
+ changes.append(Change(kind=ChangeKind.UNKNOWN, file=filename, symbol="ConcurrentHashMap", meta={"action": "removed"}, line_no=line_no))
455
+
456
+ if token_val in self.pagination_vars:
457
+ kind = ChangeKind.UNKNOWN
458
+ changes.append(Change(kind=kind, file=filename, symbol=token_val, meta={"action": "changed"}, line_no=line_no))
459
+
460
+ # Check for sequences
461
+ for i in range(len(tokens) - 2):
462
+ if (tokens[i].value == "." and
463
+ tokens[i+1].value == "lock" and
464
+ tokens[i+2].value == "("):
465
+ kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
466
+ changes.append(Change(kind=kind, file=filename, symbol="lock", line_no=tokens[i+1].position.line))
467
+
468
+ if (tokens[i].value == "Thread" and
469
+ tokens[i+1].value == "." and
470
+ tokens[i+2].value == "sleep"):
471
+ kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
472
+ changes.append(Change(kind=kind, file=filename, symbol="sleep", line_no=tokens[i+2].position.line))
473
+
474
+ # Critical Calls
475
+ for i in range(len(tokens) - 1):
476
+ if tokens[i].value in self.critical_calls and tokens[i+1].value == "(":
477
+ kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
478
+ changes.append(Change(kind=kind, file=filename, symbol=tokens[i].value, line_no=tokens[i].position.line))
479
+
480
+ # === Security Detection ===
481
+ # 1. Hardcoded secrets (String literals containing sensitive keywords)
482
+ for i, token in enumerate(tokens):
483
+ if hasattr(token, 'value') and isinstance(token.value, str):
484
+ token_val = token.value.strip('"\'`')
485
+ # Check for hardcoded secrets
486
+ if any(secret in token_val.lower() for secret in self.secret_patterns):
487
+ if len(token_val) > 3 and ("=" in token_val or ":" in token_val):
488
+ changes.append(Change(
489
+ kind=ChangeKind.LITERAL_ADDED if is_added else ChangeKind.LITERAL_REMOVED,
490
+ file=filename,
491
+ symbol="hardcoded_secret",
492
+ meta={"type": "secret", "value_hint": token_val[:20]},
493
+ line_no=token.position.line
494
+ ))
495
+
496
+ # 2. SQL string concatenation patterns
497
+ for i in range(len(tokens) - 1):
498
+ token_val = tokens[i].value
499
+ # String concatenation in SQL context: "SELECT ... " + var
500
+ if token_val in self.sql_concat_methods:
501
+ # Check context - is this in an SQL statement?
502
+ context = self._get_sql_context(tokens, i)
503
+ if context:
504
+ changes.append(Change(
505
+ kind=ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED,
506
+ file=filename,
507
+ symbol="sql_concat",
508
+ meta={"risk": "sql_injection"},
509
+ line_no=tokens[i].position.line
510
+ ))
511
+
512
+ # 3. Insecure crypto usage
513
+ for token in tokens:
514
+ if hasattr(token, 'value'):
515
+ token_val = token.value
516
+ if token_val in self.weak_crypto:
517
+ changes.append(Change(
518
+ kind=ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED,
519
+ file=filename,
520
+ symbol="weak_crypto",
521
+ meta={"algorithm": token_val, "risk": "weak_encryption"},
522
+ line_no=token.position.line
523
+ ))
524
+
525
+ # 4. Command injection (Runtime.exec, ProcessBuilder)
526
+ for i in range(len(tokens) - 1):
527
+ token_val = tokens[i].value
528
+ if token_val in self.command_methods and tokens[i+1].value == "(":
529
+ changes.append(Change(
530
+ kind=ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED,
531
+ file=filename,
532
+ symbol="command_execution",
533
+ meta={"risk": "command_injection"},
534
+ line_no=tokens[i].position.line
535
+ ))
536
+
537
+ # === End Security Detection ===
538
+
539
+ # Stop here if mode is 'light'
540
+ if mode == "light":
541
+ self._apply_ignores(changes, start_change_idx, ignores_map)
542
+ return
543
+
544
+ parsed = False
545
+ wrapper_class = f"class Dummy {{\n{code_snippet}\n}}"
546
+ offset = 1
547
+ tree = self._parse_with_cache("class", wrapper_class)
548
+ if tree is not None:
549
+ self._analyze_tree_changes(tree, filename, is_added, var_map, changes, offset)
550
+ parsed = True
551
+
552
+ if not parsed:
553
+ wrapper_method = f"class Dummy {{ void dummy() {{\n{code_snippet}\n}} }}"
554
+ offset = 2
555
+ tree = self._parse_with_cache("method", wrapper_method)
556
+ if tree is not None:
557
+ self._analyze_tree_changes(tree, filename, is_added, var_map, changes, offset)
558
+ parsed = True
559
+
560
+ # Fallback: Extract vars from tokens if parsing failed
561
+ if not parsed:
562
+ self._analyze_tokens_fallback(tokens, var_map, changes, filename, is_added)
563
+
564
+ # Apply Ignores
565
+ self._apply_ignores(changes, start_change_idx, ignores_map)
566
+
567
+ def _analyze_tokens_fallback(self, tokens, var_map, changes, filename, is_added):
568
+ i = 0
569
+ modifiers = set()
570
+
571
+ while i < len(tokens) - 1:
572
+ token = tokens[i]
573
+
574
+ # 1. Collect Modifiers
575
+ if token.value in ['private', 'public', 'protected', 'static', 'final', 'volatile', 'transient']:
576
+ modifiers.add(token.value)
577
+ i += 1
578
+ continue
579
+
580
+ # 2. Check for Type
581
+ is_type = isinstance(token, (Identifier, BasicType))
582
+
583
+ if not is_type:
584
+ modifiers = set()
585
+ i += 1
586
+ continue
587
+
588
+ current_type_name = token.value
589
+
590
+ # Check for Generics
591
+ idx = i + 1
592
+ if idx < len(tokens) and tokens[idx].value == '<':
593
+ depth = 1
594
+ idx += 1
595
+ while idx < len(tokens) and depth > 0:
596
+ if tokens[idx].value == '<': depth += 1
597
+ elif tokens[idx].value == '>': depth -= 1
598
+ idx += 1
599
+ if depth > 0: # Unbalanced
600
+ i += 1
601
+ continue
602
+
603
+ # 3. Variable Name
604
+ if idx < len(tokens) and isinstance(tokens[idx], Identifier):
605
+ var_name = tokens[idx].value
606
+ # Check what follows (should be = or ; or ,)
607
+ idx2 = idx + 1
608
+ if idx2 < len(tokens) and tokens[idx2].value in ['=', ';', ',']:
609
+ var_map[var_name] = current_type_name
610
+
611
+ # Detect Signals
612
+ line_no = token.position.line
613
+
614
+ # static_unsafe_collection
615
+ if is_added and 'static' in modifiers:
616
+ risky_static_types = {"HashMap", "ArrayList", "HashSet", "TreeMap", "LinkedList"}
617
+ if current_type_name in risky_static_types:
618
+ changes.append(Change(kind=ChangeKind.FIELD_ADDED, file=filename, symbol=var_name, meta={"static_unsafe": True}, line_no=line_no))
619
+
620
+ # final (if looks like field)
621
+ is_field = any(m in modifiers for m in ['private', 'public', 'protected', 'static'])
622
+ if is_field and 'final' in modifiers:
623
+ kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
624
+ changes.append(Change(kind=kind, file=filename, symbol="final", line_no=line_no))
625
+
626
+ i = idx2
627
+ modifiers = set()
628
+ continue
629
+
630
+ modifiers = set()
631
+ i += 1
632
+
633
+ def _apply_ignores(self, changes: List[Change], start_idx: int, ignores_map: Dict[int, Set[str]]):
634
+ for i in range(start_idx, len(changes)):
635
+ ch = changes[i]
636
+ if ch.line_no:
637
+ # line_no is 1-based, ignores_map is 0-based
638
+ idx = ch.line_no - 1
639
+ if idx in ignores_map:
640
+ ch.meta['ignores'] = list(ignores_map[idx])
641
+
642
+ def _analyze_tree_changes(self, tree, filename: str, is_added: bool, var_map: Dict, changes: List[Change], offset: int = 0):
643
+ for path, node in tree:
644
+ line_no = (node.position.line - offset) if node.position else None
645
+
646
+ # Context
647
+ self._update_context(node, var_map)
648
+
649
+ # Detectors
650
+ self._detect_concurrency_signals(node, filename, is_added, var_map, changes, line_no)
651
+ self._detect_resource_signals(node, filename, is_added, var_map, changes, line_no, path)
652
+ self._detect_data_signals(node, filename, is_added, var_map, changes, line_no, path)
653
+ self._detect_general_signals(node, filename, is_added, var_map, changes, line_no, path)
654
+
655
+ def _update_context(self, node, var_map: Dict):
656
+ if isinstance(node, FieldDeclaration):
657
+ if node.type:
658
+ for declarator in node.declarators:
659
+ var_map[declarator.name] = node.type.name
660
+ elif isinstance(node, LocalVariableDeclaration):
661
+ if node.type:
662
+ for declarator in node.declarators:
663
+ var_map[declarator.name] = node.type.name
664
+
665
+ def _detect_concurrency_signals(self, node, filename: str, is_added: bool, var_map: Dict, changes: List[Change], line_no: int):
666
+ # 1. lock_removed / synchronized / volatile / final
667
+ if isinstance(node, SynchronizedStatement):
668
+ kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
669
+ changes.append(Change(kind=kind, file=filename, symbol="synchronized", line_no=line_no))
670
+
671
+ if isinstance(node, MethodDeclaration):
672
+ if 'synchronized' in node.modifiers:
673
+ kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
674
+ changes.append(Change(kind=kind, file=filename, symbol="synchronized", line_no=line_no))
675
+
676
+ if isinstance(node, FieldDeclaration):
677
+ if 'volatile' in node.modifiers:
678
+ kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
679
+ changes.append(Change(kind=kind, file=filename, symbol="volatile", line_no=line_no))
680
+
681
+ if 'final' in node.modifiers:
682
+ kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
683
+ changes.append(Change(kind=kind, file=filename, symbol="final", line_no=line_no))
684
+
685
+ # 7. static_unsafe_collection
686
+ if is_added and 'static' in node.modifiers and node.type:
687
+ # Basic type check
688
+ type_name = node.type.name if hasattr(node.type, 'name') else str(node.type)
689
+ if not is_thread_safe(type_name):
690
+ risky_static_types = {"HashMap", "ArrayList", "HashSet", "TreeMap", "LinkedList"}
691
+ base_type = type_name.split('<')[0]
692
+ if base_type in risky_static_types:
693
+ changes.append(Change(
694
+ kind=ChangeKind.FIELD_ADDED,
695
+ file=filename,
696
+ symbol=node.declarators[0].name,
697
+ meta={"static_unsafe": True},
698
+ line_no=line_no
699
+ ))
700
+
701
+ if isinstance(node, MethodInvocation):
702
+ call_name = node.member
703
+ qualifier = node.qualifier
704
+
705
+ # lock.lock(), semaphore.acquire(), latch.await()
706
+ if call_name == "lock" and (not qualifier or "lock" in qualifier.lower()):
707
+ kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
708
+ changes.append(Change(kind=kind, file=filename, symbol="lock", line_no=line_no))
709
+
710
+ if call_name == "acquire":
711
+ kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
712
+ changes.append(Change(kind=kind, file=filename, symbol="acquire", line_no=line_no))
713
+
714
+ if call_name == "await":
715
+ kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
716
+ changes.append(Change(kind=kind, file=filename, symbol="await", line_no=line_no))
717
+
718
+ # 10. sleep
719
+ if call_name == "sleep":
720
+ kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
721
+ changes.append(Change(kind=kind, file=filename, symbol="sleep", line_no=line_no))
722
+
723
+ # 6. atomic_to_non_atomic_write (Call Removed: atomic.set)
724
+ if not is_added and call_name == "set":
725
+ if qualifier and qualifier in var_map:
726
+ var_type = var_map[qualifier]
727
+ if var_type.startswith("Atomic"):
728
+ changes.append(Change(kind=ChangeKind.CALL_REMOVED, file=filename, symbol="atomic_set", meta={"var": qualifier}, line_no=line_no))
729
+
730
+ # 8. threadpool_param_change & 9. threadpool_unbounded_queue
731
+ if isinstance(node, ClassCreator):
732
+ type_name = node.type.name
733
+ if type_name == "ThreadPoolExecutor":
734
+ args = [str(arg) for arg in node.arguments]
735
+ kind = ChangeKind.OBJECT_CREATION
736
+ action = "added" if is_added else "removed"
737
+ changes.append(Change(kind=kind, file=filename, symbol="ThreadPoolExecutor", meta={"args_count": len(args), "param_change": True, "action": action}, line_no=line_no))
738
+
739
+ if type_name == "LinkedBlockingQueue":
740
+ if not node.arguments:
741
+ kind = ChangeKind.OBJECT_CREATION
742
+ changes.append(Change(kind=kind, file=filename, symbol="LinkedBlockingQueue", meta={"unbounded": True}, line_no=line_no))
743
+ elif len(node.arguments) == 1 and "Integer.MAX_VALUE" in str(node.arguments[0]):
744
+ kind = ChangeKind.OBJECT_CREATION
745
+ changes.append(Change(kind=kind, file=filename, symbol="LinkedBlockingQueue", meta={"unbounded": True}, line_no=line_no))
746
+
747
+ # 10. while(true)
748
+ if isinstance(node, WhileStatement):
749
+ # Check if condition is true
750
+ is_true = False
751
+ if hasattr(node.condition, 'value') and node.condition.value == "true":
752
+ is_true = True
753
+ if is_true:
754
+ kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
755
+ changes.append(Change(kind=kind, file=filename, symbol="while_true", line_no=line_no))
756
+
757
+ def _detect_resource_signals(self, node, filename: str, is_added: bool, var_map: Dict, changes: List[Change], line_no: int, path: Any):
758
+ # 12. try_with_resource_removed
759
+ if isinstance(node, TryStatement):
760
+ if node.resources:
761
+ if not is_added: # Removed
762
+ changes.append(Change(kind=ChangeKind.CALL_REMOVED, file=filename, symbol="try_with_resources", line_no=line_no))
763
+
764
+ if isinstance(node, MethodInvocation):
765
+ call_name = node.member
766
+ qualifier = str(node.qualifier).lower() if node.qualifier else ""
767
+
768
+ # 13. cache_eviction_removed
769
+ if not is_added and call_name in ["expire", "setExpire", "setTTL", "evict", "clear"]:
770
+ if "cache" in filename.lower() or "redis" in filename.lower() or "map" in qualifier:
771
+ changes.append(Change(kind=ChangeKind.CALL_REMOVED, file=filename, symbol=call_name, meta={"cache_eviction": True}, line_no=line_no))
772
+
773
+ # 15. timeout_removed
774
+ if not is_added and ("timeout" in call_name.lower() or call_name == "setTimeout"):
775
+ changes.append(Change(kind=ChangeKind.CALL_REMOVED, file=filename, symbol=call_name, meta={"timeout_removed": True}, line_no=line_no))
776
+
777
+ def _detect_data_signals(self, node, filename: str, is_added: bool, var_map: Dict, changes: List[Change], line_no: int, path: Any):
778
+ # 18. equals_to_reference_compare
779
+ if isinstance(node, MethodInvocation):
780
+ if node.member == "equals" and not is_added:
781
+ changes.append(Change(kind=ChangeKind.CALL_REMOVED, file=filename, symbol="equals", line_no=line_no))
782
+
783
+ if isinstance(node, BinaryOperation):
784
+ if node.operator == "==" and is_added:
785
+ changes.append(Change(kind=ChangeKind.CALL_ADDED, file=filename, symbol="==", line_no=line_no))
786
+
787
+ # 19. null_check_removed
788
+ if isinstance(node, IfStatement) and not is_added:
789
+ cond = node.condition
790
+ if isinstance(cond, BinaryOperation) and cond.operator == "==":
791
+ has_null = False
792
+ if isinstance(cond.operandr, Literal) and cond.operandr.value == "null": has_null = True
793
+ if isinstance(cond.operandl, Literal) and cond.operandl.value == "null": has_null = True
794
+
795
+ if has_null:
796
+ changes.append(Change(kind=ChangeKind.UNKNOWN, file=filename, symbol="null_check", meta={"action": "removed"}, line_no=line_no))
797
+
798
+ def _detect_general_signals(self, node, filename: str, is_added: bool, var_map: Dict, changes: List[Change], line_no: int, path: Any):
799
+ # Original logic for critical calls etc.
800
+ if isinstance(node, MethodInvocation):
801
+ call_name = node.member
802
+ qualifier = node.qualifier
803
+
804
+ kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
805
+
806
+ # Dubbo P0: Executors factory methods
807
+ if qualifier == "Executors" and call_name in ["newFixedThreadPool", "newCachedThreadPool"]:
808
+ changes.append(Change(kind=kind, file=filename, symbol=call_name, meta={"risk": "threadpool_factory"}, line_no=line_no))
809
+
810
+ # Dubbo P0: Future.get() without timeout
811
+ if call_name == "get" and not node.arguments:
812
+ changes.append(Change(kind=kind, file=filename, symbol="get", meta={"blocking_get": True}, line_no=line_no))
813
+
814
+ # Critical calls (input/validation)
815
+ if call_name in self.critical_calls and not is_added:
816
+ changes.append(Change(kind=kind, file=filename, symbol=call_name, line_no=line_no))
817
+
818
+ # Collection mutation in loop
819
+ if call_name == "remove" and is_added:
820
+ if self._is_inside_loop(path):
821
+ changes.append(Change(kind=kind, file=filename, symbol="remove", meta={"in_loop": True}, line_no=line_no))
822
+
823
+
824
+ def _get_sql_context(self, tokens, pos: int) -> Optional[str]:
825
+ """
826
+ Check if the concatenation is in SQL context.
827
+ Looks for SQL keywords nearby in the token stream.
828
+ """
829
+ # Look back for SQL keywords
830
+ look_back = 20
831
+ start = max(0, pos - look_back)
832
+ nearby_tokens = [t.value for t in tokens[start:pos]]
833
+
834
+ sql_keywords = {
835
+ "SELECT", "INSERT", "UPDATE", "DELETE", "FROM", "WHERE", "JOIN",
836
+ "TABLE", "CREATE", "DROP", "ALTER", "query", "sql"
837
+ }
838
+
839
+ for token_val in nearby_tokens:
840
+ if token_val.upper() in sql_keywords or token_val.lower() in sql_keywords:
841
+ return "sql_statement"
842
+
843
+ return None
844
+
845
+ def _is_inside_loop(self, path: Tuple) -> bool:
846
+ """
847
+ Check if the current node (at the end of path) is inside a loop structure.
848
+ path is a list/tuple of parent nodes.
849
+ """
850
+ for node in reversed(path):
851
+ if isinstance(node, (ForStatement, WhileStatement, DoStatement)):
852
+ return True
853
+ return False