diffsense 2.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adapters/__init__.py +0 -0
- adapters/base.py +27 -0
- adapters/github_adapter.py +164 -0
- adapters/gitlab_adapter.py +207 -0
- adapters/local_adapter.py +136 -0
- banner.py +71 -0
- cli.py +606 -0
- config/__init__.py +1 -0
- config/rules.yaml +371 -0
- core/__init__.py +235 -0
- core/ast_detector.py +853 -0
- core/change.py +46 -0
- core/composer.py +93 -0
- core/evaluator.py +15 -0
- core/ignore_manager.py +71 -0
- core/knowledge.py +77 -0
- core/parser.py +181 -0
- core/parser_manager.py +104 -0
- core/quality_manager.py +117 -0
- core/renderer.py +197 -0
- core/rule_base.py +98 -0
- core/rule_runtime.py +103 -0
- core/rules.py +718 -0
- core/run_config.py +85 -0
- core/semantic_diff.py +359 -0
- core/signal_model.py +21 -0
- core/signals_registry.py +62 -0
- diffsense-2.2.12.dist-info/METADATA +18 -0
- diffsense-2.2.12.dist-info/RECORD +58 -0
- diffsense-2.2.12.dist-info/WHEEL +5 -0
- diffsense-2.2.12.dist-info/entry_points.txt +3 -0
- diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
- diffsense-2.2.12.dist-info/top_level.txt +11 -0
- diffsense_mcp/__init__.py +1 -0
- diffsense_mcp/launcher.py +28 -0
- diffsense_mcp/server.py +687 -0
- governance/lifecycle.py +54 -0
- main.py +318 -0
- rules/__init__.py +246 -0
- rules/api_compatibility.py +372 -0
- rules/collection_handling.py +349 -0
- rules/concurrency.py +194 -0
- rules/concurrency_adapter.py +250 -0
- rules/cross_language_adapter.py +444 -0
- rules/exception_handling.py +320 -0
- rules/go_rules.py +401 -0
- rules/null_safety.py +301 -0
- rules/resource_management.py +222 -0
- rules/yaml_adapter.py +195 -0
- run_audit.py +478 -0
- sdk/cpp_adapter.py +238 -0
- sdk/go_adapter.py +199 -0
- sdk/java_adapter.py +199 -0
- sdk/javascript_adapter.py +229 -0
- sdk/language_adapter.py +313 -0
- sdk/python_adapter.py +195 -0
- sdk/rule.py +63 -0
- sdk/signal.py +14 -0
core/ast_detector.py
ADDED
|
@@ -0,0 +1,853 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
import hashlib
|
|
5
|
+
import pickle
|
|
6
|
+
import javalang
|
|
7
|
+
from javalang.tokenizer import BasicType, Identifier
|
|
8
|
+
from javalang.tree import SynchronizedStatement, MethodInvocation, FieldDeclaration, MethodDeclaration, LocalVariableDeclaration, VariableDeclarator, ForStatement, WhileStatement, DoStatement, ClassCreator, ReferenceType, BasicType as TreeBasicType, Assignment, TryResource, TryStatement, IfStatement, BinaryOperation, Literal
|
|
9
|
+
from typing import List, Set, Dict, Any, Tuple, Optional
|
|
10
|
+
from . import CACHE_VERSION
|
|
11
|
+
from . import get_cache_max_age_seconds
|
|
12
|
+
from .signal_model import Signal
|
|
13
|
+
from .change import Change, ChangeKind
|
|
14
|
+
from .knowledge import is_thread_safe, is_lock_type
|
|
15
|
+
|
|
16
|
+
class ASTDetector:
|
|
17
|
+
def __init__(self):
|
|
18
|
+
self.pagination_vars = {"pageNo", "pageSize", "start", "limit", "offset"}
|
|
19
|
+
self.critical_calls = {"encode", "decode", "validate", "check", "normalize", "sanitize"}
|
|
20
|
+
self.risky_executors = {"newFixedThreadPool", "newCachedThreadPool", "newSingleThreadExecutor"}
|
|
21
|
+
self.cache_dir = self._resolve_cache_dir()
|
|
22
|
+
self.metrics = {"hits": 0, "misses": 0, "saved_ms": 0}
|
|
23
|
+
|
|
24
|
+
# === Security Detection Patterns ===
|
|
25
|
+
# Hardcoded secrets patterns (regex-based detection in token analysis)
|
|
26
|
+
self.secret_patterns = {
|
|
27
|
+
"password", "passwd", "pwd", "secret", "token", "api_key", "apikey",
|
|
28
|
+
"access_key", "accesskey", "private_key", "privatekey", "credential"
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
# Dangerous method calls
|
|
32
|
+
self.sql_concat_methods = {"concat", "append", "+"}
|
|
33
|
+
self.dangerous_methods = {
|
|
34
|
+
"execute", "exec", "query", "executeQuery", "executeUpdate",
|
|
35
|
+
"createStatement", "prepareStatement"
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
# Insecure crypto algorithms
|
|
39
|
+
self.weak_crypto = {
|
|
40
|
+
"DES", "RC4", "MD5", "SHA1", "MessageDigest",
|
|
41
|
+
"setAlgorithm" # Common method pattern
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
# Command injection risks
|
|
45
|
+
self.command_methods = {
|
|
46
|
+
"exec", "runtime", "processbuilder", "ProcessBuilder",
|
|
47
|
+
"getRuntime", "system"
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
def _resolve_cache_dir(self) -> str:
|
|
51
|
+
base_dir = os.environ.get("DIFFSENSE_CACHE_DIR")
|
|
52
|
+
if not base_dir:
|
|
53
|
+
base_dir = os.path.join(os.path.expanduser("~"), ".diffsense", "cache")
|
|
54
|
+
return os.path.join(base_dir, CACHE_VERSION, "ast")
|
|
55
|
+
|
|
56
|
+
def _ast_cache_key(self, wrapper_type: str, wrapper_text: str) -> str:
|
|
57
|
+
hasher = hashlib.sha1()
|
|
58
|
+
hasher.update(wrapper_type.encode("utf-8", errors="ignore"))
|
|
59
|
+
hasher.update(wrapper_text.encode("utf-8", errors="ignore"))
|
|
60
|
+
return hasher.hexdigest()
|
|
61
|
+
|
|
62
|
+
def _cache_path(self, cache_key: str) -> str:
|
|
63
|
+
return os.path.join(self.cache_dir, f"{cache_key}.pkl")
|
|
64
|
+
|
|
65
|
+
def _load_cached_tree(self, cache_key: str) -> Optional[Dict[str, Any]]:
|
|
66
|
+
path = self._cache_path(cache_key)
|
|
67
|
+
if not os.path.exists(path):
|
|
68
|
+
return None
|
|
69
|
+
max_age = get_cache_max_age_seconds()
|
|
70
|
+
if max_age > 0:
|
|
71
|
+
try:
|
|
72
|
+
mtime = os.path.getmtime(path)
|
|
73
|
+
if (time.time() - mtime) > max_age:
|
|
74
|
+
try:
|
|
75
|
+
os.remove(path)
|
|
76
|
+
except OSError:
|
|
77
|
+
pass
|
|
78
|
+
return None
|
|
79
|
+
except OSError:
|
|
80
|
+
return None
|
|
81
|
+
try:
|
|
82
|
+
with open(path, "rb") as f:
|
|
83
|
+
data = pickle.load(f)
|
|
84
|
+
if isinstance(data, dict) and "ok" in data:
|
|
85
|
+
return data
|
|
86
|
+
except Exception:
|
|
87
|
+
return None
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
def _save_cached_tree(self, cache_key: str, tree: Any, ok: bool) -> None:
|
|
91
|
+
os.makedirs(self.cache_dir, exist_ok=True)
|
|
92
|
+
path = self._cache_path(cache_key)
|
|
93
|
+
tmp_path = f"{path}.{os.getpid()}.tmp"
|
|
94
|
+
try:
|
|
95
|
+
with open(tmp_path, "wb") as f:
|
|
96
|
+
pickle.dump({"ok": ok, "tree": tree}, f)
|
|
97
|
+
# Atomic rename (replace existing if any)
|
|
98
|
+
os.replace(tmp_path, path)
|
|
99
|
+
except Exception:
|
|
100
|
+
if os.path.exists(tmp_path):
|
|
101
|
+
os.remove(tmp_path)
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
def _parse_with_cache(self, wrapper_type: str, wrapper_text: str) -> Optional[Any]:
|
|
105
|
+
import time
|
|
106
|
+
start_time = time.time()
|
|
107
|
+
|
|
108
|
+
cache_key = self._ast_cache_key(wrapper_type, wrapper_text)
|
|
109
|
+
cached = self._load_cached_tree(cache_key)
|
|
110
|
+
if cached is not None:
|
|
111
|
+
self.metrics["hits"] += 1
|
|
112
|
+
if cached.get("ok") is False:
|
|
113
|
+
return None
|
|
114
|
+
return cached.get("tree")
|
|
115
|
+
|
|
116
|
+
self.metrics["misses"] += 1
|
|
117
|
+
try:
|
|
118
|
+
tree = javalang.parse.parse(wrapper_text)
|
|
119
|
+
self._save_cached_tree(cache_key, tree, ok=True)
|
|
120
|
+
|
|
121
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
122
|
+
self.metrics["saved_ms"] += duration_ms
|
|
123
|
+
|
|
124
|
+
return tree
|
|
125
|
+
except Exception:
|
|
126
|
+
self._save_cached_tree(cache_key, None, ok=False)
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
def detect_changes(self, diff_data: Dict[str, Any]) -> List[Change]:
|
|
130
|
+
"""
|
|
131
|
+
New Entry Point: Returns semantic changes instead of raw signals.
|
|
132
|
+
"""
|
|
133
|
+
changes = []
|
|
134
|
+
file_patches = diff_data.get('file_patches', [])
|
|
135
|
+
|
|
136
|
+
# Fallback if parser isn't upgraded
|
|
137
|
+
if not file_patches and 'raw_diff' in diff_data:
|
|
138
|
+
file_patches = [{'file': 'unknown', 'patch': diff_data['raw_diff']}]
|
|
139
|
+
|
|
140
|
+
# Determine Analysis Tier
|
|
141
|
+
java_files = [f for f in file_patches if f.get('file', '').endswith('.java')]
|
|
142
|
+
|
|
143
|
+
# Always use deep analysis to avoid "Security Blind Spots" (Architecture Principle Violation)
|
|
144
|
+
analysis_mode = "deep"
|
|
145
|
+
|
|
146
|
+
for entry in file_patches:
|
|
147
|
+
filename = entry.get('file', 'unknown')
|
|
148
|
+
patch_content = entry.get('patch', '')
|
|
149
|
+
|
|
150
|
+
# Supported languages: Java, Python, C++, JavaScript
|
|
151
|
+
supported_extensions = {'.java', '.py', '.cpp', '.cc', '.cxx', '.c', '.h', '.hpp', '.js', '.jsx', '.ts', '.tsx'}
|
|
152
|
+
ext = os.path.splitext(filename)[1].lower() if '.' in filename else ''
|
|
153
|
+
|
|
154
|
+
if ext not in supported_extensions:
|
|
155
|
+
print(f"DEBUG: Skipping unsupported file: {filename}")
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
print(f"DEBUG: Analyzing Java file: {filename}")
|
|
159
|
+
file_changes = self._detect_changes_in_patch(filename, patch_content, mode=analysis_mode)
|
|
160
|
+
changes.extend(file_changes)
|
|
161
|
+
|
|
162
|
+
# Deduplicate changes
|
|
163
|
+
unique_changes = []
|
|
164
|
+
seen = set()
|
|
165
|
+
for ch in changes:
|
|
166
|
+
# Create a tuple for hashing
|
|
167
|
+
meta_items = []
|
|
168
|
+
for k, v in sorted(ch.meta.items()):
|
|
169
|
+
if isinstance(v, list):
|
|
170
|
+
v = tuple(v)
|
|
171
|
+
meta_items.append((k, v))
|
|
172
|
+
|
|
173
|
+
key = (ch.kind, ch.file, ch.symbol, ch.before, ch.after, ch.line_no, tuple(meta_items))
|
|
174
|
+
if key not in seen:
|
|
175
|
+
seen.add(key)
|
|
176
|
+
unique_changes.append(ch)
|
|
177
|
+
|
|
178
|
+
return unique_changes
|
|
179
|
+
|
|
180
|
+
def detect_signals(self, diff_data: Dict[str, Any]) -> List[Signal]:
|
|
181
|
+
"""
|
|
182
|
+
Legacy Adapter: Converts Changes -> Signals for backward compatibility with RuleEngine.
|
|
183
|
+
"""
|
|
184
|
+
changes = self.detect_changes(diff_data)
|
|
185
|
+
signals = []
|
|
186
|
+
|
|
187
|
+
for ch in changes:
|
|
188
|
+
# Handle Tier 3 Signal
|
|
189
|
+
if ch.symbol == "LargeRefactor":
|
|
190
|
+
signals.append(Signal(
|
|
191
|
+
id="meta.large_refactor",
|
|
192
|
+
file="meta",
|
|
193
|
+
confidence=1.0,
|
|
194
|
+
action="detected",
|
|
195
|
+
meta=ch.meta
|
|
196
|
+
))
|
|
197
|
+
continue
|
|
198
|
+
|
|
199
|
+
# Map Change -> Signal ID
|
|
200
|
+
sig_id = self._map_change_to_signal_id(ch)
|
|
201
|
+
if sig_id:
|
|
202
|
+
# Check for inline ignores
|
|
203
|
+
ignored_rules = ch.meta.get('ignores', [])
|
|
204
|
+
if sig_id in ignored_rules or 'all' in ignored_rules:
|
|
205
|
+
# Signal is suppressed
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
# Map ChangeKind -> Action string
|
|
209
|
+
action = self._map_kind_to_action(ch.kind)
|
|
210
|
+
|
|
211
|
+
signals.append(Signal(
|
|
212
|
+
id=sig_id,
|
|
213
|
+
file=ch.file,
|
|
214
|
+
confidence=1.0,
|
|
215
|
+
action=action,
|
|
216
|
+
meta=ch.meta,
|
|
217
|
+
line=ch.line_no
|
|
218
|
+
))
|
|
219
|
+
return signals
|
|
220
|
+
|
|
221
|
+
def _map_change_to_signal_id(self, change: Change) -> Optional[str]:
|
|
222
|
+
# Mapping logic (Change -> Signal ID)
|
|
223
|
+
if change.kind == ChangeKind.TYPE_CHANGED:
|
|
224
|
+
if change.meta.get('downgrade'):
|
|
225
|
+
return "runtime.concurrency.thread_safety_downgrade"
|
|
226
|
+
|
|
227
|
+
if change.kind == ChangeKind.FIELD_ADDED:
|
|
228
|
+
if change.meta.get('static_unsafe'):
|
|
229
|
+
return "runtime.concurrency.static_unsafe_collection"
|
|
230
|
+
|
|
231
|
+
if change.symbol == "lock":
|
|
232
|
+
if change.kind == ChangeKind.CALL_REMOVED:
|
|
233
|
+
return "runtime.concurrency.lock_removed"
|
|
234
|
+
return "runtime.concurrency.lock"
|
|
235
|
+
|
|
236
|
+
if change.symbol == "synchronized":
|
|
237
|
+
if change.kind == ChangeKind.MODIFIER_REMOVED:
|
|
238
|
+
return "runtime.concurrency.lock_removed"
|
|
239
|
+
return "runtime.concurrency.synchronized"
|
|
240
|
+
|
|
241
|
+
if change.symbol == "volatile":
|
|
242
|
+
if change.kind == ChangeKind.MODIFIER_REMOVED:
|
|
243
|
+
return "runtime.concurrency.volatile_removed"
|
|
244
|
+
return "runtime.concurrency.volatile"
|
|
245
|
+
|
|
246
|
+
if change.symbol == "final":
|
|
247
|
+
if change.kind == ChangeKind.MODIFIER_REMOVED:
|
|
248
|
+
return "runtime.concurrency.final_removed"
|
|
249
|
+
|
|
250
|
+
if change.symbol == "atomic_set" and change.kind == ChangeKind.CALL_REMOVED:
|
|
251
|
+
return "runtime.concurrency.atomic_to_non_atomic_write"
|
|
252
|
+
|
|
253
|
+
if change.symbol == "ThreadPoolExecutor":
|
|
254
|
+
if change.meta.get('param_change'):
|
|
255
|
+
return "runtime.concurrency.threadpool_param_change"
|
|
256
|
+
if change.kind == ChangeKind.OBJECT_CREATION and change.meta.get('args_count'):
|
|
257
|
+
return "runtime.concurrency.threadpool_creation"
|
|
258
|
+
|
|
259
|
+
if change.symbol == "LinkedBlockingQueue":
|
|
260
|
+
if change.meta.get('unbounded'):
|
|
261
|
+
return "runtime.concurrency.threadpool_unbounded_queue"
|
|
262
|
+
|
|
263
|
+
if change.symbol == "sleep":
|
|
264
|
+
if change.kind == ChangeKind.CALL_ADDED:
|
|
265
|
+
return "runtime.performance.sleep_added"
|
|
266
|
+
|
|
267
|
+
if change.symbol == "while_true":
|
|
268
|
+
if change.kind == ChangeKind.CALL_ADDED:
|
|
269
|
+
return "runtime.concurrency.busy_wait_added"
|
|
270
|
+
|
|
271
|
+
# P1 Resource
|
|
272
|
+
if change.symbol == "try_with_resources" and change.kind == ChangeKind.CALL_REMOVED:
|
|
273
|
+
return "runtime.resource.try_with_resource_removed"
|
|
274
|
+
|
|
275
|
+
if change.meta.get('cache_eviction'):
|
|
276
|
+
return "runtime.resource.cache_eviction_removed"
|
|
277
|
+
|
|
278
|
+
if change.meta.get('timeout_removed'):
|
|
279
|
+
return "runtime.network.timeout_removed"
|
|
280
|
+
|
|
281
|
+
# P2 Data
|
|
282
|
+
if change.symbol == "null_check" and change.meta.get('action') == "removed":
|
|
283
|
+
return "runtime.data.null_check_removed"
|
|
284
|
+
|
|
285
|
+
if change.symbol == "equals_to_ref":
|
|
286
|
+
return "runtime.data.equals_to_reference_compare"
|
|
287
|
+
|
|
288
|
+
# === Security Signals ===
|
|
289
|
+
# Hardcoded secrets
|
|
290
|
+
if change.symbol == "hardcoded_secret":
|
|
291
|
+
if change.kind == ChangeKind.LITERAL_ADDED:
|
|
292
|
+
return "security.hardcoded_secret"
|
|
293
|
+
if change.kind == ChangeKind.LITERAL_REMOVED:
|
|
294
|
+
return "security.hardcoded_secret_removed"
|
|
295
|
+
|
|
296
|
+
# SQL injection risk
|
|
297
|
+
if change.symbol == "sql_concat":
|
|
298
|
+
if change.meta.get("risk") == "sql_injection":
|
|
299
|
+
return "security.sql_injection"
|
|
300
|
+
|
|
301
|
+
# Weak encryption
|
|
302
|
+
if change.symbol == "weak_crypto":
|
|
303
|
+
return "security.weak_crypto"
|
|
304
|
+
|
|
305
|
+
# Command injection
|
|
306
|
+
if change.symbol == "command_execution":
|
|
307
|
+
return "security.command_injection"
|
|
308
|
+
# === End Security Signals ===
|
|
309
|
+
|
|
310
|
+
if change.kind == ChangeKind.CALL_ADDED:
|
|
311
|
+
if change.symbol == "sleep":
|
|
312
|
+
return "runtime.performance.sleep_added"
|
|
313
|
+
if change.symbol == "remove" and change.meta.get("in_loop"):
|
|
314
|
+
return "runtime.collection_mutation_inside_loop"
|
|
315
|
+
if change.symbol == "newFixedThreadPool" or change.symbol == "newCachedThreadPool":
|
|
316
|
+
return "runtime.concurrency.executors_factory_risk"
|
|
317
|
+
if change.symbol == "get" and change.meta.get("blocking_get"):
|
|
318
|
+
return "runtime.concurrency.future_get_without_timeout"
|
|
319
|
+
|
|
320
|
+
if change.kind == ChangeKind.OBJECT_CREATION:
|
|
321
|
+
if change.symbol == "ThreadPoolExecutor":
|
|
322
|
+
return "runtime.concurrency.threadpool_creation"
|
|
323
|
+
|
|
324
|
+
if change.kind == ChangeKind.CALL_REMOVED:
|
|
325
|
+
if change.symbol in self.critical_calls:
|
|
326
|
+
return "runtime.input_normalization_removed"
|
|
327
|
+
|
|
328
|
+
if change.symbol == "ConcurrentHashMap":
|
|
329
|
+
return "runtime.concurrency.concurrent_map"
|
|
330
|
+
|
|
331
|
+
if change.symbol in self.pagination_vars:
|
|
332
|
+
return "data.pagination_semantic_change"
|
|
333
|
+
|
|
334
|
+
return None
|
|
335
|
+
|
|
336
|
+
def _map_kind_to_action(self, kind: ChangeKind) -> str:
|
|
337
|
+
if kind in [ChangeKind.CALL_ADDED, ChangeKind.FIELD_ADDED, ChangeKind.MODIFIER_ADDED, ChangeKind.OBJECT_CREATION, ChangeKind.LITERAL_ADDED]:
|
|
338
|
+
return "added"
|
|
339
|
+
if kind in [ChangeKind.CALL_REMOVED, ChangeKind.FIELD_REMOVED, ChangeKind.MODIFIER_REMOVED, ChangeKind.LITERAL_REMOVED]:
|
|
340
|
+
return "removed"
|
|
341
|
+
if kind == ChangeKind.TYPE_CHANGED:
|
|
342
|
+
return "downgrade" # Specific mapping for now
|
|
343
|
+
if kind == ChangeKind.UNKNOWN and "action" in kind.name: # Fallback?
|
|
344
|
+
return "changed"
|
|
345
|
+
return "changed"
|
|
346
|
+
|
|
347
|
+
def _detect_changes_in_patch(self, filename: str, patch_content: str, mode: str = "deep") -> List[Change]:
|
|
348
|
+
changes = []
|
|
349
|
+
|
|
350
|
+
added_lines = []
|
|
351
|
+
removed_lines = []
|
|
352
|
+
|
|
353
|
+
for line in patch_content.splitlines():
|
|
354
|
+
if line.startswith('+') and not line.startswith('+++'):
|
|
355
|
+
added_lines.append(line[1:].strip())
|
|
356
|
+
elif line.startswith('-') and not line.startswith('---'):
|
|
357
|
+
removed_lines.append(line[1:].strip())
|
|
358
|
+
|
|
359
|
+
# Analyze Removed
|
|
360
|
+
removed_vars = {}
|
|
361
|
+
removed_calls = set()
|
|
362
|
+
removed_modifiers = set()
|
|
363
|
+
|
|
364
|
+
if removed_lines:
|
|
365
|
+
self._analyze_snippet_for_changes(removed_lines, filename, is_added=False,
|
|
366
|
+
var_map=removed_vars, call_set=removed_calls, mod_set=removed_modifiers, changes=changes, mode=mode)
|
|
367
|
+
|
|
368
|
+
# Analyze Added
|
|
369
|
+
added_vars = {}
|
|
370
|
+
added_calls = set()
|
|
371
|
+
added_modifiers = set()
|
|
372
|
+
|
|
373
|
+
if added_lines:
|
|
374
|
+
self._analyze_snippet_for_changes(added_lines, filename, is_added=True,
|
|
375
|
+
var_map=added_vars, call_set=added_calls, mod_set=added_modifiers, changes=changes, mode=mode)
|
|
376
|
+
|
|
377
|
+
# Cross-Analyze: Type Downgrade (Only in Deep Mode or if we have enough info)
|
|
378
|
+
# Tokenizer might not give us full type info, so this is best effort in light mode
|
|
379
|
+
for var_name, old_type in removed_vars.items():
|
|
380
|
+
if var_name in added_vars:
|
|
381
|
+
new_type = added_vars[var_name]
|
|
382
|
+
if is_thread_safe(old_type) and not is_thread_safe(new_type):
|
|
383
|
+
changes.append(Change(
|
|
384
|
+
kind=ChangeKind.TYPE_CHANGED,
|
|
385
|
+
file=filename,
|
|
386
|
+
symbol=var_name,
|
|
387
|
+
before=old_type,
|
|
388
|
+
after=new_type,
|
|
389
|
+
meta={"downgrade": True, "from": old_type, "to": new_type, "var": var_name}
|
|
390
|
+
))
|
|
391
|
+
|
|
392
|
+
# Cross-Analyze: ThreadPoolExecutor Param Change
|
|
393
|
+
tpe_removed = any(c.symbol == "ThreadPoolExecutor" and c.meta.get("action") == "removed" for c in changes)
|
|
394
|
+
tpe_added = any(c.symbol == "ThreadPoolExecutor" and c.meta.get("action") == "added" for c in changes)
|
|
395
|
+
|
|
396
|
+
if tpe_removed and tpe_added:
|
|
397
|
+
changes.append(Change(kind=ChangeKind.UNKNOWN, file=filename, symbol="ThreadPoolExecutor", meta={"param_change": True}, line_no=None))
|
|
398
|
+
|
|
399
|
+
# Cross-Analyze: equals -> ==
|
|
400
|
+
equals_removed = any(c.symbol == "equals" and c.kind == ChangeKind.CALL_REMOVED for c in changes)
|
|
401
|
+
eq_added = any(c.symbol == "==" and c.kind == ChangeKind.CALL_ADDED for c in changes)
|
|
402
|
+
if equals_removed and eq_added:
|
|
403
|
+
changes.append(Change(kind=ChangeKind.UNKNOWN, file=filename, symbol="equals_to_ref", meta={"semantic": True}, line_no=None))
|
|
404
|
+
|
|
405
|
+
return changes
|
|
406
|
+
|
|
407
|
+
def _analyze_snippet_for_changes(self, lines: List[str], filename: str, is_added: bool,
|
|
408
|
+
var_map: Dict, call_set: Set, mod_set: Set, changes: List[Change], mode: str = "deep"):
|
|
409
|
+
|
|
410
|
+
start_change_idx = len(changes)
|
|
411
|
+
|
|
412
|
+
# 1. Scan for Ignores
|
|
413
|
+
ignores_map = {} # line_idx (0-based) -> set(rule_ids)
|
|
414
|
+
ignore_pattern = re.compile(r"//\s*diffsense-ignore:\s*([\w\.]+)")
|
|
415
|
+
|
|
416
|
+
for i, line in enumerate(lines):
|
|
417
|
+
match = ignore_pattern.search(line)
|
|
418
|
+
if match:
|
|
419
|
+
rule_id = match.group(1)
|
|
420
|
+
# Apply to current line
|
|
421
|
+
if i not in ignores_map: ignores_map[i] = set()
|
|
422
|
+
ignores_map[i].add(rule_id)
|
|
423
|
+
# Apply to next line (often comments are above)
|
|
424
|
+
if i + 1 < len(lines):
|
|
425
|
+
if i + 1 not in ignores_map: ignores_map[i+1] = set()
|
|
426
|
+
ignores_map[i+1].add(rule_id)
|
|
427
|
+
|
|
428
|
+
code_snippet = "\n".join(lines)
|
|
429
|
+
|
|
430
|
+
# 2. Tokenizer
|
|
431
|
+
try:
|
|
432
|
+
tokens = list(javalang.tokenizer.tokenize(code_snippet))
|
|
433
|
+
except:
|
|
434
|
+
return
|
|
435
|
+
|
|
436
|
+
# token_values = [t.value for t in tokens]
|
|
437
|
+
# Iterate tokens directly to get position
|
|
438
|
+
|
|
439
|
+
# Raw Token Checks (Legacy/Simple)
|
|
440
|
+
for token in tokens:
|
|
441
|
+
token_val = token.value
|
|
442
|
+
line_no = token.position.line # 1-based relative to snippet
|
|
443
|
+
|
|
444
|
+
if token_val == "synchronized":
|
|
445
|
+
kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
|
|
446
|
+
changes.append(Change(kind=kind, file=filename, symbol="synchronized", line_no=line_no))
|
|
447
|
+
|
|
448
|
+
if token_val == "volatile":
|
|
449
|
+
kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
|
|
450
|
+
changes.append(Change(kind=kind, file=filename, symbol="volatile", line_no=line_no))
|
|
451
|
+
|
|
452
|
+
if token_val == "ConcurrentHashMap":
|
|
453
|
+
if not is_added:
|
|
454
|
+
changes.append(Change(kind=ChangeKind.UNKNOWN, file=filename, symbol="ConcurrentHashMap", meta={"action": "removed"}, line_no=line_no))
|
|
455
|
+
|
|
456
|
+
if token_val in self.pagination_vars:
|
|
457
|
+
kind = ChangeKind.UNKNOWN
|
|
458
|
+
changes.append(Change(kind=kind, file=filename, symbol=token_val, meta={"action": "changed"}, line_no=line_no))
|
|
459
|
+
|
|
460
|
+
# Check for sequences
|
|
461
|
+
for i in range(len(tokens) - 2):
|
|
462
|
+
if (tokens[i].value == "." and
|
|
463
|
+
tokens[i+1].value == "lock" and
|
|
464
|
+
tokens[i+2].value == "("):
|
|
465
|
+
kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
|
|
466
|
+
changes.append(Change(kind=kind, file=filename, symbol="lock", line_no=tokens[i+1].position.line))
|
|
467
|
+
|
|
468
|
+
if (tokens[i].value == "Thread" and
|
|
469
|
+
tokens[i+1].value == "." and
|
|
470
|
+
tokens[i+2].value == "sleep"):
|
|
471
|
+
kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
|
|
472
|
+
changes.append(Change(kind=kind, file=filename, symbol="sleep", line_no=tokens[i+2].position.line))
|
|
473
|
+
|
|
474
|
+
# Critical Calls
|
|
475
|
+
for i in range(len(tokens) - 1):
|
|
476
|
+
if tokens[i].value in self.critical_calls and tokens[i+1].value == "(":
|
|
477
|
+
kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
|
|
478
|
+
changes.append(Change(kind=kind, file=filename, symbol=tokens[i].value, line_no=tokens[i].position.line))
|
|
479
|
+
|
|
480
|
+
# === Security Detection ===
|
|
481
|
+
# 1. Hardcoded secrets (String literals containing sensitive keywords)
|
|
482
|
+
for i, token in enumerate(tokens):
|
|
483
|
+
if hasattr(token, 'value') and isinstance(token.value, str):
|
|
484
|
+
token_val = token.value.strip('"\'`')
|
|
485
|
+
# Check for hardcoded secrets
|
|
486
|
+
if any(secret in token_val.lower() for secret in self.secret_patterns):
|
|
487
|
+
if len(token_val) > 3 and ("=" in token_val or ":" in token_val):
|
|
488
|
+
changes.append(Change(
|
|
489
|
+
kind=ChangeKind.LITERAL_ADDED if is_added else ChangeKind.LITERAL_REMOVED,
|
|
490
|
+
file=filename,
|
|
491
|
+
symbol="hardcoded_secret",
|
|
492
|
+
meta={"type": "secret", "value_hint": token_val[:20]},
|
|
493
|
+
line_no=token.position.line
|
|
494
|
+
))
|
|
495
|
+
|
|
496
|
+
# 2. SQL string concatenation patterns
|
|
497
|
+
for i in range(len(tokens) - 1):
|
|
498
|
+
token_val = tokens[i].value
|
|
499
|
+
# String concatenation in SQL context: "SELECT ... " + var
|
|
500
|
+
if token_val in self.sql_concat_methods:
|
|
501
|
+
# Check context - is this in an SQL statement?
|
|
502
|
+
context = self._get_sql_context(tokens, i)
|
|
503
|
+
if context:
|
|
504
|
+
changes.append(Change(
|
|
505
|
+
kind=ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED,
|
|
506
|
+
file=filename,
|
|
507
|
+
symbol="sql_concat",
|
|
508
|
+
meta={"risk": "sql_injection"},
|
|
509
|
+
line_no=tokens[i].position.line
|
|
510
|
+
))
|
|
511
|
+
|
|
512
|
+
# 3. Insecure crypto usage
|
|
513
|
+
for token in tokens:
|
|
514
|
+
if hasattr(token, 'value'):
|
|
515
|
+
token_val = token.value
|
|
516
|
+
if token_val in self.weak_crypto:
|
|
517
|
+
changes.append(Change(
|
|
518
|
+
kind=ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED,
|
|
519
|
+
file=filename,
|
|
520
|
+
symbol="weak_crypto",
|
|
521
|
+
meta={"algorithm": token_val, "risk": "weak_encryption"},
|
|
522
|
+
line_no=token.position.line
|
|
523
|
+
))
|
|
524
|
+
|
|
525
|
+
# 4. Command injection (Runtime.exec, ProcessBuilder)
|
|
526
|
+
for i in range(len(tokens) - 1):
|
|
527
|
+
token_val = tokens[i].value
|
|
528
|
+
if token_val in self.command_methods and tokens[i+1].value == "(":
|
|
529
|
+
changes.append(Change(
|
|
530
|
+
kind=ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED,
|
|
531
|
+
file=filename,
|
|
532
|
+
symbol="command_execution",
|
|
533
|
+
meta={"risk": "command_injection"},
|
|
534
|
+
line_no=tokens[i].position.line
|
|
535
|
+
))
|
|
536
|
+
|
|
537
|
+
# === End Security Detection ===
|
|
538
|
+
|
|
539
|
+
# Stop here if mode is 'light'
|
|
540
|
+
if mode == "light":
|
|
541
|
+
self._apply_ignores(changes, start_change_idx, ignores_map)
|
|
542
|
+
return
|
|
543
|
+
|
|
544
|
+
parsed = False
|
|
545
|
+
wrapper_class = f"class Dummy {{\n{code_snippet}\n}}"
|
|
546
|
+
offset = 1
|
|
547
|
+
tree = self._parse_with_cache("class", wrapper_class)
|
|
548
|
+
if tree is not None:
|
|
549
|
+
self._analyze_tree_changes(tree, filename, is_added, var_map, changes, offset)
|
|
550
|
+
parsed = True
|
|
551
|
+
|
|
552
|
+
if not parsed:
|
|
553
|
+
wrapper_method = f"class Dummy {{ void dummy() {{\n{code_snippet}\n}} }}"
|
|
554
|
+
offset = 2
|
|
555
|
+
tree = self._parse_with_cache("method", wrapper_method)
|
|
556
|
+
if tree is not None:
|
|
557
|
+
self._analyze_tree_changes(tree, filename, is_added, var_map, changes, offset)
|
|
558
|
+
parsed = True
|
|
559
|
+
|
|
560
|
+
# Fallback: Extract vars from tokens if parsing failed
|
|
561
|
+
if not parsed:
|
|
562
|
+
self._analyze_tokens_fallback(tokens, var_map, changes, filename, is_added)
|
|
563
|
+
|
|
564
|
+
# Apply Ignores
|
|
565
|
+
self._apply_ignores(changes, start_change_idx, ignores_map)
|
|
566
|
+
|
|
567
|
+
def _analyze_tokens_fallback(self, tokens, var_map, changes, filename, is_added):
|
|
568
|
+
i = 0
|
|
569
|
+
modifiers = set()
|
|
570
|
+
|
|
571
|
+
while i < len(tokens) - 1:
|
|
572
|
+
token = tokens[i]
|
|
573
|
+
|
|
574
|
+
# 1. Collect Modifiers
|
|
575
|
+
if token.value in ['private', 'public', 'protected', 'static', 'final', 'volatile', 'transient']:
|
|
576
|
+
modifiers.add(token.value)
|
|
577
|
+
i += 1
|
|
578
|
+
continue
|
|
579
|
+
|
|
580
|
+
# 2. Check for Type
|
|
581
|
+
is_type = isinstance(token, (Identifier, BasicType))
|
|
582
|
+
|
|
583
|
+
if not is_type:
|
|
584
|
+
modifiers = set()
|
|
585
|
+
i += 1
|
|
586
|
+
continue
|
|
587
|
+
|
|
588
|
+
current_type_name = token.value
|
|
589
|
+
|
|
590
|
+
# Check for Generics
|
|
591
|
+
idx = i + 1
|
|
592
|
+
if idx < len(tokens) and tokens[idx].value == '<':
|
|
593
|
+
depth = 1
|
|
594
|
+
idx += 1
|
|
595
|
+
while idx < len(tokens) and depth > 0:
|
|
596
|
+
if tokens[idx].value == '<': depth += 1
|
|
597
|
+
elif tokens[idx].value == '>': depth -= 1
|
|
598
|
+
idx += 1
|
|
599
|
+
if depth > 0: # Unbalanced
|
|
600
|
+
i += 1
|
|
601
|
+
continue
|
|
602
|
+
|
|
603
|
+
# 3. Variable Name
|
|
604
|
+
if idx < len(tokens) and isinstance(tokens[idx], Identifier):
|
|
605
|
+
var_name = tokens[idx].value
|
|
606
|
+
# Check what follows (should be = or ; or ,)
|
|
607
|
+
idx2 = idx + 1
|
|
608
|
+
if idx2 < len(tokens) and tokens[idx2].value in ['=', ';', ',']:
|
|
609
|
+
var_map[var_name] = current_type_name
|
|
610
|
+
|
|
611
|
+
# Detect Signals
|
|
612
|
+
line_no = token.position.line
|
|
613
|
+
|
|
614
|
+
# static_unsafe_collection
|
|
615
|
+
if is_added and 'static' in modifiers:
|
|
616
|
+
risky_static_types = {"HashMap", "ArrayList", "HashSet", "TreeMap", "LinkedList"}
|
|
617
|
+
if current_type_name in risky_static_types:
|
|
618
|
+
changes.append(Change(kind=ChangeKind.FIELD_ADDED, file=filename, symbol=var_name, meta={"static_unsafe": True}, line_no=line_no))
|
|
619
|
+
|
|
620
|
+
# final (if looks like field)
|
|
621
|
+
is_field = any(m in modifiers for m in ['private', 'public', 'protected', 'static'])
|
|
622
|
+
if is_field and 'final' in modifiers:
|
|
623
|
+
kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
|
|
624
|
+
changes.append(Change(kind=kind, file=filename, symbol="final", line_no=line_no))
|
|
625
|
+
|
|
626
|
+
i = idx2
|
|
627
|
+
modifiers = set()
|
|
628
|
+
continue
|
|
629
|
+
|
|
630
|
+
modifiers = set()
|
|
631
|
+
i += 1
|
|
632
|
+
|
|
633
|
+
def _apply_ignores(self, changes: List[Change], start_idx: int, ignores_map: Dict[int, Set[str]]):
|
|
634
|
+
for i in range(start_idx, len(changes)):
|
|
635
|
+
ch = changes[i]
|
|
636
|
+
if ch.line_no:
|
|
637
|
+
# line_no is 1-based, ignores_map is 0-based
|
|
638
|
+
idx = ch.line_no - 1
|
|
639
|
+
if idx in ignores_map:
|
|
640
|
+
ch.meta['ignores'] = list(ignores_map[idx])
|
|
641
|
+
|
|
642
|
+
def _analyze_tree_changes(self, tree, filename: str, is_added: bool, var_map: Dict, changes: List[Change], offset: int = 0):
|
|
643
|
+
for path, node in tree:
|
|
644
|
+
line_no = (node.position.line - offset) if node.position else None
|
|
645
|
+
|
|
646
|
+
# Context
|
|
647
|
+
self._update_context(node, var_map)
|
|
648
|
+
|
|
649
|
+
# Detectors
|
|
650
|
+
self._detect_concurrency_signals(node, filename, is_added, var_map, changes, line_no)
|
|
651
|
+
self._detect_resource_signals(node, filename, is_added, var_map, changes, line_no, path)
|
|
652
|
+
self._detect_data_signals(node, filename, is_added, var_map, changes, line_no, path)
|
|
653
|
+
self._detect_general_signals(node, filename, is_added, var_map, changes, line_no, path)
|
|
654
|
+
|
|
655
|
+
def _update_context(self, node, var_map: Dict):
|
|
656
|
+
if isinstance(node, FieldDeclaration):
|
|
657
|
+
if node.type:
|
|
658
|
+
for declarator in node.declarators:
|
|
659
|
+
var_map[declarator.name] = node.type.name
|
|
660
|
+
elif isinstance(node, LocalVariableDeclaration):
|
|
661
|
+
if node.type:
|
|
662
|
+
for declarator in node.declarators:
|
|
663
|
+
var_map[declarator.name] = node.type.name
|
|
664
|
+
|
|
665
|
+
def _detect_concurrency_signals(self, node, filename: str, is_added: bool, var_map: Dict, changes: List[Change], line_no: int):
|
|
666
|
+
# 1. lock_removed / synchronized / volatile / final
|
|
667
|
+
if isinstance(node, SynchronizedStatement):
|
|
668
|
+
kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
|
|
669
|
+
changes.append(Change(kind=kind, file=filename, symbol="synchronized", line_no=line_no))
|
|
670
|
+
|
|
671
|
+
if isinstance(node, MethodDeclaration):
|
|
672
|
+
if 'synchronized' in node.modifiers:
|
|
673
|
+
kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
|
|
674
|
+
changes.append(Change(kind=kind, file=filename, symbol="synchronized", line_no=line_no))
|
|
675
|
+
|
|
676
|
+
if isinstance(node, FieldDeclaration):
|
|
677
|
+
if 'volatile' in node.modifiers:
|
|
678
|
+
kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
|
|
679
|
+
changes.append(Change(kind=kind, file=filename, symbol="volatile", line_no=line_no))
|
|
680
|
+
|
|
681
|
+
if 'final' in node.modifiers:
|
|
682
|
+
kind = ChangeKind.MODIFIER_ADDED if is_added else ChangeKind.MODIFIER_REMOVED
|
|
683
|
+
changes.append(Change(kind=kind, file=filename, symbol="final", line_no=line_no))
|
|
684
|
+
|
|
685
|
+
# 7. static_unsafe_collection
|
|
686
|
+
if is_added and 'static' in node.modifiers and node.type:
|
|
687
|
+
# Basic type check
|
|
688
|
+
type_name = node.type.name if hasattr(node.type, 'name') else str(node.type)
|
|
689
|
+
if not is_thread_safe(type_name):
|
|
690
|
+
risky_static_types = {"HashMap", "ArrayList", "HashSet", "TreeMap", "LinkedList"}
|
|
691
|
+
base_type = type_name.split('<')[0]
|
|
692
|
+
if base_type in risky_static_types:
|
|
693
|
+
changes.append(Change(
|
|
694
|
+
kind=ChangeKind.FIELD_ADDED,
|
|
695
|
+
file=filename,
|
|
696
|
+
symbol=node.declarators[0].name,
|
|
697
|
+
meta={"static_unsafe": True},
|
|
698
|
+
line_no=line_no
|
|
699
|
+
))
|
|
700
|
+
|
|
701
|
+
if isinstance(node, MethodInvocation):
|
|
702
|
+
call_name = node.member
|
|
703
|
+
qualifier = node.qualifier
|
|
704
|
+
|
|
705
|
+
# lock.lock(), semaphore.acquire(), latch.await()
|
|
706
|
+
if call_name == "lock" and (not qualifier or "lock" in qualifier.lower()):
|
|
707
|
+
kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
|
|
708
|
+
changes.append(Change(kind=kind, file=filename, symbol="lock", line_no=line_no))
|
|
709
|
+
|
|
710
|
+
if call_name == "acquire":
|
|
711
|
+
kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
|
|
712
|
+
changes.append(Change(kind=kind, file=filename, symbol="acquire", line_no=line_no))
|
|
713
|
+
|
|
714
|
+
if call_name == "await":
|
|
715
|
+
kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
|
|
716
|
+
changes.append(Change(kind=kind, file=filename, symbol="await", line_no=line_no))
|
|
717
|
+
|
|
718
|
+
# 10. sleep
|
|
719
|
+
if call_name == "sleep":
|
|
720
|
+
kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
|
|
721
|
+
changes.append(Change(kind=kind, file=filename, symbol="sleep", line_no=line_no))
|
|
722
|
+
|
|
723
|
+
# 6. atomic_to_non_atomic_write (Call Removed: atomic.set)
|
|
724
|
+
if not is_added and call_name == "set":
|
|
725
|
+
if qualifier and qualifier in var_map:
|
|
726
|
+
var_type = var_map[qualifier]
|
|
727
|
+
if var_type.startswith("Atomic"):
|
|
728
|
+
changes.append(Change(kind=ChangeKind.CALL_REMOVED, file=filename, symbol="atomic_set", meta={"var": qualifier}, line_no=line_no))
|
|
729
|
+
|
|
730
|
+
# 8. threadpool_param_change & 9. threadpool_unbounded_queue
|
|
731
|
+
if isinstance(node, ClassCreator):
|
|
732
|
+
type_name = node.type.name
|
|
733
|
+
if type_name == "ThreadPoolExecutor":
|
|
734
|
+
args = [str(arg) for arg in node.arguments]
|
|
735
|
+
kind = ChangeKind.OBJECT_CREATION
|
|
736
|
+
action = "added" if is_added else "removed"
|
|
737
|
+
changes.append(Change(kind=kind, file=filename, symbol="ThreadPoolExecutor", meta={"args_count": len(args), "param_change": True, "action": action}, line_no=line_no))
|
|
738
|
+
|
|
739
|
+
if type_name == "LinkedBlockingQueue":
|
|
740
|
+
if not node.arguments:
|
|
741
|
+
kind = ChangeKind.OBJECT_CREATION
|
|
742
|
+
changes.append(Change(kind=kind, file=filename, symbol="LinkedBlockingQueue", meta={"unbounded": True}, line_no=line_no))
|
|
743
|
+
elif len(node.arguments) == 1 and "Integer.MAX_VALUE" in str(node.arguments[0]):
|
|
744
|
+
kind = ChangeKind.OBJECT_CREATION
|
|
745
|
+
changes.append(Change(kind=kind, file=filename, symbol="LinkedBlockingQueue", meta={"unbounded": True}, line_no=line_no))
|
|
746
|
+
|
|
747
|
+
# 10. while(true)
|
|
748
|
+
if isinstance(node, WhileStatement):
|
|
749
|
+
# Check if condition is true
|
|
750
|
+
is_true = False
|
|
751
|
+
if hasattr(node.condition, 'value') and node.condition.value == "true":
|
|
752
|
+
is_true = True
|
|
753
|
+
if is_true:
|
|
754
|
+
kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
|
|
755
|
+
changes.append(Change(kind=kind, file=filename, symbol="while_true", line_no=line_no))
|
|
756
|
+
|
|
757
|
+
def _detect_resource_signals(self, node, filename: str, is_added: bool, var_map: Dict, changes: List[Change], line_no: int, path: Any):
|
|
758
|
+
# 12. try_with_resource_removed
|
|
759
|
+
if isinstance(node, TryStatement):
|
|
760
|
+
if node.resources:
|
|
761
|
+
if not is_added: # Removed
|
|
762
|
+
changes.append(Change(kind=ChangeKind.CALL_REMOVED, file=filename, symbol="try_with_resources", line_no=line_no))
|
|
763
|
+
|
|
764
|
+
if isinstance(node, MethodInvocation):
|
|
765
|
+
call_name = node.member
|
|
766
|
+
qualifier = str(node.qualifier).lower() if node.qualifier else ""
|
|
767
|
+
|
|
768
|
+
# 13. cache_eviction_removed
|
|
769
|
+
if not is_added and call_name in ["expire", "setExpire", "setTTL", "evict", "clear"]:
|
|
770
|
+
if "cache" in filename.lower() or "redis" in filename.lower() or "map" in qualifier:
|
|
771
|
+
changes.append(Change(kind=ChangeKind.CALL_REMOVED, file=filename, symbol=call_name, meta={"cache_eviction": True}, line_no=line_no))
|
|
772
|
+
|
|
773
|
+
# 15. timeout_removed
|
|
774
|
+
if not is_added and ("timeout" in call_name.lower() or call_name == "setTimeout"):
|
|
775
|
+
changes.append(Change(kind=ChangeKind.CALL_REMOVED, file=filename, symbol=call_name, meta={"timeout_removed": True}, line_no=line_no))
|
|
776
|
+
|
|
777
|
+
def _detect_data_signals(self, node, filename: str, is_added: bool, var_map: Dict, changes: List[Change], line_no: int, path: Any):
|
|
778
|
+
# 18. equals_to_reference_compare
|
|
779
|
+
if isinstance(node, MethodInvocation):
|
|
780
|
+
if node.member == "equals" and not is_added:
|
|
781
|
+
changes.append(Change(kind=ChangeKind.CALL_REMOVED, file=filename, symbol="equals", line_no=line_no))
|
|
782
|
+
|
|
783
|
+
if isinstance(node, BinaryOperation):
|
|
784
|
+
if node.operator == "==" and is_added:
|
|
785
|
+
changes.append(Change(kind=ChangeKind.CALL_ADDED, file=filename, symbol="==", line_no=line_no))
|
|
786
|
+
|
|
787
|
+
# 19. null_check_removed
|
|
788
|
+
if isinstance(node, IfStatement) and not is_added:
|
|
789
|
+
cond = node.condition
|
|
790
|
+
if isinstance(cond, BinaryOperation) and cond.operator == "==":
|
|
791
|
+
has_null = False
|
|
792
|
+
if isinstance(cond.operandr, Literal) and cond.operandr.value == "null": has_null = True
|
|
793
|
+
if isinstance(cond.operandl, Literal) and cond.operandl.value == "null": has_null = True
|
|
794
|
+
|
|
795
|
+
if has_null:
|
|
796
|
+
changes.append(Change(kind=ChangeKind.UNKNOWN, file=filename, symbol="null_check", meta={"action": "removed"}, line_no=line_no))
|
|
797
|
+
|
|
798
|
+
def _detect_general_signals(self, node, filename: str, is_added: bool, var_map: Dict, changes: List[Change], line_no: int, path: Any):
|
|
799
|
+
# Original logic for critical calls etc.
|
|
800
|
+
if isinstance(node, MethodInvocation):
|
|
801
|
+
call_name = node.member
|
|
802
|
+
qualifier = node.qualifier
|
|
803
|
+
|
|
804
|
+
kind = ChangeKind.CALL_ADDED if is_added else ChangeKind.CALL_REMOVED
|
|
805
|
+
|
|
806
|
+
# Dubbo P0: Executors factory methods
|
|
807
|
+
if qualifier == "Executors" and call_name in ["newFixedThreadPool", "newCachedThreadPool"]:
|
|
808
|
+
changes.append(Change(kind=kind, file=filename, symbol=call_name, meta={"risk": "threadpool_factory"}, line_no=line_no))
|
|
809
|
+
|
|
810
|
+
# Dubbo P0: Future.get() without timeout
|
|
811
|
+
if call_name == "get" and not node.arguments:
|
|
812
|
+
changes.append(Change(kind=kind, file=filename, symbol="get", meta={"blocking_get": True}, line_no=line_no))
|
|
813
|
+
|
|
814
|
+
# Critical calls (input/validation)
|
|
815
|
+
if call_name in self.critical_calls and not is_added:
|
|
816
|
+
changes.append(Change(kind=kind, file=filename, symbol=call_name, line_no=line_no))
|
|
817
|
+
|
|
818
|
+
# Collection mutation in loop
|
|
819
|
+
if call_name == "remove" and is_added:
|
|
820
|
+
if self._is_inside_loop(path):
|
|
821
|
+
changes.append(Change(kind=kind, file=filename, symbol="remove", meta={"in_loop": True}, line_no=line_no))
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
def _get_sql_context(self, tokens, pos: int) -> Optional[str]:
|
|
825
|
+
"""
|
|
826
|
+
Check if the concatenation is in SQL context.
|
|
827
|
+
Looks for SQL keywords nearby in the token stream.
|
|
828
|
+
"""
|
|
829
|
+
# Look back for SQL keywords
|
|
830
|
+
look_back = 20
|
|
831
|
+
start = max(0, pos - look_back)
|
|
832
|
+
nearby_tokens = [t.value for t in tokens[start:pos]]
|
|
833
|
+
|
|
834
|
+
sql_keywords = {
|
|
835
|
+
"SELECT", "INSERT", "UPDATE", "DELETE", "FROM", "WHERE", "JOIN",
|
|
836
|
+
"TABLE", "CREATE", "DROP", "ALTER", "query", "sql"
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
for token_val in nearby_tokens:
|
|
840
|
+
if token_val.upper() in sql_keywords or token_val.lower() in sql_keywords:
|
|
841
|
+
return "sql_statement"
|
|
842
|
+
|
|
843
|
+
return None
|
|
844
|
+
|
|
845
|
+
def _is_inside_loop(self, path: Tuple) -> bool:
|
|
846
|
+
"""
|
|
847
|
+
Check if the current node (at the end of path) is inside a loop structure.
|
|
848
|
+
path is a list/tuple of parent nodes.
|
|
849
|
+
"""
|
|
850
|
+
for node in reversed(path):
|
|
851
|
+
if isinstance(node, (ForStatement, WhileStatement, DoStatement)):
|
|
852
|
+
return True
|
|
853
|
+
return False
|