diffsense 2.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adapters/__init__.py +0 -0
- adapters/base.py +27 -0
- adapters/github_adapter.py +164 -0
- adapters/gitlab_adapter.py +207 -0
- adapters/local_adapter.py +136 -0
- banner.py +71 -0
- cli.py +606 -0
- config/__init__.py +1 -0
- config/rules.yaml +371 -0
- core/__init__.py +235 -0
- core/ast_detector.py +853 -0
- core/change.py +46 -0
- core/composer.py +93 -0
- core/evaluator.py +15 -0
- core/ignore_manager.py +71 -0
- core/knowledge.py +77 -0
- core/parser.py +181 -0
- core/parser_manager.py +104 -0
- core/quality_manager.py +117 -0
- core/renderer.py +197 -0
- core/rule_base.py +98 -0
- core/rule_runtime.py +103 -0
- core/rules.py +718 -0
- core/run_config.py +85 -0
- core/semantic_diff.py +359 -0
- core/signal_model.py +21 -0
- core/signals_registry.py +62 -0
- diffsense-2.2.12.dist-info/METADATA +18 -0
- diffsense-2.2.12.dist-info/RECORD +58 -0
- diffsense-2.2.12.dist-info/WHEEL +5 -0
- diffsense-2.2.12.dist-info/entry_points.txt +3 -0
- diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
- diffsense-2.2.12.dist-info/top_level.txt +11 -0
- diffsense_mcp/__init__.py +1 -0
- diffsense_mcp/launcher.py +28 -0
- diffsense_mcp/server.py +687 -0
- governance/lifecycle.py +54 -0
- main.py +318 -0
- rules/__init__.py +246 -0
- rules/api_compatibility.py +372 -0
- rules/collection_handling.py +349 -0
- rules/concurrency.py +194 -0
- rules/concurrency_adapter.py +250 -0
- rules/cross_language_adapter.py +444 -0
- rules/exception_handling.py +320 -0
- rules/go_rules.py +401 -0
- rules/null_safety.py +301 -0
- rules/resource_management.py +222 -0
- rules/yaml_adapter.py +195 -0
- run_audit.py +478 -0
- sdk/cpp_adapter.py +238 -0
- sdk/go_adapter.py +199 -0
- sdk/java_adapter.py +199 -0
- sdk/javascript_adapter.py +229 -0
- sdk/language_adapter.py +313 -0
- sdk/python_adapter.py +195 -0
- sdk/rule.py +63 -0
- sdk/signal.py +14 -0
core/change.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from typing import Any, Dict, Optional
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
class ChangeKind(Enum):
|
|
6
|
+
TYPE_CHANGED = "TYPE_CHANGED"
|
|
7
|
+
MODIFIER_REMOVED = "MODIFIER_REMOVED"
|
|
8
|
+
MODIFIER_ADDED = "MODIFIER_ADDED"
|
|
9
|
+
CALL_REMOVED = "CALL_REMOVED"
|
|
10
|
+
CALL_ADDED = "CALL_ADDED"
|
|
11
|
+
FIELD_REMOVED = "FIELD_REMOVED"
|
|
12
|
+
FIELD_ADDED = "FIELD_ADDED"
|
|
13
|
+
ANNOTATION_REMOVED = "ANNOTATION_REMOVED"
|
|
14
|
+
ANNOTATION_ADDED = "ANNOTATION_ADDED"
|
|
15
|
+
OBJECT_CREATION = "OBJECT_CREATION"
|
|
16
|
+
# Security-related changes
|
|
17
|
+
LITERAL_ADDED = "LITERAL_ADDED" # Hardcoded secrets/passwords
|
|
18
|
+
LITERAL_REMOVED = "LITERAL_REMOVED"
|
|
19
|
+
# Fallback/Generic
|
|
20
|
+
UNKNOWN = "UNKNOWN"
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class Change:
|
|
24
|
+
kind: ChangeKind
|
|
25
|
+
file: str
|
|
26
|
+
symbol: str # The identifier (variable name, method name, etc.)
|
|
27
|
+
|
|
28
|
+
# Semantic values (not just raw strings if possible, but strings are fine for now)
|
|
29
|
+
before: Optional[Any] = None
|
|
30
|
+
after: Optional[Any] = None
|
|
31
|
+
|
|
32
|
+
# Location info
|
|
33
|
+
line_no: Optional[int] = None
|
|
34
|
+
|
|
35
|
+
# Extra context
|
|
36
|
+
meta: Dict[str, Any] = field(default_factory=dict)
|
|
37
|
+
|
|
38
|
+
def to_dict(self):
|
|
39
|
+
return {
|
|
40
|
+
"kind": self.kind.value,
|
|
41
|
+
"file": self.file,
|
|
42
|
+
"symbol": self.symbol,
|
|
43
|
+
"before": self.before,
|
|
44
|
+
"after": self.after,
|
|
45
|
+
"meta": self.meta
|
|
46
|
+
}
|
core/composer.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from typing import Dict, Any, List
|
|
2
|
+
|
|
3
|
+
class DecisionComposer:
|
|
4
|
+
def compose(self, triggered_rules: List[Dict[str, Any]], diff_files: List[str] = None) -> Dict[str, Any]:
|
|
5
|
+
"""
|
|
6
|
+
Synthesizes triggered rules into a final decision adhering to the Parser Contract.
|
|
7
|
+
"""
|
|
8
|
+
diff_files = diff_files or []
|
|
9
|
+
reasons = []
|
|
10
|
+
details = []
|
|
11
|
+
impacts_map = {}
|
|
12
|
+
|
|
13
|
+
severity_map = {
|
|
14
|
+
"critical": 3,
|
|
15
|
+
"high": 2,
|
|
16
|
+
"medium": 1,
|
|
17
|
+
"low": 0
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
max_score = 0
|
|
21
|
+
has_blocking_rule = False
|
|
22
|
+
|
|
23
|
+
for rule in triggered_rules:
|
|
24
|
+
rule_id = rule.get('id', 'unknown')
|
|
25
|
+
impact_dim = rule.get('impact', 'general')
|
|
26
|
+
severity = rule.get('severity', 'low')
|
|
27
|
+
rationale = rule.get('rationale', '')
|
|
28
|
+
matched_file = rule.get('matched_file', '')
|
|
29
|
+
precision = rule.get('precision')
|
|
30
|
+
quality_status = rule.get('quality_status')
|
|
31
|
+
is_experimental = bool(rule.get("experimental"))
|
|
32
|
+
is_blocking = rule.get('is_blocking', False)
|
|
33
|
+
|
|
34
|
+
if is_blocking:
|
|
35
|
+
has_blocking_rule = True
|
|
36
|
+
|
|
37
|
+
reasons.append(rule_id)
|
|
38
|
+
|
|
39
|
+
detail = {
|
|
40
|
+
"rule_id": rule_id,
|
|
41
|
+
"severity": severity,
|
|
42
|
+
"file": matched_file,
|
|
43
|
+
"rationale": rationale,
|
|
44
|
+
"impact": impact_dim
|
|
45
|
+
}
|
|
46
|
+
if precision is not None:
|
|
47
|
+
detail["precision"] = precision
|
|
48
|
+
if quality_status is not None:
|
|
49
|
+
detail["quality_status"] = quality_status
|
|
50
|
+
if is_experimental:
|
|
51
|
+
detail["experimental"] = True
|
|
52
|
+
details.append(detail)
|
|
53
|
+
if not is_experimental:
|
|
54
|
+
current_dim_score = severity_map.get(impacts_map.get(impact_dim, "low"), 0)
|
|
55
|
+
new_score = severity_map.get(severity, 0)
|
|
56
|
+
|
|
57
|
+
if new_score > current_dim_score:
|
|
58
|
+
impacts_map[impact_dim] = severity
|
|
59
|
+
|
|
60
|
+
if new_score > max_score:
|
|
61
|
+
max_score = new_score
|
|
62
|
+
|
|
63
|
+
# Determine Review Level
|
|
64
|
+
# 只对 CRITICAL 级别触发失败(需要确认),HIGH 及以下仅在评论中报告
|
|
65
|
+
review_level = "normal"
|
|
66
|
+
if len(triggered_rules) > 0:
|
|
67
|
+
# CRITICAL 级别触发 critical review(需要审批/反应)
|
|
68
|
+
# HIGH 及以下只报告,不阻止 CI
|
|
69
|
+
if has_blocking_rule or max_score >= 3:
|
|
70
|
+
review_level = "critical"
|
|
71
|
+
else:
|
|
72
|
+
review_level = "low" # 标记为低风险,仅报告不阻止
|
|
73
|
+
|
|
74
|
+
# Construct Final JSON Contract
|
|
75
|
+
suggested_action = "auto_merge"
|
|
76
|
+
if review_level == "critical":
|
|
77
|
+
suggested_action = "block_pr"
|
|
78
|
+
elif review_level == "elevated":
|
|
79
|
+
suggested_action = "manual_review"
|
|
80
|
+
|
|
81
|
+
result = {
|
|
82
|
+
"review_level": review_level,
|
|
83
|
+
"reasons": reasons,
|
|
84
|
+
"files": diff_files,
|
|
85
|
+
"impacts": impacts_map,
|
|
86
|
+
"details": details,
|
|
87
|
+
"meta": {
|
|
88
|
+
"confidence": 1.0, # Placeholder as requested
|
|
89
|
+
"suggested_action": suggested_action
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return result
|
core/evaluator.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from typing import Dict, Any, List
|
|
2
|
+
from .rules import RuleEngine
|
|
3
|
+
|
|
4
|
+
class ImpactEvaluator:
|
|
5
|
+
def __init__(self, rule_engine: RuleEngine):
|
|
6
|
+
self.rule_engine = rule_engine
|
|
7
|
+
|
|
8
|
+
def evaluate(self, diff_data: Dict[str, Any], ast_signals: List[Any] = None) -> List[Dict[str, Any]]:
|
|
9
|
+
"""
|
|
10
|
+
Calculates impact by delegating to Rule Engine.
|
|
11
|
+
Returns a list of triggered rule objects.
|
|
12
|
+
"""
|
|
13
|
+
ast_signals = ast_signals or []
|
|
14
|
+
|
|
15
|
+
return self.rule_engine.evaluate(diff_data, ast_signals)
|
core/ignore_manager.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import yaml
|
|
3
|
+
import fnmatch
|
|
4
|
+
from typing import List, Dict, Any, Optional
|
|
5
|
+
|
|
6
|
+
class IgnoreManager:
|
|
7
|
+
"""
|
|
8
|
+
Manages repository-level ignore configurations.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, repo_root: str = "."):
|
|
12
|
+
self.repo_root = repo_root
|
|
13
|
+
self.ignores = [] # List of {rule_pattern, file_patterns}
|
|
14
|
+
self._load_config()
|
|
15
|
+
|
|
16
|
+
def _load_config(self):
|
|
17
|
+
# Prefer diffsense-ignore.yaml (roadmap standard), then legacy names. Only one is loaded.
|
|
18
|
+
config_files = ["diffsense-ignore.yaml", ".diffsense.yaml", ".diffsenseignore"]
|
|
19
|
+
|
|
20
|
+
for fname in config_files:
|
|
21
|
+
path = os.path.join(self.repo_root, fname)
|
|
22
|
+
if os.path.exists(path):
|
|
23
|
+
try:
|
|
24
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
25
|
+
data = yaml.safe_load(f)
|
|
26
|
+
if data and 'ignore' in data:
|
|
27
|
+
self._parse_ignores(data['ignore'])
|
|
28
|
+
print(f"Loaded ignore config from {fname}")
|
|
29
|
+
break
|
|
30
|
+
except Exception as e:
|
|
31
|
+
print(f"Error loading ignore config {fname}: {e}")
|
|
32
|
+
|
|
33
|
+
def _parse_ignores(self, ignore_list: List[Dict[str, Any]]):
|
|
34
|
+
for item in ignore_list:
|
|
35
|
+
rule = item.get('rule') or item.get('id')
|
|
36
|
+
files = item.get('files', [])
|
|
37
|
+
if isinstance(files, str):
|
|
38
|
+
files = [files]
|
|
39
|
+
|
|
40
|
+
if rule:
|
|
41
|
+
self.ignores.append({
|
|
42
|
+
"rule": rule,
|
|
43
|
+
"files": files
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
def is_ignored(self, rule_id: str, file_path: str) -> bool:
|
|
47
|
+
"""
|
|
48
|
+
Checks if a rule is ignored for a specific file.
|
|
49
|
+
"""
|
|
50
|
+
for ignore in self.ignores:
|
|
51
|
+
rule_pattern = ignore["rule"]
|
|
52
|
+
file_patterns = ignore["files"]
|
|
53
|
+
|
|
54
|
+
# Check Rule Match
|
|
55
|
+
if not fnmatch.fnmatch(rule_id, rule_pattern):
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
# If no file patterns, it applies globally (to all files)
|
|
59
|
+
if not file_patterns:
|
|
60
|
+
return True
|
|
61
|
+
|
|
62
|
+
# Check File Match
|
|
63
|
+
for fp in file_patterns:
|
|
64
|
+
# Normalize paths for matching
|
|
65
|
+
# file_path might be relative or absolute. Pattern is usually relative.
|
|
66
|
+
# Let's assume file_path is relative to repo root or just basename?
|
|
67
|
+
# Usually DiffSense uses relative paths in report.
|
|
68
|
+
if fnmatch.fnmatch(file_path, fp):
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
return False
|
core/knowledge.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from typing import Set
|
|
2
|
+
|
|
3
|
+
# Layer 1: Knowledge Base / TypeTags
|
|
4
|
+
|
|
5
|
+
# P0: Thread Safety Types
|
|
6
|
+
THREAD_SAFE_TYPES: Set[str] = {
|
|
7
|
+
"ConcurrentHashMap",
|
|
8
|
+
"AtomicInteger",
|
|
9
|
+
"AtomicLong",
|
|
10
|
+
"AtomicBoolean",
|
|
11
|
+
"LongAdder",
|
|
12
|
+
"DoubleAdder",
|
|
13
|
+
"BlockingQueue",
|
|
14
|
+
"ArrayBlockingQueue",
|
|
15
|
+
"LinkedBlockingQueue",
|
|
16
|
+
"PriorityBlockingQueue",
|
|
17
|
+
"DelayQueue",
|
|
18
|
+
"SynchronousQueue",
|
|
19
|
+
"LinkedTransferQueue",
|
|
20
|
+
"LinkedBlockingDeque",
|
|
21
|
+
"CopyOnWriteArrayList",
|
|
22
|
+
"CopyOnWriteArraySet",
|
|
23
|
+
"ConcurrentLinkedQueue",
|
|
24
|
+
"ConcurrentLinkedDeque",
|
|
25
|
+
"ConcurrentSkipListMap",
|
|
26
|
+
"ConcurrentSkipListSet",
|
|
27
|
+
"ReentrantLock",
|
|
28
|
+
"ReentrantReadWriteLock",
|
|
29
|
+
"StampedLock",
|
|
30
|
+
"Semaphore",
|
|
31
|
+
"CountDownLatch",
|
|
32
|
+
"CyclicBarrier",
|
|
33
|
+
"Exchanger",
|
|
34
|
+
"Phaser",
|
|
35
|
+
"StringBuffer", # Legacy but thread-safe
|
|
36
|
+
"Hashtable", # Legacy but thread-safe
|
|
37
|
+
"Vector" # Legacy but thread-safe
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
# P0: Lock Types (Subset of Thread Safe, but specific for locking semantics)
|
|
41
|
+
LOCK_TYPES: Set[str] = {
|
|
42
|
+
"Lock",
|
|
43
|
+
"ReentrantLock",
|
|
44
|
+
"ReadWriteLock",
|
|
45
|
+
"ReentrantReadWriteLock",
|
|
46
|
+
"StampedLock"
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# P1: Resource Types (Need closing)
|
|
50
|
+
RESOURCE_TYPES: Set[str] = {
|
|
51
|
+
"InputStream",
|
|
52
|
+
"OutputStream",
|
|
53
|
+
"Reader",
|
|
54
|
+
"Writer",
|
|
55
|
+
"Connection",
|
|
56
|
+
"Statement",
|
|
57
|
+
"ResultSet",
|
|
58
|
+
"Socket",
|
|
59
|
+
"ServerSocket",
|
|
60
|
+
"Channel",
|
|
61
|
+
"Selector",
|
|
62
|
+
"FileLock"
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
def is_thread_safe(type_name: str) -> bool:
|
|
66
|
+
"""Check if a type is known to be thread-safe."""
|
|
67
|
+
# Simple name match for now.
|
|
68
|
+
# In a real system, we'd handle full qualified names and inheritance.
|
|
69
|
+
if not type_name:
|
|
70
|
+
return False
|
|
71
|
+
# Handle generics like ConcurrentHashMap<K,V> -> ConcurrentHashMap
|
|
72
|
+
base_name = type_name.split('<')[0].strip()
|
|
73
|
+
return base_name in THREAD_SAFE_TYPES
|
|
74
|
+
|
|
75
|
+
def is_lock_type(type_name: str) -> bool:
|
|
76
|
+
base_name = type_name.split('<')[0].strip()
|
|
77
|
+
return base_name in LOCK_TYPES
|
core/parser.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import os
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
import hashlib
|
|
6
|
+
from typing import List, Dict, Any, Optional
|
|
7
|
+
from . import CACHE_VERSION
|
|
8
|
+
from . import get_cache_max_age_seconds
|
|
9
|
+
|
|
10
|
+
class DiffParser:
|
|
11
|
+
def __init__(self, cache_dir: Optional[str] = None):
|
|
12
|
+
self.cache_dir = cache_dir or self._resolve_cache_dir()
|
|
13
|
+
self.metrics = {"hits": 0, "misses": 0, "saved_ms": 0}
|
|
14
|
+
|
|
15
|
+
def _resolve_cache_dir(self) -> str:
|
|
16
|
+
base_dir = os.environ.get("DIFFSENSE_CACHE_DIR")
|
|
17
|
+
if not base_dir:
|
|
18
|
+
base_dir = os.path.join(os.path.expanduser("~"), ".diffsense", "cache")
|
|
19
|
+
return os.path.join(base_dir, CACHE_VERSION, "diff")
|
|
20
|
+
|
|
21
|
+
def _cache_key(self, diff_content: str) -> str:
|
|
22
|
+
digest = hashlib.sha1(diff_content.encode("utf-8", errors="ignore")).hexdigest()
|
|
23
|
+
return digest
|
|
24
|
+
|
|
25
|
+
def _cache_path(self, cache_key: str) -> str:
|
|
26
|
+
return os.path.join(self.cache_dir, f"{cache_key}.json")
|
|
27
|
+
|
|
28
|
+
def _load_cache(self, cache_key: str) -> Optional[Dict[str, Any]]:
|
|
29
|
+
path = self._cache_path(cache_key)
|
|
30
|
+
if not os.path.exists(path):
|
|
31
|
+
return None
|
|
32
|
+
max_age = get_cache_max_age_seconds()
|
|
33
|
+
if max_age > 0:
|
|
34
|
+
try:
|
|
35
|
+
mtime = os.path.getmtime(path)
|
|
36
|
+
if (time.time() - mtime) > max_age:
|
|
37
|
+
try:
|
|
38
|
+
os.remove(path)
|
|
39
|
+
except OSError:
|
|
40
|
+
pass
|
|
41
|
+
return None
|
|
42
|
+
except OSError:
|
|
43
|
+
return None
|
|
44
|
+
try:
|
|
45
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
46
|
+
return json.load(f)
|
|
47
|
+
except Exception:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
def _save_cache(self, cache_key: str, data: Dict[str, Any]) -> None:
|
|
51
|
+
os.makedirs(self.cache_dir, exist_ok=True)
|
|
52
|
+
path = self._cache_path(cache_key)
|
|
53
|
+
tmp_path = f"{path}.{os.getpid()}.tmp"
|
|
54
|
+
try:
|
|
55
|
+
with open(tmp_path, "w", encoding="utf-8") as f:
|
|
56
|
+
json.dump(data, f)
|
|
57
|
+
# Atomic rename (replace existing if any)
|
|
58
|
+
os.replace(tmp_path, path)
|
|
59
|
+
except Exception:
|
|
60
|
+
if os.path.exists(tmp_path):
|
|
61
|
+
os.remove(tmp_path)
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
def parse(self, diff_content: str) -> Dict[str, Any]:
|
|
65
|
+
"""
|
|
66
|
+
Parses a unified diff string and returns a structured object.
|
|
67
|
+
"""
|
|
68
|
+
import time
|
|
69
|
+
start_time = time.time()
|
|
70
|
+
|
|
71
|
+
files = []
|
|
72
|
+
new_files = []
|
|
73
|
+
stats = {"add": 0, "del": 0}
|
|
74
|
+
file_patches = []
|
|
75
|
+
|
|
76
|
+
cache_key = self._cache_key(diff_content)
|
|
77
|
+
cached = self._load_cache(cache_key)
|
|
78
|
+
if cached:
|
|
79
|
+
self.metrics["hits"] += 1
|
|
80
|
+
return cached
|
|
81
|
+
|
|
82
|
+
self.metrics["misses"] += 1
|
|
83
|
+
|
|
84
|
+
# Check if content looks like JSON
|
|
85
|
+
if diff_content.strip().startswith('{') or diff_content.strip().startswith('['):
|
|
86
|
+
print("Warning: Diff content looks like JSON. Parser expects Unified Diff format.")
|
|
87
|
+
result = {"files": [], "new_files": [], "file_patches": [], "stats": stats, "change_types": [], "raw_diff": diff_content}
|
|
88
|
+
self._save_cache(cache_key, result)
|
|
89
|
+
return result
|
|
90
|
+
|
|
91
|
+
lines = diff_content.splitlines()
|
|
92
|
+
current_file = None
|
|
93
|
+
current_patch_lines = []
|
|
94
|
+
is_new_file = False
|
|
95
|
+
|
|
96
|
+
for line in lines:
|
|
97
|
+
# Check for new file header
|
|
98
|
+
if line.startswith("diff --git"):
|
|
99
|
+
# Save previous patch if exists
|
|
100
|
+
if current_file and current_patch_lines:
|
|
101
|
+
file_patches.append({
|
|
102
|
+
"file": current_file,
|
|
103
|
+
"patch": "\n".join(current_patch_lines),
|
|
104
|
+
"is_new": is_new_file
|
|
105
|
+
})
|
|
106
|
+
if is_new_file:
|
|
107
|
+
new_files.append(current_file)
|
|
108
|
+
|
|
109
|
+
# Reset for new file
|
|
110
|
+
current_file = None
|
|
111
|
+
current_patch_lines = []
|
|
112
|
+
is_new_file = False
|
|
113
|
+
|
|
114
|
+
# Capture filename from --- or +++
|
|
115
|
+
if line.startswith("--- "):
|
|
116
|
+
path = line[4:].strip()
|
|
117
|
+
if path == "/dev/null":
|
|
118
|
+
is_new_file = True
|
|
119
|
+
else:
|
|
120
|
+
if path.startswith("a/"):
|
|
121
|
+
path = path[2:]
|
|
122
|
+
if current_file is None:
|
|
123
|
+
current_file = path
|
|
124
|
+
|
|
125
|
+
if line.startswith("+++ "):
|
|
126
|
+
path = line[4:].strip()
|
|
127
|
+
if path != "/dev/null":
|
|
128
|
+
if path.startswith("b/"):
|
|
129
|
+
path = path[2:]
|
|
130
|
+
current_file = path
|
|
131
|
+
|
|
132
|
+
if current_file and current_file not in files:
|
|
133
|
+
files.append(current_file)
|
|
134
|
+
|
|
135
|
+
# Accumulate patch lines
|
|
136
|
+
# We include headers in the patch content for context
|
|
137
|
+
current_patch_lines.append(line)
|
|
138
|
+
|
|
139
|
+
# Stats
|
|
140
|
+
if line.startswith('+') and not line.startswith('+++'):
|
|
141
|
+
stats["add"] += 1
|
|
142
|
+
elif line.startswith('-') and not line.startswith('---'):
|
|
143
|
+
stats["del"] += 1
|
|
144
|
+
|
|
145
|
+
# Add the last patch
|
|
146
|
+
if current_file and current_patch_lines:
|
|
147
|
+
file_patches.append({
|
|
148
|
+
"file": current_file,
|
|
149
|
+
"patch": "\n".join(current_patch_lines),
|
|
150
|
+
"is_new": is_new_file
|
|
151
|
+
})
|
|
152
|
+
if is_new_file:
|
|
153
|
+
new_files.append(current_file)
|
|
154
|
+
|
|
155
|
+
# Determine change types
|
|
156
|
+
change_types = set()
|
|
157
|
+
for f in files:
|
|
158
|
+
if f.endswith('.json') or f.endswith('.yaml') or f.endswith('.yml'):
|
|
159
|
+
change_types.add("config")
|
|
160
|
+
elif f.endswith('.ts') or f.endswith('.py') or f.endswith('.go') or f.endswith('.java'):
|
|
161
|
+
change_types.add("logic")
|
|
162
|
+
elif f.endswith('.md') or f.endswith('.txt'):
|
|
163
|
+
change_types.add("doc")
|
|
164
|
+
else:
|
|
165
|
+
change_types.add("other")
|
|
166
|
+
|
|
167
|
+
result = {
|
|
168
|
+
"files": files,
|
|
169
|
+
"new_files": new_files,
|
|
170
|
+
"file_patches": file_patches,
|
|
171
|
+
"stats": stats,
|
|
172
|
+
"change_types": list(change_types),
|
|
173
|
+
"raw_diff": diff_content
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
# Track how much time this parse took to estimate future savings
|
|
177
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
178
|
+
self.metrics["saved_ms"] = duration_ms # Proxy for saved time on hit
|
|
179
|
+
|
|
180
|
+
self._save_cache(cache_key, result)
|
|
181
|
+
return result
|
core/parser_manager.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Multi-language Parser Manager for DiffSense
|
|
3
|
+
Manages language-specific parsers and provides unified AST signal extraction interface.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import importlib
|
|
8
|
+
from typing import Dict, List, Any, Optional, Set
|
|
9
|
+
from core.rule_base import Rule
|
|
10
|
+
|
|
11
|
+
class ParserManager:
|
|
12
|
+
"""
|
|
13
|
+
Manages multiple language parsers and provides unified interface for AST signal extraction.
|
|
14
|
+
Parsers are loaded dynamically based on available language modules.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
18
|
+
self.config = config or {}
|
|
19
|
+
self.parsers: Dict[str, Any] = {}
|
|
20
|
+
self.supported_languages: Set[str] = set()
|
|
21
|
+
self._load_parsers()
|
|
22
|
+
|
|
23
|
+
def _load_parsers(self):
|
|
24
|
+
"""Load all available language parsers from diffsense.parsers package."""
|
|
25
|
+
parser_dir = os.path.join(os.path.dirname(__file__), '..', 'parsers')
|
|
26
|
+
if not os.path.exists(parser_dir):
|
|
27
|
+
return
|
|
28
|
+
|
|
29
|
+
# Look for language-specific parser modules
|
|
30
|
+
for item in os.listdir(parser_dir):
|
|
31
|
+
if item.startswith('__') or not item.endswith('.py'):
|
|
32
|
+
continue
|
|
33
|
+
|
|
34
|
+
module_name = item[:-3] # Remove .py extension
|
|
35
|
+
try:
|
|
36
|
+
# Try to import the parser module
|
|
37
|
+
parser_module = importlib.import_module(f'diffsense.parsers.{module_name}')
|
|
38
|
+
|
|
39
|
+
# Check if it has a get_parser function
|
|
40
|
+
if hasattr(parser_module, 'get_parser'):
|
|
41
|
+
parser_instance = parser_module.get_parser(self.config)
|
|
42
|
+
language = getattr(parser_instance, 'language', module_name)
|
|
43
|
+
|
|
44
|
+
self.parsers[language] = parser_instance
|
|
45
|
+
self.supported_languages.add(language)
|
|
46
|
+
|
|
47
|
+
except (ImportError, AttributeError) as e:
|
|
48
|
+
# Skip modules that don't conform to parser interface
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
def extract_signals(self, file_path: str, file_content: str, language: Optional[str] = None) -> List[Any]:
|
|
52
|
+
"""
|
|
53
|
+
Extract AST signals from a file using the appropriate language parser.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
file_path: Path to the source file
|
|
57
|
+
file_content: Content of the source file
|
|
58
|
+
language: Optional explicit language hint
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
List of AST signals extracted from the file
|
|
62
|
+
"""
|
|
63
|
+
# Determine language if not provided
|
|
64
|
+
if language is None:
|
|
65
|
+
language = self._detect_language(file_path)
|
|
66
|
+
|
|
67
|
+
# Use appropriate parser if available
|
|
68
|
+
if language in self.parsers:
|
|
69
|
+
try:
|
|
70
|
+
return self.parsers[language].extract_signals(file_path, file_content)
|
|
71
|
+
except Exception as e:
|
|
72
|
+
# Log error but don't crash - return empty signals
|
|
73
|
+
return []
|
|
74
|
+
|
|
75
|
+
# Fallback: return empty signals for unsupported languages
|
|
76
|
+
return []
|
|
77
|
+
|
|
78
|
+
def _detect_language(self, file_path: str) -> str:
|
|
79
|
+
"""Detect programming language from file extension."""
|
|
80
|
+
extension_map = {
|
|
81
|
+
'.java': 'java',
|
|
82
|
+
'.go': 'go',
|
|
83
|
+
'.py': 'python',
|
|
84
|
+
'.js': 'javascript',
|
|
85
|
+
'.ts': 'typescript',
|
|
86
|
+
'.cpp': 'cpp',
|
|
87
|
+
'.c': 'c',
|
|
88
|
+
'.cs': 'csharp',
|
|
89
|
+
'.rb': 'ruby',
|
|
90
|
+
'.php': 'php',
|
|
91
|
+
'.scala': 'scala',
|
|
92
|
+
'.kt': 'kotlin'
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
_, ext = os.path.splitext(file_path.lower())
|
|
96
|
+
return extension_map.get(ext, 'unknown')
|
|
97
|
+
|
|
98
|
+
def get_supported_languages(self) -> Set[str]:
|
|
99
|
+
"""Get set of supported programming languages."""
|
|
100
|
+
return self.supported_languages.copy()
|
|
101
|
+
|
|
102
|
+
def is_language_supported(self, language: str) -> bool:
|
|
103
|
+
"""Check if a language is supported by available parsers."""
|
|
104
|
+
return language in self.supported_languages
|