devguard-core 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ """DevGuard core package."""
2
+
3
+ from .config import ScanOptions
4
+ from .models import Finding, Rule, ScanResult
5
+ from .scanner import scan_path
6
+
7
+ __all__ = ["Rule", "Finding", "ScanResult", "ScanOptions", "scan_path"]
@@ -0,0 +1,150 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ from pathlib import Path
5
+
6
+ from .models import Finding
7
+ from .rules import BUILTIN_RULES
8
+
9
+ SQL_PREFIXES = ("SELECT", "INSERT", "UPDATE", "DELETE")
10
+ SECRET_NAMES = {"api_key", "apikey", "secret", "token", "password", "access_token"}
11
+ UNSAFE_DESER_CALLS = {"pickle.loads", "yaml.load", "jsonpickle.decode"}
12
+
13
+
14
+ def run_python_ast_checks(file_path: Path, text: str) -> tuple[list[Finding], bool]:
15
+ try:
16
+ tree = ast.parse(text)
17
+ except SyntaxError:
18
+ return [], False
19
+
20
+ findings: list[Finding] = []
21
+ tainted_sql_vars: set[str] = set()
22
+
23
+ for node in ast.walk(tree):
24
+ finding = _detect_sql_injection(node, file_path, tainted_sql_vars)
25
+ if finding is not None:
26
+ findings.append(finding)
27
+
28
+ finding = _detect_unsafe_deser(node, file_path)
29
+ if finding is not None:
30
+ findings.append(finding)
31
+
32
+ finding = _detect_hardcoded_secrets(node, file_path)
33
+ if finding is not None:
34
+ findings.append(finding)
35
+
36
+ return findings, True
37
+
38
+
39
+ def _detect_sql_injection(node: ast.AST, file_path: Path, tainted_sql_vars: set[str]) -> Finding | None:
40
+ if isinstance(node, ast.Assign) and len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
41
+ target_name = node.targets[0].id
42
+ if _is_sql_concat(node.value):
43
+ tainted_sql_vars.add(target_name)
44
+
45
+ if not isinstance(node, ast.Call):
46
+ return None
47
+
48
+ call_name = _dotted_name(node.func)
49
+ if call_name not in {"execute", "query", "cursor.execute", "cursor.query"}:
50
+ return None
51
+ if not node.args:
52
+ return None
53
+
54
+ first_arg = node.args[0]
55
+ line = getattr(node, "lineno", 1)
56
+
57
+ if isinstance(first_arg, ast.Name) and first_arg.id in tainted_sql_vars:
58
+ return _make_finding(
59
+ "DG001",
60
+ file_path,
61
+ line,
62
+ "Potential SQL injection pattern found in query execution.",
63
+ 0.9,
64
+ )
65
+
66
+ if _is_sql_concat(first_arg):
67
+ return _make_finding(
68
+ "DG001",
69
+ file_path,
70
+ line,
71
+ "Potential SQL injection pattern found in query construction.",
72
+ 0.91,
73
+ )
74
+
75
+ return None
76
+
77
+
78
+ def _detect_unsafe_deser(node: ast.AST, file_path: Path) -> Finding | None:
79
+ if not isinstance(node, ast.Call):
80
+ return None
81
+
82
+ call_name = _dotted_name(node.func)
83
+ if call_name in UNSAFE_DESER_CALLS:
84
+ return _make_finding(
85
+ "DG002",
86
+ file_path,
87
+ getattr(node, "lineno", 1),
88
+ "Potential unsafe deserialization call detected.",
89
+ 0.92,
90
+ )
91
+
92
+ return None
93
+
94
+
95
+ def _detect_hardcoded_secrets(node: ast.AST, file_path: Path) -> Finding | None:
96
+ if not isinstance(node, ast.Assign):
97
+ return None
98
+ if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name):
99
+ return None
100
+
101
+ var_name = node.targets[0].id.lower()
102
+ if var_name not in SECRET_NAMES:
103
+ return None
104
+
105
+ value = node.value
106
+ if isinstance(value, ast.Constant) and isinstance(value.value, str) and len(value.value) >= 8:
107
+ return _make_finding(
108
+ "DG003",
109
+ file_path,
110
+ getattr(node, "lineno", 1),
111
+ "Potential hardcoded secret detected.",
112
+ 0.94,
113
+ )
114
+
115
+ return None
116
+
117
+
118
+ def _is_sql_concat(node: ast.AST) -> bool:
119
+ if not isinstance(node, ast.BinOp) or not isinstance(node.op, ast.Add):
120
+ return False
121
+
122
+ left = node.left
123
+ if not isinstance(left, ast.Constant) or not isinstance(left.value, str):
124
+ return False
125
+
126
+ prefix = left.value.strip().upper()
127
+ return prefix.startswith(SQL_PREFIXES)
128
+
129
+
130
+ def _dotted_name(node: ast.AST) -> str:
131
+ if isinstance(node, ast.Name):
132
+ return node.id
133
+ if isinstance(node, ast.Attribute):
134
+ left = _dotted_name(node.value)
135
+ return f"{left}.{node.attr}" if left else node.attr
136
+ return ""
137
+
138
+
139
+ def _make_finding(rule_id: str, file_path: Path, line: int, message: str, confidence: float) -> Finding:
140
+ rule = BUILTIN_RULES[rule_id]
141
+ return Finding(
142
+ rule_id=rule.id,
143
+ severity=rule.severity,
144
+ file_path=str(file_path),
145
+ line=line,
146
+ message=message,
147
+ recommendation=rule.fix,
148
+ language="python",
149
+ confidence=confidence,
150
+ )
@@ -0,0 +1,237 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+ from .ast_checks import run_python_ast_checks
7
+ from .models import Finding
8
+ from .rules import BUILTIN_RULES
9
+
10
+ SQL_INJECTION_PATTERN = re.compile(r"(?:execute|query)\s*\([^\n]*[\"'][^\"']*[\"']\s*\+", re.IGNORECASE)
11
+ SQL_ASSIGN_CONCAT_PATTERN = re.compile(
12
+ r"^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=\s*[\"']\s*(SELECT|INSERT|UPDATE|DELETE)\b[^\"']*[\"']\s*\+",
13
+ re.IGNORECASE,
14
+ )
15
+ EXECUTE_VAR_PATTERN = re.compile(r"(?:execute|query)\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)", re.IGNORECASE)
16
+ UNSAFE_DESER_PATTERN = re.compile(
17
+ r"pickle\.loads\(|yaml\.load\(|ObjectInputStream\(|BinaryFormatter|jsonpickle\.decode\(",
18
+ re.IGNORECASE,
19
+ )
20
+ HARDCODED_SECRET_PATTERN = re.compile(
21
+ r"(?i)(api[_-]?key|secret|token|password)\s*[:=]\s*[\"'][A-Za-z0-9_\-\./+=]{8,}[\"']"
22
+ )
23
+ LOOP_HEADER_PATTERN = re.compile(r"^\s*(for|while)\b")
24
+ EXPENSIVE_IN_LOOP_PATTERN = re.compile(r"(re\.compile\(|new\s+Regex\(|json\.loads\(|datetime\.strptime\()")
25
+ ASYNC_DEF_PATTERN = re.compile(r"^\s*async\s+def\b")
26
+ NETWORK_CALL_PATTERN = re.compile(r"\b(requests\.|httpx\.|aiohttp\.|fetch\(|axios\.)")
27
+ TRY_PATTERN = re.compile(r"^\s*try\s*:")
28
+ IGNORE_INLINE_PATTERN = re.compile(r"devguard-ignore\s*:\s*(.+)", re.IGNORECASE)
29
+ IGNORE_NEXT_LINE_PATTERN = re.compile(r"devguard-ignore-next-line\s*:\s*(.+)", re.IGNORECASE)
30
+
31
+
32
+ SUPPORTED_EXTENSIONS = {
33
+ ".py": "python",
34
+ ".js": "javascript",
35
+ ".ts": "typescript",
36
+ ".java": "java",
37
+ ".go": "go",
38
+ ".rs": "rust",
39
+ }
40
+
41
+
42
+ def detect_language(path: Path) -> str:
43
+ return SUPPORTED_EXTENSIONS.get(path.suffix.lower(), "unknown")
44
+
45
+
46
+ def _make_finding(
47
+ rule_id: str,
48
+ file_path: Path,
49
+ line: int,
50
+ language: str,
51
+ message: str,
52
+ confidence: float,
53
+ ) -> Finding:
54
+ rule = BUILTIN_RULES[rule_id]
55
+ return Finding(
56
+ rule_id=rule.id,
57
+ severity=rule.severity,
58
+ file_path=str(file_path),
59
+ line=line,
60
+ message=message,
61
+ recommendation=rule.fix,
62
+ language=language,
63
+ confidence=confidence,
64
+ )
65
+
66
+
67
+ def run_builtin_checks(file_path: Path, text: str) -> list[Finding]:
68
+ language = detect_language(file_path)
69
+ findings: list[Finding] = []
70
+ lines = text.splitlines()
71
+ ast_parsed = False
72
+ if language == "python":
73
+ ast_findings, ast_parsed = run_python_ast_checks(file_path, text)
74
+ findings.extend(ast_findings)
75
+
76
+ tainted_sql_vars: set[str] = set()
77
+ for idx, line in enumerate(lines, start=1):
78
+ if language == "python" and ast_parsed:
79
+ continue
80
+
81
+ assign_match = SQL_ASSIGN_CONCAT_PATTERN.search(line)
82
+ if assign_match:
83
+ tainted_sql_vars.add(assign_match.group(1))
84
+
85
+ if SQL_INJECTION_PATTERN.search(line):
86
+ findings.append(
87
+ _make_finding(
88
+ "DG001",
89
+ file_path,
90
+ idx,
91
+ language,
92
+ "Potential SQL injection pattern found in query construction.",
93
+ 0.88,
94
+ )
95
+ )
96
+
97
+ exec_match = EXECUTE_VAR_PATTERN.search(line)
98
+ if exec_match and exec_match.group(1) in tainted_sql_vars:
99
+ findings.append(
100
+ _make_finding(
101
+ "DG001",
102
+ file_path,
103
+ idx,
104
+ language,
105
+ "Potential SQL injection pattern found in query execution.",
106
+ 0.84,
107
+ )
108
+ )
109
+
110
+ if UNSAFE_DESER_PATTERN.search(line):
111
+ findings.append(
112
+ _make_finding(
113
+ "DG002",
114
+ file_path,
115
+ idx,
116
+ language,
117
+ "Potential unsafe deserialization call detected.",
118
+ 0.87,
119
+ )
120
+ )
121
+
122
+ if HARDCODED_SECRET_PATTERN.search(line):
123
+ findings.append(
124
+ _make_finding(
125
+ "DG003",
126
+ file_path,
127
+ idx,
128
+ language,
129
+ "Potential hardcoded secret detected.",
130
+ 0.91,
131
+ )
132
+ )
133
+
134
+ findings.extend(_detect_expensive_allocations_in_loops(file_path, language, lines))
135
+ findings.extend(_detect_network_calls_without_local_try(file_path, language, lines))
136
+ deduped = _dedupe_findings(findings)
137
+ return _apply_suppressions(deduped, lines)
138
+
139
+
140
+ def _dedupe_findings(findings: list[Finding]) -> list[Finding]:
141
+ deduped: list[Finding] = []
142
+ seen: set[tuple[str, str, int, str]] = set()
143
+ for finding in findings:
144
+ key = (finding.rule_id, finding.file_path, finding.line, finding.message)
145
+ if key in seen:
146
+ continue
147
+ seen.add(key)
148
+ deduped.append(finding)
149
+ return deduped
150
+
151
+
152
+ def _detect_expensive_allocations_in_loops(file_path: Path, language: str, lines: list[str]) -> list[Finding]:
153
+ findings: list[Finding] = []
154
+ for idx, line in enumerate(lines, start=1):
155
+ if LOOP_HEADER_PATTERN.search(line):
156
+ end = min(len(lines), idx + 6)
157
+ for look_ahead in range(idx, end):
158
+ if EXPENSIVE_IN_LOOP_PATTERN.search(lines[look_ahead - 1]):
159
+ findings.append(
160
+ _make_finding(
161
+ "DG004",
162
+ file_path,
163
+ look_ahead,
164
+ language,
165
+ "Potential repeated expensive allocation inside loop.",
166
+ 0.72,
167
+ )
168
+ )
169
+ break
170
+ return findings
171
+
172
+
173
+ def _detect_network_calls_without_local_try(file_path: Path, language: str, lines: list[str]) -> list[Finding]:
174
+ findings: list[Finding] = []
175
+ for idx, line in enumerate(lines, start=1):
176
+ if ASYNC_DEF_PATTERN.search(line):
177
+ block_end = min(len(lines), idx + 20)
178
+ block = lines[idx - 1:block_end]
179
+ has_try = any(TRY_PATTERN.search(item) for item in block)
180
+ for local_idx, block_line in enumerate(block, start=idx):
181
+ if NETWORK_CALL_PATTERN.search(block_line) and not has_try:
182
+ findings.append(
183
+ _make_finding(
184
+ "DG005",
185
+ file_path,
186
+ local_idx,
187
+ language,
188
+ "Async/network call found without local try/except handling.",
189
+ 0.68,
190
+ )
191
+ )
192
+ break
193
+ return findings
194
+
195
+
196
+ def _apply_suppressions(findings: list[Finding], lines: list[str]) -> list[Finding]:
197
+ line_suppressions, file_suppressions = _collect_suppressions(lines)
198
+ filtered: list[Finding] = []
199
+ for finding in findings:
200
+ if "all" in file_suppressions or finding.rule_id in file_suppressions:
201
+ continue
202
+ suppressed = line_suppressions.get(finding.line, set())
203
+ if "all" in suppressed or finding.rule_id in suppressed:
204
+ continue
205
+ filtered.append(finding)
206
+ return filtered
207
+
208
+
209
+ def _collect_suppressions(lines: list[str]) -> tuple[dict[int, set[str]], set[str]]:
210
+ line_suppressions: dict[int, set[str]] = {}
211
+ file_suppressions: set[str] = set()
212
+
213
+ for idx, line in enumerate(lines, start=1):
214
+ next_match = IGNORE_NEXT_LINE_PATTERN.search(line)
215
+ if next_match:
216
+ rules = _parse_rule_list(next_match.group(1))
217
+ line_suppressions.setdefault(idx + 1, set()).update(rules)
218
+
219
+ inline_match = IGNORE_INLINE_PATTERN.search(line)
220
+ if inline_match:
221
+ rules = _parse_rule_list(inline_match.group(1))
222
+ line_suppressions.setdefault(idx, set()).update(rules)
223
+ if "file" in rules:
224
+ file_suppressions.update({"all"})
225
+
226
+ return line_suppressions, file_suppressions
227
+
228
+
229
+ def _parse_rule_list(raw: str) -> set[str]:
230
+ # Accept comma or whitespace delimited rule IDs.
231
+ items = [token.strip().upper() for token in re.split(r"[,\s]+", raw.strip()) if token.strip()]
232
+ normalized = set(items)
233
+ if "ALL" in normalized:
234
+ return {"all"}
235
+ if "FILE" in normalized:
236
+ return {"file"}
237
+ return normalized
devguard_core/cli.py ADDED
@@ -0,0 +1,241 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ from pathlib import Path
6
+
7
+ from .config import ScanOptions
8
+ from .formatters import to_json, to_sarif
9
+ from .models import Finding, ScanResult
10
+ from .scanner import scan_path, scan_targets
11
+
12
+ SEVERITY_RANK = {"low": 1, "medium": 2, "high": 3}
13
+
14
+
15
+ def build_parser() -> argparse.ArgumentParser:
16
+ parser = argparse.ArgumentParser(description="DevGuard core scanner")
17
+ sub = parser.add_subparsers(dest="command", required=True)
18
+
19
+ scan_cmd = sub.add_parser("scan", help="Scan a file or directory")
20
+ scan_cmd.add_argument("target", help="Target file or directory")
21
+ scan_cmd.add_argument("--file-list", help="Path to newline-delimited file list to scan")
22
+ scan_cmd.add_argument("--format", choices=["json", "sarif"], default="json")
23
+ scan_cmd.add_argument("--output", help="Output file path (optional)")
24
+ scan_cmd.add_argument("--config", help="Path to .devguard.json config file")
25
+ scan_cmd.add_argument("--exclude-dir", action="append", help="Directory name to exclude")
26
+ scan_cmd.add_argument("--max-file-size-kb", type=int, help="Skip files larger than this size")
27
+ scan_cmd.add_argument("--workers", type=int, help="Number of worker threads")
28
+ scan_cmd.add_argument("--min-severity", choices=["low", "medium", "high"], help="Minimum severity to report")
29
+ scan_cmd.add_argument("--min-confidence", type=float, help="Minimum confidence to report (0.0-1.0)")
30
+ scan_cmd.add_argument("--include-rule", action="append", help="Only include matching rule ID (repeatable)")
31
+ scan_cmd.add_argument("--exclude-rule", action="append", help="Exclude matching rule ID (repeatable)")
32
+ scan_cmd.add_argument("--baseline-in", help="JSON file with known finding fingerprints to suppress")
33
+ scan_cmd.add_argument("--baseline-out", help="Write current finding fingerprints to this JSON file")
34
+ return parser
35
+
36
+
37
+ def main() -> int:
38
+ parser = build_parser()
39
+ args = parser.parse_args()
40
+
41
+ if args.command == "scan":
42
+ target = Path(args.target).resolve()
43
+ if not target.exists():
44
+ parser.error(f"Target does not exist: {target}")
45
+
46
+ try:
47
+ config = _load_config(args.config, target)
48
+ except ValueError as exc:
49
+ parser.error(str(exc))
50
+
51
+ max_file_size_kb = _pick(args.max_file_size_kb, config, "max_file_size_kb", 512)
52
+ workers = _pick(args.workers, config, "workers", ScanOptions().workers)
53
+ min_severity = _pick(args.min_severity, config, "min_severity", "low")
54
+ min_confidence = _pick(args.min_confidence, config, "min_confidence", 0.0)
55
+ exclude_dirs = _pick_list(args.exclude_dir, config, "exclude_dir", [])
56
+ include_rules = _normalized_rule_list(_pick_list(args.include_rule, config, "include_rule", []))
57
+ exclude_rules = _normalized_rule_list(_pick_list(args.exclude_rule, config, "exclude_rule", []))
58
+ baseline_in = _pick(args.baseline_in, config, "baseline_in", None)
59
+
60
+ if max_file_size_kb <= 0:
61
+ parser.error("--max-file-size-kb must be > 0")
62
+ if workers <= 0:
63
+ parser.error("--workers must be > 0")
64
+ if not (0.0 <= min_confidence <= 1.0):
65
+ parser.error("--min-confidence must be between 0.0 and 1.0")
66
+ overlap = include_rules.intersection(exclude_rules)
67
+ if overlap:
68
+ parser.error(f"Rule IDs cannot be both included and excluded: {sorted(overlap)}")
69
+
70
+ options = ScanOptions(
71
+ excluded_dirs=ScanOptions().merged_exclusions(exclude_dirs),
72
+ max_file_size_bytes=max_file_size_kb * 1024,
73
+ workers=workers,
74
+ )
75
+ try:
76
+ scan_list = _load_file_list(args.file_list)
77
+ except ValueError as exc:
78
+ parser.error(str(exc))
79
+ if scan_list:
80
+ result = scan_targets(scan_list, options=options)
81
+ else:
82
+ result = scan_path(target, options=options)
83
+ baseline_root = target if target.is_dir() else target.parent
84
+ normalized = result.relative_to(baseline_root)
85
+
86
+ severity_filtered = _filter_by_min_severity(normalized, min_severity)
87
+ confidence_filtered = _filter_by_min_confidence(severity_filtered, min_confidence)
88
+ rule_filtered = _filter_by_rules(confidence_filtered, include_rules, exclude_rules)
89
+
90
+ try:
91
+ baseline = _load_baseline(baseline_in)
92
+ except ValueError as exc:
93
+ parser.error(str(exc))
94
+
95
+ filtered_result = _filter_by_baseline(rule_filtered, baseline)
96
+
97
+ if args.baseline_out:
98
+ _write_baseline(args.baseline_out, rule_filtered)
99
+
100
+ if args.format == "sarif":
101
+ body = to_sarif(filtered_result)
102
+ else:
103
+ body = to_json(filtered_result)
104
+
105
+ if args.output:
106
+ Path(args.output).write_text(body + "\n", encoding="utf-8")
107
+ else:
108
+ print(body)
109
+
110
+ # Non-zero exit when findings exist so it can gate CI.
111
+ return 1 if filtered_result.total > 0 else 0
112
+
113
+ parser.print_help()
114
+ return 2
115
+
116
+
117
+ def _fingerprint(finding: Finding) -> str:
118
+ return "|".join(
119
+ [
120
+ str(getattr(finding, "rule_id")),
121
+ str(getattr(finding, "file_path")),
122
+ str(getattr(finding, "line")),
123
+ str(getattr(finding, "message")),
124
+ ]
125
+ )
126
+
127
+
128
+ def _load_baseline(path: str | None) -> set[str]:
129
+ if not path:
130
+ return set()
131
+ baseline_file = Path(path)
132
+ if not baseline_file.exists():
133
+ return set()
134
+ try:
135
+ data = json.loads(baseline_file.read_text(encoding="utf-8"))
136
+ except json.JSONDecodeError as exc:
137
+ raise ValueError(f"Invalid baseline JSON in {baseline_file}: {exc.msg}") from exc
138
+ if not isinstance(data, list):
139
+ raise ValueError(f"Invalid baseline format in {baseline_file}: expected a JSON array")
140
+ return {str(item) for item in data}
141
+
142
+
143
+ def _write_baseline(path: str, result: ScanResult) -> None:
144
+ fingerprints = sorted({_fingerprint(f) for f in result.findings})
145
+ Path(path).write_text(json.dumps(fingerprints, indent=2) + "\n", encoding="utf-8")
146
+
147
+
148
+ def _filter_by_baseline(result: ScanResult, baseline: set[str]) -> ScanResult:
149
+ if not baseline:
150
+ return result
151
+
152
+ findings = [f for f in result.findings if _fingerprint(f) not in baseline]
153
+ return ScanResult(findings=findings)
154
+
155
+
156
+ def _filter_by_min_severity(result: ScanResult, min_severity: str) -> ScanResult:
157
+ threshold = SEVERITY_RANK[min_severity]
158
+ findings = [f for f in result.findings if SEVERITY_RANK.get(f.severity, 0) >= threshold]
159
+ return ScanResult(findings=findings)
160
+
161
+
162
+ def _filter_by_min_confidence(result: ScanResult, min_confidence: float) -> ScanResult:
163
+ findings = [f for f in result.findings if f.confidence >= min_confidence]
164
+ return ScanResult(findings=findings)
165
+
166
+
167
+ def _filter_by_rules(result: ScanResult, include_rules: set[str], exclude_rules: set[str]) -> ScanResult:
168
+ findings = result.findings
169
+ if include_rules:
170
+ findings = [f for f in findings if f.rule_id.upper() in include_rules]
171
+ if exclude_rules:
172
+ findings = [f for f in findings if f.rule_id.upper() not in exclude_rules]
173
+ return ScanResult(findings=findings)
174
+
175
+
176
+ def _load_config(path: str | None, target: Path) -> dict:
177
+ config_path: Path | None = None
178
+ if path:
179
+ config_path = Path(path)
180
+ else:
181
+ root = target if target.is_dir() else target.parent
182
+ candidate = root / ".devguard.json"
183
+ if candidate.exists():
184
+ config_path = candidate
185
+
186
+ if config_path is None or not config_path.exists():
187
+ return {}
188
+
189
+ try:
190
+ content = json.loads(config_path.read_text(encoding="utf-8"))
191
+ except json.JSONDecodeError as exc:
192
+ raise ValueError(f"Invalid config JSON in {config_path}: {exc.msg}") from exc
193
+
194
+ if not isinstance(content, dict):
195
+ raise ValueError(f"Invalid config format in {config_path}: expected object")
196
+ return content
197
+
198
+
199
+ def _pick(cli_value, config: dict, key: str, default):
200
+ if cli_value is not None:
201
+ return cli_value
202
+ if key in config:
203
+ return config[key]
204
+ return default
205
+
206
+
207
+ def _pick_list(cli_value, config: dict, key: str, default: list[str]) -> list[str]:
208
+ if cli_value is not None:
209
+ return list(cli_value)
210
+ if key in config:
211
+ value = config[key]
212
+ if isinstance(value, list):
213
+ return [str(item) for item in value]
214
+ return [str(value)]
215
+ return list(default)
216
+
217
+
218
+ def _normalized_rule_list(values: list[str]) -> set[str]:
219
+ return {str(v).strip().upper() for v in values if str(v).strip()}
220
+
221
+
222
+ def _load_file_list(path: str | None) -> list[Path]:
223
+ if not path:
224
+ return []
225
+
226
+ file_list = Path(path)
227
+ if not file_list.exists():
228
+ raise ValueError(f"File list does not exist: {file_list}")
229
+
230
+ items: list[Path] = []
231
+ for raw in file_list.read_text(encoding="utf-8").splitlines():
232
+ line = raw.strip()
233
+ if not line or line.startswith("#"):
234
+ continue
235
+ p = Path(line)
236
+ items.append(p if p.is_absolute() else Path.cwd() / p)
237
+ return items
238
+
239
+
240
+ if __name__ == "__main__":
241
+ raise SystemExit(main())
@@ -0,0 +1,39 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from dataclasses import dataclass, field
5
+
6
+
7
+ DEFAULT_EXCLUDED_DIRS = {
8
+ ".git",
9
+ ".hg",
10
+ ".svn",
11
+ "node_modules",
12
+ "dist",
13
+ "build",
14
+ "venv",
15
+ ".venv",
16
+ "__pycache__",
17
+ ".pytest_cache",
18
+ ".mypy_cache",
19
+ }
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class ScanOptions:
24
+ excluded_dirs: set[str] = field(default_factory=lambda: set(DEFAULT_EXCLUDED_DIRS))
25
+ max_file_size_bytes: int = 512 * 1024
26
+ workers: int = max(1, min(32, (os.cpu_count() or 2) * 2))
27
+
28
+ def __post_init__(self) -> None:
29
+ if self.max_file_size_bytes <= 0:
30
+ raise ValueError("max_file_size_bytes must be > 0")
31
+ if self.workers <= 0:
32
+ raise ValueError("workers must be > 0")
33
+
34
+ def merged_exclusions(self, extra: list[str] | None) -> set[str]:
35
+ if not extra:
36
+ return set(self.excluded_dirs)
37
+ merged = set(self.excluded_dirs)
38
+ merged.update(extra)
39
+ return merged
@@ -0,0 +1,88 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from datetime import datetime, timezone
5
+
6
+ from .models import ScanResult
7
+
8
+
9
+ def to_json(result: ScanResult) -> str:
10
+ payload = {
11
+ "total": result.total,
12
+ "findings": [
13
+ {
14
+ "rule_id": f.rule_id,
15
+ "severity": f.severity,
16
+ "file_path": f.file_path,
17
+ "line": f.line,
18
+ "message": f.message,
19
+ "recommendation": f.recommendation,
20
+ "language": f.language,
21
+ "confidence": f.confidence,
22
+ }
23
+ for f in result.findings
24
+ ],
25
+ }
26
+ return json.dumps(payload, indent=2)
27
+
28
+
29
+ def to_sarif(result: ScanResult) -> str:
30
+ rules = {}
31
+ runs_results = []
32
+
33
+ for finding in result.findings:
34
+ rules[finding.rule_id] = {
35
+ "id": finding.rule_id,
36
+ "name": finding.rule_id,
37
+ "shortDescription": {"text": finding.message},
38
+ "help": {"text": finding.recommendation},
39
+ "properties": {"severity": finding.severity, "confidence": finding.confidence},
40
+ }
41
+ runs_results.append(
42
+ {
43
+ "ruleId": finding.rule_id,
44
+ "message": {"text": finding.message},
45
+ "locations": [
46
+ {
47
+ "physicalLocation": {
48
+ "artifactLocation": {"uri": finding.file_path},
49
+ "region": {"startLine": finding.line},
50
+ }
51
+ }
52
+ ],
53
+ "level": _to_sarif_level(finding.severity),
54
+ }
55
+ )
56
+
57
+ payload = {
58
+ "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
59
+ "version": "2.1.0",
60
+ "runs": [
61
+ {
62
+ "tool": {
63
+ "driver": {
64
+ "name": "devguard-core",
65
+ "version": "0.1.0",
66
+ "informationUri": "https://github.com/upendra-manike/developer-problem-solvers",
67
+ "rules": list(rules.values()),
68
+ }
69
+ },
70
+ "invocations": [
71
+ {
72
+ "executionSuccessful": True,
73
+ "endTimeUtc": datetime.now(timezone.utc).isoformat(),
74
+ }
75
+ ],
76
+ "results": runs_results,
77
+ }
78
+ ],
79
+ }
80
+ return json.dumps(payload, indent=2)
81
+
82
+
83
+ def _to_sarif_level(severity: str) -> str:
84
+ if severity == "high":
85
+ return "error"
86
+ if severity == "medium":
87
+ return "warning"
88
+ return "note"
@@ -0,0 +1,61 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class Rule:
9
+ id: str
10
+ severity: str
11
+ description: str
12
+ fix: str
13
+ match_type: str = "regex"
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class Finding:
18
+ rule_id: str
19
+ severity: str
20
+ file_path: str
21
+ line: int
22
+ message: str
23
+ recommendation: str
24
+ language: str
25
+ confidence: float
26
+
27
+ def __post_init__(self) -> None:
28
+ if self.severity not in {"low", "medium", "high"}:
29
+ raise ValueError(f"Invalid severity: {self.severity}")
30
+ if not (0.0 <= self.confidence <= 1.0):
31
+ raise ValueError(f"Invalid confidence: {self.confidence}")
32
+ if self.line <= 0:
33
+ raise ValueError(f"Invalid line: {self.line}")
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class ScanResult:
38
+ findings: list[Finding]
39
+
40
+ @property
41
+ def total(self) -> int:
42
+ return len(self.findings)
43
+
44
+ def relative_to(self, root: Path) -> "ScanResult":
45
+ rel_findings: list[Finding] = []
46
+ for f in self.findings:
47
+ p = Path(f.file_path)
48
+ rel_path = str(p.relative_to(root)) if p.is_absolute() and root in p.parents else f.file_path
49
+ rel_findings.append(
50
+ Finding(
51
+ rule_id=f.rule_id,
52
+ severity=f.severity,
53
+ file_path=rel_path,
54
+ line=f.line,
55
+ message=f.message,
56
+ recommendation=f.recommendation,
57
+ language=f.language,
58
+ confidence=f.confidence,
59
+ )
60
+ )
61
+ return ScanResult(findings=rel_findings)
devguard_core/rules.py ADDED
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ from .models import Rule
4
+
5
+
6
+ BUILTIN_RULES: dict[str, Rule] = {
7
+ "DG001": Rule(
8
+ id="DG001",
9
+ severity="high",
10
+ description="Potential SQL injection via string concatenation in query execution.",
11
+ fix="Use parameterized queries/placeholders instead of concatenation.",
12
+ ),
13
+ "DG002": Rule(
14
+ id="DG002",
15
+ severity="high",
16
+ description="Potential unsafe deserialization call.",
17
+ fix="Use safe loaders/whitelists and validate input before deserialization.",
18
+ ),
19
+ "DG003": Rule(
20
+ id="DG003",
21
+ severity="high",
22
+ description="Potential hardcoded secret in source code.",
23
+ fix="Move secrets to environment variables or a secrets manager.",
24
+ ),
25
+ "DG004": Rule(
26
+ id="DG004",
27
+ severity="medium",
28
+ description="Potential inefficient object creation inside hot loops.",
29
+ fix="Move expensive allocation/compilation outside loops or cache it.",
30
+ ),
31
+ "DG005": Rule(
32
+ id="DG005",
33
+ severity="medium",
34
+ description="Network/async call without local error handling.",
35
+ fix="Wrap risky network calls in try/except and handle expected failures.",
36
+ ),
37
+ }
@@ -0,0 +1,80 @@
1
+ from __future__ import annotations
2
+
3
+ from concurrent.futures import ThreadPoolExecutor
4
+ from pathlib import Path
5
+
6
+ from .checks import SUPPORTED_EXTENSIONS, run_builtin_checks
7
+ from .config import ScanOptions
8
+ from .models import ScanResult
9
+
10
+
11
+ def iter_source_files(path: Path, options: ScanOptions) -> list[Path]:
12
+ if path.is_file():
13
+ try:
14
+ if path.stat().st_size > options.max_file_size_bytes:
15
+ return []
16
+ except OSError:
17
+ return []
18
+ return [path]
19
+
20
+ excluded_dirs = options.excluded_dirs
21
+ files: list[Path] = []
22
+ for ext in SUPPORTED_EXTENSIONS.keys():
23
+ for file in path.rglob(f"*{ext}"):
24
+ if any(part in excluded_dirs for part in file.parts):
25
+ continue
26
+ if file.is_symlink():
27
+ continue
28
+ try:
29
+ if file.stat().st_size > options.max_file_size_bytes:
30
+ continue
31
+ except OSError:
32
+ continue
33
+ files.append(file)
34
+ return sorted(set(files))
35
+
36
+
37
+ def scan_path(path: Path, options: ScanOptions | None = None) -> ScanResult:
38
+ return scan_targets([path], options=options)
39
+
40
+
41
+ def scan_targets(paths: list[Path], options: ScanOptions | None = None) -> ScanResult:
42
+ opts = options or ScanOptions()
43
+ files = _expand_paths(paths, opts)
44
+ findings = []
45
+
46
+ with ThreadPoolExecutor(max_workers=opts.workers) as executor:
47
+ for result in executor.map(_scan_file, files):
48
+ findings.extend(result)
49
+
50
+ findings.sort(key=lambda f: (f.file_path, f.line, f.rule_id, f.message))
51
+ return ScanResult(findings=findings)
52
+
53
+
54
+ def _scan_file(file: Path) -> list:
55
+ try:
56
+ text = file.read_text(encoding="utf-8")
57
+ except (UnicodeDecodeError, OSError):
58
+ # Skip files with unsupported encoding or transient read errors.
59
+ return []
60
+ return run_builtin_checks(file, text)
61
+
62
+
63
+ def _expand_paths(paths: list[Path], options: ScanOptions) -> list[Path]:
64
+ files: list[Path] = []
65
+ supported_exts = set(SUPPORTED_EXTENSIONS.keys())
66
+ for path in paths:
67
+ if not path.exists():
68
+ continue
69
+ if path.is_dir():
70
+ files.extend(iter_source_files(path, options))
71
+ continue
72
+ if path.suffix.lower() not in supported_exts:
73
+ continue
74
+ try:
75
+ if path.stat().st_size > options.max_file_size_bytes:
76
+ continue
77
+ except OSError:
78
+ continue
79
+ files.append(path)
80
+ return sorted(set(files))
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.4
2
+ Name: devguard-core
3
+ Version: 0.1.2
4
+ Summary: Core analysis engine for DevGuard modules
5
+ Author: DevGuard Contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/upendra-manike/developer-problem-solvers
8
+ Project-URL: Repository, https://github.com/upendra-manike/developer-problem-solvers
9
+ Project-URL: Issues, https://github.com/upendra-manike/developer-problem-solvers/issues
10
+ Keywords: static-analysis,security,reliability,ai-code
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Operating System :: OS Independent
13
+ Requires-Python: >=3.10
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Provides-Extra: dev
17
+ Requires-Dist: pytest>=8.0; extra == "dev"
18
+ Requires-Dist: ruff>=0.8.0; extra == "dev"
19
+ Requires-Dist: build>=1.2.0; extra == "dev"
20
+ Dynamic: license-file
21
+
22
+ # devguard-core
23
+
24
+ Shared scanning engine and rule framework for DevGuard modules.
25
+
26
+ ## Features
27
+
28
+ - Rule metadata model (`id`, `severity`, `match_type`, `description`, `fix`)
29
+ - File walker with language detection
30
+ - Built-in checks for common AI-code risks
31
+ - AST-backed Python checks for SQL injection, unsafe deserialization, and hardcoded secrets
32
+ - JSON and SARIF output
33
+ - Baseline input/output for incremental CI rollout
34
+
35
+ ## Quick Run
36
+
37
+ ```bash
38
+ PYTHONPATH=src python -m devguard_core.cli scan ../../examples/sample_insecure.py --format json
39
+ ```
@@ -0,0 +1,15 @@
1
+ devguard_core/__init__.py,sha256=s26MH9BH9DeL426cRkAd78CCFhfsUgmAHotZoNMYKkU,212
2
+ devguard_core/ast_checks.py,sha256=mDVWEUCMN3vi6ZelDlfyPa9TLF9GKd44qrznG5m9J-4,4333
3
+ devguard_core/checks.py,sha256=bFd_y1l_nSmxze8ZbYicsEzKDfIlxRGxiHH-xU6PxhA,8331
4
+ devguard_core/cli.py,sha256=5VNEHu0nKOYgee_Ou9iRtzG_z2KPfNeY8CCjv6M1oKw,9160
5
+ devguard_core/config.py,sha256=gTRbvw9-eISqO7QGZE_J0dagLoxy__UYbWeQKyj_C10,972
6
+ devguard_core/formatters.py,sha256=WbOpp0rru5WnqBQeiMmo5Idj7BO4EENsgCvsqU0PyWI,2656
7
+ devguard_core/models.py,sha256=6S4aPnwH3HsaBHzh1eDSQJerVJvsD9eaTpL1qukLUDw,1674
8
+ devguard_core/rules.py,sha256=YuP9ZQcJDvU0bsG87z10ig2f5aJlaela1ms3OlqwCkA,1235
9
+ devguard_core/scanner.py,sha256=3P1eBYt8kkZUa7IshioL2LwJZnWkK4e1KwaYSIs5ROA,2518
10
+ devguard_core-0.1.2.dist-info/licenses/LICENSE,sha256=ocTW19_cq2E_BQfxThjsFR0yQXqXrs40F0bj6fwP_X0,1078
11
+ devguard_core-0.1.2.dist-info/METADATA,sha256=LqgyPJS2vUVfL9I8qFK84PLpxA0UoKrW94nPQXYBPpw,1370
12
+ devguard_core-0.1.2.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
13
+ devguard_core-0.1.2.dist-info/entry_points.txt,sha256=V--mfplD7ykBZoYKTqE6k5w0YuNrmAR98yDi3PsemPA,57
14
+ devguard_core-0.1.2.dist-info/top_level.txt,sha256=lTH7LXvLfAgltM_HreHh-BeDUt8qqDa1QczqClFkrl0,14
15
+ devguard_core-0.1.2.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ devguard-core = devguard_core.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 DevGuard Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ devguard_core