devguard-core 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 DevGuard Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.4
2
+ Name: devguard-core
3
+ Version: 0.1.2
4
+ Summary: Core analysis engine for DevGuard modules
5
+ Author: DevGuard Contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/upendra-manike/developer-problem-solvers
8
+ Project-URL: Repository, https://github.com/upendra-manike/developer-problem-solvers
9
+ Project-URL: Issues, https://github.com/upendra-manike/developer-problem-solvers/issues
10
+ Keywords: static-analysis,security,reliability,ai-code
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Operating System :: OS Independent
13
+ Requires-Python: >=3.10
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Provides-Extra: dev
17
+ Requires-Dist: pytest>=8.0; extra == "dev"
18
+ Requires-Dist: ruff>=0.8.0; extra == "dev"
19
+ Requires-Dist: build>=1.2.0; extra == "dev"
20
+ Dynamic: license-file
21
+
22
+ # devguard-core
23
+
24
+ Shared scanning engine and rule framework for DevGuard modules.
25
+
26
+ ## Features
27
+
28
+ - Rule metadata model (`id`, `severity`, `match_type`, `description`, `fix`)
29
+ - File walker with language detection
30
+ - Built-in checks for common AI-code risks
31
+ - AST-backed Python checks for SQL injection, unsafe deserialization, and hardcoded secrets
32
+ - JSON and SARIF output
33
+ - Baseline input/output for incremental CI rollout
34
+
35
+ ## Quick Run
36
+
37
+ ```bash
38
+ PYTHONPATH=src python -m devguard_core.cli scan ../../examples/sample_insecure.py --format json
39
+ ```
@@ -0,0 +1,18 @@
1
+ # devguard-core
2
+
3
+ Shared scanning engine and rule framework for DevGuard modules.
4
+
5
+ ## Features
6
+
7
+ - Rule metadata model (`id`, `severity`, `match_type`, `description`, `fix`)
8
+ - File walker with language detection
9
+ - Built-in checks for common AI-code risks
10
+ - AST-backed Python checks for SQL injection, unsafe deserialization, and hardcoded secrets
11
+ - JSON and SARIF output
12
+ - Baseline input/output for incremental CI rollout
13
+
14
+ ## Quick Run
15
+
16
+ ```bash
17
+ PYTHONPATH=src python -m devguard_core.cli scan ../../examples/sample_insecure.py --format json
18
+ ```
@@ -0,0 +1,43 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "devguard-core"
7
+ version = "0.1.2"
8
+ description = "Core analysis engine for DevGuard modules"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ authors = [{ name = "DevGuard Contributors" }]
12
+ license = "MIT"
13
+ license-files = ["LICENSE"]
14
+ keywords = ["static-analysis", "security", "reliability", "ai-code"]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "Operating System :: OS Independent"
18
+ ]
19
+
20
+ [project.urls]
21
+ Homepage = "https://github.com/upendra-manike/developer-problem-solvers"
22
+ Repository = "https://github.com/upendra-manike/developer-problem-solvers"
23
+ Issues = "https://github.com/upendra-manike/developer-problem-solvers/issues"
24
+
25
+ [project.optional-dependencies]
26
+ dev = ["pytest>=8.0", "ruff>=0.8.0", "build>=1.2.0"]
27
+
28
+ [project.scripts]
29
+ devguard-core = "devguard_core.cli:main"
30
+
31
+ [tool.setuptools]
32
+ package-dir = {"" = "src"}
33
+
34
+ [tool.setuptools.packages.find]
35
+ where = ["src"]
36
+
37
+ [tool.pytest.ini_options]
38
+ addopts = "-q"
39
+ testpaths = ["tests"]
40
+
41
+ [tool.ruff]
42
+ line-length = 100
43
+ target-version = "py310"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,7 @@
1
+ """DevGuard core package."""
2
+
3
+ from .config import ScanOptions
4
+ from .models import Finding, Rule, ScanResult
5
+ from .scanner import scan_path
6
+
7
+ __all__ = ["Rule", "Finding", "ScanResult", "ScanOptions", "scan_path"]
@@ -0,0 +1,150 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ from pathlib import Path
5
+
6
+ from .models import Finding
7
+ from .rules import BUILTIN_RULES
8
+
9
+ SQL_PREFIXES = ("SELECT", "INSERT", "UPDATE", "DELETE")
10
+ SECRET_NAMES = {"api_key", "apikey", "secret", "token", "password", "access_token"}
11
+ UNSAFE_DESER_CALLS = {"pickle.loads", "yaml.load", "jsonpickle.decode"}
12
+
13
+
14
+ def run_python_ast_checks(file_path: Path, text: str) -> tuple[list[Finding], bool]:
15
+ try:
16
+ tree = ast.parse(text)
17
+ except SyntaxError:
18
+ return [], False
19
+
20
+ findings: list[Finding] = []
21
+ tainted_sql_vars: set[str] = set()
22
+
23
+ for node in ast.walk(tree):
24
+ finding = _detect_sql_injection(node, file_path, tainted_sql_vars)
25
+ if finding is not None:
26
+ findings.append(finding)
27
+
28
+ finding = _detect_unsafe_deser(node, file_path)
29
+ if finding is not None:
30
+ findings.append(finding)
31
+
32
+ finding = _detect_hardcoded_secrets(node, file_path)
33
+ if finding is not None:
34
+ findings.append(finding)
35
+
36
+ return findings, True
37
+
38
+
39
+ def _detect_sql_injection(node: ast.AST, file_path: Path, tainted_sql_vars: set[str]) -> Finding | None:
40
+ if isinstance(node, ast.Assign) and len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
41
+ target_name = node.targets[0].id
42
+ if _is_sql_concat(node.value):
43
+ tainted_sql_vars.add(target_name)
44
+
45
+ if not isinstance(node, ast.Call):
46
+ return None
47
+
48
+ call_name = _dotted_name(node.func)
49
+ if call_name not in {"execute", "query", "cursor.execute", "cursor.query"}:
50
+ return None
51
+ if not node.args:
52
+ return None
53
+
54
+ first_arg = node.args[0]
55
+ line = getattr(node, "lineno", 1)
56
+
57
+ if isinstance(first_arg, ast.Name) and first_arg.id in tainted_sql_vars:
58
+ return _make_finding(
59
+ "DG001",
60
+ file_path,
61
+ line,
62
+ "Potential SQL injection pattern found in query execution.",
63
+ 0.9,
64
+ )
65
+
66
+ if _is_sql_concat(first_arg):
67
+ return _make_finding(
68
+ "DG001",
69
+ file_path,
70
+ line,
71
+ "Potential SQL injection pattern found in query construction.",
72
+ 0.91,
73
+ )
74
+
75
+ return None
76
+
77
+
78
+ def _detect_unsafe_deser(node: ast.AST, file_path: Path) -> Finding | None:
79
+ if not isinstance(node, ast.Call):
80
+ return None
81
+
82
+ call_name = _dotted_name(node.func)
83
+ if call_name in UNSAFE_DESER_CALLS:
84
+ return _make_finding(
85
+ "DG002",
86
+ file_path,
87
+ getattr(node, "lineno", 1),
88
+ "Potential unsafe deserialization call detected.",
89
+ 0.92,
90
+ )
91
+
92
+ return None
93
+
94
+
95
+ def _detect_hardcoded_secrets(node: ast.AST, file_path: Path) -> Finding | None:
96
+ if not isinstance(node, ast.Assign):
97
+ return None
98
+ if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name):
99
+ return None
100
+
101
+ var_name = node.targets[0].id.lower()
102
+ if var_name not in SECRET_NAMES:
103
+ return None
104
+
105
+ value = node.value
106
+ if isinstance(value, ast.Constant) and isinstance(value.value, str) and len(value.value) >= 8:
107
+ return _make_finding(
108
+ "DG003",
109
+ file_path,
110
+ getattr(node, "lineno", 1),
111
+ "Potential hardcoded secret detected.",
112
+ 0.94,
113
+ )
114
+
115
+ return None
116
+
117
+
118
+ def _is_sql_concat(node: ast.AST) -> bool:
119
+ if not isinstance(node, ast.BinOp) or not isinstance(node.op, ast.Add):
120
+ return False
121
+
122
+ left = node.left
123
+ if not isinstance(left, ast.Constant) or not isinstance(left.value, str):
124
+ return False
125
+
126
+ prefix = left.value.strip().upper()
127
+ return prefix.startswith(SQL_PREFIXES)
128
+
129
+
130
+ def _dotted_name(node: ast.AST) -> str:
131
+ if isinstance(node, ast.Name):
132
+ return node.id
133
+ if isinstance(node, ast.Attribute):
134
+ left = _dotted_name(node.value)
135
+ return f"{left}.{node.attr}" if left else node.attr
136
+ return ""
137
+
138
+
139
+ def _make_finding(rule_id: str, file_path: Path, line: int, message: str, confidence: float) -> Finding:
140
+ rule = BUILTIN_RULES[rule_id]
141
+ return Finding(
142
+ rule_id=rule.id,
143
+ severity=rule.severity,
144
+ file_path=str(file_path),
145
+ line=line,
146
+ message=message,
147
+ recommendation=rule.fix,
148
+ language="python",
149
+ confidence=confidence,
150
+ )
@@ -0,0 +1,237 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+ from .ast_checks import run_python_ast_checks
7
+ from .models import Finding
8
+ from .rules import BUILTIN_RULES
9
+
10
+ SQL_INJECTION_PATTERN = re.compile(r"(?:execute|query)\s*\([^\n]*[\"'][^\"']*[\"']\s*\+", re.IGNORECASE)
11
+ SQL_ASSIGN_CONCAT_PATTERN = re.compile(
12
+ r"^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=\s*[\"']\s*(SELECT|INSERT|UPDATE|DELETE)\b[^\"']*[\"']\s*\+",
13
+ re.IGNORECASE,
14
+ )
15
+ EXECUTE_VAR_PATTERN = re.compile(r"(?:execute|query)\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)", re.IGNORECASE)
16
+ UNSAFE_DESER_PATTERN = re.compile(
17
+ r"pickle\.loads\(|yaml\.load\(|ObjectInputStream\(|BinaryFormatter|jsonpickle\.decode\(",
18
+ re.IGNORECASE,
19
+ )
20
+ HARDCODED_SECRET_PATTERN = re.compile(
21
+ r"(?i)(api[_-]?key|secret|token|password)\s*[:=]\s*[\"'][A-Za-z0-9_\-\./+=]{8,}[\"']"
22
+ )
23
+ LOOP_HEADER_PATTERN = re.compile(r"^\s*(for|while)\b")
24
+ EXPENSIVE_IN_LOOP_PATTERN = re.compile(r"(re\.compile\(|new\s+Regex\(|json\.loads\(|datetime\.strptime\()")
25
+ ASYNC_DEF_PATTERN = re.compile(r"^\s*async\s+def\b")
26
+ NETWORK_CALL_PATTERN = re.compile(r"\b(requests\.|httpx\.|aiohttp\.|fetch\(|axios\.)")
27
+ TRY_PATTERN = re.compile(r"^\s*try\s*:")
28
+ IGNORE_INLINE_PATTERN = re.compile(r"devguard-ignore\s*:\s*(.+)", re.IGNORECASE)
29
+ IGNORE_NEXT_LINE_PATTERN = re.compile(r"devguard-ignore-next-line\s*:\s*(.+)", re.IGNORECASE)
30
+
31
+
32
+ SUPPORTED_EXTENSIONS = {
33
+ ".py": "python",
34
+ ".js": "javascript",
35
+ ".ts": "typescript",
36
+ ".java": "java",
37
+ ".go": "go",
38
+ ".rs": "rust",
39
+ }
40
+
41
+
42
+ def detect_language(path: Path) -> str:
43
+ return SUPPORTED_EXTENSIONS.get(path.suffix.lower(), "unknown")
44
+
45
+
46
+ def _make_finding(
47
+ rule_id: str,
48
+ file_path: Path,
49
+ line: int,
50
+ language: str,
51
+ message: str,
52
+ confidence: float,
53
+ ) -> Finding:
54
+ rule = BUILTIN_RULES[rule_id]
55
+ return Finding(
56
+ rule_id=rule.id,
57
+ severity=rule.severity,
58
+ file_path=str(file_path),
59
+ line=line,
60
+ message=message,
61
+ recommendation=rule.fix,
62
+ language=language,
63
+ confidence=confidence,
64
+ )
65
+
66
+
67
+ def run_builtin_checks(file_path: Path, text: str) -> list[Finding]:
68
+ language = detect_language(file_path)
69
+ findings: list[Finding] = []
70
+ lines = text.splitlines()
71
+ ast_parsed = False
72
+ if language == "python":
73
+ ast_findings, ast_parsed = run_python_ast_checks(file_path, text)
74
+ findings.extend(ast_findings)
75
+
76
+ tainted_sql_vars: set[str] = set()
77
+ for idx, line in enumerate(lines, start=1):
78
+ if language == "python" and ast_parsed:
79
+ continue
80
+
81
+ assign_match = SQL_ASSIGN_CONCAT_PATTERN.search(line)
82
+ if assign_match:
83
+ tainted_sql_vars.add(assign_match.group(1))
84
+
85
+ if SQL_INJECTION_PATTERN.search(line):
86
+ findings.append(
87
+ _make_finding(
88
+ "DG001",
89
+ file_path,
90
+ idx,
91
+ language,
92
+ "Potential SQL injection pattern found in query construction.",
93
+ 0.88,
94
+ )
95
+ )
96
+
97
+ exec_match = EXECUTE_VAR_PATTERN.search(line)
98
+ if exec_match and exec_match.group(1) in tainted_sql_vars:
99
+ findings.append(
100
+ _make_finding(
101
+ "DG001",
102
+ file_path,
103
+ idx,
104
+ language,
105
+ "Potential SQL injection pattern found in query execution.",
106
+ 0.84,
107
+ )
108
+ )
109
+
110
+ if UNSAFE_DESER_PATTERN.search(line):
111
+ findings.append(
112
+ _make_finding(
113
+ "DG002",
114
+ file_path,
115
+ idx,
116
+ language,
117
+ "Potential unsafe deserialization call detected.",
118
+ 0.87,
119
+ )
120
+ )
121
+
122
+ if HARDCODED_SECRET_PATTERN.search(line):
123
+ findings.append(
124
+ _make_finding(
125
+ "DG003",
126
+ file_path,
127
+ idx,
128
+ language,
129
+ "Potential hardcoded secret detected.",
130
+ 0.91,
131
+ )
132
+ )
133
+
134
+ findings.extend(_detect_expensive_allocations_in_loops(file_path, language, lines))
135
+ findings.extend(_detect_network_calls_without_local_try(file_path, language, lines))
136
+ deduped = _dedupe_findings(findings)
137
+ return _apply_suppressions(deduped, lines)
138
+
139
+
140
+ def _dedupe_findings(findings: list[Finding]) -> list[Finding]:
141
+ deduped: list[Finding] = []
142
+ seen: set[tuple[str, str, int, str]] = set()
143
+ for finding in findings:
144
+ key = (finding.rule_id, finding.file_path, finding.line, finding.message)
145
+ if key in seen:
146
+ continue
147
+ seen.add(key)
148
+ deduped.append(finding)
149
+ return deduped
150
+
151
+
152
+ def _detect_expensive_allocations_in_loops(file_path: Path, language: str, lines: list[str]) -> list[Finding]:
153
+ findings: list[Finding] = []
154
+ for idx, line in enumerate(lines, start=1):
155
+ if LOOP_HEADER_PATTERN.search(line):
156
+ end = min(len(lines), idx + 6)
157
+ for look_ahead in range(idx, end):
158
+ if EXPENSIVE_IN_LOOP_PATTERN.search(lines[look_ahead - 1]):
159
+ findings.append(
160
+ _make_finding(
161
+ "DG004",
162
+ file_path,
163
+ look_ahead,
164
+ language,
165
+ "Potential repeated expensive allocation inside loop.",
166
+ 0.72,
167
+ )
168
+ )
169
+ break
170
+ return findings
171
+
172
+
173
+ def _detect_network_calls_without_local_try(file_path: Path, language: str, lines: list[str]) -> list[Finding]:
174
+ findings: list[Finding] = []
175
+ for idx, line in enumerate(lines, start=1):
176
+ if ASYNC_DEF_PATTERN.search(line):
177
+ block_end = min(len(lines), idx + 20)
178
+ block = lines[idx - 1:block_end]
179
+ has_try = any(TRY_PATTERN.search(item) for item in block)
180
+ for local_idx, block_line in enumerate(block, start=idx):
181
+ if NETWORK_CALL_PATTERN.search(block_line) and not has_try:
182
+ findings.append(
183
+ _make_finding(
184
+ "DG005",
185
+ file_path,
186
+ local_idx,
187
+ language,
188
+ "Async/network call found without local try/except handling.",
189
+ 0.68,
190
+ )
191
+ )
192
+ break
193
+ return findings
194
+
195
+
196
+ def _apply_suppressions(findings: list[Finding], lines: list[str]) -> list[Finding]:
197
+ line_suppressions, file_suppressions = _collect_suppressions(lines)
198
+ filtered: list[Finding] = []
199
+ for finding in findings:
200
+ if "all" in file_suppressions or finding.rule_id in file_suppressions:
201
+ continue
202
+ suppressed = line_suppressions.get(finding.line, set())
203
+ if "all" in suppressed or finding.rule_id in suppressed:
204
+ continue
205
+ filtered.append(finding)
206
+ return filtered
207
+
208
+
209
+ def _collect_suppressions(lines: list[str]) -> tuple[dict[int, set[str]], set[str]]:
210
+ line_suppressions: dict[int, set[str]] = {}
211
+ file_suppressions: set[str] = set()
212
+
213
+ for idx, line in enumerate(lines, start=1):
214
+ next_match = IGNORE_NEXT_LINE_PATTERN.search(line)
215
+ if next_match:
216
+ rules = _parse_rule_list(next_match.group(1))
217
+ line_suppressions.setdefault(idx + 1, set()).update(rules)
218
+
219
+ inline_match = IGNORE_INLINE_PATTERN.search(line)
220
+ if inline_match:
221
+ rules = _parse_rule_list(inline_match.group(1))
222
+ line_suppressions.setdefault(idx, set()).update(rules)
223
+ if "file" in rules:
224
+ file_suppressions.update({"all"})
225
+
226
+ return line_suppressions, file_suppressions
227
+
228
+
229
+ def _parse_rule_list(raw: str) -> set[str]:
230
+ # Accept comma or whitespace delimited rule IDs.
231
+ items = [token.strip().upper() for token in re.split(r"[,\s]+", raw.strip()) if token.strip()]
232
+ normalized = set(items)
233
+ if "ALL" in normalized:
234
+ return {"all"}
235
+ if "FILE" in normalized:
236
+ return {"file"}
237
+ return normalized