PyPI - devguard-core - Versions diffs - 0.1.2__py3-none-any.whl - Mend

devguard-core 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

devguard_core/__init__.py +7 -0
devguard_core/ast_checks.py +150 -0
devguard_core/checks.py +237 -0
devguard_core/cli.py +241 -0
devguard_core/config.py +39 -0
devguard_core/formatters.py +88 -0
devguard_core/models.py +61 -0
devguard_core/rules.py +37 -0
devguard_core/scanner.py +80 -0
devguard_core-0.1.2.dist-info/METADATA +39 -0
devguard_core-0.1.2.dist-info/RECORD +15 -0
devguard_core-0.1.2.dist-info/WHEEL +5 -0
devguard_core-0.1.2.dist-info/entry_points.txt +2 -0
devguard_core-0.1.2.dist-info/licenses/LICENSE +21 -0
devguard_core-0.1.2.dist-info/top_level.txt +1 -0

devguard_core/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""DevGuard core package."""
+from .config import ScanOptions
+from .models import Finding, Rule, ScanResult
+from .scanner import scan_path
+__all__ = ["Rule", "Finding", "ScanResult", "ScanOptions", "scan_path"]

devguard_core/ast_checks.py ADDED Viewed

@@ -0,0 +1,150 @@
+from __future__ import annotations
+import ast
+from pathlib import Path
+from .models import Finding
+from .rules import BUILTIN_RULES
+SQL_PREFIXES = ("SELECT", "INSERT", "UPDATE", "DELETE")
+SECRET_NAMES = {"api_key", "apikey", "secret", "token", "password", "access_token"}
+UNSAFE_DESER_CALLS = {"pickle.loads", "yaml.load", "jsonpickle.decode"}
+def run_python_ast_checks(file_path: Path, text: str) -> tuple[list[Finding], bool]:
+    try:
+        tree = ast.parse(text)
+    except SyntaxError:
+        return [], False
+    findings: list[Finding] = []
+    tainted_sql_vars: set[str] = set()
+    for node in ast.walk(tree):
+        finding = _detect_sql_injection(node, file_path, tainted_sql_vars)
+        if finding is not None:
+            findings.append(finding)
+        finding = _detect_unsafe_deser(node, file_path)
+        if finding is not None:
+            findings.append(finding)
+        finding = _detect_hardcoded_secrets(node, file_path)
+        if finding is not None:
+            findings.append(finding)
+    return findings, True
+def _detect_sql_injection(node: ast.AST, file_path: Path, tainted_sql_vars: set[str]) -> Finding | None:
+    if isinstance(node, ast.Assign) and len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
+        target_name = node.targets[0].id
+        if _is_sql_concat(node.value):
+            tainted_sql_vars.add(target_name)
+    if not isinstance(node, ast.Call):
+        return None
+    call_name = _dotted_name(node.func)
+    if call_name not in {"execute", "query", "cursor.execute", "cursor.query"}:
+        return None
+    if not node.args:
+        return None
+    first_arg = node.args[0]
+    line = getattr(node, "lineno", 1)
+    if isinstance(first_arg, ast.Name) and first_arg.id in tainted_sql_vars:
+        return _make_finding(
+            "DG001",
+            file_path,
+            line,
+            "Potential SQL injection pattern found in query execution.",
+            0.9,
+        )
+    if _is_sql_concat(first_arg):
+        return _make_finding(
+            "DG001",
+            file_path,
+            line,
+            "Potential SQL injection pattern found in query construction.",
+            0.91,
+        )
+    return None
+def _detect_unsafe_deser(node: ast.AST, file_path: Path) -> Finding | None:
+    if not isinstance(node, ast.Call):
+        return None
+    call_name = _dotted_name(node.func)
+    if call_name in UNSAFE_DESER_CALLS:
+        return _make_finding(
+            "DG002",
+            file_path,
+            getattr(node, "lineno", 1),
+            "Potential unsafe deserialization call detected.",
+            0.92,
+        )
+    return None
+def _detect_hardcoded_secrets(node: ast.AST, file_path: Path) -> Finding | None:
+    if not isinstance(node, ast.Assign):
+        return None
+    if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name):
+        return None
+    var_name = node.targets[0].id.lower()
+    if var_name not in SECRET_NAMES:
+        return None
+    value = node.value
+    if isinstance(value, ast.Constant) and isinstance(value.value, str) and len(value.value) >= 8:
+        return _make_finding(
+            "DG003",
+            file_path,
+            getattr(node, "lineno", 1),
+            "Potential hardcoded secret detected.",
+            0.94,
+        )
+    return None
+def _is_sql_concat(node: ast.AST) -> bool:
+    if not isinstance(node, ast.BinOp) or not isinstance(node.op, ast.Add):
+        return False
+    left = node.left
+    if not isinstance(left, ast.Constant) or not isinstance(left.value, str):
+        return False
+    prefix = left.value.strip().upper()
+    return prefix.startswith(SQL_PREFIXES)
+def _dotted_name(node: ast.AST) -> str:
+    if isinstance(node, ast.Name):
+        return node.id
+    if isinstance(node, ast.Attribute):
+        left = _dotted_name(node.value)
+        return f"{left}.{node.attr}" if left else node.attr
+    return ""
+def _make_finding(rule_id: str, file_path: Path, line: int, message: str, confidence: float) -> Finding:
+    rule = BUILTIN_RULES[rule_id]
+    return Finding(
+        rule_id=rule.id,
+        severity=rule.severity,
+        file_path=str(file_path),
+        line=line,
+        message=message,
+        recommendation=rule.fix,
+        language="python",
+        confidence=confidence,
+    )

devguard_core/checks.py ADDED Viewed

@@ -0,0 +1,237 @@
+from __future__ import annotations
+import re
+from pathlib import Path
+from .ast_checks import run_python_ast_checks
+from .models import Finding
+from .rules import BUILTIN_RULES
+SQL_INJECTION_PATTERN = re.compile(r"(?:execute|query)\s*\([^\n]*[\"'][^\"']*[\"']\s*\+", re.IGNORECASE)
+SQL_ASSIGN_CONCAT_PATTERN = re.compile(
+    r"^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=\s*[\"']\s*(SELECT|INSERT|UPDATE|DELETE)\b[^\"']*[\"']\s*\+",
+    re.IGNORECASE,
+)
+EXECUTE_VAR_PATTERN = re.compile(r"(?:execute|query)\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)", re.IGNORECASE)
+UNSAFE_DESER_PATTERN = re.compile(
+    r"pickle\.loads\(|yaml\.load\(|ObjectInputStream\(|BinaryFormatter|jsonpickle\.decode\(",
+    re.IGNORECASE,
+)
+HARDCODED_SECRET_PATTERN = re.compile(
+    r"(?i)(api[_-]?key|secret|token|password)\s*[:=]\s*[\"'][A-Za-z0-9_\-\./+=]{8,}[\"']"
+)
+LOOP_HEADER_PATTERN = re.compile(r"^\s*(for|while)\b")
+EXPENSIVE_IN_LOOP_PATTERN = re.compile(r"(re\.compile\(|new\s+Regex\(|json\.loads\(|datetime\.strptime\()")
+ASYNC_DEF_PATTERN = re.compile(r"^\s*async\s+def\b")
+NETWORK_CALL_PATTERN = re.compile(r"\b(requests\.|httpx\.|aiohttp\.|fetch\(|axios\.)")
+TRY_PATTERN = re.compile(r"^\s*try\s*:")
+IGNORE_INLINE_PATTERN = re.compile(r"devguard-ignore\s*:\s*(.+)", re.IGNORECASE)
+IGNORE_NEXT_LINE_PATTERN = re.compile(r"devguard-ignore-next-line\s*:\s*(.+)", re.IGNORECASE)
+SUPPORTED_EXTENSIONS = {
+    ".py": "python",
+    ".js": "javascript",
+    ".ts": "typescript",
+    ".java": "java",
+    ".go": "go",
+    ".rs": "rust",
+}
+def detect_language(path: Path) -> str:
+    return SUPPORTED_EXTENSIONS.get(path.suffix.lower(), "unknown")
+def _make_finding(
+    rule_id: str,
+    file_path: Path,
+    line: int,
+    language: str,
+    message: str,
+    confidence: float,
+) -> Finding:
+    rule = BUILTIN_RULES[rule_id]
+    return Finding(
+        rule_id=rule.id,
+        severity=rule.severity,
+        file_path=str(file_path),
+        line=line,
+        message=message,
+        recommendation=rule.fix,
+        language=language,
+        confidence=confidence,
+    )
+def run_builtin_checks(file_path: Path, text: str) -> list[Finding]:
+    language = detect_language(file_path)
+    findings: list[Finding] = []
+    lines = text.splitlines()
+    ast_parsed = False
+    if language == "python":
+        ast_findings, ast_parsed = run_python_ast_checks(file_path, text)
+        findings.extend(ast_findings)
+    tainted_sql_vars: set[str] = set()
+    for idx, line in enumerate(lines, start=1):
+        if language == "python" and ast_parsed:
+            continue
+        assign_match = SQL_ASSIGN_CONCAT_PATTERN.search(line)
+        if assign_match:
+            tainted_sql_vars.add(assign_match.group(1))
+        if SQL_INJECTION_PATTERN.search(line):
+            findings.append(
+                _make_finding(
+                    "DG001",
+                    file_path,
+                    idx,
+                    language,
+                    "Potential SQL injection pattern found in query construction.",
+                    0.88,
+                )
+            )
+        exec_match = EXECUTE_VAR_PATTERN.search(line)
+        if exec_match and exec_match.group(1) in tainted_sql_vars:
+            findings.append(
+                _make_finding(
+                    "DG001",
+                    file_path,
+                    idx,
+                    language,
+                    "Potential SQL injection pattern found in query execution.",
+                    0.84,
+                )
+            )
+        if UNSAFE_DESER_PATTERN.search(line):
+            findings.append(
+                _make_finding(
+                    "DG002",
+                    file_path,
+                    idx,
+                    language,
+                    "Potential unsafe deserialization call detected.",
+                    0.87,
+                )
+            )
+        if HARDCODED_SECRET_PATTERN.search(line):
+            findings.append(
+                _make_finding(
+                    "DG003",
+                    file_path,
+                    idx,
+                    language,
+                    "Potential hardcoded secret detected.",
+                    0.91,
+                )
+            )
+    findings.extend(_detect_expensive_allocations_in_loops(file_path, language, lines))
+    findings.extend(_detect_network_calls_without_local_try(file_path, language, lines))
+    deduped = _dedupe_findings(findings)
+    return _apply_suppressions(deduped, lines)
+def _dedupe_findings(findings: list[Finding]) -> list[Finding]:
+    deduped: list[Finding] = []
+    seen: set[tuple[str, str, int, str]] = set()
+    for finding in findings:
+        key = (finding.rule_id, finding.file_path, finding.line, finding.message)
+        if key in seen:
+            continue
+        seen.add(key)
+        deduped.append(finding)
+    return deduped
+def _detect_expensive_allocations_in_loops(file_path: Path, language: str, lines: list[str]) -> list[Finding]:
+    findings: list[Finding] = []
+    for idx, line in enumerate(lines, start=1):
+        if LOOP_HEADER_PATTERN.search(line):
+            end = min(len(lines), idx + 6)
+            for look_ahead in range(idx, end):
+                if EXPENSIVE_IN_LOOP_PATTERN.search(lines[look_ahead - 1]):
+                    findings.append(
+                        _make_finding(
+                            "DG004",
+                            file_path,
+                            look_ahead,
+                            language,
+                            "Potential repeated expensive allocation inside loop.",
+                            0.72,
+                        )
+                    )
+                    break
+    return findings
+def _detect_network_calls_without_local_try(file_path: Path, language: str, lines: list[str]) -> list[Finding]:
+    findings: list[Finding] = []
+    for idx, line in enumerate(lines, start=1):
+        if ASYNC_DEF_PATTERN.search(line):
+            block_end = min(len(lines), idx + 20)
+            block = lines[idx - 1:block_end]
+            has_try = any(TRY_PATTERN.search(item) for item in block)
+            for local_idx, block_line in enumerate(block, start=idx):
+                if NETWORK_CALL_PATTERN.search(block_line) and not has_try:
+                    findings.append(
+                        _make_finding(
+                            "DG005",
+                            file_path,
+                            local_idx,
+                            language,
+                            "Async/network call found without local try/except handling.",
+                            0.68,
+                        )
+                    )
+                    break
+    return findings
+def _apply_suppressions(findings: list[Finding], lines: list[str]) -> list[Finding]:
+    line_suppressions, file_suppressions = _collect_suppressions(lines)
+    filtered: list[Finding] = []
+    for finding in findings:
+        if "all" in file_suppressions or finding.rule_id in file_suppressions:
+            continue
+        suppressed = line_suppressions.get(finding.line, set())
+        if "all" in suppressed or finding.rule_id in suppressed:
+            continue
+        filtered.append(finding)
+    return filtered
+def _collect_suppressions(lines: list[str]) -> tuple[dict[int, set[str]], set[str]]:
+    line_suppressions: dict[int, set[str]] = {}
+    file_suppressions: set[str] = set()
+    for idx, line in enumerate(lines, start=1):
+        next_match = IGNORE_NEXT_LINE_PATTERN.search(line)
+        if next_match:
+            rules = _parse_rule_list(next_match.group(1))
+            line_suppressions.setdefault(idx + 1, set()).update(rules)
+        inline_match = IGNORE_INLINE_PATTERN.search(line)
+        if inline_match:
+            rules = _parse_rule_list(inline_match.group(1))
+            line_suppressions.setdefault(idx, set()).update(rules)
+            if "file" in rules:
+                file_suppressions.update({"all"})
+    return line_suppressions, file_suppressions
+def _parse_rule_list(raw: str) -> set[str]:
+    # Accept comma or whitespace delimited rule IDs.
+    items = [token.strip().upper() for token in re.split(r"[,\s]+", raw.strip()) if token.strip()]
+    normalized = set(items)
+    if "ALL" in normalized:
+        return {"all"}
+    if "FILE" in normalized:
+        return {"file"}
+    return normalized

devguard_core/cli.py ADDED Viewed

@@ -0,0 +1,241 @@
+from __future__ import annotations
+import argparse
+import json
+from pathlib import Path
+from .config import ScanOptions
+from .formatters import to_json, to_sarif
+from .models import Finding, ScanResult
+from .scanner import scan_path, scan_targets
+SEVERITY_RANK = {"low": 1, "medium": 2, "high": 3}
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="DevGuard core scanner")
+    sub = parser.add_subparsers(dest="command", required=True)
+    scan_cmd = sub.add_parser("scan", help="Scan a file or directory")
+    scan_cmd.add_argument("target", help="Target file or directory")
+    scan_cmd.add_argument("--file-list", help="Path to newline-delimited file list to scan")
+    scan_cmd.add_argument("--format", choices=["json", "sarif"], default="json")
+    scan_cmd.add_argument("--output", help="Output file path (optional)")
+    scan_cmd.add_argument("--config", help="Path to .devguard.json config file")
+    scan_cmd.add_argument("--exclude-dir", action="append", help="Directory name to exclude")
+    scan_cmd.add_argument("--max-file-size-kb", type=int, help="Skip files larger than this size")
+    scan_cmd.add_argument("--workers", type=int, help="Number of worker threads")
+    scan_cmd.add_argument("--min-severity", choices=["low", "medium", "high"], help="Minimum severity to report")
+    scan_cmd.add_argument("--min-confidence", type=float, help="Minimum confidence to report (0.0-1.0)")
+    scan_cmd.add_argument("--include-rule", action="append", help="Only include matching rule ID (repeatable)")
+    scan_cmd.add_argument("--exclude-rule", action="append", help="Exclude matching rule ID (repeatable)")
+    scan_cmd.add_argument("--baseline-in", help="JSON file with known finding fingerprints to suppress")
+    scan_cmd.add_argument("--baseline-out", help="Write current finding fingerprints to this JSON file")
+    return parser
+def main() -> int:
+    parser = build_parser()
+    args = parser.parse_args()
+    if args.command == "scan":
+        target = Path(args.target).resolve()
+        if not target.exists():
+            parser.error(f"Target does not exist: {target}")
+        try:
+            config = _load_config(args.config, target)
+        except ValueError as exc:
+            parser.error(str(exc))
+        max_file_size_kb = _pick(args.max_file_size_kb, config, "max_file_size_kb", 512)
+        workers = _pick(args.workers, config, "workers", ScanOptions().workers)
+        min_severity = _pick(args.min_severity, config, "min_severity", "low")
+        min_confidence = _pick(args.min_confidence, config, "min_confidence", 0.0)
+        exclude_dirs = _pick_list(args.exclude_dir, config, "exclude_dir", [])
+        include_rules = _normalized_rule_list(_pick_list(args.include_rule, config, "include_rule", []))
+        exclude_rules = _normalized_rule_list(_pick_list(args.exclude_rule, config, "exclude_rule", []))
+        baseline_in = _pick(args.baseline_in, config, "baseline_in", None)
+        if max_file_size_kb <= 0:
+            parser.error("--max-file-size-kb must be > 0")
+        if workers <= 0:
+            parser.error("--workers must be > 0")
+        if not (0.0 <= min_confidence <= 1.0):
+            parser.error("--min-confidence must be between 0.0 and 1.0")
+        overlap = include_rules.intersection(exclude_rules)
+        if overlap:
+            parser.error(f"Rule IDs cannot be both included and excluded: {sorted(overlap)}")
+        options = ScanOptions(
+            excluded_dirs=ScanOptions().merged_exclusions(exclude_dirs),
+            max_file_size_bytes=max_file_size_kb * 1024,
+            workers=workers,
+        )
+        try:
+            scan_list = _load_file_list(args.file_list)
+        except ValueError as exc:
+            parser.error(str(exc))
+        if scan_list:
+            result = scan_targets(scan_list, options=options)
+        else:
+            result = scan_path(target, options=options)
+        baseline_root = target if target.is_dir() else target.parent
+        normalized = result.relative_to(baseline_root)
+        severity_filtered = _filter_by_min_severity(normalized, min_severity)
+        confidence_filtered = _filter_by_min_confidence(severity_filtered, min_confidence)
+        rule_filtered = _filter_by_rules(confidence_filtered, include_rules, exclude_rules)
+        try:
+            baseline = _load_baseline(baseline_in)
+        except ValueError as exc:
+            parser.error(str(exc))
+        filtered_result = _filter_by_baseline(rule_filtered, baseline)
+        if args.baseline_out:
+            _write_baseline(args.baseline_out, rule_filtered)
+        if args.format == "sarif":
+            body = to_sarif(filtered_result)
+        else:
+            body = to_json(filtered_result)
+        if args.output:
+            Path(args.output).write_text(body + "\n", encoding="utf-8")
+        else:
+            print(body)
+        # Non-zero exit when findings exist so it can gate CI.
+        return 1 if filtered_result.total > 0 else 0
+    parser.print_help()
+    return 2
+def _fingerprint(finding: Finding) -> str:
+    return "|".join(
+        [
+            str(getattr(finding, "rule_id")),
+            str(getattr(finding, "file_path")),
+            str(getattr(finding, "line")),
+            str(getattr(finding, "message")),
+        ]
+    )
+def _load_baseline(path: str | None) -> set[str]:
+    if not path:
+        return set()
+    baseline_file = Path(path)
+    if not baseline_file.exists():
+        return set()
+    try:
+        data = json.loads(baseline_file.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as exc:
+        raise ValueError(f"Invalid baseline JSON in {baseline_file}: {exc.msg}") from exc
+    if not isinstance(data, list):
+        raise ValueError(f"Invalid baseline format in {baseline_file}: expected a JSON array")
+    return {str(item) for item in data}
+def _write_baseline(path: str, result: ScanResult) -> None:
+    fingerprints = sorted({_fingerprint(f) for f in result.findings})
+    Path(path).write_text(json.dumps(fingerprints, indent=2) + "\n", encoding="utf-8")
+def _filter_by_baseline(result: ScanResult, baseline: set[str]) -> ScanResult:
+    if not baseline:
+        return result
+    findings = [f for f in result.findings if _fingerprint(f) not in baseline]
+    return ScanResult(findings=findings)
+def _filter_by_min_severity(result: ScanResult, min_severity: str) -> ScanResult:
+    threshold = SEVERITY_RANK[min_severity]
+    findings = [f for f in result.findings if SEVERITY_RANK.get(f.severity, 0) >= threshold]
+    return ScanResult(findings=findings)
+def _filter_by_min_confidence(result: ScanResult, min_confidence: float) -> ScanResult:
+    findings = [f for f in result.findings if f.confidence >= min_confidence]
+    return ScanResult(findings=findings)
+def _filter_by_rules(result: ScanResult, include_rules: set[str], exclude_rules: set[str]) -> ScanResult:
+    findings = result.findings
+    if include_rules:
+        findings = [f for f in findings if f.rule_id.upper() in include_rules]
+    if exclude_rules:
+        findings = [f for f in findings if f.rule_id.upper() not in exclude_rules]
+    return ScanResult(findings=findings)
+def _load_config(path: str | None, target: Path) -> dict:
+    config_path: Path | None = None
+    if path:
+        config_path = Path(path)
+    else:
+        root = target if target.is_dir() else target.parent
+        candidate = root / ".devguard.json"
+        if candidate.exists():
+            config_path = candidate
+    if config_path is None or not config_path.exists():
+        return {}
+    try:
+        content = json.loads(config_path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as exc:
+        raise ValueError(f"Invalid config JSON in {config_path}: {exc.msg}") from exc
+    if not isinstance(content, dict):
+        raise ValueError(f"Invalid config format in {config_path}: expected object")
+    return content
+def _pick(cli_value, config: dict, key: str, default):
+    if cli_value is not None:
+        return cli_value
+    if key in config:
+        return config[key]
+    return default
+def _pick_list(cli_value, config: dict, key: str, default: list[str]) -> list[str]:
+    if cli_value is not None:
+        return list(cli_value)
+    if key in config:
+        value = config[key]
+        if isinstance(value, list):
+            return [str(item) for item in value]
+        return [str(value)]
+    return list(default)
+def _normalized_rule_list(values: list[str]) -> set[str]:
+    return {str(v).strip().upper() for v in values if str(v).strip()}
+def _load_file_list(path: str | None) -> list[Path]:
+    if not path:
+        return []
+    file_list = Path(path)
+    if not file_list.exists():
+        raise ValueError(f"File list does not exist: {file_list}")
+    items: list[Path] = []
+    for raw in file_list.read_text(encoding="utf-8").splitlines():
+        line = raw.strip()
+        if not line or line.startswith("#"):
+            continue
+        p = Path(line)
+        items.append(p if p.is_absolute() else Path.cwd() / p)
+    return items
+if __name__ == "__main__":
+    raise SystemExit(main())

devguard_core/config.py ADDED Viewed

@@ -0,0 +1,39 @@
+from __future__ import annotations
+import os
+from dataclasses import dataclass, field
+DEFAULT_EXCLUDED_DIRS = {
+    ".git",
+    ".hg",
+    ".svn",
+    "node_modules",
+    "dist",
+    "build",
+    "venv",
+    ".venv",
+    "__pycache__",
+    ".pytest_cache",
+    ".mypy_cache",
+}
+@dataclass(frozen=True)
+class ScanOptions:
+    excluded_dirs: set[str] = field(default_factory=lambda: set(DEFAULT_EXCLUDED_DIRS))
+    max_file_size_bytes: int = 512 * 1024
+    workers: int = max(1, min(32, (os.cpu_count() or 2) * 2))
+    def __post_init__(self) -> None:
+        if self.max_file_size_bytes <= 0:
+            raise ValueError("max_file_size_bytes must be > 0")
+        if self.workers <= 0:
+            raise ValueError("workers must be > 0")
+    def merged_exclusions(self, extra: list[str] | None) -> set[str]:
+        if not extra:
+            return set(self.excluded_dirs)
+        merged = set(self.excluded_dirs)
+        merged.update(extra)
+        return merged

devguard_core/formatters.py ADDED Viewed

@@ -0,0 +1,88 @@
+from __future__ import annotations
+import json
+from datetime import datetime, timezone
+from .models import ScanResult
+def to_json(result: ScanResult) -> str:
+    payload = {
+        "total": result.total,
+        "findings": [
+            {
+                "rule_id": f.rule_id,
+                "severity": f.severity,
+                "file_path": f.file_path,
+                "line": f.line,
+                "message": f.message,
+                "recommendation": f.recommendation,
+                "language": f.language,
+                "confidence": f.confidence,
+            }
+            for f in result.findings
+        ],
+    }
+    return json.dumps(payload, indent=2)
+def to_sarif(result: ScanResult) -> str:
+    rules = {}
+    runs_results = []
+    for finding in result.findings:
+        rules[finding.rule_id] = {
+            "id": finding.rule_id,
+            "name": finding.rule_id,
+            "shortDescription": {"text": finding.message},
+            "help": {"text": finding.recommendation},
+            "properties": {"severity": finding.severity, "confidence": finding.confidence},
+        }
+        runs_results.append(
+            {
+                "ruleId": finding.rule_id,
+                "message": {"text": finding.message},
+                "locations": [
+                    {
+                        "physicalLocation": {
+                            "artifactLocation": {"uri": finding.file_path},
+                            "region": {"startLine": finding.line},
+                        }
+                    }
+                ],
+                "level": _to_sarif_level(finding.severity),
+            }
+        )
+    payload = {
+        "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
+        "version": "2.1.0",
+        "runs": [
+            {
+                "tool": {
+                    "driver": {
+                        "name": "devguard-core",
+                        "version": "0.1.0",
+                        "informationUri": "https://github.com/upendra-manike/developer-problem-solvers",
+                        "rules": list(rules.values()),
+                    }
+                },
+                "invocations": [
+                    {
+                        "executionSuccessful": True,
+                        "endTimeUtc": datetime.now(timezone.utc).isoformat(),
+                    }
+                ],
+                "results": runs_results,
+            }
+        ],
+    }
+    return json.dumps(payload, indent=2)
+def _to_sarif_level(severity: str) -> str:
+    if severity == "high":
+        return "error"
+    if severity == "medium":
+        return "warning"
+    return "note"

devguard_core/models.py ADDED Viewed

@@ -0,0 +1,61 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+@dataclass(frozen=True)
+class Rule:
+    id: str
+    severity: str
+    description: str
+    fix: str
+    match_type: str = "regex"
+@dataclass(frozen=True)
+class Finding:
+    rule_id: str
+    severity: str
+    file_path: str
+    line: int
+    message: str
+    recommendation: str
+    language: str
+    confidence: float
+    def __post_init__(self) -> None:
+        if self.severity not in {"low", "medium", "high"}:
+            raise ValueError(f"Invalid severity: {self.severity}")
+        if not (0.0 <= self.confidence <= 1.0):
+            raise ValueError(f"Invalid confidence: {self.confidence}")
+        if self.line <= 0:
+            raise ValueError(f"Invalid line: {self.line}")
+@dataclass(frozen=True)
+class ScanResult:
+    findings: list[Finding]
+    @property
+    def total(self) -> int:
+        return len(self.findings)
+    def relative_to(self, root: Path) -> "ScanResult":
+        rel_findings: list[Finding] = []
+        for f in self.findings:
+            p = Path(f.file_path)
+            rel_path = str(p.relative_to(root)) if p.is_absolute() and root in p.parents else f.file_path
+            rel_findings.append(
+                Finding(
+                    rule_id=f.rule_id,
+                    severity=f.severity,
+                    file_path=rel_path,
+                    line=f.line,
+                    message=f.message,
+                    recommendation=f.recommendation,
+                    language=f.language,
+                    confidence=f.confidence,
+                )
+            )
+        return ScanResult(findings=rel_findings)

devguard_core/rules.py ADDED Viewed

@@ -0,0 +1,37 @@
+from __future__ import annotations
+from .models import Rule
+BUILTIN_RULES: dict[str, Rule] = {
+    "DG001": Rule(
+        id="DG001",
+        severity="high",
+        description="Potential SQL injection via string concatenation in query execution.",
+        fix="Use parameterized queries/placeholders instead of concatenation.",
+    ),
+    "DG002": Rule(
+        id="DG002",
+        severity="high",
+        description="Potential unsafe deserialization call.",
+        fix="Use safe loaders/whitelists and validate input before deserialization.",
+    ),
+    "DG003": Rule(
+        id="DG003",
+        severity="high",
+        description="Potential hardcoded secret in source code.",
+        fix="Move secrets to environment variables or a secrets manager.",
+    ),
+    "DG004": Rule(
+        id="DG004",
+        severity="medium",
+        description="Potential inefficient object creation inside hot loops.",
+        fix="Move expensive allocation/compilation outside loops or cache it.",
+    ),
+    "DG005": Rule(
+        id="DG005",
+        severity="medium",
+        description="Network/async call without local error handling.",
+        fix="Wrap risky network calls in try/except and handle expected failures.",
+    ),
+}

devguard_core/scanner.py ADDED Viewed

@@ -0,0 +1,80 @@
+from __future__ import annotations
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+from .checks import SUPPORTED_EXTENSIONS, run_builtin_checks
+from .config import ScanOptions
+from .models import ScanResult
+def iter_source_files(path: Path, options: ScanOptions) -> list[Path]:
+    if path.is_file():
+        try:
+            if path.stat().st_size > options.max_file_size_bytes:
+                return []
+        except OSError:
+            return []
+        return [path]
+    excluded_dirs = options.excluded_dirs
+    files: list[Path] = []
+    for ext in SUPPORTED_EXTENSIONS.keys():
+        for file in path.rglob(f"*{ext}"):
+            if any(part in excluded_dirs for part in file.parts):
+                continue
+            if file.is_symlink():
+                continue
+            try:
+                if file.stat().st_size > options.max_file_size_bytes:
+                    continue
+            except OSError:
+                continue
+            files.append(file)
+    return sorted(set(files))
+def scan_path(path: Path, options: ScanOptions | None = None) -> ScanResult:
+    return scan_targets([path], options=options)
+def scan_targets(paths: list[Path], options: ScanOptions | None = None) -> ScanResult:
+    opts = options or ScanOptions()
+    files = _expand_paths(paths, opts)
+    findings = []
+    with ThreadPoolExecutor(max_workers=opts.workers) as executor:
+        for result in executor.map(_scan_file, files):
+            findings.extend(result)
+    findings.sort(key=lambda f: (f.file_path, f.line, f.rule_id, f.message))
+    return ScanResult(findings=findings)
+def _scan_file(file: Path) -> list:
+    try:
+        text = file.read_text(encoding="utf-8")
+    except (UnicodeDecodeError, OSError):
+        # Skip files with unsupported encoding or transient read errors.
+        return []
+    return run_builtin_checks(file, text)
+def _expand_paths(paths: list[Path], options: ScanOptions) -> list[Path]:
+    files: list[Path] = []
+    supported_exts = set(SUPPORTED_EXTENSIONS.keys())
+    for path in paths:
+        if not path.exists():
+            continue
+        if path.is_dir():
+            files.extend(iter_source_files(path, options))
+            continue
+        if path.suffix.lower() not in supported_exts:
+            continue
+        try:
+            if path.stat().st_size > options.max_file_size_bytes:
+                continue
+        except OSError:
+            continue
+        files.append(path)
+    return sorted(set(files))

devguard_core-0.1.2.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,39 @@
+Metadata-Version: 2.4
+Name: devguard-core
+Version: 0.1.2
+Summary: Core analysis engine for DevGuard modules
+Author: DevGuard Contributors
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/upendra-manike/developer-problem-solvers
+Project-URL: Repository, https://github.com/upendra-manike/developer-problem-solvers
+Project-URL: Issues, https://github.com/upendra-manike/developer-problem-solvers/issues
+Keywords: static-analysis,security,reliability,ai-code
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == "dev"
+Requires-Dist: ruff>=0.8.0; extra == "dev"
+Requires-Dist: build>=1.2.0; extra == "dev"
+Dynamic: license-file
+# devguard-core
+Shared scanning engine and rule framework for DevGuard modules.
+## Features
+- Rule metadata model (`id`, `severity`, `match_type`, `description`, `fix`)
+- File walker with language detection
+- Built-in checks for common AI-code risks
+- AST-backed Python checks for SQL injection, unsafe deserialization, and hardcoded secrets
+- JSON and SARIF output
+- Baseline input/output for incremental CI rollout
+## Quick Run
+```bash
+PYTHONPATH=src python -m devguard_core.cli scan ../../examples/sample_insecure.py --format json
+```

devguard_core-0.1.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+devguard_core/__init__.py,sha256=s26MH9BH9DeL426cRkAd78CCFhfsUgmAHotZoNMYKkU,212
+devguard_core/ast_checks.py,sha256=mDVWEUCMN3vi6ZelDlfyPa9TLF9GKd44qrznG5m9J-4,4333
+devguard_core/checks.py,sha256=bFd_y1l_nSmxze8ZbYicsEzKDfIlxRGxiHH-xU6PxhA,8331
+devguard_core/cli.py,sha256=5VNEHu0nKOYgee_Ou9iRtzG_z2KPfNeY8CCjv6M1oKw,9160
+devguard_core/config.py,sha256=gTRbvw9-eISqO7QGZE_J0dagLoxy__UYbWeQKyj_C10,972
+devguard_core/formatters.py,sha256=WbOpp0rru5WnqBQeiMmo5Idj7BO4EENsgCvsqU0PyWI,2656
+devguard_core/models.py,sha256=6S4aPnwH3HsaBHzh1eDSQJerVJvsD9eaTpL1qukLUDw,1674
+devguard_core/rules.py,sha256=YuP9ZQcJDvU0bsG87z10ig2f5aJlaela1ms3OlqwCkA,1235
+devguard_core/scanner.py,sha256=3P1eBYt8kkZUa7IshioL2LwJZnWkK4e1KwaYSIs5ROA,2518
+devguard_core-0.1.2.dist-info/licenses/LICENSE,sha256=ocTW19_cq2E_BQfxThjsFR0yQXqXrs40F0bj6fwP_X0,1078
+devguard_core-0.1.2.dist-info/METADATA,sha256=LqgyPJS2vUVfL9I8qFK84PLpxA0UoKrW94nPQXYBPpw,1370
+devguard_core-0.1.2.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
+devguard_core-0.1.2.dist-info/entry_points.txt,sha256=V--mfplD7ykBZoYKTqE6k5w0YuNrmAR98yDi3PsemPA,57
+devguard_core-0.1.2.dist-info/top_level.txt,sha256=lTH7LXvLfAgltM_HreHh-BeDUt8qqDa1QczqClFkrl0,14
+devguard_core-0.1.2.dist-info/RECORD,,

devguard_core-0.1.2.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

devguard_core-0.1.2.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ devguard-core = devguard_core.cli:main

devguard_core-0.1.2.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 DevGuard Contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

devguard_core-0.1.2.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ devguard_core