PyPI - ragsec - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ragsec 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

ragguard/__init__.py +3 -0
ragguard/cli.py +86 -0
ragguard/engine.py +51 -0
ragguard/finding.py +15 -0
ragguard/report/__init__.py +0 -0
ragguard/report/html.py +32 -0
ragguard/report/markdown.py +61 -0
ragguard/report/template.html +164 -0
ragguard/scanners/__init__.py +25 -0
ragguard/scanners/auth_gaps.py +77 -0
ragguard/scanners/base.py +16 -0
ragguard/scanners/filter_injection.py +77 -0
ragguard/scanners/nosql_injection.py +68 -0
ragguard/scanners/resource_safety.py +80 -0
ragguard/scanners/secret_logging.py +65 -0
ragguard/scanners/sql_injection.py +80 -0
ragsec-0.1.0.dist-info/METADATA +108 -0
ragsec-0.1.0.dist-info/RECORD +20 -0
ragsec-0.1.0.dist-info/WHEEL +4 -0
ragsec-0.1.0.dist-info/entry_points.txt +2 -0

ragguard/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""RAGGuard -- static security scanner for RAG pipelines."""
+__version__ = "0.1.0"

ragguard/cli.py ADDED Viewed

@@ -0,0 +1,86 @@
+import os
+import click
+from rich.console import Console
+from rich.table import Table
+from ragguard.engine import run_scan
+from ragguard.report.html import write_html_report
+from ragguard.report.markdown import write_markdown_report
+from ragguard.scanners import ALL_SCANNERS
+console = Console()
+@click.group()
+@click.version_option()
+def main():
+    """RAGGuard -- static security scanner for RAG pipelines."""
+@main.command()
+@click.argument("target", type=click.Path(exists=True))
+@click.option("--output", "-o", type=click.Path(), help="Output file path (auto-detects format from extension).")
+@click.option("--format", "fmt", type=click.Choice(["markdown", "html", "terminal"]), default="terminal")
+@click.option(
+    "--severity", type=click.Choice(["high", "medium", "low"], case_sensitive=False), help="Filter by severity."
+)
+@click.option("--category", help="Filter by category (e.g. filter-injection, nosql-injection).")
+def scan(target: str, output: str | None, fmt: str, severity: str | None, category: str | None):
+    """Scan a codebase for RAG security vulnerabilities."""
+    target = os.path.abspath(target)
+    console.print(f"\n[bold blue]RAGGuard[/] scanning [cyan]{target}[/]\n")
+    scanners = [cls() for cls in ALL_SCANNERS]
+    findings = run_scan(target, scanners, severity_filter=severity, category_filter=category)
+    if output and not fmt:
+        if output.endswith(".html"):
+            fmt = "html"
+        elif output.endswith(".md"):
+            fmt = "markdown"
+    if fmt == "terminal" and not output:
+        _print_terminal(findings, target)
+    elif fmt == "html" or (output and output.endswith(".html")):
+        path = output or "ragguard-report.html"
+        write_html_report(findings, target, path)
+        console.print(f"\n[green]HTML report written to {path}[/]")
+    elif fmt == "markdown" or (output and output.endswith(".md")):
+        path = output or "ragguard-report.md"
+        write_markdown_report(findings, target, path)
+        console.print(f"\n[green]Markdown report written to {path}[/]")
+    else:
+        _print_terminal(findings, target)
+    _print_summary(findings)
+def _print_terminal(findings: list, target: str):
+    if not findings:
+        console.print("[green]No findings.[/]")
+        return
+    for f in findings:
+        sev_color = {"HIGH": "red", "MEDIUM": "yellow", "LOW": "blue"}.get(f.severity, "white")
+        console.print(f"\n[bold {sev_color}]{f.id} [{f.severity}][/] {f.title}")
+        console.print(f"  [dim]{f.file_path}:{f.line_number}[/]")
+        console.print(f"  {f.description}")
+        if f.code_snippet:
+            console.print(f"  [dim]> {f.code_snippet.strip()[:120]}[/]")
+def _print_summary(findings: list):
+    high = sum(1 for f in findings if f.severity == "HIGH")
+    med = sum(1 for f in findings if f.severity == "MEDIUM")
+    low = sum(1 for f in findings if f.severity == "LOW")
+    console.print()
+    table = Table(title="Summary", show_header=True)
+    table.add_column("Severity", style="bold")
+    table.add_column("Count", justify="right")
+    table.add_row("[red]HIGH[/]", str(high))
+    table.add_row("[yellow]MEDIUM[/]", str(med))
+    table.add_row("[blue]LOW[/]", str(low))
+    table.add_row("[bold]Total[/]", f"[bold]{len(findings)}[/]")
+    console.print(table)

ragguard/engine.py ADDED Viewed

@@ -0,0 +1,51 @@
+import os
+from pathlib import Path
+from ragguard.finding import Finding
+from ragguard.scanners.base import BaseScanner
+def discover_python_files(root: str) -> list[str]:
+    files = []
+    for dirpath, _, filenames in os.walk(root):
+        if any(skip in dirpath for skip in ("__pycache__", ".git", "node_modules", ".venv", "venv")):
+            continue
+        for f in filenames:
+            if f.endswith(".py"):
+                files.append(os.path.join(dirpath, f))
+    return sorted(files)
+def run_scan(
+    target: str,
+    scanners: list[BaseScanner],
+    severity_filter: str | None = None,
+    category_filter: str | None = None,
+) -> list[Finding]:
+    root = os.path.abspath(target)
+    files = discover_python_files(root)
+    findings: list[Finding] = []
+    counter = 1
+    for file_path in files:
+        try:
+            content = Path(file_path).read_text(encoding="utf-8", errors="replace")
+        except OSError:
+            continue
+        lines = content.splitlines()
+        rel_path = os.path.relpath(file_path, root)
+        for scanner in scanners:
+            if category_filter and scanner.category != category_filter:
+                continue
+            for finding in scanner.scan_file(rel_path, content, lines):
+                if severity_filter and finding.severity.lower() != severity_filter.lower():
+                    continue
+                finding.id = f"RG-{counter:03d}"
+                counter += 1
+                findings.append(finding)
+    findings.sort(key=lambda f: ({"HIGH": 0, "MEDIUM": 1, "LOW": 2}.get(f.severity, 3), f.file_path, f.line_number))
+    return findings

ragguard/finding.py ADDED Viewed

@@ -0,0 +1,15 @@
+from dataclasses import dataclass
+@dataclass
+class Finding:
+    id: str
+    severity: str
+    category: str
+    title: str
+    file_path: str
+    line_number: int
+    code_snippet: str
+    description: str
+    remediation: str
+    cwe_id: str | None = None

ragguard/report/__init__.py ADDED Viewed

File without changes

ragguard/report/html.py ADDED Viewed

@@ -0,0 +1,32 @@
+import html
+from datetime import datetime, timezone
+from pathlib import Path
+from jinja2 import Template
+from ragguard.finding import Finding
+_TEMPLATE_PATH = Path(__file__).parent / "template.html"
+def write_html_report(findings: list[Finding], target: str, output_path: str) -> None:
+    high = sum(1 for f in findings if f.severity == "HIGH")
+    med = sum(1 for f in findings if f.severity == "MEDIUM")
+    low = sum(1 for f in findings if f.severity == "LOW")
+    timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
+    template_str = _TEMPLATE_PATH.read_text(encoding="utf-8")
+    template = Template(template_str)
+    rendered = template.render(
+        target=html.escape(target),
+        timestamp=timestamp,
+        total=len(findings),
+        high=high,
+        medium=med,
+        low=low,
+        findings=findings,
+    )
+    with open(output_path, "w", encoding="utf-8") as fp:
+        fp.write(rendered)

ragguard/report/markdown.py ADDED Viewed

@@ -0,0 +1,61 @@
+from datetime import datetime, timezone
+from ragguard.finding import Finding
+def write_markdown_report(findings: list[Finding], target: str, output_path: str) -> None:
+    high = sum(1 for f in findings if f.severity == "HIGH")
+    med = sum(1 for f in findings if f.severity == "MEDIUM")
+    low = sum(1 for f in findings if f.severity == "LOW")
+    timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
+    lines = [
+        "# RAGGuard Security Report",
+        "",
+        f"**Target**: `{target}`",
+        f"**Date**: {timestamp}",
+        f"**Findings**: {len(findings)} ({high} high, {med} medium, {low} low)",
+        "",
+        "---",
+        "",
+        "## Summary",
+        "",
+        "| Severity | Count |",
+        "|----------|-------|",
+        f"| HIGH     | {high}   |",
+        f"| MEDIUM   | {med}   |",
+        f"| LOW      | {low}   |",
+        f"| **Total**| **{len(findings)}** |",
+        "",
+    ]
+    if not findings:
+        lines.append("No security findings detected.")
+    else:
+        lines.append("## Findings")
+        lines.append("")
+        for f in findings:
+            cwe = f" ({f.cwe_id})" if f.cwe_id else ""
+            lines.append(f"### {f.id} [{f.severity}] {f.title}{cwe}")
+            lines.append("")
+            lines.append(f"**File**: `{f.file_path}:{f.line_number}`")
+            lines.append(f"**Category**: {f.category}")
+            lines.append("")
+            lines.append(f"{f.description}")
+            lines.append("")
+            if f.code_snippet:
+                lines.append("```python")
+                lines.append(f"{f.code_snippet}")
+                lines.append("```")
+                lines.append("")
+            lines.append(f"**Remediation**: {f.remediation}")
+            lines.append("")
+            lines.append("---")
+            lines.append("")
+    lines.append("")
+    lines.append("*Generated by [RAGGuard](https://github.com/HrushiYadav/ragguard)*")
+    with open(output_path, "w", encoding="utf-8") as fp:
+        fp.write("\n".join(lines))

ragguard/report/template.html ADDED Viewed

@@ -0,0 +1,164 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>RAGGuard Security Report</title>
+<style>
+  :root {
+    --bg: #0d1117;
+    --card: #161b22;
+    --border: #30363d;
+    --text: #e6edf3;
+    --dim: #8b949e;
+    --high: #f85149;
+    --medium: #d29922;
+    --low: #58a6ff;
+    --green: #3fb950;
+  }
+  * { margin: 0; padding: 0; box-sizing: border-box; }
+  body {
+    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
+    background: var(--bg);
+    color: var(--text);
+    line-height: 1.6;
+    padding: 2rem;
+    max-width: 960px;
+    margin: 0 auto;
+  }
+  h1 { font-size: 1.8rem; margin-bottom: 0.5rem; }
+  .meta { color: var(--dim); margin-bottom: 2rem; font-size: 0.9rem; }
+  .stats {
+    display: flex;
+    gap: 1rem;
+    margin-bottom: 2rem;
+  }
+  .stat {
+    background: var(--card);
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    padding: 1rem 1.5rem;
+    text-align: center;
+    flex: 1;
+  }
+  .stat .number { font-size: 2rem; font-weight: 700; }
+  .stat .label { font-size: 0.8rem; color: var(--dim); text-transform: uppercase; letter-spacing: 0.05em; }
+  .stat.high .number { color: var(--high); }
+  .stat.medium .number { color: var(--medium); }
+  .stat.low .number { color: var(--low); }
+  .stat.total .number { color: var(--text); }
+  .finding {
+    background: var(--card);
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    padding: 1.25rem;
+    margin-bottom: 1rem;
+    border-left: 4px solid var(--border);
+  }
+  .finding.sev-HIGH { border-left-color: var(--high); }
+  .finding.sev-MEDIUM { border-left-color: var(--medium); }
+  .finding.sev-LOW { border-left-color: var(--low); }
+  .finding-header {
+    display: flex;
+    align-items: center;
+    gap: 0.75rem;
+    margin-bottom: 0.5rem;
+  }
+  .badge {
+    font-size: 0.7rem;
+    font-weight: 700;
+    padding: 0.15rem 0.5rem;
+    border-radius: 4px;
+    text-transform: uppercase;
+    letter-spacing: 0.05em;
+  }
+  .badge.HIGH { background: var(--high); color: #fff; }
+  .badge.MEDIUM { background: var(--medium); color: #000; }
+  .badge.LOW { background: var(--low); color: #000; }
+  .finding-id { color: var(--dim); font-size: 0.85rem; font-weight: 600; }
+  .finding-title { font-weight: 600; }
+  .finding-location { color: var(--dim); font-size: 0.85rem; margin-bottom: 0.5rem; }
+  .finding-desc { margin-bottom: 0.75rem; font-size: 0.9rem; }
+  .code {
+    background: #0d1117;
+    border: 1px solid var(--border);
+    border-radius: 4px;
+    padding: 0.75rem;
+    font-family: 'SF Mono', 'Fira Code', monospace;
+    font-size: 0.8rem;
+    overflow-x: auto;
+    margin-bottom: 0.75rem;
+    color: var(--dim);
+  }
+  .remediation {
+    font-size: 0.85rem;
+    color: var(--green);
+  }
+  .remediation::before { content: "Fix: "; font-weight: 600; }
+  .footer {
+    text-align: center;
+    color: var(--dim);
+    margin-top: 2rem;
+    font-size: 0.8rem;
+    padding-top: 1rem;
+    border-top: 1px solid var(--border);
+  }
+  .footer a { color: var(--blue, #58a6ff); text-decoration: none; }
+  .no-findings {
+    text-align: center;
+    padding: 3rem;
+    color: var(--green);
+    font-size: 1.2rem;
+  }
+</style>
+</head>
+<body>
+  <h1>RAGGuard Security Report</h1>
+  <div class="meta">
+    Target: <code>{{ target }}</code> &middot; {{ timestamp }}
+  </div>
+  <div class="stats">
+    <div class="stat high">
+      <div class="number">{{ high }}</div>
+      <div class="label">High</div>
+    </div>
+    <div class="stat medium">
+      <div class="number">{{ medium }}</div>
+      <div class="label">Medium</div>
+    </div>
+    <div class="stat low">
+      <div class="number">{{ low }}</div>
+      <div class="label">Low</div>
+    </div>
+    <div class="stat total">
+      <div class="number">{{ total }}</div>
+      <div class="label">Total</div>
+    </div>
+  </div>
+  {% if not findings %}
+  <div class="no-findings">No security findings detected.</div>
+  {% endif %}
+  {% for f in findings %}
+  <div class="finding sev-{{ f.severity }}">
+    <div class="finding-header">
+      <span class="finding-id">{{ f.id }}</span>
+      <span class="badge {{ f.severity }}">{{ f.severity }}</span>
+      <span class="finding-title">{{ f.title }}</span>
+    </div>
+    <div class="finding-location">{{ f.file_path }}:{{ f.line_number }}{% if f.cwe_id %} &middot; {{ f.cwe_id }}{% endif %}</div>
+    <div class="finding-desc">{{ f.description }}</div>
+    {% if f.code_snippet %}
+    <div class="code">{{ f.code_snippet | e }}</div>
+    {% endif %}
+    <div class="remediation">{{ f.remediation }}</div>
+  </div>
+  {% endfor %}
+  <div class="footer">
+    Generated by <a href="https://github.com/HrushiYadav/ragguard">RAGGuard</a>
+  </div>
+</body>
+</html>

ragguard/scanners/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+from ragguard.scanners.auth_gaps import AuthGapsScanner
+from ragguard.scanners.filter_injection import FilterInjectionScanner
+from ragguard.scanners.nosql_injection import NoSQLInjectionScanner
+from ragguard.scanners.resource_safety import ResourceSafetyScanner
+from ragguard.scanners.secret_logging import SecretLoggingScanner
+from ragguard.scanners.sql_injection import SQLInjectionScanner
+ALL_SCANNERS = [
+    FilterInjectionScanner,
+    NoSQLInjectionScanner,
+    SQLInjectionScanner,
+    SecretLoggingScanner,
+    AuthGapsScanner,
+    ResourceSafetyScanner,
+]
+__all__ = [
+    "ALL_SCANNERS",
+    "FilterInjectionScanner",
+    "NoSQLInjectionScanner",
+    "SQLInjectionScanner",
+    "SecretLoggingScanner",
+    "AuthGapsScanner",
+    "ResourceSafetyScanner",
+]

ragguard/scanners/auth_gaps.py ADDED Viewed

@@ -0,0 +1,77 @@
+import re
+from ragguard.finding import Finding
+from ragguard.scanners.base import BaseScanner
+# FastAPI route decorators
+_ROUTE_DECORATOR = re.compile(r"@\w+\.(get|post|put|delete|patch)\(")
+# Auth-related patterns that indicate authorization is present
+_AUTH_PATTERNS = re.compile(
+    r"Depends\(.*auth|Depends\(.*verify|Depends\(.*current_user"
+    r"|Security\(|HTTPBearer|OAuth2|api_key.*Header"
+    r"|@require_auth|@login_required|@authenticated",
+    re.IGNORECASE,
+)
+# Client-controlled ID in request body (IDOR risk)
+_IDOR_PATTERN = re.compile(r"(?:body|request|payload|data)\.\w*(?:user_id|org_id|tenant_id)")
+class AuthGapsScanner(BaseScanner):
+    @property
+    def name(self) -> str:
+        return "Auth Gaps"
+    @property
+    def category(self) -> str:
+        return "auth-gaps"
+    def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]:
+        if "test" in file_path.replace("\\", "/").split("/")[-1].lower():
+            return []
+        findings = []
+        # Check for FastAPI routes without auth
+        has_any_auth = bool(_AUTH_PATTERNS.search(content))
+        route_lines = []
+        for i, line in enumerate(lines, 1):
+            stripped = line.strip()
+            if _ROUTE_DECORATOR.search(stripped) and not stripped.startswith("@mock"):
+                route_lines.append((i, stripped))
+        if route_lines and not has_any_auth:
+            for line_num, snippet in route_lines:
+                findings.append(Finding(
+                    id="",
+                    severity="MEDIUM",
+                    category=self.category,
+                    title="API route without authentication middleware",
+                    file_path=file_path,
+                    line_number=line_num,
+                    code_snippet=snippet,
+                    description="This API endpoint has no visible authentication dependency. Any caller can access it.",
+                    remediation="Add authentication middleware (e.g., Depends(verify_token)) to protect this endpoint.",
+                    cwe_id="CWE-306",
+                ))
+        # Check for client-controlled user_id (IDOR)
+        for i, line in enumerate(lines, 1):
+            stripped = line.strip()
+            if _IDOR_PATTERN.search(stripped):
+                findings.append(Finding(
+                    id="",
+                    severity="MEDIUM",
+                    category=self.category,
+                    title="Client-controlled user/tenant ID (potential IDOR)",
+                    file_path=file_path,
+                    line_number=i,
+                    code_snippet=stripped,
+                    description="User/tenant ID is taken from the request body, "
+                    "allowing clients to impersonate other users.",
+                    remediation="Derive user_id from the authenticated session/token, not from the request body.",
+                    cwe_id="CWE-639",
+                ))
+        return findings

ragguard/scanners/base.py ADDED Viewed

@@ -0,0 +1,16 @@
+from abc import ABC, abstractmethod
+from ragguard.finding import Finding
+class BaseScanner(ABC):
+    @property
+    @abstractmethod
+    def name(self) -> str: ...
+    @property
+    @abstractmethod
+    def category(self) -> str: ...
+    @abstractmethod
+    def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]: ...

ragguard/scanners/filter_injection.py ADDED Viewed

@@ -0,0 +1,77 @@
+import re
+from ragguard.finding import Finding
+from ragguard.scanners.base import BaseScanner
+# Patterns where user-controlled values are interpolated into filter/query expressions
+_PATTERNS = [
+    # Milvus/Baidu: f'(metadata["{key}"] == "{value}")'
+    (
+        re.compile(r'''f['"].*metadata\[.*\{.*\}.*==.*\{.*\}'''),
+        "User-controlled value interpolated into metadata filter expression",
+        "Validate filter values are primitives and escape quotes before interpolation.",
+    ),
+    # Valkey/Redis: f'@{key}:{{{value}}}'
+    (
+        re.compile(r'''f['"].*@\{.*\}:\{\{\{?\w*\}?\}\}'''),
+        "User-controlled value interpolated into FT.SEARCH tag query without escaping",
+        "Escape Valkey/Redis FT.SEARCH special characters in tag filter values.",
+    ),
+    # Azure: f"{key} eq '{value}'"
+    (
+        re.compile(r'''f['"].*\{.*\}\s+eq\s+['\"]?\{.*\}'''),
+        "User-controlled value interpolated into OData filter expression",
+        "Use parameterized filters or escape single quotes in values.",
+    ),
+    # Neptune: f'{{equals:{{property: \'{k}\', value: \'{v}\'}}}}'
+    (
+        re.compile(r'''f['"].*equals.*property.*\{.*\}.*value.*\{.*\}'''),
+        "User-controlled value interpolated into graph query filter",
+        "Use parameterized queries instead of string interpolation.",
+    ),
+    # Upstash: f"{k} = {self._stringify(v)}"
+    (
+        re.compile(r'''f['"].*\{.*\}\s*=\s*\{.*stringify.*\}'''),
+        "User-controlled value passed through stringify into filter expression",
+        "Validate filter values are safe primitives before interpolation.",
+    ),
+    # Generic: any f-string building a filter with user value interpolation
+    (
+        re.compile(r'''f['"].*filter.*\{.*value.*\}|f['"].*\{.*key.*\}.*\{.*value.*\}'''),
+        "Possible filter expression injection via f-string interpolation",
+        "Validate and escape user-controlled values before building filter expressions.",
+    ),
+]
+class FilterInjectionScanner(BaseScanner):
+    @property
+    def name(self) -> str:
+        return "Filter Expression Injection"
+    @property
+    def category(self) -> str:
+        return "filter-injection"
+    def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]:
+        findings = []
+        for i, line in enumerate(lines, 1):
+            stripped = line.strip()
+            if not stripped or stripped.startswith("#"):
+                continue
+            for pattern, desc, remediation in _PATTERNS:
+                if pattern.search(stripped):
+                    findings.append(Finding(
+                        id="",
+                        severity="HIGH",
+                        category=self.category,
+                        title=f"Filter injection: {desc.split('.')[0]}",
+                        file_path=file_path,
+                        line_number=i,
+                        code_snippet=stripped,
+                        description=desc,
+                        remediation=remediation,
+                        cwe_id="CWE-943",
+                    ))
+                    break
+        return findings

ragguard/scanners/nosql_injection.py ADDED Viewed

@@ -0,0 +1,68 @@
+import re
+from ragguard.finding import Finding
+from ragguard.scanners.base import BaseScanner
+_PATTERNS = [
+    # MongoDB: {"payload." + key: value} where value comes from filters dict without validation
+    (
+        re.compile(r'''["']payload\.\s*["']\s*\+\s*\w+\s*:\s*\w+'''),
+        "Filter value passed directly into MongoDB query without type validation",
+        "Reject dict values that could contain MongoDB operators ($ne, $gt, $regex).",
+        "CWE-943",
+    ),
+    # Elasticsearch/OpenSearch: {"term": {f"metadata.{key}": value}}
+    (
+        re.compile(r'''["']term["']\s*:\s*\{.*f["'].*\{.*\}.*["']\s*:\s*\w+\s*\}'''),
+        "Filter value passed directly into Elasticsearch term query",
+        "Validate that filter values are scalars, not nested query objects.",
+        "CWE-943",
+    ),
+    # Generic: any dict comprehension building query filters from user input
+    (
+        re.compile(r'''\.append\(\{.*["']payload\.|\.append\(\{.*["']metadata\.'''),
+        "User-controlled value appended to query filter conditions without validation",
+        "Validate filter values are scalars before constructing query conditions.",
+        "CWE-943",
+    ),
+]
+# Check if a validation function exists nearby
+_VALIDATION_PATTERN = re.compile(r"_validate_filter|_sanitize|_escape|isinstance.*dict.*raise")
+class NoSQLInjectionScanner(BaseScanner):
+    @property
+    def name(self) -> str:
+        return "NoSQL Operator Injection"
+    @property
+    def category(self) -> str:
+        return "nosql-injection"
+    def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]:
+        has_validation = bool(_VALIDATION_PATTERN.search(content))
+        if has_validation:
+            return []
+        findings = []
+        for i, line in enumerate(lines, 1):
+            stripped = line.strip()
+            if not stripped or stripped.startswith("#"):
+                continue
+            for pattern, desc, remediation, cwe in _PATTERNS:
+                if pattern.search(stripped):
+                    findings.append(Finding(
+                        id="",
+                        severity="HIGH",
+                        category=self.category,
+                        title=f"NoSQL injection: {desc.split('.')[0]}",
+                        file_path=file_path,
+                        line_number=i,
+                        code_snippet=stripped,
+                        description=desc,
+                        remediation=remediation,
+                        cwe_id=cwe,
+                    ))
+                    break
+        return findings

ragguard/scanners/resource_safety.py ADDED Viewed

@@ -0,0 +1,80 @@
+import re
+from ragguard.finding import Finding
+from ragguard.scanners.base import BaseScanner
+_PATTERNS = [
+    # pickle.load / pickle.loads on untrusted data
+    (
+        re.compile(r"pickle\.loads?\("),
+        "Deserialization of untrusted data via pickle",
+        "Use safe deserialization formats (JSON, msgpack) instead of pickle for untrusted data.",
+        "CWE-502",
+        "HIGH",
+    ),
+    # zipfile without size limit
+    (
+        re.compile(r"zipfile\.ZipFile\("),
+        "ZIP file extraction without apparent size validation",
+        "Check uncompressed sizes before extraction to prevent zip bomb attacks.",
+        "CWE-409",
+        "MEDIUM",
+    ),
+    # tarfile.open
+    (
+        re.compile(r"tarfile\.open\("),
+        "TAR file extraction (potential path traversal and zip bomb)",
+        "Validate member paths and sizes before extraction. Use data_filter on Python 3.12+.",
+        "CWE-409",
+        "MEDIUM",
+    ),
+    # file.read() without size limit (in upload/import contexts)
+    (
+        re.compile(r"\.read\(\s*\)"),
+        "Unbounded file read (no size limit)",
+        "Pass a max size argument to .read(max_bytes) to prevent memory exhaustion.",
+        "CWE-400",
+        "LOW",
+    ),
+    # eval() or exec() calls
+    (
+        re.compile(r"(?<!\.)\beval\s*\(|(?<!\.)\bexec\s*\("),
+        "Dynamic code execution via eval/exec",
+        "Avoid eval/exec on user-controlled input. Use ast.literal_eval for safe parsing.",
+        "CWE-95",
+        "HIGH",
+    ),
+]
+class ResourceSafetyScanner(BaseScanner):
+    @property
+    def name(self) -> str:
+        return "Resource Safety"
+    @property
+    def category(self) -> str:
+        return "resource-safety"
+    def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]:
+        findings = []
+        for i, line in enumerate(lines, 1):
+            stripped = line.strip()
+            if not stripped or stripped.startswith("#"):
+                continue
+            for pattern, desc, remediation, cwe, severity in _PATTERNS:
+                if pattern.search(stripped):
+                    findings.append(Finding(
+                        id="",
+                        severity=severity,
+                        category=self.category,
+                        title=f"Resource safety: {desc.split('(')[0].strip()}",
+                        file_path=file_path,
+                        line_number=i,
+                        code_snippet=stripped,
+                        description=desc,
+                        remediation=remediation,
+                        cwe_id=cwe,
+                    ))
+                    break
+        return findings

ragguard/scanners/secret_logging.py ADDED Viewed

@@ -0,0 +1,65 @@
+import re
+from ragguard.finding import Finding
+from ragguard.scanners.base import BaseScanner
+_SECRET_VARS = re.compile(
+    r"\b\w*(?:password|secret|api_key|api_secret|token|private_key|credentials"
+    r"|mongo_uri|valkey_url|redis_url|connection_string|conn_str|service_account_json)\b",
+    re.IGNORECASE,
+)
+_LOGGER_CALL = re.compile(r"logger\.\w+\(")
+# Connection strings with embedded credentials: scheme://user:pass@host
+_CONN_STRING_IN_FSTRING = re.compile(r'''f["'].*://.*\{.*\}.*@|f["'].*\{.*url.*\}''', re.IGNORECASE)
+class SecretLoggingScanner(BaseScanner):
+    @property
+    def name(self) -> str:
+        return "Secret Logging"
+    @property
+    def category(self) -> str:
+        return "secret-logging"
+    def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]:
+        findings = []
+        for i, line in enumerate(lines, 1):
+            stripped = line.strip()
+            if not stripped or stripped.startswith("#"):
+                continue
+            if not _LOGGER_CALL.search(stripped):
+                continue
+            if _SECRET_VARS.search(stripped):
+                var_match = _SECRET_VARS.search(stripped)
+                var_name = var_match.group(0) if var_match else "secret"
+                findings.append(Finding(
+                    id="",
+                    severity="MEDIUM",
+                    category=self.category,
+                    title=f"Possible secret '{var_name}' in log output",
+                    file_path=file_path,
+                    line_number=i,
+                    code_snippet=stripped,
+                    description=f"Logger call references variable '{var_name}' which may contain sensitive data.",
+                    remediation="Mask or omit secrets from log messages. Use a redaction utility.",
+                    cwe_id="CWE-532",
+                ))
+            elif _CONN_STRING_IN_FSTRING.search(stripped):
+                findings.append(Finding(
+                    id="",
+                    severity="MEDIUM",
+                    category=self.category,
+                    title="Connection string with credentials in log output",
+                    file_path=file_path,
+                    line_number=i,
+                    code_snippet=stripped,
+                    description="Logger call includes a connection URL that may contain embedded credentials.",
+                    remediation="Redact credentials from connection strings before logging.",
+                    cwe_id="CWE-532",
+                ))
+        return findings

ragguard/scanners/sql_injection.py ADDED Viewed

@@ -0,0 +1,80 @@
+import re
+from ragguard.finding import Finding
+from ragguard.scanners.base import BaseScanner
+_PATTERNS = [
+    # f"INSERT INTO {table} ... VALUES {values}"
+    (
+        re.compile(r'''f["'].*INSERT\s+INTO\s+.*\{.*\}.*VALUES\s+.*\{.*\}''', re.IGNORECASE),
+        "SQL INSERT built via f-string with interpolated values",
+        "Use parameterized queries with placeholders instead of f-string interpolation.",
+        "CWE-89",
+    ),
+    # f"DELETE FROM {table} WHERE ..."
+    (
+        re.compile(r'''f["'].*DELETE\s+FROM\s+.*\{.*\}.*WHERE.*\{.*\}''', re.IGNORECASE),
+        "SQL DELETE built via f-string with interpolated values",
+        "Use parameterized queries with placeholders instead of f-string interpolation.",
+        "CWE-89",
+    ),
+    # f"SELECT ... FROM {table} WHERE {condition}"
+    (
+        re.compile(r'''f["'].*SELECT\s+.*FROM\s+.*\{.*\}.*WHERE.*\{.*\}''', re.IGNORECASE),
+        "SQL SELECT built via f-string with interpolated values",
+        "Use parameterized queries with placeholders instead of f-string interpolation.",
+        "CWE-89",
+    ),
+    # execute(f"...") pattern
+    (
+        re.compile(r'''\.execute\(\s*f["']'''),
+        "SQL query executed via f-string interpolation",
+        "Use parameterized queries (execute with %s or ? placeholders).",
+        "CWE-89",
+    ),
+    # f"... SET {col} = {val}" (UPDATE)
+    (
+        re.compile(r'''f["'].*SET\s+.*\{.*\}\s*=\s*\{.*\}''', re.IGNORECASE),
+        "SQL SET clause built via f-string with interpolated values",
+        "Use parameterized queries with placeholders instead of f-string interpolation.",
+        "CWE-89",
+    ),
+]
+_PARAMETERIZED_RE = re.compile(r"%s|\?")
+class SQLInjectionScanner(BaseScanner):
+    @property
+    def name(self) -> str:
+        return "SQL Injection"
+    @property
+    def category(self) -> str:
+        return "sql-injection"
+    def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]:
+        findings = []
+        for i, line in enumerate(lines, 1):
+            stripped = line.strip()
+            if not stripped or stripped.startswith("#"):
+                continue
+            for pattern, desc, remediation, cwe in _PATTERNS:
+                if pattern.search(stripped):
+                    if _PARAMETERIZED_RE.search(stripped):
+                        continue
+                    findings.append(Finding(
+                        id="",
+                        severity="HIGH",
+                        category=self.category,
+                        title=f"SQL injection: {desc.split('.')[0]}",
+                        file_path=file_path,
+                        line_number=i,
+                        code_snippet=stripped,
+                        description=desc,
+                        remediation=remediation,
+                        cwe_id=cwe,
+                    ))
+                    break
+        return findings

ragsec-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,108 @@
+Metadata-Version: 2.4
+Name: ragsec
+Version: 0.1.0
+Summary: Static security scanner for RAG pipelines
+Author-email: Hrushikesh Yadav <yadavhrushikesh65@gmail.com>
+License-Expression: Apache-2.0
+Keywords: rag,scanner,security,static-analysis,vector-store
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Security
+Classifier: Topic :: Software Development :: Quality Assurance
+Requires-Python: >=3.10
+Requires-Dist: click>=8.0
+Requires-Dist: jinja2>=3.0
+Requires-Dist: rich>=13.0
+Description-Content-Type: text/markdown
+# RAGGuard
+Static security scanner for RAG pipelines. Finds injection vulnerabilities, secret logging, auth gaps, and resource safety issues in Python codebases.
+Built from real-world security audits of production RAG frameworks.
+## Install
+```bash
+pip install ragguard
+```
+Or from source:
+```bash
+git clone https://github.com/HrushiYadav/ragGuard.git
+cd ragguard
+pip install -e .
+```
+## Usage
+```bash
+# Terminal output (default)
+ragguard scan ./path/to/codebase
+# Generate reports
+ragguard scan ./path/to/codebase --output report.md --format markdown
+ragguard scan ./path/to/codebase --output report.html --format html
+# Filter by severity or category
+ragguard scan ./path/to/codebase --severity high
+ragguard scan ./path/to/codebase --category filter-injection
+```
+## What it detects
+| Scanner | Severity | What it finds |
+|---------|----------|---------------|
+| Filter Injection | HIGH | f-string interpolation in Milvus, Valkey, Azure, Elasticsearch filter expressions |
+| NoSQL Injection | HIGH | Unvalidated dict values in MongoDB/Elasticsearch queries |
+| SQL Injection | HIGH | f-string SQL construction (INSERT, DELETE, SELECT, UPDATE) |
+| Secret Logging | MEDIUM | API keys, passwords, connection strings in logger calls |
+| Auth Gaps | MEDIUM | FastAPI/Flask routes without auth, client-controlled user IDs (IDOR) |
+| Resource Safety | HIGH/MEDIUM/LOW | pickle deserialization, zip bombs, eval/exec, unbounded reads |
+## Example output
+```
+RAGGuard scanning ./my-rag-app
+RG-001 [HIGH] Filter injection: Possible filter expression injection
+  vector_stores/store.py:42
+  > conditions.append(f'(metadata["{key}"] == "{value}")')
+RG-002 [HIGH] NoSQL injection: Filter value passed into query
+  vector_stores/mongo.py:89
+  > filter_dict["payload." + key] = value
+      Summary
++------------------+
+| Severity | Count |
+|----------+-------|
+| HIGH     |     5 |
+| MEDIUM   |     8 |
+| LOW      |     3 |
+| Total    |    16 |
++------------------+
+```
+## HTML Report
+Generate a styled HTML report for sharing:
+```bash
+ragguard scan ./my-rag-app --output report.html --format html
+```
+Dark theme with severity badges, code snippets, and remediation guidance.
+## Development
+```bash
+pip install -e .
+pytest tests/ -v
+ruff check ragguard/
+```
+## License
+Apache-2.0

ragsec-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,20 @@
+ragguard/__init__.py,sha256=rviieV-F0UzVQ7RrWeYP6Q2imo_u4-sEud6VbZWmRyY,84
+ragguard/cli.py,sha256=RitCClCIPUYHIi0poITGLKyCQ2XraVhfMHCv9_k-7KE,3301
+ragguard/engine.py,sha256=suQgiALPXpmUPemOl-PFIRq-9h3R74CCzRll_HsdvyE,1634
+ragguard/finding.py,sha256=o78W4dHOGax8XBQvCVSC04XxzBiNzD8mweKcKwAyTMg,259
+ragguard/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ragguard/report/html.py,sha256=AHJqzYM8NvZNpRPOaJcfnd1VfAMjNR9yXej8kLF-xx8,946
+ragguard/report/markdown.py,sha256=v2O0aEkgW-a1iMfrAzA0N5V-eZ9FF8yxOZHk1DvX7JU,2053
+ragguard/report/template.html,sha256=lEgtl2ZBHJCd5j8sdCCHsYMHzQw88q2Taph3wanxuts,4774
+ragguard/scanners/__init__.py,sha256=1lcO2Ki5jDCbMzXk0MgsJ046Wu1oKK-Q8lK5B6EvbA4,767
+ragguard/scanners/auth_gaps.py,sha256=rnby9mEpdeuDhtAWTmkDYZZADcd0CiLdFtVDzz6Wiyg,2986
+ragguard/scanners/base.py,sha256=OuVTehXuv-RZ7VAw-7VzWU8dTcLmSbbz2_RfNpkEYHw,350
+ragguard/scanners/filter_injection.py,sha256=xpkz5DEaLjy1KWE5Jcju3y2MWIWB_gWoI15Kt7_YZms,3128
+ragguard/scanners/nosql_injection.py,sha256=e0pXFXO0wCVPTJHxc9-sOb_Q7J1FQwOU62RybRsnmP0,2598
+ragguard/scanners/resource_safety.py,sha256=zNR1AC7nG1-dYHsMlc0ndf31gD2cKxfjzdfGxGTM19w,2682
+ragguard/scanners/secret_logging.py,sha256=MvomXuTiYmVrfcZggpepY_wG7t4U3VdkBp15v9igUR4,2566
+ragguard/scanners/sql_injection.py,sha256=-7MQe5rEq7YfXlZtc68hTYIBNGefpn6I3S4VqPz3oL4,2911
+ragsec-0.1.0.dist-info/METADATA,sha256=xUYvPHYOnXVKD38UZlZZ-eU2sMmW8CJipoQaMpyJ3Tc,2870
+ragsec-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+ragsec-0.1.0.dist-info/entry_points.txt,sha256=oYwCjXfUIlPsH8vqc7J29sfcIgsZtWKGUQ6JpvxqtqQ,47
+ragsec-0.1.0.dist-info/RECORD,,

ragsec-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

ragsec-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ ragguard = ragguard.cli:main