ragsec 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragguard/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """RAGGuard -- static security scanner for RAG pipelines."""
2
+
3
+ __version__ = "0.1.0"
ragguard/cli.py ADDED
@@ -0,0 +1,86 @@
1
+ import os
2
+
3
+ import click
4
+ from rich.console import Console
5
+ from rich.table import Table
6
+
7
+ from ragguard.engine import run_scan
8
+ from ragguard.report.html import write_html_report
9
+ from ragguard.report.markdown import write_markdown_report
10
+ from ragguard.scanners import ALL_SCANNERS
11
+
12
+ console = Console()
13
+
14
+
15
+ @click.group()
16
+ @click.version_option()
17
+ def main():
18
+ """RAGGuard -- static security scanner for RAG pipelines."""
19
+
20
+
21
+ @main.command()
22
+ @click.argument("target", type=click.Path(exists=True))
23
+ @click.option("--output", "-o", type=click.Path(), help="Output file path (auto-detects format from extension).")
24
+ @click.option("--format", "fmt", type=click.Choice(["markdown", "html", "terminal"]), default="terminal")
25
+ @click.option(
26
+ "--severity", type=click.Choice(["high", "medium", "low"], case_sensitive=False), help="Filter by severity."
27
+ )
28
+ @click.option("--category", help="Filter by category (e.g. filter-injection, nosql-injection).")
29
+ def scan(target: str, output: str | None, fmt: str, severity: str | None, category: str | None):
30
+ """Scan a codebase for RAG security vulnerabilities."""
31
+ target = os.path.abspath(target)
32
+ console.print(f"\n[bold blue]RAGGuard[/] scanning [cyan]{target}[/]\n")
33
+
34
+ scanners = [cls() for cls in ALL_SCANNERS]
35
+ findings = run_scan(target, scanners, severity_filter=severity, category_filter=category)
36
+
37
+ if output and not fmt:
38
+ if output.endswith(".html"):
39
+ fmt = "html"
40
+ elif output.endswith(".md"):
41
+ fmt = "markdown"
42
+
43
+ if fmt == "terminal" and not output:
44
+ _print_terminal(findings, target)
45
+ elif fmt == "html" or (output and output.endswith(".html")):
46
+ path = output or "ragguard-report.html"
47
+ write_html_report(findings, target, path)
48
+ console.print(f"\n[green]HTML report written to {path}[/]")
49
+ elif fmt == "markdown" or (output and output.endswith(".md")):
50
+ path = output or "ragguard-report.md"
51
+ write_markdown_report(findings, target, path)
52
+ console.print(f"\n[green]Markdown report written to {path}[/]")
53
+ else:
54
+ _print_terminal(findings, target)
55
+
56
+ _print_summary(findings)
57
+
58
+
59
+ def _print_terminal(findings: list, target: str):
60
+ if not findings:
61
+ console.print("[green]No findings.[/]")
62
+ return
63
+
64
+ for f in findings:
65
+ sev_color = {"HIGH": "red", "MEDIUM": "yellow", "LOW": "blue"}.get(f.severity, "white")
66
+ console.print(f"\n[bold {sev_color}]{f.id} [{f.severity}][/] {f.title}")
67
+ console.print(f" [dim]{f.file_path}:{f.line_number}[/]")
68
+ console.print(f" {f.description}")
69
+ if f.code_snippet:
70
+ console.print(f" [dim]> {f.code_snippet.strip()[:120]}[/]")
71
+
72
+
73
+ def _print_summary(findings: list):
74
+ high = sum(1 for f in findings if f.severity == "HIGH")
75
+ med = sum(1 for f in findings if f.severity == "MEDIUM")
76
+ low = sum(1 for f in findings if f.severity == "LOW")
77
+
78
+ console.print()
79
+ table = Table(title="Summary", show_header=True)
80
+ table.add_column("Severity", style="bold")
81
+ table.add_column("Count", justify="right")
82
+ table.add_row("[red]HIGH[/]", str(high))
83
+ table.add_row("[yellow]MEDIUM[/]", str(med))
84
+ table.add_row("[blue]LOW[/]", str(low))
85
+ table.add_row("[bold]Total[/]", f"[bold]{len(findings)}[/]")
86
+ console.print(table)
ragguard/engine.py ADDED
@@ -0,0 +1,51 @@
1
+ import os
2
+ from pathlib import Path
3
+
4
+ from ragguard.finding import Finding
5
+ from ragguard.scanners.base import BaseScanner
6
+
7
+
8
+ def discover_python_files(root: str) -> list[str]:
9
+ files = []
10
+ for dirpath, _, filenames in os.walk(root):
11
+ if any(skip in dirpath for skip in ("__pycache__", ".git", "node_modules", ".venv", "venv")):
12
+ continue
13
+ for f in filenames:
14
+ if f.endswith(".py"):
15
+ files.append(os.path.join(dirpath, f))
16
+ return sorted(files)
17
+
18
+
19
+ def run_scan(
20
+ target: str,
21
+ scanners: list[BaseScanner],
22
+ severity_filter: str | None = None,
23
+ category_filter: str | None = None,
24
+ ) -> list[Finding]:
25
+ root = os.path.abspath(target)
26
+ files = discover_python_files(root)
27
+ findings: list[Finding] = []
28
+ counter = 1
29
+
30
+ for file_path in files:
31
+ try:
32
+ content = Path(file_path).read_text(encoding="utf-8", errors="replace")
33
+ except OSError:
34
+ continue
35
+
36
+ lines = content.splitlines()
37
+ rel_path = os.path.relpath(file_path, root)
38
+
39
+ for scanner in scanners:
40
+ if category_filter and scanner.category != category_filter:
41
+ continue
42
+
43
+ for finding in scanner.scan_file(rel_path, content, lines):
44
+ if severity_filter and finding.severity.lower() != severity_filter.lower():
45
+ continue
46
+ finding.id = f"RG-{counter:03d}"
47
+ counter += 1
48
+ findings.append(finding)
49
+
50
+ findings.sort(key=lambda f: ({"HIGH": 0, "MEDIUM": 1, "LOW": 2}.get(f.severity, 3), f.file_path, f.line_number))
51
+ return findings
ragguard/finding.py ADDED
@@ -0,0 +1,15 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class Finding:
6
+ id: str
7
+ severity: str
8
+ category: str
9
+ title: str
10
+ file_path: str
11
+ line_number: int
12
+ code_snippet: str
13
+ description: str
14
+ remediation: str
15
+ cwe_id: str | None = None
File without changes
@@ -0,0 +1,32 @@
1
+ import html
2
+ from datetime import datetime, timezone
3
+ from pathlib import Path
4
+
5
+ from jinja2 import Template
6
+
7
+ from ragguard.finding import Finding
8
+
9
+ _TEMPLATE_PATH = Path(__file__).parent / "template.html"
10
+
11
+
12
+ def write_html_report(findings: list[Finding], target: str, output_path: str) -> None:
13
+ high = sum(1 for f in findings if f.severity == "HIGH")
14
+ med = sum(1 for f in findings if f.severity == "MEDIUM")
15
+ low = sum(1 for f in findings if f.severity == "LOW")
16
+ timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
17
+
18
+ template_str = _TEMPLATE_PATH.read_text(encoding="utf-8")
19
+ template = Template(template_str)
20
+
21
+ rendered = template.render(
22
+ target=html.escape(target),
23
+ timestamp=timestamp,
24
+ total=len(findings),
25
+ high=high,
26
+ medium=med,
27
+ low=low,
28
+ findings=findings,
29
+ )
30
+
31
+ with open(output_path, "w", encoding="utf-8") as fp:
32
+ fp.write(rendered)
@@ -0,0 +1,61 @@
1
+ from datetime import datetime, timezone
2
+
3
+ from ragguard.finding import Finding
4
+
5
+
6
+ def write_markdown_report(findings: list[Finding], target: str, output_path: str) -> None:
7
+ high = sum(1 for f in findings if f.severity == "HIGH")
8
+ med = sum(1 for f in findings if f.severity == "MEDIUM")
9
+ low = sum(1 for f in findings if f.severity == "LOW")
10
+ timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
11
+
12
+ lines = [
13
+ "# RAGGuard Security Report",
14
+ "",
15
+ f"**Target**: `{target}`",
16
+ f"**Date**: {timestamp}",
17
+ f"**Findings**: {len(findings)} ({high} high, {med} medium, {low} low)",
18
+ "",
19
+ "---",
20
+ "",
21
+ "## Summary",
22
+ "",
23
+ "| Severity | Count |",
24
+ "|----------|-------|",
25
+ f"| HIGH | {high} |",
26
+ f"| MEDIUM | {med} |",
27
+ f"| LOW | {low} |",
28
+ f"| **Total**| **{len(findings)}** |",
29
+ "",
30
+ ]
31
+
32
+ if not findings:
33
+ lines.append("No security findings detected.")
34
+ else:
35
+ lines.append("## Findings")
36
+ lines.append("")
37
+
38
+ for f in findings:
39
+ cwe = f" ({f.cwe_id})" if f.cwe_id else ""
40
+ lines.append(f"### {f.id} [{f.severity}] {f.title}{cwe}")
41
+ lines.append("")
42
+ lines.append(f"**File**: `{f.file_path}:{f.line_number}`")
43
+ lines.append(f"**Category**: {f.category}")
44
+ lines.append("")
45
+ lines.append(f"{f.description}")
46
+ lines.append("")
47
+ if f.code_snippet:
48
+ lines.append("```python")
49
+ lines.append(f"{f.code_snippet}")
50
+ lines.append("```")
51
+ lines.append("")
52
+ lines.append(f"**Remediation**: {f.remediation}")
53
+ lines.append("")
54
+ lines.append("---")
55
+ lines.append("")
56
+
57
+ lines.append("")
58
+ lines.append("*Generated by [RAGGuard](https://github.com/HrushiYadav/ragguard)*")
59
+
60
+ with open(output_path, "w", encoding="utf-8") as fp:
61
+ fp.write("\n".join(lines))
@@ -0,0 +1,164 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>RAGGuard Security Report</title>
7
+ <style>
8
+ :root {
9
+ --bg: #0d1117;
10
+ --card: #161b22;
11
+ --border: #30363d;
12
+ --text: #e6edf3;
13
+ --dim: #8b949e;
14
+ --high: #f85149;
15
+ --medium: #d29922;
16
+ --low: #58a6ff;
17
+ --green: #3fb950;
18
+ }
19
+ * { margin: 0; padding: 0; box-sizing: border-box; }
20
+ body {
21
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
22
+ background: var(--bg);
23
+ color: var(--text);
24
+ line-height: 1.6;
25
+ padding: 2rem;
26
+ max-width: 960px;
27
+ margin: 0 auto;
28
+ }
29
+ h1 { font-size: 1.8rem; margin-bottom: 0.5rem; }
30
+ .meta { color: var(--dim); margin-bottom: 2rem; font-size: 0.9rem; }
31
+ .stats {
32
+ display: flex;
33
+ gap: 1rem;
34
+ margin-bottom: 2rem;
35
+ }
36
+ .stat {
37
+ background: var(--card);
38
+ border: 1px solid var(--border);
39
+ border-radius: 8px;
40
+ padding: 1rem 1.5rem;
41
+ text-align: center;
42
+ flex: 1;
43
+ }
44
+ .stat .number { font-size: 2rem; font-weight: 700; }
45
+ .stat .label { font-size: 0.8rem; color: var(--dim); text-transform: uppercase; letter-spacing: 0.05em; }
46
+ .stat.high .number { color: var(--high); }
47
+ .stat.medium .number { color: var(--medium); }
48
+ .stat.low .number { color: var(--low); }
49
+ .stat.total .number { color: var(--text); }
50
+ .finding {
51
+ background: var(--card);
52
+ border: 1px solid var(--border);
53
+ border-radius: 8px;
54
+ padding: 1.25rem;
55
+ margin-bottom: 1rem;
56
+ border-left: 4px solid var(--border);
57
+ }
58
+ .finding.sev-HIGH { border-left-color: var(--high); }
59
+ .finding.sev-MEDIUM { border-left-color: var(--medium); }
60
+ .finding.sev-LOW { border-left-color: var(--low); }
61
+ .finding-header {
62
+ display: flex;
63
+ align-items: center;
64
+ gap: 0.75rem;
65
+ margin-bottom: 0.5rem;
66
+ }
67
+ .badge {
68
+ font-size: 0.7rem;
69
+ font-weight: 700;
70
+ padding: 0.15rem 0.5rem;
71
+ border-radius: 4px;
72
+ text-transform: uppercase;
73
+ letter-spacing: 0.05em;
74
+ }
75
+ .badge.HIGH { background: var(--high); color: #fff; }
76
+ .badge.MEDIUM { background: var(--medium); color: #000; }
77
+ .badge.LOW { background: var(--low); color: #000; }
78
+ .finding-id { color: var(--dim); font-size: 0.85rem; font-weight: 600; }
79
+ .finding-title { font-weight: 600; }
80
+ .finding-location { color: var(--dim); font-size: 0.85rem; margin-bottom: 0.5rem; }
81
+ .finding-desc { margin-bottom: 0.75rem; font-size: 0.9rem; }
82
+ .code {
83
+ background: #0d1117;
84
+ border: 1px solid var(--border);
85
+ border-radius: 4px;
86
+ padding: 0.75rem;
87
+ font-family: 'SF Mono', 'Fira Code', monospace;
88
+ font-size: 0.8rem;
89
+ overflow-x: auto;
90
+ margin-bottom: 0.75rem;
91
+ color: var(--dim);
92
+ }
93
+ .remediation {
94
+ font-size: 0.85rem;
95
+ color: var(--green);
96
+ }
97
+ .remediation::before { content: "Fix: "; font-weight: 600; }
98
+ .footer {
99
+ text-align: center;
100
+ color: var(--dim);
101
+ margin-top: 2rem;
102
+ font-size: 0.8rem;
103
+ padding-top: 1rem;
104
+ border-top: 1px solid var(--border);
105
+ }
106
+ .footer a { color: var(--blue, #58a6ff); text-decoration: none; }
107
+ .no-findings {
108
+ text-align: center;
109
+ padding: 3rem;
110
+ color: var(--green);
111
+ font-size: 1.2rem;
112
+ }
113
+ </style>
114
+ </head>
115
+ <body>
116
+ <h1>RAGGuard Security Report</h1>
117
+ <div class="meta">
118
+ Target: <code>{{ target }}</code> &middot; {{ timestamp }}
119
+ </div>
120
+
121
+ <div class="stats">
122
+ <div class="stat high">
123
+ <div class="number">{{ high }}</div>
124
+ <div class="label">High</div>
125
+ </div>
126
+ <div class="stat medium">
127
+ <div class="number">{{ medium }}</div>
128
+ <div class="label">Medium</div>
129
+ </div>
130
+ <div class="stat low">
131
+ <div class="number">{{ low }}</div>
132
+ <div class="label">Low</div>
133
+ </div>
134
+ <div class="stat total">
135
+ <div class="number">{{ total }}</div>
136
+ <div class="label">Total</div>
137
+ </div>
138
+ </div>
139
+
140
+ {% if not findings %}
141
+ <div class="no-findings">No security findings detected.</div>
142
+ {% endif %}
143
+
144
+ {% for f in findings %}
145
+ <div class="finding sev-{{ f.severity }}">
146
+ <div class="finding-header">
147
+ <span class="finding-id">{{ f.id }}</span>
148
+ <span class="badge {{ f.severity }}">{{ f.severity }}</span>
149
+ <span class="finding-title">{{ f.title }}</span>
150
+ </div>
151
+ <div class="finding-location">{{ f.file_path }}:{{ f.line_number }}{% if f.cwe_id %} &middot; {{ f.cwe_id }}{% endif %}</div>
152
+ <div class="finding-desc">{{ f.description }}</div>
153
+ {% if f.code_snippet %}
154
+ <div class="code">{{ f.code_snippet | e }}</div>
155
+ {% endif %}
156
+ <div class="remediation">{{ f.remediation }}</div>
157
+ </div>
158
+ {% endfor %}
159
+
160
+ <div class="footer">
161
+ Generated by <a href="https://github.com/HrushiYadav/ragguard">RAGGuard</a>
162
+ </div>
163
+ </body>
164
+ </html>
@@ -0,0 +1,25 @@
1
+ from ragguard.scanners.auth_gaps import AuthGapsScanner
2
+ from ragguard.scanners.filter_injection import FilterInjectionScanner
3
+ from ragguard.scanners.nosql_injection import NoSQLInjectionScanner
4
+ from ragguard.scanners.resource_safety import ResourceSafetyScanner
5
+ from ragguard.scanners.secret_logging import SecretLoggingScanner
6
+ from ragguard.scanners.sql_injection import SQLInjectionScanner
7
+
8
+ ALL_SCANNERS = [
9
+ FilterInjectionScanner,
10
+ NoSQLInjectionScanner,
11
+ SQLInjectionScanner,
12
+ SecretLoggingScanner,
13
+ AuthGapsScanner,
14
+ ResourceSafetyScanner,
15
+ ]
16
+
17
+ __all__ = [
18
+ "ALL_SCANNERS",
19
+ "FilterInjectionScanner",
20
+ "NoSQLInjectionScanner",
21
+ "SQLInjectionScanner",
22
+ "SecretLoggingScanner",
23
+ "AuthGapsScanner",
24
+ "ResourceSafetyScanner",
25
+ ]
@@ -0,0 +1,77 @@
1
+ import re
2
+
3
+ from ragguard.finding import Finding
4
+ from ragguard.scanners.base import BaseScanner
5
+
6
+ # FastAPI route decorators
7
+ _ROUTE_DECORATOR = re.compile(r"@\w+\.(get|post|put|delete|patch)\(")
8
+
9
+ # Auth-related patterns that indicate authorization is present
10
+ _AUTH_PATTERNS = re.compile(
11
+ r"Depends\(.*auth|Depends\(.*verify|Depends\(.*current_user"
12
+ r"|Security\(|HTTPBearer|OAuth2|api_key.*Header"
13
+ r"|@require_auth|@login_required|@authenticated",
14
+ re.IGNORECASE,
15
+ )
16
+
17
+ # Client-controlled ID in request body (IDOR risk)
18
+ _IDOR_PATTERN = re.compile(r"(?:body|request|payload|data)\.\w*(?:user_id|org_id|tenant_id)")
19
+
20
+
21
+ class AuthGapsScanner(BaseScanner):
22
+ @property
23
+ def name(self) -> str:
24
+ return "Auth Gaps"
25
+
26
+ @property
27
+ def category(self) -> str:
28
+ return "auth-gaps"
29
+
30
+ def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]:
31
+ if "test" in file_path.replace("\\", "/").split("/")[-1].lower():
32
+ return []
33
+
34
+ findings = []
35
+
36
+ # Check for FastAPI routes without auth
37
+ has_any_auth = bool(_AUTH_PATTERNS.search(content))
38
+ route_lines = []
39
+ for i, line in enumerate(lines, 1):
40
+ stripped = line.strip()
41
+ if _ROUTE_DECORATOR.search(stripped) and not stripped.startswith("@mock"):
42
+ route_lines.append((i, stripped))
43
+
44
+ if route_lines and not has_any_auth:
45
+ for line_num, snippet in route_lines:
46
+ findings.append(Finding(
47
+ id="",
48
+ severity="MEDIUM",
49
+ category=self.category,
50
+ title="API route without authentication middleware",
51
+ file_path=file_path,
52
+ line_number=line_num,
53
+ code_snippet=snippet,
54
+ description="This API endpoint has no visible authentication dependency. Any caller can access it.",
55
+ remediation="Add authentication middleware (e.g., Depends(verify_token)) to protect this endpoint.",
56
+ cwe_id="CWE-306",
57
+ ))
58
+
59
+ # Check for client-controlled user_id (IDOR)
60
+ for i, line in enumerate(lines, 1):
61
+ stripped = line.strip()
62
+ if _IDOR_PATTERN.search(stripped):
63
+ findings.append(Finding(
64
+ id="",
65
+ severity="MEDIUM",
66
+ category=self.category,
67
+ title="Client-controlled user/tenant ID (potential IDOR)",
68
+ file_path=file_path,
69
+ line_number=i,
70
+ code_snippet=stripped,
71
+ description="User/tenant ID is taken from the request body, "
72
+ "allowing clients to impersonate other users.",
73
+ remediation="Derive user_id from the authenticated session/token, not from the request body.",
74
+ cwe_id="CWE-639",
75
+ ))
76
+
77
+ return findings
@@ -0,0 +1,16 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ from ragguard.finding import Finding
4
+
5
+
6
+ class BaseScanner(ABC):
7
+ @property
8
+ @abstractmethod
9
+ def name(self) -> str: ...
10
+
11
+ @property
12
+ @abstractmethod
13
+ def category(self) -> str: ...
14
+
15
+ @abstractmethod
16
+ def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]: ...
@@ -0,0 +1,77 @@
1
+ import re
2
+
3
+ from ragguard.finding import Finding
4
+ from ragguard.scanners.base import BaseScanner
5
+
6
+ # Patterns where user-controlled values are interpolated into filter/query expressions
7
+ _PATTERNS = [
8
+ # Milvus/Baidu: f'(metadata["{key}"] == "{value}")'
9
+ (
10
+ re.compile(r'''f['"].*metadata\[.*\{.*\}.*==.*\{.*\}'''),
11
+ "User-controlled value interpolated into metadata filter expression",
12
+ "Validate filter values are primitives and escape quotes before interpolation.",
13
+ ),
14
+ # Valkey/Redis: f'@{key}:{{{value}}}'
15
+ (
16
+ re.compile(r'''f['"].*@\{.*\}:\{\{\{?\w*\}?\}\}'''),
17
+ "User-controlled value interpolated into FT.SEARCH tag query without escaping",
18
+ "Escape Valkey/Redis FT.SEARCH special characters in tag filter values.",
19
+ ),
20
+ # Azure: f"{key} eq '{value}'"
21
+ (
22
+ re.compile(r'''f['"].*\{.*\}\s+eq\s+['\"]?\{.*\}'''),
23
+ "User-controlled value interpolated into OData filter expression",
24
+ "Use parameterized filters or escape single quotes in values.",
25
+ ),
26
+ # Neptune: f'{{equals:{{property: \'{k}\', value: \'{v}\'}}}}'
27
+ (
28
+ re.compile(r'''f['"].*equals.*property.*\{.*\}.*value.*\{.*\}'''),
29
+ "User-controlled value interpolated into graph query filter",
30
+ "Use parameterized queries instead of string interpolation.",
31
+ ),
32
+ # Upstash: f"{k} = {self._stringify(v)}"
33
+ (
34
+ re.compile(r'''f['"].*\{.*\}\s*=\s*\{.*stringify.*\}'''),
35
+ "User-controlled value passed through stringify into filter expression",
36
+ "Validate filter values are safe primitives before interpolation.",
37
+ ),
38
+ # Generic: any f-string building a filter with user value interpolation
39
+ (
40
+ re.compile(r'''f['"].*filter.*\{.*value.*\}|f['"].*\{.*key.*\}.*\{.*value.*\}'''),
41
+ "Possible filter expression injection via f-string interpolation",
42
+ "Validate and escape user-controlled values before building filter expressions.",
43
+ ),
44
+ ]
45
+
46
+
47
+ class FilterInjectionScanner(BaseScanner):
48
+ @property
49
+ def name(self) -> str:
50
+ return "Filter Expression Injection"
51
+
52
+ @property
53
+ def category(self) -> str:
54
+ return "filter-injection"
55
+
56
+ def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]:
57
+ findings = []
58
+ for i, line in enumerate(lines, 1):
59
+ stripped = line.strip()
60
+ if not stripped or stripped.startswith("#"):
61
+ continue
62
+ for pattern, desc, remediation in _PATTERNS:
63
+ if pattern.search(stripped):
64
+ findings.append(Finding(
65
+ id="",
66
+ severity="HIGH",
67
+ category=self.category,
68
+ title=f"Filter injection: {desc.split('.')[0]}",
69
+ file_path=file_path,
70
+ line_number=i,
71
+ code_snippet=stripped,
72
+ description=desc,
73
+ remediation=remediation,
74
+ cwe_id="CWE-943",
75
+ ))
76
+ break
77
+ return findings
@@ -0,0 +1,68 @@
1
+ import re
2
+
3
+ from ragguard.finding import Finding
4
+ from ragguard.scanners.base import BaseScanner
5
+
6
+ _PATTERNS = [
7
+ # MongoDB: {"payload." + key: value} where value comes from filters dict without validation
8
+ (
9
+ re.compile(r'''["']payload\.\s*["']\s*\+\s*\w+\s*:\s*\w+'''),
10
+ "Filter value passed directly into MongoDB query without type validation",
11
+ "Reject dict values that could contain MongoDB operators ($ne, $gt, $regex).",
12
+ "CWE-943",
13
+ ),
14
+ # Elasticsearch/OpenSearch: {"term": {f"metadata.{key}": value}}
15
+ (
16
+ re.compile(r'''["']term["']\s*:\s*\{.*f["'].*\{.*\}.*["']\s*:\s*\w+\s*\}'''),
17
+ "Filter value passed directly into Elasticsearch term query",
18
+ "Validate that filter values are scalars, not nested query objects.",
19
+ "CWE-943",
20
+ ),
21
+ # Generic: any dict comprehension building query filters from user input
22
+ (
23
+ re.compile(r'''\.append\(\{.*["']payload\.|\.append\(\{.*["']metadata\.'''),
24
+ "User-controlled value appended to query filter conditions without validation",
25
+ "Validate filter values are scalars before constructing query conditions.",
26
+ "CWE-943",
27
+ ),
28
+ ]
29
+
30
+ # Check if a validation function exists nearby
31
+ _VALIDATION_PATTERN = re.compile(r"_validate_filter|_sanitize|_escape|isinstance.*dict.*raise")
32
+
33
+
34
+ class NoSQLInjectionScanner(BaseScanner):
35
+ @property
36
+ def name(self) -> str:
37
+ return "NoSQL Operator Injection"
38
+
39
+ @property
40
+ def category(self) -> str:
41
+ return "nosql-injection"
42
+
43
+ def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]:
44
+ has_validation = bool(_VALIDATION_PATTERN.search(content))
45
+ if has_validation:
46
+ return []
47
+
48
+ findings = []
49
+ for i, line in enumerate(lines, 1):
50
+ stripped = line.strip()
51
+ if not stripped or stripped.startswith("#"):
52
+ continue
53
+ for pattern, desc, remediation, cwe in _PATTERNS:
54
+ if pattern.search(stripped):
55
+ findings.append(Finding(
56
+ id="",
57
+ severity="HIGH",
58
+ category=self.category,
59
+ title=f"NoSQL injection: {desc.split('.')[0]}",
60
+ file_path=file_path,
61
+ line_number=i,
62
+ code_snippet=stripped,
63
+ description=desc,
64
+ remediation=remediation,
65
+ cwe_id=cwe,
66
+ ))
67
+ break
68
+ return findings
@@ -0,0 +1,80 @@
1
+ import re
2
+
3
+ from ragguard.finding import Finding
4
+ from ragguard.scanners.base import BaseScanner
5
+
6
+ _PATTERNS = [
7
+ # pickle.load / pickle.loads on untrusted data
8
+ (
9
+ re.compile(r"pickle\.loads?\("),
10
+ "Deserialization of untrusted data via pickle",
11
+ "Use safe deserialization formats (JSON, msgpack) instead of pickle for untrusted data.",
12
+ "CWE-502",
13
+ "HIGH",
14
+ ),
15
+ # zipfile without size limit
16
+ (
17
+ re.compile(r"zipfile\.ZipFile\("),
18
+ "ZIP file extraction without apparent size validation",
19
+ "Check uncompressed sizes before extraction to prevent zip bomb attacks.",
20
+ "CWE-409",
21
+ "MEDIUM",
22
+ ),
23
+ # tarfile.open
24
+ (
25
+ re.compile(r"tarfile\.open\("),
26
+ "TAR file extraction (potential path traversal and zip bomb)",
27
+ "Validate member paths and sizes before extraction. Use data_filter on Python 3.12+.",
28
+ "CWE-409",
29
+ "MEDIUM",
30
+ ),
31
+ # file.read() without size limit (in upload/import contexts)
32
+ (
33
+ re.compile(r"\.read\(\s*\)"),
34
+ "Unbounded file read (no size limit)",
35
+ "Pass a max size argument to .read(max_bytes) to prevent memory exhaustion.",
36
+ "CWE-400",
37
+ "LOW",
38
+ ),
39
+ # eval() or exec() calls
40
+ (
41
+ re.compile(r"(?<!\.)\beval\s*\(|(?<!\.)\bexec\s*\("),
42
+ "Dynamic code execution via eval/exec",
43
+ "Avoid eval/exec on user-controlled input. Use ast.literal_eval for safe parsing.",
44
+ "CWE-95",
45
+ "HIGH",
46
+ ),
47
+ ]
48
+
49
+
50
+ class ResourceSafetyScanner(BaseScanner):
51
+ @property
52
+ def name(self) -> str:
53
+ return "Resource Safety"
54
+
55
+ @property
56
+ def category(self) -> str:
57
+ return "resource-safety"
58
+
59
+ def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]:
60
+ findings = []
61
+ for i, line in enumerate(lines, 1):
62
+ stripped = line.strip()
63
+ if not stripped or stripped.startswith("#"):
64
+ continue
65
+ for pattern, desc, remediation, cwe, severity in _PATTERNS:
66
+ if pattern.search(stripped):
67
+ findings.append(Finding(
68
+ id="",
69
+ severity=severity,
70
+ category=self.category,
71
+ title=f"Resource safety: {desc.split('(')[0].strip()}",
72
+ file_path=file_path,
73
+ line_number=i,
74
+ code_snippet=stripped,
75
+ description=desc,
76
+ remediation=remediation,
77
+ cwe_id=cwe,
78
+ ))
79
+ break
80
+ return findings
@@ -0,0 +1,65 @@
1
+ import re
2
+
3
+ from ragguard.finding import Finding
4
+ from ragguard.scanners.base import BaseScanner
5
+
6
+ _SECRET_VARS = re.compile(
7
+ r"\b\w*(?:password|secret|api_key|api_secret|token|private_key|credentials"
8
+ r"|mongo_uri|valkey_url|redis_url|connection_string|conn_str|service_account_json)\b",
9
+ re.IGNORECASE,
10
+ )
11
+
12
+ _LOGGER_CALL = re.compile(r"logger\.\w+\(")
13
+
14
+ # Connection strings with embedded credentials: scheme://user:pass@host
15
+ _CONN_STRING_IN_FSTRING = re.compile(r'''f["'].*://.*\{.*\}.*@|f["'].*\{.*url.*\}''', re.IGNORECASE)
16
+
17
+
18
+ class SecretLoggingScanner(BaseScanner):
19
+ @property
20
+ def name(self) -> str:
21
+ return "Secret Logging"
22
+
23
+ @property
24
+ def category(self) -> str:
25
+ return "secret-logging"
26
+
27
+ def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]:
28
+ findings = []
29
+ for i, line in enumerate(lines, 1):
30
+ stripped = line.strip()
31
+ if not stripped or stripped.startswith("#"):
32
+ continue
33
+
34
+ if not _LOGGER_CALL.search(stripped):
35
+ continue
36
+
37
+ if _SECRET_VARS.search(stripped):
38
+ var_match = _SECRET_VARS.search(stripped)
39
+ var_name = var_match.group(0) if var_match else "secret"
40
+ findings.append(Finding(
41
+ id="",
42
+ severity="MEDIUM",
43
+ category=self.category,
44
+ title=f"Possible secret '{var_name}' in log output",
45
+ file_path=file_path,
46
+ line_number=i,
47
+ code_snippet=stripped,
48
+ description=f"Logger call references variable '{var_name}' which may contain sensitive data.",
49
+ remediation="Mask or omit secrets from log messages. Use a redaction utility.",
50
+ cwe_id="CWE-532",
51
+ ))
52
+ elif _CONN_STRING_IN_FSTRING.search(stripped):
53
+ findings.append(Finding(
54
+ id="",
55
+ severity="MEDIUM",
56
+ category=self.category,
57
+ title="Connection string with credentials in log output",
58
+ file_path=file_path,
59
+ line_number=i,
60
+ code_snippet=stripped,
61
+ description="Logger call includes a connection URL that may contain embedded credentials.",
62
+ remediation="Redact credentials from connection strings before logging.",
63
+ cwe_id="CWE-532",
64
+ ))
65
+ return findings
@@ -0,0 +1,80 @@
1
+ import re
2
+
3
+ from ragguard.finding import Finding
4
+ from ragguard.scanners.base import BaseScanner
5
+
6
+ _PATTERNS = [
7
+ # f"INSERT INTO {table} ... VALUES {values}"
8
+ (
9
+ re.compile(r'''f["'].*INSERT\s+INTO\s+.*\{.*\}.*VALUES\s+.*\{.*\}''', re.IGNORECASE),
10
+ "SQL INSERT built via f-string with interpolated values",
11
+ "Use parameterized queries with placeholders instead of f-string interpolation.",
12
+ "CWE-89",
13
+ ),
14
+ # f"DELETE FROM {table} WHERE ..."
15
+ (
16
+ re.compile(r'''f["'].*DELETE\s+FROM\s+.*\{.*\}.*WHERE.*\{.*\}''', re.IGNORECASE),
17
+ "SQL DELETE built via f-string with interpolated values",
18
+ "Use parameterized queries with placeholders instead of f-string interpolation.",
19
+ "CWE-89",
20
+ ),
21
+ # f"SELECT ... FROM {table} WHERE {condition}"
22
+ (
23
+ re.compile(r'''f["'].*SELECT\s+.*FROM\s+.*\{.*\}.*WHERE.*\{.*\}''', re.IGNORECASE),
24
+ "SQL SELECT built via f-string with interpolated values",
25
+ "Use parameterized queries with placeholders instead of f-string interpolation.",
26
+ "CWE-89",
27
+ ),
28
+ # execute(f"...") pattern
29
+ (
30
+ re.compile(r'''\.execute\(\s*f["']'''),
31
+ "SQL query executed via f-string interpolation",
32
+ "Use parameterized queries (execute with %s or ? placeholders).",
33
+ "CWE-89",
34
+ ),
35
+ # f"... SET {col} = {val}" (UPDATE)
36
+ (
37
+ re.compile(r'''f["'].*SET\s+.*\{.*\}\s*=\s*\{.*\}''', re.IGNORECASE),
38
+ "SQL SET clause built via f-string with interpolated values",
39
+ "Use parameterized queries with placeholders instead of f-string interpolation.",
40
+ "CWE-89",
41
+ ),
42
+ ]
43
+
44
+
45
+ _PARAMETERIZED_RE = re.compile(r"%s|\?")
46
+
47
+
48
+ class SQLInjectionScanner(BaseScanner):
49
+ @property
50
+ def name(self) -> str:
51
+ return "SQL Injection"
52
+
53
+ @property
54
+ def category(self) -> str:
55
+ return "sql-injection"
56
+
57
+ def scan_file(self, file_path: str, content: str, lines: list[str]) -> list[Finding]:
58
+ findings = []
59
+ for i, line in enumerate(lines, 1):
60
+ stripped = line.strip()
61
+ if not stripped or stripped.startswith("#"):
62
+ continue
63
+ for pattern, desc, remediation, cwe in _PATTERNS:
64
+ if pattern.search(stripped):
65
+ if _PARAMETERIZED_RE.search(stripped):
66
+ continue
67
+ findings.append(Finding(
68
+ id="",
69
+ severity="HIGH",
70
+ category=self.category,
71
+ title=f"SQL injection: {desc.split('.')[0]}",
72
+ file_path=file_path,
73
+ line_number=i,
74
+ code_snippet=stripped,
75
+ description=desc,
76
+ remediation=remediation,
77
+ cwe_id=cwe,
78
+ ))
79
+ break
80
+ return findings
@@ -0,0 +1,108 @@
1
+ Metadata-Version: 2.4
2
+ Name: ragsec
3
+ Version: 0.1.0
4
+ Summary: Static security scanner for RAG pipelines
5
+ Author-email: Hrushikesh Yadav <yadavhrushikesh65@gmail.com>
6
+ License-Expression: Apache-2.0
7
+ Keywords: rag,scanner,security,static-analysis,vector-store
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Topic :: Security
11
+ Classifier: Topic :: Software Development :: Quality Assurance
12
+ Requires-Python: >=3.10
13
+ Requires-Dist: click>=8.0
14
+ Requires-Dist: jinja2>=3.0
15
+ Requires-Dist: rich>=13.0
16
+ Description-Content-Type: text/markdown
17
+
18
+ # RAGGuard
19
+
20
+ Static security scanner for RAG pipelines. Finds injection vulnerabilities, secret logging, auth gaps, and resource safety issues in Python codebases.
21
+
22
+ Built from real-world security audits of production RAG frameworks.
23
+
24
+ ## Install
25
+
26
+ ```bash
27
+ pip install ragguard
28
+ ```
29
+
30
+ Or from source:
31
+
32
+ ```bash
33
+ git clone https://github.com/HrushiYadav/ragGuard.git
34
+ cd ragguard
35
+ pip install -e .
36
+ ```
37
+
38
+ ## Usage
39
+
40
+ ```bash
41
+ # Terminal output (default)
42
+ ragguard scan ./path/to/codebase
43
+
44
+ # Generate reports
45
+ ragguard scan ./path/to/codebase --output report.md --format markdown
46
+ ragguard scan ./path/to/codebase --output report.html --format html
47
+
48
+ # Filter by severity or category
49
+ ragguard scan ./path/to/codebase --severity high
50
+ ragguard scan ./path/to/codebase --category filter-injection
51
+ ```
52
+
53
+ ## What it detects
54
+
55
+ | Scanner | Severity | What it finds |
56
+ |---------|----------|---------------|
57
+ | Filter Injection | HIGH | f-string interpolation in Milvus, Valkey, Azure, Elasticsearch filter expressions |
58
+ | NoSQL Injection | HIGH | Unvalidated dict values in MongoDB/Elasticsearch queries |
59
+ | SQL Injection | HIGH | f-string SQL construction (INSERT, DELETE, SELECT, UPDATE) |
60
+ | Secret Logging | MEDIUM | API keys, passwords, connection strings in logger calls |
61
+ | Auth Gaps | MEDIUM | FastAPI/Flask routes without auth, client-controlled user IDs (IDOR) |
62
+ | Resource Safety | HIGH/MEDIUM/LOW | pickle deserialization, zip bombs, eval/exec, unbounded reads |
63
+
64
+ ## Example output
65
+
66
+ ```
67
+ RAGGuard scanning ./my-rag-app
68
+
69
+ RG-001 [HIGH] Filter injection: Possible filter expression injection
70
+ vector_stores/store.py:42
71
+ > conditions.append(f'(metadata["{key}"] == "{value}")')
72
+
73
+ RG-002 [HIGH] NoSQL injection: Filter value passed into query
74
+ vector_stores/mongo.py:89
75
+ > filter_dict["payload." + key] = value
76
+
77
+ Summary
78
+ +------------------+
79
+ | Severity | Count |
80
+ |----------+-------|
81
+ | HIGH | 5 |
82
+ | MEDIUM | 8 |
83
+ | LOW | 3 |
84
+ | Total | 16 |
85
+ +------------------+
86
+ ```
87
+
88
+ ## HTML Report
89
+
90
+ Generate a styled HTML report for sharing:
91
+
92
+ ```bash
93
+ ragguard scan ./my-rag-app --output report.html --format html
94
+ ```
95
+
96
+ Dark theme with severity badges, code snippets, and remediation guidance.
97
+
98
+ ## Development
99
+
100
+ ```bash
101
+ pip install -e .
102
+ pytest tests/ -v
103
+ ruff check ragguard/
104
+ ```
105
+
106
+ ## License
107
+
108
+ Apache-2.0
@@ -0,0 +1,20 @@
1
+ ragguard/__init__.py,sha256=rviieV-F0UzVQ7RrWeYP6Q2imo_u4-sEud6VbZWmRyY,84
2
+ ragguard/cli.py,sha256=RitCClCIPUYHIi0poITGLKyCQ2XraVhfMHCv9_k-7KE,3301
3
+ ragguard/engine.py,sha256=suQgiALPXpmUPemOl-PFIRq-9h3R74CCzRll_HsdvyE,1634
4
+ ragguard/finding.py,sha256=o78W4dHOGax8XBQvCVSC04XxzBiNzD8mweKcKwAyTMg,259
5
+ ragguard/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ ragguard/report/html.py,sha256=AHJqzYM8NvZNpRPOaJcfnd1VfAMjNR9yXej8kLF-xx8,946
7
+ ragguard/report/markdown.py,sha256=v2O0aEkgW-a1iMfrAzA0N5V-eZ9FF8yxOZHk1DvX7JU,2053
8
+ ragguard/report/template.html,sha256=lEgtl2ZBHJCd5j8sdCCHsYMHzQw88q2Taph3wanxuts,4774
9
+ ragguard/scanners/__init__.py,sha256=1lcO2Ki5jDCbMzXk0MgsJ046Wu1oKK-Q8lK5B6EvbA4,767
10
+ ragguard/scanners/auth_gaps.py,sha256=rnby9mEpdeuDhtAWTmkDYZZADcd0CiLdFtVDzz6Wiyg,2986
11
+ ragguard/scanners/base.py,sha256=OuVTehXuv-RZ7VAw-7VzWU8dTcLmSbbz2_RfNpkEYHw,350
12
+ ragguard/scanners/filter_injection.py,sha256=xpkz5DEaLjy1KWE5Jcju3y2MWIWB_gWoI15Kt7_YZms,3128
13
+ ragguard/scanners/nosql_injection.py,sha256=e0pXFXO0wCVPTJHxc9-sOb_Q7J1FQwOU62RybRsnmP0,2598
14
+ ragguard/scanners/resource_safety.py,sha256=zNR1AC7nG1-dYHsMlc0ndf31gD2cKxfjzdfGxGTM19w,2682
15
+ ragguard/scanners/secret_logging.py,sha256=MvomXuTiYmVrfcZggpepY_wG7t4U3VdkBp15v9igUR4,2566
16
+ ragguard/scanners/sql_injection.py,sha256=-7MQe5rEq7YfXlZtc68hTYIBNGefpn6I3S4VqPz3oL4,2911
17
+ ragsec-0.1.0.dist-info/METADATA,sha256=xUYvPHYOnXVKD38UZlZZ-eU2sMmW8CJipoQaMpyJ3Tc,2870
18
+ ragsec-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
19
+ ragsec-0.1.0.dist-info/entry_points.txt,sha256=oYwCjXfUIlPsH8vqc7J29sfcIgsZtWKGUQ6JpvxqtqQ,47
20
+ ragsec-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ ragguard = ragguard.cli:main