refutescan 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- refutescan/__init__.py +38 -0
- refutescan/adapters/__init__.py +11 -0
- refutescan/adapters/openai_chat.py +51 -0
- refutescan/cli.py +116 -0
- refutescan/codemap.py +84 -0
- refutescan/fileaccess.py +142 -0
- refutescan/models.py +91 -0
- refutescan/providers.py +32 -0
- refutescan/safety.py +89 -0
- refutescan/sandbox/Dockerfile +26 -0
- refutescan/sandbox/__init__.py +10 -0
- refutescan/sandbox/build.sh +11 -0
- refutescan/sandbox/docker.py +146 -0
- refutescan/sandbox/toolrunner.py +214 -0
- refutescan/scanner.py +462 -0
- refutescan/vulns.py +29 -0
- refutescan-0.1.0.dist-info/METADATA +132 -0
- refutescan-0.1.0.dist-info/RECORD +22 -0
- refutescan-0.1.0.dist-info/WHEEL +5 -0
- refutescan-0.1.0.dist-info/entry_points.txt +3 -0
- refutescan-0.1.0.dist-info/licenses/LICENSE +21 -0
- refutescan-0.1.0.dist-info/top_level.txt +1 -0
refutescan/__init__.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""refutescan — a two-LLM, refute-first agentic source-code vulnerability scanner.
|
|
2
|
+
|
|
3
|
+
A fast model NAVIGATES the repo with read-only tools and casts a wide net of
|
|
4
|
+
candidate findings; a stronger model then REFUTES each one against the real code
|
|
5
|
+
slice before it surfaces — so you get the recall of an agentic scanner without
|
|
6
|
+
the false-positive flood. Scans run jailed in an ephemeral docker sandbox
|
|
7
|
+
(clone-in-container, no network, read-only, non-root) by default. Bring your own
|
|
8
|
+
LLMs.
|
|
9
|
+
|
|
10
|
+
from refutescan import scan
|
|
11
|
+
from refutescan.adapters import openai_navigator_factory, openai_judge_factory
|
|
12
|
+
|
|
13
|
+
result = scan(
|
|
14
|
+
"https://github.com/owner/repo",
|
|
15
|
+
navigator_factory=openai_navigator_factory(),
|
|
16
|
+
judge_factory=openai_judge_factory(),
|
|
17
|
+
)
|
|
18
|
+
for f in result.findings:
|
|
19
|
+
print(f["severity"], f["title"], f["file"], f["line"])
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from .models import ScanConfig, ScanResult, Verdict
|
|
23
|
+
from .scanner import derive_title, looks_like_git_url, scan
|
|
24
|
+
from .vulns import SEVERITIES, VULN_CLASSES
|
|
25
|
+
|
|
26
|
+
__version__ = "0.1.0"
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"scan",
|
|
30
|
+
"ScanConfig",
|
|
31
|
+
"ScanResult",
|
|
32
|
+
"Verdict",
|
|
33
|
+
"VULN_CLASSES",
|
|
34
|
+
"SEVERITIES",
|
|
35
|
+
"looks_like_git_url",
|
|
36
|
+
"derive_title",
|
|
37
|
+
"__version__",
|
|
38
|
+
]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Ready-made model factories for common providers.
|
|
2
|
+
|
|
3
|
+
These are thin conveniences over LangChain chat models so you don't have to wire
|
|
4
|
+
the injection seam yourself. Import the one you want; each needs its provider
|
|
5
|
+
extra installed (e.g. ``pip install refutescan[openai]``). Bring your own by
|
|
6
|
+
passing any LangChain-style chat model — see ``refutescan.providers``.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .openai_chat import openai_judge_factory, openai_navigator_factory
|
|
10
|
+
|
|
11
|
+
__all__ = ["openai_navigator_factory", "openai_judge_factory"]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""OpenAI (or any OpenAI-compatible endpoint) model factories.
|
|
2
|
+
|
|
3
|
+
Requires ``langchain-openai`` (the ``openai`` extra). Set ``OPENAI_API_KEY``, or
|
|
4
|
+
pass ``base_url`` to point at a compatible gateway (vLLM, Ollama's OpenAI shim,
|
|
5
|
+
Azure OpenAI, a local proxy, …).
|
|
6
|
+
|
|
7
|
+
from refutescan import scan
|
|
8
|
+
from refutescan.adapters import openai_navigator_factory, openai_judge_factory
|
|
9
|
+
|
|
10
|
+
result = scan(
|
|
11
|
+
"https://github.com/owner/repo",
|
|
12
|
+
navigator_factory=openai_navigator_factory("gpt-4o-mini"),
|
|
13
|
+
judge_factory=openai_judge_factory("gpt-4o"),
|
|
14
|
+
)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from ..providers import JudgeFactory, NavigatorFactory
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _chat(model: str, **kwargs: Any):
|
|
25
|
+
try:
|
|
26
|
+
from langchain_openai import ChatOpenAI
|
|
27
|
+
except ImportError as e: # pragma: no cover
|
|
28
|
+
raise ImportError(
|
|
29
|
+
"refutescan's OpenAI adapter needs langchain-openai. "
|
|
30
|
+
"Install it with: pip install 'refutescan[openai]'"
|
|
31
|
+
) from e
|
|
32
|
+
return ChatOpenAI(model=model, temperature=0, **kwargs)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def openai_navigator_factory(model: str = "gpt-4o-mini", **kwargs: Any) -> NavigatorFactory:
|
|
36
|
+
"""A navigator factory using a fast OpenAI tool-calling model (the wide-net pass)."""
|
|
37
|
+
def factory():
|
|
38
|
+
return _chat(model, **kwargs)
|
|
39
|
+
return factory
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def openai_judge_factory(model: str = "gpt-4o", **kwargs: Any) -> JudgeFactory:
|
|
43
|
+
"""A judge factory using a stronger OpenAI model with structured output (refute pass)."""
|
|
44
|
+
def factory():
|
|
45
|
+
llm = _chat(model, **kwargs)
|
|
46
|
+
|
|
47
|
+
def judge(prompt: str, schema: type):
|
|
48
|
+
return llm.with_structured_output(schema).invoke(prompt)
|
|
49
|
+
|
|
50
|
+
return judge
|
|
51
|
+
return factory
|
refutescan/cli.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Command-line entry points.
|
|
2
|
+
|
|
3
|
+
refutescan <path-or-git-url> [options] — run a scan, print findings
|
|
4
|
+
refutescan-build-sandbox — build the docker jail image
|
|
5
|
+
|
|
6
|
+
The scan CLI uses the OpenAI adapter by default (needs the ``openai`` extra and
|
|
7
|
+
OPENAI_API_KEY, or --base-url for a compatible gateway). For other providers,
|
|
8
|
+
call ``refutescan.scan`` directly with your own factories.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import argparse
|
|
14
|
+
import subprocess
|
|
15
|
+
import sys
|
|
16
|
+
|
|
17
|
+
from . import __version__
|
|
18
|
+
from .models import ScanConfig
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _sev_marker(sev: str) -> str:
|
|
22
|
+
return {"critical": "[CRIT]", "high": "[HIGH]", "medium": "[MED ]",
|
|
23
|
+
"low": "[LOW ]", "info": "[INFO]"}.get(sev, "[????]")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def main(argv=None) -> int:
|
|
27
|
+
ap = argparse.ArgumentParser(
|
|
28
|
+
prog="refutescan",
|
|
29
|
+
description="Two-LLM, refute-first agentic source-code vulnerability scanner.")
|
|
30
|
+
ap.add_argument("source", help="a local directory path or a git URL to clone")
|
|
31
|
+
ap.add_argument("--branch", default="", help="git branch to clone (git sources only)")
|
|
32
|
+
ap.add_argument("--sandbox", default="auto", choices=["auto", "docker", "inprocess"],
|
|
33
|
+
help="isolation backend (default: auto)")
|
|
34
|
+
ap.add_argument("--navigator-model", default="gpt-4o-mini",
|
|
35
|
+
help="navigator (wide-net) model (default: gpt-4o-mini)")
|
|
36
|
+
ap.add_argument("--judge-model", default="gpt-4o",
|
|
37
|
+
help="judge (refute) model (default: gpt-4o)")
|
|
38
|
+
ap.add_argument("--base-url", default=None,
|
|
39
|
+
help="OpenAI-compatible base URL (vLLM, Azure, local proxy, …)")
|
|
40
|
+
ap.add_argument("--json", action="store_true", help="emit the full result as JSON")
|
|
41
|
+
ap.add_argument("--version", action="version", version=f"refutescan {__version__}")
|
|
42
|
+
args = ap.parse_args(argv)
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
from .adapters import openai_judge_factory, openai_navigator_factory
|
|
46
|
+
except Exception as e: # pragma: no cover
|
|
47
|
+
print(f"error: {e}", file=sys.stderr)
|
|
48
|
+
return 2
|
|
49
|
+
|
|
50
|
+
kw = {"base_url": args.base_url} if args.base_url else {}
|
|
51
|
+
from . import scan
|
|
52
|
+
|
|
53
|
+
def _progress(phase: str) -> None:
|
|
54
|
+
if not args.json:
|
|
55
|
+
print(f" … {phase}", file=sys.stderr)
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
result = scan(
|
|
59
|
+
args.source,
|
|
60
|
+
navigator_factory=openai_navigator_factory(args.navigator_model, **kw),
|
|
61
|
+
judge_factory=openai_judge_factory(args.judge_model, **kw),
|
|
62
|
+
branch=args.branch,
|
|
63
|
+
config=ScanConfig(sandbox=args.sandbox),
|
|
64
|
+
progress=_progress,
|
|
65
|
+
)
|
|
66
|
+
except Exception as e:
|
|
67
|
+
print(f"scan failed: {type(e).__name__}: {e}", file=sys.stderr)
|
|
68
|
+
return 1
|
|
69
|
+
|
|
70
|
+
if args.json:
|
|
71
|
+
print(result.model_dump_json(indent=2))
|
|
72
|
+
return 0 if not result.findings else 1
|
|
73
|
+
|
|
74
|
+
s = result.summary
|
|
75
|
+
print(f"\nScanned {s.get('files_scanned')} files / {s.get('total_loc')} LOC"
|
|
76
|
+
f" · {'sandboxed' if result.sandboxed else 'in-process'}")
|
|
77
|
+
print(f"{result.candidate_count} candidate(s) → {len(result.findings)} confirmed, "
|
|
78
|
+
f"{len(result.culled)} culled\n")
|
|
79
|
+
if not result.findings:
|
|
80
|
+
print("No confirmed findings.")
|
|
81
|
+
return 0
|
|
82
|
+
for f in result.findings:
|
|
83
|
+
conf = f.get("confidence")
|
|
84
|
+
conf_s = f" (conf {conf:.2f})" if isinstance(conf, (int, float)) else ""
|
|
85
|
+
print(f"{_sev_marker(f.get('severity', ''))} {f.get('title')}{conf_s}")
|
|
86
|
+
print(f" {f.get('file')}:{f.get('line')} [{f.get('vuln_class')}]")
|
|
87
|
+
if f.get("reasoning"):
|
|
88
|
+
print(f" {f['reasoning']}")
|
|
89
|
+
print()
|
|
90
|
+
return 1 # non-zero exit when findings exist (CI-friendly)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def build_sandbox(argv=None) -> int:
|
|
94
|
+
"""Build the docker jail image (refutescan-build-sandbox)."""
|
|
95
|
+
from .sandbox.docker import sandbox_dir
|
|
96
|
+
ap = argparse.ArgumentParser(
|
|
97
|
+
prog="refutescan-build-sandbox",
|
|
98
|
+
description="Build the refutescan docker sandbox image.")
|
|
99
|
+
ap.add_argument("--image", default="refutescan-sandbox:current", help="image tag to build")
|
|
100
|
+
args = ap.parse_args(argv)
|
|
101
|
+
d = sandbox_dir()
|
|
102
|
+
print(f"Building {args.image} from {d} ...")
|
|
103
|
+
try:
|
|
104
|
+
subprocess.run(["docker", "build", "-t", args.image, str(d)], check=True)
|
|
105
|
+
except FileNotFoundError:
|
|
106
|
+
print("error: docker not found on PATH", file=sys.stderr)
|
|
107
|
+
return 2
|
|
108
|
+
except subprocess.CalledProcessError as e:
|
|
109
|
+
print(f"docker build failed ({e.returncode})", file=sys.stderr)
|
|
110
|
+
return e.returncode
|
|
111
|
+
print(f"Done. Image: {args.image}")
|
|
112
|
+
return 0
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
if __name__ == "__main__":
|
|
116
|
+
sys.exit(main())
|
refutescan/codemap.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Deterministic, LLM-free repository map.
|
|
2
|
+
|
|
3
|
+
A single os.walk that classifies files (source / manifest / entrypoint), counts
|
|
4
|
+
LOC, and returns the relative source-file list the navigator will reach. Cheap,
|
|
5
|
+
reproducible, and the same logic that runs inside the sandbox toolrunner.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Dict, List, Tuple
|
|
13
|
+
|
|
14
|
+
# Source file extensions worth scanning. Anything else (assets, lockfiles,
|
|
15
|
+
# binaries, minified bundles) is counted as context, not handed to the LLM.
|
|
16
|
+
SOURCE_EXTS = {
|
|
17
|
+
".py", ".js", ".jsx", ".ts", ".tsx", ".java", ".go", ".rb", ".php", ".cs",
|
|
18
|
+
".c", ".cc", ".cpp", ".h", ".hpp", ".rs", ".kt", ".scala", ".swift", ".sh",
|
|
19
|
+
".bash", ".pl", ".pm", ".sql", ".html", ".vue", ".lua", ".groovy", ".tf",
|
|
20
|
+
}
|
|
21
|
+
# Directories never worth walking — vendored / generated / VCS metadata.
|
|
22
|
+
SKIP_DIRS = {
|
|
23
|
+
".git", ".hg", ".svn", "node_modules", "vendor", "venv", ".venv", "env",
|
|
24
|
+
"__pycache__", ".mypy_cache", ".pytest_cache", "dist", "build", ".next",
|
|
25
|
+
"site-packages", "target", ".idea", ".vscode", ".gradle", "bin", "obj",
|
|
26
|
+
"coverage", ".tox", ".cache", "bower_components",
|
|
27
|
+
}
|
|
28
|
+
# Dependency manifests we surface in the map (signal for what the app is).
|
|
29
|
+
MANIFESTS = {
|
|
30
|
+
"requirements.txt", "pyproject.toml", "setup.py", "Pipfile", "package.json",
|
|
31
|
+
"go.mod", "pom.xml", "build.gradle", "Gemfile", "composer.json", "Cargo.toml",
|
|
32
|
+
"csproj",
|
|
33
|
+
}
|
|
34
|
+
# Entry-point filename hints (heuristic — where untrusted input often lands).
|
|
35
|
+
ENTRY_HINTS = {
|
|
36
|
+
"app.py", "main.py", "manage.py", "wsgi.py", "asgi.py", "server.py",
|
|
37
|
+
"index.js", "server.js", "app.js", "main.go", "main.rs", "index.php",
|
|
38
|
+
"application.java",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def build_map(root: Path, max_map_files: int = 400,
|
|
43
|
+
max_file_bytes: int = 200_000) -> Tuple[Dict[str, Any], List[str]]:
|
|
44
|
+
"""Walk the tree once. Returns (map_dict, relative_source_file_paths)."""
|
|
45
|
+
root = root.resolve()
|
|
46
|
+
langs: Dict[str, int] = {}
|
|
47
|
+
manifests: List[str] = []
|
|
48
|
+
entrypoints: List[str] = []
|
|
49
|
+
source_files: List[str] = []
|
|
50
|
+
total_files = 0
|
|
51
|
+
total_loc = 0
|
|
52
|
+
|
|
53
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
54
|
+
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS and not d.startswith(".")]
|
|
55
|
+
for fn in filenames:
|
|
56
|
+
total_files += 1
|
|
57
|
+
ext = os.path.splitext(fn)[1].lower()
|
|
58
|
+
full = Path(dirpath) / fn
|
|
59
|
+
rel = str(full.relative_to(root))
|
|
60
|
+
if fn in MANIFESTS or fn.endswith(".csproj"):
|
|
61
|
+
manifests.append(rel)
|
|
62
|
+
if fn in ENTRY_HINTS:
|
|
63
|
+
entrypoints.append(rel)
|
|
64
|
+
if ext in SOURCE_EXTS and len(source_files) < max_map_files:
|
|
65
|
+
source_files.append(rel)
|
|
66
|
+
langs[ext] = langs.get(ext, 0) + 1
|
|
67
|
+
try:
|
|
68
|
+
if full.stat().st_size <= max_file_bytes:
|
|
69
|
+
with open(full, "r", errors="ignore") as fh:
|
|
70
|
+
total_loc += sum(1 for _ in fh)
|
|
71
|
+
except Exception:
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
code_map = {
|
|
75
|
+
"root_name": root.name,
|
|
76
|
+
"total_files": total_files,
|
|
77
|
+
"source_files_scanned": len(source_files),
|
|
78
|
+
"truncated": len(source_files) >= max_map_files,
|
|
79
|
+
"total_loc": total_loc,
|
|
80
|
+
"languages": dict(sorted(langs.items(), key=lambda kv: -kv[1])),
|
|
81
|
+
"manifests": manifests[:30],
|
|
82
|
+
"entrypoints": entrypoints[:30],
|
|
83
|
+
}
|
|
84
|
+
return code_map, source_files
|
refutescan/fileaccess.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""File access behind one interface, two backends.
|
|
2
|
+
|
|
3
|
+
The navigator's tools and the validator's slice re-reads go through a
|
|
4
|
+
``FileAccess`` object, so the same scan logic runs either in-process (guarded
|
|
5
|
+
host) or via the docker sandbox without branching all over the pipeline.
|
|
6
|
+
|
|
7
|
+
• ``LocalFileAccess`` — reads the host filesystem, confined by safe_path()
|
|
8
|
+
+ the secret denylist (defense-in-depth fallback).
|
|
9
|
+
• ``SandboxFileAccess`` — delegates every touch to the toolrunner inside the
|
|
10
|
+
locked-down container via the sandbox handle.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any, Dict, List, Tuple
|
|
18
|
+
|
|
19
|
+
from .codemap import SKIP_DIRS, build_map
|
|
20
|
+
from .models import ScanConfig
|
|
21
|
+
from .safety import safe_path
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LocalFileAccess:
|
|
25
|
+
"""In-process file access on the host, confined by safe_path() + denylist."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, root: Path, config: ScanConfig) -> None:
|
|
28
|
+
self.root = root.resolve()
|
|
29
|
+
self.cfg = config
|
|
30
|
+
|
|
31
|
+
def build_map(self) -> Tuple[Dict[str, Any], List[str]]:
|
|
32
|
+
return build_map(self.root, self.cfg.max_map_files, self.cfg.max_file_bytes)
|
|
33
|
+
|
|
34
|
+
def list_dir(self, path: str = "") -> str:
|
|
35
|
+
try:
|
|
36
|
+
p = safe_path(self.root, path)
|
|
37
|
+
except Exception as e:
|
|
38
|
+
return f"error: {e}"
|
|
39
|
+
if not p.is_dir():
|
|
40
|
+
return f"error: not a directory: {path}"
|
|
41
|
+
entries = []
|
|
42
|
+
for child in sorted(p.iterdir()):
|
|
43
|
+
if child.name in SKIP_DIRS:
|
|
44
|
+
continue
|
|
45
|
+
entries.append(child.name + ("/" if child.is_dir() else ""))
|
|
46
|
+
return "\n".join(entries[:300]) or "(empty)"
|
|
47
|
+
|
|
48
|
+
def read_file(self, path: str, start_line: int = 1, max_lines: int = 200) -> str:
|
|
49
|
+
try:
|
|
50
|
+
p = safe_path(self.root, path)
|
|
51
|
+
with open(p, "r", errors="ignore") as fh:
|
|
52
|
+
lines = fh.readlines()
|
|
53
|
+
except Exception as e:
|
|
54
|
+
return f"error: {e}"
|
|
55
|
+
start = max(1, int(start_line or 1))
|
|
56
|
+
n = min(int(max_lines or 200), self.cfg.max_read_lines)
|
|
57
|
+
chunk = lines[start - 1: start - 1 + n]
|
|
58
|
+
body = "".join(f"{start+i:>5} {ln.rstrip()}\n" for i, ln in enumerate(chunk))
|
|
59
|
+
return body[:self.cfg.max_file_bytes] or "(no lines in range)"
|
|
60
|
+
|
|
61
|
+
def grep(self, pattern: str, files: List[str], path_contains: str = "") -> str:
|
|
62
|
+
try:
|
|
63
|
+
rx = re.compile(pattern)
|
|
64
|
+
except re.error as e:
|
|
65
|
+
return f"error: bad regex: {e}"
|
|
66
|
+
hits: List[str] = []
|
|
67
|
+
for rel in files:
|
|
68
|
+
if path_contains and path_contains not in rel:
|
|
69
|
+
continue
|
|
70
|
+
try:
|
|
71
|
+
p = safe_path(self.root, rel)
|
|
72
|
+
with open(p, "r", errors="ignore") as fh:
|
|
73
|
+
for i, ln in enumerate(fh, 1):
|
|
74
|
+
if rx.search(ln):
|
|
75
|
+
hits.append(f"{rel}:{i}: {ln.strip()[:200]}")
|
|
76
|
+
if len(hits) >= self.cfg.max_grep_hits:
|
|
77
|
+
return "\n".join(hits) + "\n…(truncated)"
|
|
78
|
+
except Exception:
|
|
79
|
+
continue
|
|
80
|
+
return "\n".join(hits) or "(no matches)"
|
|
81
|
+
|
|
82
|
+
def read_slice(self, rel: str, line: int, ctx: int = 25) -> str:
|
|
83
|
+
try:
|
|
84
|
+
p = safe_path(self.root, rel)
|
|
85
|
+
with open(p, "r", errors="ignore") as fh:
|
|
86
|
+
lines = fh.readlines()
|
|
87
|
+
except Exception as e:
|
|
88
|
+
return f"<could not read {rel}: {type(e).__name__}>"
|
|
89
|
+
if line and line > 0:
|
|
90
|
+
lo = max(0, line - ctx - 1)
|
|
91
|
+
hi = min(len(lines), line + ctx)
|
|
92
|
+
else:
|
|
93
|
+
lo, hi = 0, min(len(lines), 2 * ctx)
|
|
94
|
+
numbered = [f"{i+1:>5} {lines[i].rstrip()}" for i in range(lo, hi)]
|
|
95
|
+
return "\n".join(numbered)[:8000]
|
|
96
|
+
|
|
97
|
+
def stop(self) -> None:
|
|
98
|
+
pass
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class SandboxFileAccess:
|
|
102
|
+
"""File access delegated to the docker sandbox via the toolrunner."""
|
|
103
|
+
|
|
104
|
+
def __init__(self, sandbox) -> None:
|
|
105
|
+
self.sb = sandbox
|
|
106
|
+
|
|
107
|
+
def build_map(self) -> Tuple[Dict[str, Any], List[str]]:
|
|
108
|
+
out = self.sb.call("build_map", {})
|
|
109
|
+
if "error" in out:
|
|
110
|
+
raise RuntimeError(f"sandbox build_map: {out['error']}")
|
|
111
|
+
return out["map"], out["files"]
|
|
112
|
+
|
|
113
|
+
def list_dir(self, path: str = "") -> str:
|
|
114
|
+
try:
|
|
115
|
+
out = self.sb.call("list_dir", {"path": path})
|
|
116
|
+
except Exception as e:
|
|
117
|
+
return f"error: {e}"
|
|
118
|
+
return out.get("error") and f"error: {out['error']}" or out.get("result", "")
|
|
119
|
+
|
|
120
|
+
def read_file(self, path: str, start_line: int = 1, max_lines: int = 200) -> str:
|
|
121
|
+
try:
|
|
122
|
+
out = self.sb.call("read_file", {"path": path, "start_line": start_line, "max_lines": max_lines})
|
|
123
|
+
except Exception as e:
|
|
124
|
+
return f"error: {e}"
|
|
125
|
+
return out.get("error") and f"error: {out['error']}" or out.get("result", "")
|
|
126
|
+
|
|
127
|
+
def grep(self, pattern: str, files: List[str], path_contains: str = "") -> str:
|
|
128
|
+
try:
|
|
129
|
+
out = self.sb.call("grep", {"pattern": pattern, "files": files, "path_contains": path_contains})
|
|
130
|
+
except Exception as e:
|
|
131
|
+
return f"error: {e}"
|
|
132
|
+
return out.get("error") and f"error: {out['error']}" or out.get("result", "")
|
|
133
|
+
|
|
134
|
+
def read_slice(self, rel: str, line: int, ctx: int = 25) -> str:
|
|
135
|
+
try:
|
|
136
|
+
out = self.sb.call("read_slice", {"path": rel, "line": line, "ctx": ctx})
|
|
137
|
+
except Exception as e:
|
|
138
|
+
return f"<could not read {rel}: {type(e).__name__}>"
|
|
139
|
+
return out.get("result") or f"<could not read {rel}>"
|
|
140
|
+
|
|
141
|
+
def stop(self) -> None:
|
|
142
|
+
self.sb.stop()
|
refutescan/models.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Typed data shapes for the scan pipeline.
|
|
2
|
+
|
|
3
|
+
``ScanConfig`` is a plain dataclass (bounds + isolation knobs). The finding/
|
|
4
|
+
verdict shapes are pydantic so the judge's structured-output call validates and
|
|
5
|
+
retries on a malformed model response. ``Verdict`` is the schema the kernel
|
|
6
|
+
hands to the injected judge; everything else is what the kernel returns.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, List, Literal
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, Field
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# ── Configuration ───────────────────────────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class ScanConfig:
|
|
22
|
+
"""Bounds and isolation settings for a scan. Defaults are sane for a typical
|
|
23
|
+
web-app repo; raise the bounds for larger codebases at the cost of more LLM
|
|
24
|
+
calls and wall-clock."""
|
|
25
|
+
|
|
26
|
+
# Walk / read bounds — keep a scan from running away on a huge repo.
|
|
27
|
+
max_map_files: int = 400 # source files the map tracks / navigator can reach
|
|
28
|
+
max_file_bytes: int = 200_000 # per-file read ceiling
|
|
29
|
+
max_read_lines: int = 600 # per read_file call
|
|
30
|
+
max_grep_hits: int = 80 # per grep call
|
|
31
|
+
|
|
32
|
+
# Navigator (the wide-net pass).
|
|
33
|
+
nav_max_iters: int = 40 # tool-loop turns
|
|
34
|
+
nav_deadline: float = 300.0 # wall-clock budget (seconds)
|
|
35
|
+
max_candidates: int = 60 # candidates handed to the judge
|
|
36
|
+
|
|
37
|
+
# Validator (the refute-first pass).
|
|
38
|
+
judge_timeout: float = 60.0 # per-candidate judge deadline
|
|
39
|
+
|
|
40
|
+
# Source preparation.
|
|
41
|
+
clone_timeout: int = 120 # git clone budget (seconds)
|
|
42
|
+
git_allowed_protocols: str = "http:https:ssh:git"
|
|
43
|
+
|
|
44
|
+
# Isolation. sandbox: "auto" (docker if available, else in-process),
|
|
45
|
+
# "docker" (force; error if unavailable), "inprocess" (force guarded host).
|
|
46
|
+
sandbox: str = "auto"
|
|
47
|
+
sandbox_image: str = "refutescan-sandbox:current"
|
|
48
|
+
sandbox_uid: str = "10001"
|
|
49
|
+
sandbox_pids: str = "256"
|
|
50
|
+
sandbox_memory: str = "2g"
|
|
51
|
+
sandbox_cpus: str = "2"
|
|
52
|
+
docker_bin: str = "docker"
|
|
53
|
+
exec_timeout: int = 90 # per docker-exec tool call
|
|
54
|
+
container_ttl: int = 1200 # audit container sleep budget (> nav_deadline)
|
|
55
|
+
|
|
56
|
+
# In-process backend only: confine local-path scans to these roots. Empty =
|
|
57
|
+
# unrestricted (the docker jail is the boundary). Secret denylist always on.
|
|
58
|
+
allowed_roots: List[Path] = field(default_factory=list)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# ── Judge I/O ───────────────────────────────────────────────────────────────────
|
|
62
|
+
|
|
63
|
+
class Verdict(BaseModel):
|
|
64
|
+
"""The structured verdict the judge returns for one candidate. This is the
|
|
65
|
+
schema the kernel hands to the injected judge callable."""
|
|
66
|
+
|
|
67
|
+
is_real: bool = Field(description="True ONLY if the code shown demonstrates a real, "
|
|
68
|
+
"exploitable vulnerability with a concrete untrusted "
|
|
69
|
+
"source reaching a dangerous sink. Default to false when unsure.")
|
|
70
|
+
reachable: bool = Field(description="True if untrusted input can actually reach this sink "
|
|
71
|
+
"(not gated by validation/auth that neutralizes it).")
|
|
72
|
+
confidence: float = Field(description="0.0–1.0 confidence that this is a true positive.")
|
|
73
|
+
severity: Literal["critical", "high", "medium", "low", "info"] = Field(
|
|
74
|
+
description="Impact severity if exploitable.")
|
|
75
|
+
reasoning: str = Field(description="2–4 sentences: the data flow you confirmed, or precisely "
|
|
76
|
+
"why this is a false positive.")
|
|
77
|
+
repro: str = Field(description="If real: concrete repro / exploitation steps. Else: empty.")
|
|
78
|
+
recommendation: str = Field(description="If real: the targeted fix. Else: empty.")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# ── Result shapes (returned to the caller) ──────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
class ScanResult(BaseModel):
|
|
84
|
+
"""Everything one scan produced. The consumer persists/renders this."""
|
|
85
|
+
|
|
86
|
+
code_map: Dict[str, Any]
|
|
87
|
+
findings: List[Dict[str, Any]] # confirmed, sorted worst-first
|
|
88
|
+
culled: List[Dict[str, Any]] # refuted/timed-out candidates, with reasons
|
|
89
|
+
summary: Dict[str, Any]
|
|
90
|
+
candidate_count: int
|
|
91
|
+
sandboxed: bool
|
refutescan/providers.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""The model-injection seam — how a caller plugs its own LLMs into the scan.
|
|
2
|
+
|
|
3
|
+
refutescan is model-agnostic: it never imports a provider SDK in the core. The
|
|
4
|
+
caller passes two factories.
|
|
5
|
+
|
|
6
|
+
navigator_factory() -> a chat model
|
|
7
|
+
A LangChain-style chat model that supports ``.bind_tools(tools)`` and
|
|
8
|
+
``.invoke(messages)`` returning a message whose ``.tool_calls`` the loop
|
|
9
|
+
reads. This drives the wide-net pass. A fast/cheap tool-calling model is the
|
|
10
|
+
right fit (it is fine that it over-reports — the judge culls).
|
|
11
|
+
|
|
12
|
+
judge_factory() -> judge(prompt: str, schema: type[BaseModel]) -> BaseModel
|
|
13
|
+
A callable that runs ONE structured-output completion: given a prompt and a
|
|
14
|
+
pydantic model, return a validated instance. This is the refute-first pass;
|
|
15
|
+
a stronger model is the right fit. The kernel wraps each call in a hard
|
|
16
|
+
timeout, so the callable itself need not.
|
|
17
|
+
|
|
18
|
+
See ``refutescan.adapters`` for ready-made factories (e.g. OpenAI).
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from typing import Any, Callable
|
|
24
|
+
|
|
25
|
+
from pydantic import BaseModel
|
|
26
|
+
|
|
27
|
+
# A factory returning a tool-capable chat model (bind_tools + invoke).
|
|
28
|
+
NavigatorFactory = Callable[[], Any]
|
|
29
|
+
|
|
30
|
+
# A factory returning a judge callable: (prompt, schema) -> validated instance.
|
|
31
|
+
JudgeCallable = Callable[[str, type], BaseModel]
|
|
32
|
+
JudgeFactory = Callable[[], JudgeCallable]
|
refutescan/safety.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Path-safety and secret-denylist guards shared by every file-access backend.
|
|
2
|
+
|
|
3
|
+
These are defense-in-depth. The real isolation boundary is the docker sandbox
|
|
4
|
+
(``--network none``, read-only, non-root, only the target mounted). But the
|
|
5
|
+
in-process backend reads files as the host user, so the same guards run there —
|
|
6
|
+
and they run *inside* the container too (see ``sandbox/toolrunner.py``), so a
|
|
7
|
+
single bypassed layer never exposes credentials.
|
|
8
|
+
|
|
9
|
+
The constants here are deliberately conservative defaults. ``toolrunner.py``
|
|
10
|
+
keeps its own copies because it ships into a minimal image with no refutescan
|
|
11
|
+
install; keep the two in sync.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import fnmatch
|
|
17
|
+
import os
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Iterable, List, Optional
|
|
20
|
+
|
|
21
|
+
# Path components we never descend into or read — credential & VCS/runtime stores.
|
|
22
|
+
SECRET_DIR_NAMES = {
|
|
23
|
+
".ssh", ".gnupg", ".aws", ".azure", ".kube", ".docker", ".secrets", "secrets",
|
|
24
|
+
".git",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
# Basename globs we never read, even inside an allowed root — keys, certs, env files.
|
|
28
|
+
SECRET_FILE_GLOBS = (
|
|
29
|
+
"*.key", "*.pem", "*.age", "*.p12", "*.pfx", "*.keystore", "*.jks",
|
|
30
|
+
".env", ".env.*", "*.env", "id_rsa*", "id_ed25519*", "id_ecdsa*", "id_dsa*",
|
|
31
|
+
".netrc", ".pgpass", ".htpasswd", "credentials", "*.secret", "*.secrets",
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Transports git may use when cloning — excludes ext::/fd:: (command execution).
|
|
35
|
+
GIT_ALLOWED_PROTOCOLS = "http:https:ssh:git"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def parse_roots(spec: Optional[str]) -> List[Path]:
|
|
39
|
+
"""Parse a colon-separated roots string into resolved Paths ([] if falsy)."""
|
|
40
|
+
if not spec:
|
|
41
|
+
return []
|
|
42
|
+
return [
|
|
43
|
+
Path(os.path.expanduser(p)).resolve()
|
|
44
|
+
for p in spec.split(":")
|
|
45
|
+
if p.strip()
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def within_allowed_roots(p: Path, roots: Optional[Iterable[Path]]) -> bool:
|
|
50
|
+
"""True iff p resolves inside one of the configured roots.
|
|
51
|
+
|
|
52
|
+
An empty/None roots list means "unrestricted" — the right default for a
|
|
53
|
+
library running on the user's own machine, where the docker jail (or the
|
|
54
|
+
user's own intent) is the boundary, not a host allowlist.
|
|
55
|
+
"""
|
|
56
|
+
roots = list(roots or [])
|
|
57
|
+
if not roots:
|
|
58
|
+
return True
|
|
59
|
+
p = p.resolve()
|
|
60
|
+
return any(p == r or r in p.parents for r in roots)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def deny_secret(p: Path, root: Path) -> None:
|
|
64
|
+
"""Refuse credential/key/env paths even within an allowed root. Raises ValueError."""
|
|
65
|
+
try:
|
|
66
|
+
rel = p.relative_to(root)
|
|
67
|
+
except ValueError:
|
|
68
|
+
rel = p
|
|
69
|
+
for part in rel.parts:
|
|
70
|
+
if part in SECRET_DIR_NAMES:
|
|
71
|
+
raise ValueError(f"refusing to read sensitive path '{part}/'")
|
|
72
|
+
name = p.name
|
|
73
|
+
for glob in SECRET_FILE_GLOBS:
|
|
74
|
+
if fnmatch.fnmatch(name, glob):
|
|
75
|
+
raise ValueError(f"refusing to read sensitive file '{name}'")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def safe_path(root: Path, rel: str) -> Path:
|
|
79
|
+
"""Resolve rel under root; reject path escapes and secret-shaped files.
|
|
80
|
+
|
|
81
|
+
The single choke point every navigator tool (list_dir / read_file / grep)
|
|
82
|
+
and the judge's slice re-read go through, so the denylist covers them all.
|
|
83
|
+
"""
|
|
84
|
+
root = root.resolve()
|
|
85
|
+
p = (root / (rel or "").lstrip("/")).resolve()
|
|
86
|
+
if root != p and root not in p.parents:
|
|
87
|
+
raise ValueError("path escapes the repository root")
|
|
88
|
+
deny_secret(p, root)
|
|
89
|
+
return p
|