run-codeql 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- run_codeql/__init__.py +1 -0
- run_codeql/__main__.py +3 -0
- run_codeql/cli.py +224 -0
- run_codeql/download.py +158 -0
- run_codeql/logging_utils.py +28 -0
- run_codeql/sarif.py +138 -0
- run_codeql/scanner.py +144 -0
- run_codeql/settings.py +78 -0
- run_codeql-1.0.0.dist-info/METADATA +275 -0
- run_codeql-1.0.0.dist-info/RECORD +13 -0
- run_codeql-1.0.0.dist-info/WHEEL +4 -0
- run_codeql-1.0.0.dist-info/entry_points.txt +3 -0
- run_codeql-1.0.0.dist-info/licenses/LICENSE +21 -0
run_codeql/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""run_codeql package."""
|
run_codeql/__main__.py
ADDED
run_codeql/cli.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""CLI entrypoint and orchestration for run_codeql."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import concurrent.futures
|
|
5
|
+
import os
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from run_codeql.download import fetch_codeql
|
|
11
|
+
from run_codeql.logging_utils import configure_logging, err, log
|
|
12
|
+
from run_codeql.sarif import build_sarif_summary
|
|
13
|
+
from run_codeql.scanner import cleanup_reports, detect_langs, run_lang
|
|
14
|
+
from run_codeql.settings import TOOLS_DIR
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def main() -> None:
|
|
18
|
+
"""CLI main function."""
|
|
19
|
+
parser = argparse.ArgumentParser(
|
|
20
|
+
description="Run CodeQL code-quality analysis locally (mirrors GitHub 'Code Quality' check)", # noqa: E501
|
|
21
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
22
|
+
epilog=(
|
|
23
|
+
"Outputs:\n"
|
|
24
|
+
" Databases: .codeql/db-<lang>/\n"
|
|
25
|
+
" SARIF: .codeql/reports/<lang>-code-quality.sarif\n\n"
|
|
26
|
+
"CodeQL CLI is auto-downloaded to ~/.codeql-tools/ if not on PATH.\n"
|
|
27
|
+
"Run from the root of any repository."
|
|
28
|
+
),
|
|
29
|
+
)
|
|
30
|
+
parser.add_argument(
|
|
31
|
+
"--lang",
|
|
32
|
+
default=None,
|
|
33
|
+
help="Comma-separated languages to scan (default: auto-detected from repo contents)",
|
|
34
|
+
)
|
|
35
|
+
parser.add_argument(
|
|
36
|
+
"--keep-db",
|
|
37
|
+
action="store_true",
|
|
38
|
+
help="Reuse existing databases instead of recreating",
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument(
|
|
41
|
+
"--keep-reports",
|
|
42
|
+
action="store_true",
|
|
43
|
+
help="Do not delete prior SARIF reports before running",
|
|
44
|
+
)
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"--no-fail",
|
|
47
|
+
action="store_true",
|
|
48
|
+
help="Exit 0 even if findings or scan errors exist",
|
|
49
|
+
)
|
|
50
|
+
parser.add_argument(
|
|
51
|
+
"--verbose",
|
|
52
|
+
"-v",
|
|
53
|
+
action="store_true",
|
|
54
|
+
help="Print each finding with rule, location, and message",
|
|
55
|
+
)
|
|
56
|
+
parser.add_argument(
|
|
57
|
+
"--report-only",
|
|
58
|
+
action="store_true",
|
|
59
|
+
help="Skip scanning; summarize existing SARIF reports from the last run",
|
|
60
|
+
)
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
"--quiet",
|
|
63
|
+
"-q",
|
|
64
|
+
action="store_true",
|
|
65
|
+
help="Suppress log output; print only the final summaries (useful for agent/scripted use)",
|
|
66
|
+
)
|
|
67
|
+
parser.add_argument(
|
|
68
|
+
"--files",
|
|
69
|
+
default=None,
|
|
70
|
+
help=(
|
|
71
|
+
"Comma-separated file paths (or fnmatch patterns) to restrict findings to. "
|
|
72
|
+
"Paths are matched against the end of the SARIF artifact URI "
|
|
73
|
+
"(e.g. 'src/foo.py' or 'src/*.py')"
|
|
74
|
+
),
|
|
75
|
+
)
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--rule",
|
|
78
|
+
default=None,
|
|
79
|
+
help=(
|
|
80
|
+
"Comma-separated rule IDs (or fnmatch patterns) to restrict findings to "
|
|
81
|
+
"(e.g. 'py/unused-import' or 'py/*')"
|
|
82
|
+
),
|
|
83
|
+
)
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"--limit",
|
|
86
|
+
type=int,
|
|
87
|
+
default=None,
|
|
88
|
+
metavar="N",
|
|
89
|
+
help="Return at most N findings across all files (after --files filtering)",
|
|
90
|
+
)
|
|
91
|
+
parser.add_argument(
|
|
92
|
+
"--offset",
|
|
93
|
+
type=int,
|
|
94
|
+
default=0,
|
|
95
|
+
metavar="N",
|
|
96
|
+
help="Skip the first N findings before applying --limit (for pagination)",
|
|
97
|
+
)
|
|
98
|
+
args = parser.parse_args()
|
|
99
|
+
|
|
100
|
+
configure_logging(args.quiet)
|
|
101
|
+
if args.quiet:
|
|
102
|
+
print("[codeql-local] running in quiet mode", file=sys.stderr, flush=True)
|
|
103
|
+
|
|
104
|
+
file_patterns = [p.strip() for p in args.files.split(",") if p.strip()] if args.files else None
|
|
105
|
+
rule_patterns = [p.strip() for p in args.rule.split(",") if p.strip()] if args.rule else None
|
|
106
|
+
|
|
107
|
+
repo_root = Path.cwd()
|
|
108
|
+
work_dir = repo_root / ".codeql"
|
|
109
|
+
report_dir = work_dir / "reports"
|
|
110
|
+
|
|
111
|
+
if args.report_only:
|
|
112
|
+
filter_langs = (
|
|
113
|
+
{lang_name.strip() for lang_name in args.lang.split(",")} if args.lang else None
|
|
114
|
+
)
|
|
115
|
+
sarif_files = sorted(report_dir.glob("*.sarif"))
|
|
116
|
+
if filter_langs:
|
|
117
|
+
sarif_files = [
|
|
118
|
+
f for f in sarif_files if f.stem.removesuffix("-code-quality") in filter_langs
|
|
119
|
+
]
|
|
120
|
+
if not sarif_files:
|
|
121
|
+
err(f"No SARIF files found in {report_dir}. Run without --report-only first.")
|
|
122
|
+
sys.exit(1)
|
|
123
|
+
log("===== Summaries (from previous scan) =====")
|
|
124
|
+
report_failed = False
|
|
125
|
+
findings_found = False
|
|
126
|
+
for sarif in sarif_files:
|
|
127
|
+
lang = sarif.stem.removesuffix("-code-quality")
|
|
128
|
+
summary = build_sarif_summary(
|
|
129
|
+
sarif,
|
|
130
|
+
verbose=args.verbose,
|
|
131
|
+
files=file_patterns,
|
|
132
|
+
rules=rule_patterns,
|
|
133
|
+
limit=args.limit,
|
|
134
|
+
offset=args.offset,
|
|
135
|
+
)
|
|
136
|
+
findings_found = findings_found or summary.total_findings > 0
|
|
137
|
+
report_failed = report_failed or summary.read_error
|
|
138
|
+
if (file_patterns is None and rule_patterns is None) or summary.matched_findings > 0:
|
|
139
|
+
print(f"[{lang}] SARIF: {sarif}\n{summary.text}")
|
|
140
|
+
should_fail = report_failed or findings_found
|
|
141
|
+
sys.exit(0 if args.no_fail else int(should_fail))
|
|
142
|
+
|
|
143
|
+
config_file = repo_root / ".github" / "codeql" / "codeql-config.yml"
|
|
144
|
+
|
|
145
|
+
if args.lang:
|
|
146
|
+
langs = [lang_name.strip() for lang_name in args.lang.split(",") if lang_name.strip()]
|
|
147
|
+
else:
|
|
148
|
+
langs = detect_langs(repo_root)
|
|
149
|
+
if not langs:
|
|
150
|
+
err("No languages detected. Use --lang to specify one or add supported source files.")
|
|
151
|
+
sys.exit(1)
|
|
152
|
+
|
|
153
|
+
TOOLS_DIR.mkdir(parents=True, exist_ok=True)
|
|
154
|
+
work_dir.mkdir(parents=True, exist_ok=True)
|
|
155
|
+
report_dir.mkdir(parents=True, exist_ok=True)
|
|
156
|
+
|
|
157
|
+
gitignore = work_dir / ".gitignore"
|
|
158
|
+
if not gitignore.exists():
|
|
159
|
+
gitignore.write_text("*\n")
|
|
160
|
+
|
|
161
|
+
codeql = fetch_codeql()
|
|
162
|
+
cleanup_reports(report_dir, args.keep_reports, langs=langs if args.lang else None)
|
|
163
|
+
|
|
164
|
+
scan_failed = False
|
|
165
|
+
findings_found = False
|
|
166
|
+
threads_per_lang = max(1, (os.cpu_count() or 1) // len(langs)) if len(langs) > 1 else 0
|
|
167
|
+
log(f"Running {len(langs)} language(s) in parallel with {threads_per_lang} thread(s) each")
|
|
168
|
+
summaries: dict[str, tuple[str, int]] = {} # lang -> (text, matched_findings)
|
|
169
|
+
|
|
170
|
+
def scan(lang: str) -> tuple[str, str, int, int, bool]:
|
|
171
|
+
try:
|
|
172
|
+
sarif = run_lang(
|
|
173
|
+
lang,
|
|
174
|
+
codeql,
|
|
175
|
+
args.keep_db,
|
|
176
|
+
repo_root,
|
|
177
|
+
work_dir,
|
|
178
|
+
report_dir,
|
|
179
|
+
config_file,
|
|
180
|
+
threads=threads_per_lang,
|
|
181
|
+
quiet=args.quiet,
|
|
182
|
+
)
|
|
183
|
+
summary = build_sarif_summary(
|
|
184
|
+
sarif,
|
|
185
|
+
verbose=args.verbose,
|
|
186
|
+
files=file_patterns,
|
|
187
|
+
rules=rule_patterns,
|
|
188
|
+
limit=args.limit,
|
|
189
|
+
offset=args.offset,
|
|
190
|
+
)
|
|
191
|
+
return (
|
|
192
|
+
lang,
|
|
193
|
+
f"[{lang}] SARIF: {sarif}\n{summary.text}",
|
|
194
|
+
summary.total_findings,
|
|
195
|
+
summary.matched_findings,
|
|
196
|
+
False,
|
|
197
|
+
)
|
|
198
|
+
except subprocess.CalledProcessError as exc:
|
|
199
|
+
err(f"{lang} failed (exit {exc.returncode})")
|
|
200
|
+
return (lang, f"[{lang}] FAILED (exit {exc.returncode})", 0, 0, True)
|
|
201
|
+
|
|
202
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
203
|
+
futures = {executor.submit(scan, lang): lang for lang in langs}
|
|
204
|
+
for future in concurrent.futures.as_completed(futures):
|
|
205
|
+
if future.exception():
|
|
206
|
+
scan_failed = True
|
|
207
|
+
lang = futures[future]
|
|
208
|
+
err(f"{lang} crashed unexpectedly: {future.exception()}")
|
|
209
|
+
summaries[lang] = (f"[{lang}] FAILED (unexpected exception)", 0)
|
|
210
|
+
continue
|
|
211
|
+
lang, text, finding_count, matched_count, failed = future.result()
|
|
212
|
+
summaries[lang] = (text, matched_count)
|
|
213
|
+
findings_found = findings_found or finding_count > 0
|
|
214
|
+
scan_failed = scan_failed or failed
|
|
215
|
+
|
|
216
|
+
log("===== Summaries =====")
|
|
217
|
+
for lang in langs:
|
|
218
|
+
if lang in summaries:
|
|
219
|
+
text, matched_count = summaries[lang]
|
|
220
|
+
if (file_patterns is None and rule_patterns is None) or matched_count > 0:
|
|
221
|
+
print(text)
|
|
222
|
+
|
|
223
|
+
should_fail = scan_failed or findings_found
|
|
224
|
+
sys.exit(0 if args.no_fail else int(should_fail))
|
run_codeql/download.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""CodeQL download, integrity verification, and safe extraction."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import os
|
|
5
|
+
import platform
|
|
6
|
+
import re
|
|
7
|
+
import shutil
|
|
8
|
+
import sys
|
|
9
|
+
import tarfile
|
|
10
|
+
import time
|
|
11
|
+
import urllib.error
|
|
12
|
+
import urllib.request
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Callable, TypeVar
|
|
15
|
+
|
|
16
|
+
from run_codeql.logging_utils import LOGGER, err, log
|
|
17
|
+
from run_codeql.settings import (
|
|
18
|
+
CODEQL_BIN,
|
|
19
|
+
CODEQL_VERSION,
|
|
20
|
+
DOWNLOAD_RETRY_ATTEMPTS,
|
|
21
|
+
DOWNLOAD_RETRY_SLEEP_SECONDS,
|
|
22
|
+
DOWNLOAD_TIMEOUT_SECONDS,
|
|
23
|
+
TOOLS_DIR,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
T = TypeVar("T")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def fetch_codeql() -> Path:
|
|
30
|
+
"""Resolve an executable CodeQL binary, downloading and verifying if needed."""
|
|
31
|
+
which = shutil.which("codeql")
|
|
32
|
+
if which:
|
|
33
|
+
log(f"Using system CodeQL: {which}")
|
|
34
|
+
return Path(which)
|
|
35
|
+
|
|
36
|
+
if CODEQL_BIN.is_file() and os.access(CODEQL_BIN, os.X_OK):
|
|
37
|
+
log(f"Using downloaded CodeQL: {CODEQL_BIN}")
|
|
38
|
+
return CODEQL_BIN
|
|
39
|
+
|
|
40
|
+
system = platform.system()
|
|
41
|
+
if system == "Linux":
|
|
42
|
+
plat = "linux64"
|
|
43
|
+
elif system == "Darwin":
|
|
44
|
+
plat = "osx64"
|
|
45
|
+
else:
|
|
46
|
+
err(f"Unsupported platform for CodeQL auto-download: {system}")
|
|
47
|
+
sys.exit(1)
|
|
48
|
+
|
|
49
|
+
log(f"Downloading CodeQL CLI {CODEQL_VERSION} to {TOOLS_DIR}")
|
|
50
|
+
archive_name = f"codeql-bundle-{plat}.tar.gz"
|
|
51
|
+
url = (
|
|
52
|
+
f"https://github.com/github/codeql-action/releases/download/"
|
|
53
|
+
f"codeql-bundle-v{CODEQL_VERSION}/{archive_name}"
|
|
54
|
+
)
|
|
55
|
+
checksum_url = f"{url}.checksum.txt"
|
|
56
|
+
tmp = TOOLS_DIR / f"{archive_name}.part"
|
|
57
|
+
TOOLS_DIR.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
try:
|
|
59
|
+
download_file_with_retry(url, tmp)
|
|
60
|
+
checksum_text = download_text_with_retry(checksum_url)
|
|
61
|
+
expected_checksum = parse_sha256_checksum(checksum_text, archive_name)
|
|
62
|
+
actual_checksum = compute_sha256(tmp)
|
|
63
|
+
if actual_checksum != expected_checksum:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"Checksum mismatch for {archive_name}: expected {expected_checksum}, got {actual_checksum}" # noqa: E501
|
|
66
|
+
)
|
|
67
|
+
with tarfile.open(tmp, "r:gz") as tar:
|
|
68
|
+
safe_extract_tar(tar, TOOLS_DIR)
|
|
69
|
+
except Exception as exc:
|
|
70
|
+
err(f"Failed to download/install CodeQL: {exc}")
|
|
71
|
+
sys.exit(1)
|
|
72
|
+
finally:
|
|
73
|
+
tmp.unlink(missing_ok=True)
|
|
74
|
+
|
|
75
|
+
if not (CODEQL_BIN.is_file() and os.access(CODEQL_BIN, os.X_OK)):
|
|
76
|
+
err(f"Downloaded CodeQL bundle missing binary at {CODEQL_BIN}")
|
|
77
|
+
sys.exit(1)
|
|
78
|
+
|
|
79
|
+
CODEQL_BIN.chmod(CODEQL_BIN.stat().st_mode | 0o111)
|
|
80
|
+
log(f"Downloaded CodeQL to {CODEQL_BIN}")
|
|
81
|
+
return CODEQL_BIN
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _with_retries(action: str, operation: Callable[[], T]) -> T:
|
|
85
|
+
"""Run an operation with bounded retries and fixed backoff."""
|
|
86
|
+
for attempt in range(1, DOWNLOAD_RETRY_ATTEMPTS + 1):
|
|
87
|
+
try:
|
|
88
|
+
return operation()
|
|
89
|
+
except (OSError, TimeoutError, urllib.error.URLError, ValueError) as exc:
|
|
90
|
+
if attempt == DOWNLOAD_RETRY_ATTEMPTS:
|
|
91
|
+
raise
|
|
92
|
+
LOGGER.warning(
|
|
93
|
+
"%s failed (%s), retrying in %ss (%s/%s)",
|
|
94
|
+
action,
|
|
95
|
+
exc,
|
|
96
|
+
DOWNLOAD_RETRY_SLEEP_SECONDS,
|
|
97
|
+
attempt,
|
|
98
|
+
DOWNLOAD_RETRY_ATTEMPTS,
|
|
99
|
+
)
|
|
100
|
+
time.sleep(DOWNLOAD_RETRY_SLEEP_SECONDS)
|
|
101
|
+
raise RuntimeError(f"{action} failed")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def download_file_with_retry(url: str, destination: Path) -> None:
|
|
105
|
+
"""Download a URL to disk with retries and timeout."""
|
|
106
|
+
|
|
107
|
+
def _download() -> None:
|
|
108
|
+
with urllib.request.urlopen(url, timeout=DOWNLOAD_TIMEOUT_SECONDS) as response:
|
|
109
|
+
with destination.open("wb") as out:
|
|
110
|
+
shutil.copyfileobj(response, out)
|
|
111
|
+
|
|
112
|
+
_with_retries(f"Download failed for {url}", _download)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def download_text_with_retry(url: str) -> str:
|
|
116
|
+
"""Download UTF-8 text from a URL with retries and timeout."""
|
|
117
|
+
|
|
118
|
+
def _download() -> str:
|
|
119
|
+
with urllib.request.urlopen(url, timeout=DOWNLOAD_TIMEOUT_SECONDS) as response:
|
|
120
|
+
return response.read().decode("utf-8")
|
|
121
|
+
|
|
122
|
+
return _with_retries(f"Download failed for {url}", _download)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def parse_sha256_checksum(checksum_text: str, filename: str) -> str:
|
|
126
|
+
"""Extract the expected SHA-256 digest for a file from checksum text."""
|
|
127
|
+
for line in checksum_text.splitlines():
|
|
128
|
+
parts = line.strip().split()
|
|
129
|
+
if len(parts) < 2:
|
|
130
|
+
continue
|
|
131
|
+
digest, name = parts[0], parts[1].lstrip("*")
|
|
132
|
+
if name == filename and re.fullmatch(r"[A-Fa-f0-9]{64}", digest):
|
|
133
|
+
return digest.lower()
|
|
134
|
+
raise ValueError(f"Checksum for {filename} not found")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def compute_sha256(path: Path) -> str:
|
|
138
|
+
"""Compute the SHA-256 digest for a file."""
|
|
139
|
+
digest = hashlib.sha256()
|
|
140
|
+
with path.open("rb") as stream:
|
|
141
|
+
for chunk in iter(lambda: stream.read(8192), b""):
|
|
142
|
+
digest.update(chunk)
|
|
143
|
+
return digest.hexdigest()
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def safe_extract_tar(tar: tarfile.TarFile, destination: Path) -> None:
|
|
147
|
+
"""Extract a tar archive while blocking path traversal and link entries."""
|
|
148
|
+
dest = destination.resolve()
|
|
149
|
+
for member in tar.getmembers():
|
|
150
|
+
if member.issym() or member.islnk():
|
|
151
|
+
raise ValueError(f"Refusing to extract link from archive: {member.name}")
|
|
152
|
+
member_path = (dest / member.name).resolve()
|
|
153
|
+
if member_path != dest and dest not in member_path.parents:
|
|
154
|
+
raise ValueError(f"Refusing to extract path outside destination: {member.name}")
|
|
155
|
+
try:
|
|
156
|
+
tar.extractall(dest, filter="data")
|
|
157
|
+
except TypeError:
|
|
158
|
+
tar.extractall(dest)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Logging setup for run_codeql CLI output."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
LOGGER = logging.getLogger("codeql-local")
|
|
7
|
+
LOGGER.addHandler(logging.NullHandler())
|
|
8
|
+
LOGGER.propagate = False
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def configure_logging(quiet: bool) -> None:
|
|
12
|
+
"""Configure CLI logging with timestamped stderr output."""
|
|
13
|
+
for handler in list(LOGGER.handlers):
|
|
14
|
+
LOGGER.removeHandler(handler)
|
|
15
|
+
handler = logging.StreamHandler(sys.stderr)
|
|
16
|
+
handler.setFormatter(logging.Formatter("[codeql-local %(asctime)s] %(message)s", "%H:%M:%S"))
|
|
17
|
+
LOGGER.addHandler(handler)
|
|
18
|
+
LOGGER.setLevel(logging.ERROR if quiet else logging.INFO)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def err(msg: str) -> None:
|
|
22
|
+
"""Log an error with the CLI prefix convention."""
|
|
23
|
+
LOGGER.error("[error] %s", msg)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def log(msg: str) -> None:
|
|
27
|
+
"""Log an informational CLI message."""
|
|
28
|
+
LOGGER.info(msg)
|
run_codeql/sarif.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""SARIF parsing and summary rendering."""
|
|
2
|
+
|
|
3
|
+
import fnmatch
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class SarifSummary:
|
|
12
|
+
"""Rendered SARIF summary and metadata needed for exit semantics."""
|
|
13
|
+
|
|
14
|
+
text: str
|
|
15
|
+
total_findings: int
|
|
16
|
+
read_error: bool
|
|
17
|
+
matched_findings: int = 0 # findings that passed --files filter (before limit/offset)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _uri_matches(uri: str, patterns: list[str]) -> bool:
|
|
21
|
+
"""Return True if *uri* matches any of the fnmatch *patterns*.
|
|
22
|
+
|
|
23
|
+
Each pattern is tested against both the full URI and the basename so
|
|
24
|
+
callers can pass either ``src/foo.py`` or just ``foo.py``.
|
|
25
|
+
"""
|
|
26
|
+
for pat in patterns:
|
|
27
|
+
if fnmatch.fnmatch(uri, pat):
|
|
28
|
+
return True
|
|
29
|
+
if fnmatch.fnmatch(uri, f"*/{pat}"):
|
|
30
|
+
return True
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def build_sarif_summary(
|
|
35
|
+
sarif: Path,
|
|
36
|
+
verbose: bool = False,
|
|
37
|
+
files: list[str] | None = None,
|
|
38
|
+
rules: list[str] | None = None,
|
|
39
|
+
limit: int | None = None,
|
|
40
|
+
offset: int = 0,
|
|
41
|
+
) -> SarifSummary:
|
|
42
|
+
"""Build a rendered SARIF summary and metadata for control flow.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
sarif: Path to the ``.sarif`` file.
|
|
46
|
+
verbose: Include per-finding details in the output text.
|
|
47
|
+
files: Optional list of fnmatch patterns matched against artifact URIs.
|
|
48
|
+
rules: Optional list of fnmatch patterns matched against rule IDs
|
|
49
|
+
(e.g. ``['py/unused-import']`` or ``['py/*']``).
|
|
50
|
+
limit: If set, return at most this many findings (after *offset*).
|
|
51
|
+
offset: Skip this many findings before applying *limit* (pagination).
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
data = json.loads(sarif.read_text(encoding="utf-8"))
|
|
55
|
+
except Exception as exc:
|
|
56
|
+
return SarifSummary(
|
|
57
|
+
text=f" (could not read SARIF: {exc})", total_findings=0, read_error=True
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Collect all matching results first so we can apply offset/limit uniformly.
|
|
61
|
+
matched: list[dict] = []
|
|
62
|
+
rules_map: dict[str, dict] = {}
|
|
63
|
+
|
|
64
|
+
for run in data.get("runs", []):
|
|
65
|
+
rules_map.update(
|
|
66
|
+
{r["id"]: r for r in run.get("tool", {}).get("driver", {}).get("rules", [])}
|
|
67
|
+
)
|
|
68
|
+
for result in run.get("results", []):
|
|
69
|
+
if files is not None:
|
|
70
|
+
loc = result.get("locations", [{}])[0]
|
|
71
|
+
uri = loc.get("physicalLocation", {}).get("artifactLocation", {}).get("uri", "")
|
|
72
|
+
if not _uri_matches(uri, files):
|
|
73
|
+
continue
|
|
74
|
+
if rules is not None:
|
|
75
|
+
rule_id = result.get("ruleId", "")
|
|
76
|
+
if not any(fnmatch.fnmatch(rule_id, pat) for pat in rules):
|
|
77
|
+
continue
|
|
78
|
+
matched.append(result)
|
|
79
|
+
|
|
80
|
+
# Apply pagination.
|
|
81
|
+
paginated = matched[offset:]
|
|
82
|
+
if limit is not None:
|
|
83
|
+
paginated = paginated[:limit]
|
|
84
|
+
|
|
85
|
+
counts: dict[str, int] = {}
|
|
86
|
+
finding_lines: list[str] = []
|
|
87
|
+
|
|
88
|
+
for result in paginated:
|
|
89
|
+
level = result.get("level", "warning")
|
|
90
|
+
counts[level] = counts.get(level, 0) + 1
|
|
91
|
+
|
|
92
|
+
if verbose:
|
|
93
|
+
rule_id = result.get("ruleId", "unknown")
|
|
94
|
+
rule = rules_map.get(rule_id, {})
|
|
95
|
+
short_desc = rule.get("shortDescription", {}).get("text", "")
|
|
96
|
+
message = result.get("message", {}).get("text", "")
|
|
97
|
+
message = re.sub(r"\[([^\]]+)\]\(\d+\)", r"\1", message)
|
|
98
|
+
loc = result.get("locations", [{}])[0]
|
|
99
|
+
phys = loc.get("physicalLocation", {})
|
|
100
|
+
uri = phys.get("artifactLocation", {}).get("uri", "")
|
|
101
|
+
line = phys.get("region", {}).get("startLine", "")
|
|
102
|
+
location = f"{uri}:{line}" if line else uri
|
|
103
|
+
finding_lines.append(
|
|
104
|
+
f" [{level}] {rule_id}\n"
|
|
105
|
+
f" {short_desc}\n"
|
|
106
|
+
f" {location}\n"
|
|
107
|
+
f" {message}"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
total_matched = len(matched)
|
|
111
|
+
total_shown = len(paginated)
|
|
112
|
+
total = sum(counts.values())
|
|
113
|
+
|
|
114
|
+
count_lines = [f" {level}: {counts[level]}" for level in sorted(counts)]
|
|
115
|
+
if files is not None or rules is not None or limit is not None or offset:
|
|
116
|
+
count_lines.append(f" Shown: {total_shown} (matched: {total_matched})")
|
|
117
|
+
else:
|
|
118
|
+
count_lines.append(f" Total: {total}")
|
|
119
|
+
|
|
120
|
+
if verbose and finding_lines:
|
|
121
|
+
return SarifSummary(
|
|
122
|
+
text="\n".join(count_lines) + "\n\n" + "\n\n".join(finding_lines),
|
|
123
|
+
total_findings=total,
|
|
124
|
+
read_error=False,
|
|
125
|
+
matched_findings=total_matched,
|
|
126
|
+
)
|
|
127
|
+
return SarifSummary(
|
|
128
|
+
text="\n".join(count_lines),
|
|
129
|
+
total_findings=total,
|
|
130
|
+
read_error=False,
|
|
131
|
+
matched_findings=total_matched,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def summarize_sarif(sarif: Path, lang: str, verbose: bool = False) -> str:
|
|
136
|
+
"""Render a SARIF summary string for CLI output."""
|
|
137
|
+
del lang # reserved for future language-specific formatting
|
|
138
|
+
return build_sarif_summary(sarif, verbose=verbose).text
|
run_codeql/scanner.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Language detection and CodeQL scan orchestration helpers."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import subprocess
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from run_codeql.logging_utils import log
|
|
9
|
+
from run_codeql.settings import (
|
|
10
|
+
EXT_TO_LANG,
|
|
11
|
+
IGNORE_DIRS,
|
|
12
|
+
LANG_CONFIG,
|
|
13
|
+
PACKAGES_DIR,
|
|
14
|
+
TOOLS_DIR,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def detect_langs(repo_root: Path) -> list[str]:
|
|
19
|
+
"""Scan the repo for source files and return the CodeQL languages to run."""
|
|
20
|
+
found: set[str] = set()
|
|
21
|
+
for _, dirnames, filenames in os.walk(repo_root):
|
|
22
|
+
dirnames[:] = [d for d in dirnames if d not in IGNORE_DIRS]
|
|
23
|
+
for fname in filenames:
|
|
24
|
+
lang = EXT_TO_LANG.get(Path(fname).suffix)
|
|
25
|
+
if lang:
|
|
26
|
+
found.add(lang)
|
|
27
|
+
|
|
28
|
+
workflows = repo_root / ".github" / "workflows"
|
|
29
|
+
if workflows.is_dir() and (any(workflows.glob("*.yml")) or any(workflows.glob("*.yaml"))):
|
|
30
|
+
found.add("actions")
|
|
31
|
+
|
|
32
|
+
langs = sorted(found)
|
|
33
|
+
log(f"Auto-detected languages: {', '.join(langs) if langs else '(none)'}")
|
|
34
|
+
return langs
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def ensure_pack(pack_name: str, codeql: Path, quiet: bool) -> None:
|
|
38
|
+
"""Download a CodeQL query pack if it is not already in the local cache."""
|
|
39
|
+
pack_dir = PACKAGES_DIR / pack_name
|
|
40
|
+
if pack_dir.exists():
|
|
41
|
+
return
|
|
42
|
+
log(f"Downloading missing pack: {pack_name}")
|
|
43
|
+
subprocess.run(
|
|
44
|
+
[str(codeql), "pack", "download", pack_name],
|
|
45
|
+
check=True,
|
|
46
|
+
stdout=subprocess.DEVNULL if quiet else None,
|
|
47
|
+
stderr=subprocess.DEVNULL if quiet else None,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def cleanup_reports(report_dir: Path, keep: bool, langs: list[str] | None = None) -> None:
|
|
52
|
+
"""Clean reports before scanning based on target language scope."""
|
|
53
|
+
if keep:
|
|
54
|
+
return
|
|
55
|
+
report_dir.mkdir(parents=True, exist_ok=True)
|
|
56
|
+
if langs is None:
|
|
57
|
+
if report_dir.exists():
|
|
58
|
+
shutil.rmtree(report_dir)
|
|
59
|
+
report_dir.mkdir(parents=True, exist_ok=True)
|
|
60
|
+
return
|
|
61
|
+
for lang in set(langs):
|
|
62
|
+
target = report_dir / f"{lang}-code-quality.sarif"
|
|
63
|
+
target.unlink(missing_ok=True)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def cleanup_db(work_dir: Path, lang: str, keep: bool) -> None:
|
|
67
|
+
"""Remove an existing language DB unless reusing previous DBs."""
|
|
68
|
+
if keep:
|
|
69
|
+
return
|
|
70
|
+
db_dir = work_dir / f"db-{lang}"
|
|
71
|
+
if db_dir.exists():
|
|
72
|
+
shutil.rmtree(db_dir)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def run_lang(
|
|
76
|
+
lang: str,
|
|
77
|
+
codeql: Path,
|
|
78
|
+
keep_db: bool,
|
|
79
|
+
repo_root: Path,
|
|
80
|
+
work_dir: Path,
|
|
81
|
+
report_dir: Path,
|
|
82
|
+
config_file: Path,
|
|
83
|
+
threads: int = 0,
|
|
84
|
+
quiet: bool = False,
|
|
85
|
+
) -> Path:
|
|
86
|
+
"""Run DB creation and analysis for one language and return SARIF path."""
|
|
87
|
+
cfg = LANG_CONFIG.get(lang, {})
|
|
88
|
+
lang_arg = cfg.get("lang_arg", lang)
|
|
89
|
+
suite = cfg.get("suite", f"codeql/{lang}-queries:codeql-suites/{lang}-code-quality.qls")
|
|
90
|
+
build_command = cfg.get("build_command")
|
|
91
|
+
|
|
92
|
+
db_dir = work_dir / f"db-{lang}"
|
|
93
|
+
sarif = report_dir / f"{lang}-code-quality.sarif"
|
|
94
|
+
|
|
95
|
+
cleanup_db(work_dir, lang, keep_db)
|
|
96
|
+
|
|
97
|
+
log(f"Creating DB for {lang}")
|
|
98
|
+
create_cmd = [
|
|
99
|
+
str(codeql),
|
|
100
|
+
"database",
|
|
101
|
+
"create",
|
|
102
|
+
str(db_dir),
|
|
103
|
+
f"--language={lang_arg}",
|
|
104
|
+
f"--source-root={repo_root}",
|
|
105
|
+
"--overwrite",
|
|
106
|
+
f"--threads={threads}",
|
|
107
|
+
"--no-run-unnecessary-builds",
|
|
108
|
+
]
|
|
109
|
+
if config_file.is_file():
|
|
110
|
+
create_cmd += ["--codescanning-config", str(config_file)]
|
|
111
|
+
if build_command:
|
|
112
|
+
create_cmd += ["--command", build_command]
|
|
113
|
+
|
|
114
|
+
subprocess.run(
|
|
115
|
+
create_cmd,
|
|
116
|
+
check=True,
|
|
117
|
+
stdout=subprocess.DEVNULL if quiet else None,
|
|
118
|
+
stderr=subprocess.DEVNULL if quiet else None,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
pack_name = suite.split(":")[0]
|
|
122
|
+
ensure_pack(pack_name, codeql, quiet=quiet)
|
|
123
|
+
|
|
124
|
+
log(f"Analyzing {lang}")
|
|
125
|
+
analyze_cmd = [
|
|
126
|
+
str(codeql),
|
|
127
|
+
"database",
|
|
128
|
+
"analyze",
|
|
129
|
+
str(db_dir),
|
|
130
|
+
suite,
|
|
131
|
+
"--format=sarif-latest",
|
|
132
|
+
f"--output={sarif}",
|
|
133
|
+
f"--threads={threads}",
|
|
134
|
+
"--ram=6144",
|
|
135
|
+
f"--search-path={TOOLS_DIR / 'codeql'}",
|
|
136
|
+
]
|
|
137
|
+
subprocess.run(
|
|
138
|
+
analyze_cmd,
|
|
139
|
+
check=True,
|
|
140
|
+
stdout=subprocess.DEVNULL if quiet else None,
|
|
141
|
+
stderr=subprocess.DEVNULL if quiet else None,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
return sarif
|
run_codeql/settings.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Shared configuration and defaults for run_codeql."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _int_env(name: str, default: int) -> int:
|
|
8
|
+
"""Read a positive integer from environment with safe fallback."""
|
|
9
|
+
raw = os.getenv(name)
|
|
10
|
+
if not raw:
|
|
11
|
+
return default
|
|
12
|
+
try:
|
|
13
|
+
value = int(raw)
|
|
14
|
+
except ValueError:
|
|
15
|
+
return default
|
|
16
|
+
return value if value > 0 else default
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
CODEQL_VERSION = "2.24.2"
|
|
20
|
+
TOOLS_DIR = Path.home() / ".codeql-tools"
|
|
21
|
+
CODEQL_BIN = TOOLS_DIR / "codeql" / "codeql"
|
|
22
|
+
PACKAGES_DIR = Path.home() / ".codeql" / "packages"
|
|
23
|
+
DOWNLOAD_TIMEOUT_SECONDS = _int_env("RCQL_DOWNLOAD_TIMEOUT_SECONDS", 60)
|
|
24
|
+
DOWNLOAD_RETRY_ATTEMPTS = _int_env("RCQL_DOWNLOAD_RETRY_ATTEMPTS", 3)
|
|
25
|
+
DOWNLOAD_RETRY_SLEEP_SECONDS = _int_env("RCQL_DOWNLOAD_RETRY_SLEEP_SECONDS", 2)
|
|
26
|
+
|
|
27
|
+
# Directories to skip when scanning for source files.
|
|
28
|
+
IGNORE_DIRS = {
|
|
29
|
+
".git",
|
|
30
|
+
".codeql",
|
|
31
|
+
".venv",
|
|
32
|
+
"venv",
|
|
33
|
+
"env",
|
|
34
|
+
".env",
|
|
35
|
+
"node_modules",
|
|
36
|
+
"vendor",
|
|
37
|
+
"target",
|
|
38
|
+
"__pycache__",
|
|
39
|
+
".tox",
|
|
40
|
+
".mypy_cache",
|
|
41
|
+
".pytest_cache",
|
|
42
|
+
"dist",
|
|
43
|
+
"build",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Maps file extensions to CodeQL language names.
|
|
47
|
+
EXT_TO_LANG: dict[str, str] = {
|
|
48
|
+
".py": "python",
|
|
49
|
+
".rs": "rust",
|
|
50
|
+
".js": "javascript-typescript",
|
|
51
|
+
".jsx": "javascript-typescript",
|
|
52
|
+
".ts": "javascript-typescript",
|
|
53
|
+
".tsx": "javascript-typescript",
|
|
54
|
+
".go": "go",
|
|
55
|
+
".java": "java",
|
|
56
|
+
".kt": "java",
|
|
57
|
+
".cs": "csharp",
|
|
58
|
+
".cpp": "cpp",
|
|
59
|
+
".cc": "cpp",
|
|
60
|
+
".cxx": "cpp",
|
|
61
|
+
".c": "cpp",
|
|
62
|
+
".rb": "ruby",
|
|
63
|
+
".swift": "swift",
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
LANG_CONFIG = {
|
|
67
|
+
"javascript-typescript": {
|
|
68
|
+
"lang_arg": "javascript",
|
|
69
|
+
"suite": "codeql/javascript-queries:codeql-suites/javascript-code-quality.qls",
|
|
70
|
+
},
|
|
71
|
+
"rust": {
|
|
72
|
+
"suite": "codeql/rust-queries:codeql-suites/rust-security-and-quality.qls",
|
|
73
|
+
"build_command": "cd rust && cargo build --workspace --all-targets --locked",
|
|
74
|
+
},
|
|
75
|
+
"actions": {
|
|
76
|
+
"suite": "codeql/actions-queries:codeql-suites/actions-security-and-quality.qls",
|
|
77
|
+
},
|
|
78
|
+
}
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: run-codeql
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Run CodeQL code-quality analysis locally, mirroring the GitHub 'Code Quality' check
|
|
5
|
+
Project-URL: Homepage, https://github.com/dereknorrbom/run-codeql
|
|
6
|
+
Project-URL: Repository, https://github.com/dereknorrbom/run-codeql
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/dereknorrbom/run-codeql/issues
|
|
8
|
+
Author-email: Derek Norrbom <dereknorrbom@gmail.com>
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2026 Derek Norrbom
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Keywords: code-quality,codeql,linting,security,static-analysis
|
|
32
|
+
Classifier: Development Status :: 4 - Beta
|
|
33
|
+
Classifier: Environment :: Console
|
|
34
|
+
Classifier: Intended Audience :: Developers
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Operating System :: OS Independent
|
|
37
|
+
Classifier: Programming Language :: Python :: 3
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
41
|
+
Classifier: Topic :: Security
|
|
42
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
43
|
+
Requires-Python: >=3.10
|
|
44
|
+
Provides-Extra: dev
|
|
45
|
+
Requires-Dist: black; extra == 'dev'
|
|
46
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
47
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
48
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
49
|
+
Description-Content-Type: text/markdown
|
|
50
|
+
|
|
51
|
+
# run-codeql
|
|
52
|
+
|
|
53
|
+
A pip-installable CLI tool that runs [CodeQL](https://codeql.github.com/) code-quality analysis locally, mirroring the GitHub "Code Quality" check. Install once, run from any repository.
|
|
54
|
+
|
|
55
|
+
## Installation
|
|
56
|
+
|
|
57
|
+
```sh
|
|
58
|
+
pip install run-codeql
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
This installs two commands: `run-codeql` and the shorthand `rcql`.
|
|
62
|
+
|
|
63
|
+
## Requirements
|
|
64
|
+
|
|
65
|
+
- Python 3.10+
|
|
66
|
+
- CodeQL CLI — auto-downloaded to `~/.codeql-tools/` on first run if not already on `PATH` (SHA-256 verified, with retry/timeout policy)
|
|
67
|
+
|
|
68
|
+
## Usage
|
|
69
|
+
|
|
70
|
+
Run from the root of any repository:
|
|
71
|
+
|
|
72
|
+
```sh
|
|
73
|
+
rcql # auto-detect languages, run full scan
|
|
74
|
+
rcql --lang python # scan only Python
|
|
75
|
+
rcql --lang python,actions # scan multiple specific languages
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Options
|
|
79
|
+
|
|
80
|
+
| Flag | Description |
|
|
81
|
+
|------|-------------|
|
|
82
|
+
| `--lang` | Comma-separated languages to scan (default: auto-detected) |
|
|
83
|
+
| `--report-only` | Skip scanning; summarize existing SARIF reports from the last run |
|
|
84
|
+
| `--verbose`, `-v` | Print each finding with rule ID, location, and message |
|
|
85
|
+
| `--quiet`, `-q` | Suppress log output; print only final summaries (for agent/scripted use) |
|
|
86
|
+
| `--files` | Comma-separated file paths or fnmatch patterns to restrict findings to (e.g. `src/foo.py` or `src/*.py`) |
|
|
87
|
+
| `--rule` | Comma-separated rule IDs or fnmatch patterns to restrict findings to (e.g. `py/unused-import` or `py/*`) |
|
|
88
|
+
| `--limit N` | Return at most N findings (after `--files`/`--rule` filtering) |
|
|
89
|
+
| `--offset N` | Skip the first N findings before applying `--limit` (for pagination) |
|
|
90
|
+
| `--keep-db` | Reuse existing databases instead of recreating them |
|
|
91
|
+
| `--keep-reports` | Do not delete prior SARIF reports before running |
|
|
92
|
+
| `--no-fail` | Exit 0 even if findings or scan errors exist |
|
|
93
|
+
|
|
94
|
+
Download behavior can be tuned with environment variables:
|
|
95
|
+
`RCQL_DOWNLOAD_TIMEOUT_SECONDS`, `RCQL_DOWNLOAD_RETRY_ATTEMPTS`, and `RCQL_DOWNLOAD_RETRY_SLEEP_SECONDS`.
|
|
96
|
+
|
|
97
|
+
Report cleanup behavior before scans:
|
|
98
|
+
- with `--lang`, only the matching `<lang>-code-quality.sarif` reports are replaced
|
|
99
|
+
- without `--lang`, all prior SARIF reports are cleared first
|
|
100
|
+
- with `--keep-reports`, no reports are deleted
|
|
101
|
+
|
|
102
|
+
### Language auto-detection
|
|
103
|
+
|
|
104
|
+
When `--lang` is not specified, the tool scans the repo for source files and detects which CodeQL languages to run. Common dependency directories are skipped (`node_modules`, `vendor`, `target`, `.venv`, etc.).
|
|
105
|
+
|
|
106
|
+
Supported languages: `python`, `rust`, `javascript-typescript`, `go`, `java`, `csharp`, `cpp`, `ruby`, `swift`, `actions`
|
|
107
|
+
|
|
108
|
+
GitHub Actions workflows (`.github/workflows/*.yml` and `.github/workflows/*.yaml`) are detected automatically and trigger the `actions` scanner.
|
|
109
|
+
|
|
110
|
+
### Outputs
|
|
111
|
+
|
|
112
|
+
- Databases: `.codeql/db-<lang>/`
|
|
113
|
+
- SARIF reports: `.codeql/reports/<lang>-code-quality.sarif`
|
|
114
|
+
|
|
115
|
+
A `.codeql/.gitignore` with `*` is created automatically on first run so these artifacts are not committed.
|
|
116
|
+
|
|
117
|
+
By default, `rcql` exits non-zero if any findings are present or any language scan fails. Use `--no-fail` to force a zero exit code for informational/reporting workflows.
|
|
118
|
+
|
|
119
|
+
## Common workflows
|
|
120
|
+
|
|
121
|
+
### Full scan
|
|
122
|
+
|
|
123
|
+
```sh
|
|
124
|
+
cd ~/projects/my-repo
|
|
125
|
+
rcql
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Quick re-summary after a previous scan
|
|
129
|
+
|
|
130
|
+
```sh
|
|
131
|
+
rcql --report-only
|
|
132
|
+
rcql --report-only --verbose
|
|
133
|
+
rcql --report-only --lang rust
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Agent-friendly output
|
|
137
|
+
|
|
138
|
+
Produces clean, structured output suitable for an AI agent — no log noise, findings include rule ID, file location, and message:
|
|
139
|
+
|
|
140
|
+
```sh
|
|
141
|
+
rcql -q -v --report-only
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Example output:
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
[python] SARIF: /path/to/.codeql/reports/python-code-quality.sarif
|
|
148
|
+
error: 1
|
|
149
|
+
warning: 2
|
|
150
|
+
Total: 3
|
|
151
|
+
|
|
152
|
+
[error] py/sql-injection
|
|
153
|
+
SQL injection
|
|
154
|
+
src/db.py:42
|
|
155
|
+
This query depends on user-provided value.
|
|
156
|
+
|
|
157
|
+
[warning] py/unused-import
|
|
158
|
+
Unused import
|
|
159
|
+
src/utils.py:3
|
|
160
|
+
Import of 'os' is not used.
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Filtering findings for large codebases
|
|
164
|
+
|
|
165
|
+
When a scan returns hundreds or thousands of findings, use `--files`, `--rule`, `--limit`, and `--offset` to slice the results. These flags work with both `--report-only` and live scans.
|
|
166
|
+
|
|
167
|
+
**Filter to a specific file:**
|
|
168
|
+
|
|
169
|
+
```sh
|
|
170
|
+
rcql -q -v --report-only --files src/models/user.py
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
**Filter using a glob pattern:**
|
|
174
|
+
|
|
175
|
+
```sh
|
|
176
|
+
rcql -q -v --report-only --files 'src/api/*.py'
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
**Filter to a specific rule:**
|
|
180
|
+
|
|
181
|
+
```sh
|
|
182
|
+
rcql -q -v --report-only --rule py/unused-import
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
**Filter to an entire rule category:**
|
|
186
|
+
|
|
187
|
+
```sh
|
|
188
|
+
rcql -q -v --report-only --rule 'py/*'
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
**Combine file and rule filters:**
|
|
192
|
+
|
|
193
|
+
```sh
|
|
194
|
+
rcql -q -v --report-only --files src/models/user.py --rule py/unused-import
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
**Paginate through a large result set:**
|
|
198
|
+
|
|
199
|
+
```sh
|
|
200
|
+
# First 20 findings
|
|
201
|
+
rcql -q -v --report-only --limit 20
|
|
202
|
+
|
|
203
|
+
# Next 20
|
|
204
|
+
rcql -q -v --report-only --limit 20 --offset 20
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
When any filter or pagination flag is active, the summary line changes from `Total: N` to `Shown: X (matched: Y)` so you can see both how many were returned and how many matched in total.
|
|
208
|
+
|
|
209
|
+
Language blocks with zero matching findings are automatically suppressed when `--files` or `--rule` is active, so only relevant output is shown.
|
|
210
|
+
|
|
211
|
+
### Single-language scan
|
|
212
|
+
|
|
213
|
+
```sh
|
|
214
|
+
rcql --lang actions --no-fail
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## Parallel execution
|
|
218
|
+
|
|
219
|
+
When scanning multiple languages, all scans run in parallel with CPU threads divided evenly across languages. Log timestamps make this visible.
|
|
220
|
+
|
|
221
|
+
## Upgrading CodeQL
|
|
222
|
+
|
|
223
|
+
The CodeQL version is pinned in the package. The checksum for each release is fetched live from GitHub at download time, so no manual SHA updates are needed. To use a newer CodeQL version, update `CODEQL_VERSION` in `run_codeql/settings.py` and delete `~/.codeql-tools/` to trigger a fresh download on next run.
|
|
224
|
+
|
|
225
|
+
## Development
|
|
226
|
+
|
|
227
|
+
```sh
|
|
228
|
+
git clone https://github.com/YOUR_USERNAME/run-codeql
|
|
229
|
+
cd run-codeql
|
|
230
|
+
pip install -e ".[dev]"
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### Make targets
|
|
234
|
+
|
|
235
|
+
| Target | Description |
|
|
236
|
+
|--------|-------------|
|
|
237
|
+
| `make test` | Run the test suite |
|
|
238
|
+
| `make cov` | Run tests with coverage report |
|
|
239
|
+
| `make lint` | Run ruff (check only) |
|
|
240
|
+
| `make fmt` | Auto-format with black and ruff --fix |
|
|
241
|
+
| `make fmt-check` | Check formatting without modifying files |
|
|
242
|
+
| `make check` | fmt-check + lint (CI-safe, no modifications) |
|
|
243
|
+
| `make fix` | lint + fmt combined (auto-fix everything) |
|
|
244
|
+
| `make install` | Install in editable mode with dev deps |
|
|
245
|
+
|
|
246
|
+
### Running tests
|
|
247
|
+
|
|
248
|
+
```sh
|
|
249
|
+
make test # run all 100+ tests
|
|
250
|
+
make cov # with per-line coverage report
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
Tests cover SARIF filtering, language detection, download integrity, extraction safety, and CLI behavior using fixture SARIF files. No CodeQL installation is required to run the tests.
|
|
254
|
+
|
|
255
|
+
### Package layout
|
|
256
|
+
|
|
257
|
+
| File | Purpose |
|
|
258
|
+
|------|---------|
|
|
259
|
+
| `run_codeql/cli.py` | Argument parsing and orchestration |
|
|
260
|
+
| `run_codeql/download.py` | CodeQL download, retry, checksum, extraction |
|
|
261
|
+
| `run_codeql/scanner.py` | Language detection and per-language scan execution |
|
|
262
|
+
| `run_codeql/sarif.py` | SARIF parsing, filtering, and summary rendering |
|
|
263
|
+
| `run_codeql/settings.py` | Constants and environment-tunable defaults |
|
|
264
|
+
|
|
265
|
+
## Contributing
|
|
266
|
+
|
|
267
|
+
Contributions are welcome. Please:
|
|
268
|
+
|
|
269
|
+
1. Fork the repo and create a feature branch
|
|
270
|
+
2. Run `make check` and `make test` before submitting
|
|
271
|
+
3. Open a pull request with a clear description of the change
|
|
272
|
+
|
|
273
|
+
## License
|
|
274
|
+
|
|
275
|
+
MIT
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
run_codeql/__init__.py,sha256=AR8O-VPOn7oYYfbgnTsl9x4NNnJvPGv6_m2H5UYz40s,26
|
|
2
|
+
run_codeql/__main__.py,sha256=qypp4Fi9rWOw64J7kVgXr1r2JtFPd1F2tziFYThqjt4,40
|
|
3
|
+
run_codeql/cli.py,sha256=ouEUyHlR7IF8i3mncQTj7GLhl7ZmNNYPe7aVTx_C6NI,8158
|
|
4
|
+
run_codeql/download.py,sha256=_aV629-NBZGPgBJEkUI6xZJkMMAECOcmMfotbmSFvNY,5483
|
|
5
|
+
run_codeql/logging_utils.py,sha256=y4p7bqUkTMJoaEl3dCgL6vjjkA6Lu-FvkdvITKhJwwc,827
|
|
6
|
+
run_codeql/sarif.py,sha256=pjnXLZIc_Pm0bZtnuopx5w25JE0t08-J3AN38weTcgs,4924
|
|
7
|
+
run_codeql/scanner.py,sha256=PqdZIav3RHWN7NwVLg9Vw7tOhwnQ4vyJlt2md5pOEH0,4173
|
|
8
|
+
run_codeql/settings.py,sha256=0iDXgWJC_5Nc9Lc0UhQqXV16uDYN1YGMJbupqSUJL8I,2028
|
|
9
|
+
run_codeql-1.0.0.dist-info/METADATA,sha256=S2-IkXx88SZe-zqJjoVLccvbQ0QB5VbWkoaGP-XW0tg,9638
|
|
10
|
+
run_codeql-1.0.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
11
|
+
run_codeql-1.0.0.dist-info/entry_points.txt,sha256=LWD5_50tO1UVkYF2_k3MtCNcSsxUnzx48Vr0oEwFtp8,78
|
|
12
|
+
run_codeql-1.0.0.dist-info/licenses/LICENSE,sha256=jsTyDqXTGaB22CrjsfiI6aTXheUkbVf3-PymiZk6pH4,1070
|
|
13
|
+
run_codeql-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Derek Norrbom
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|