run-codeql 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
run_codeql/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """run_codeql package."""
run_codeql/__main__.py ADDED
@@ -0,0 +1,3 @@
1
+ from run_codeql.cli import main
2
+
3
+ main()
run_codeql/cli.py ADDED
@@ -0,0 +1,224 @@
1
+ """CLI entrypoint and orchestration for run_codeql."""
2
+
3
+ import argparse
4
+ import concurrent.futures
5
+ import os
6
+ import subprocess
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ from run_codeql.download import fetch_codeql
11
+ from run_codeql.logging_utils import configure_logging, err, log
12
+ from run_codeql.sarif import build_sarif_summary
13
+ from run_codeql.scanner import cleanup_reports, detect_langs, run_lang
14
+ from run_codeql.settings import TOOLS_DIR
15
+
16
+
17
+ def main() -> None:
18
+ """CLI main function."""
19
+ parser = argparse.ArgumentParser(
20
+ description="Run CodeQL code-quality analysis locally (mirrors GitHub 'Code Quality' check)", # noqa: E501
21
+ formatter_class=argparse.RawDescriptionHelpFormatter,
22
+ epilog=(
23
+ "Outputs:\n"
24
+ " Databases: .codeql/db-<lang>/\n"
25
+ " SARIF: .codeql/reports/<lang>-code-quality.sarif\n\n"
26
+ "CodeQL CLI is auto-downloaded to ~/.codeql-tools/ if not on PATH.\n"
27
+ "Run from the root of any repository."
28
+ ),
29
+ )
30
+ parser.add_argument(
31
+ "--lang",
32
+ default=None,
33
+ help="Comma-separated languages to scan (default: auto-detected from repo contents)",
34
+ )
35
+ parser.add_argument(
36
+ "--keep-db",
37
+ action="store_true",
38
+ help="Reuse existing databases instead of recreating",
39
+ )
40
+ parser.add_argument(
41
+ "--keep-reports",
42
+ action="store_true",
43
+ help="Do not delete prior SARIF reports before running",
44
+ )
45
+ parser.add_argument(
46
+ "--no-fail",
47
+ action="store_true",
48
+ help="Exit 0 even if findings or scan errors exist",
49
+ )
50
+ parser.add_argument(
51
+ "--verbose",
52
+ "-v",
53
+ action="store_true",
54
+ help="Print each finding with rule, location, and message",
55
+ )
56
+ parser.add_argument(
57
+ "--report-only",
58
+ action="store_true",
59
+ help="Skip scanning; summarize existing SARIF reports from the last run",
60
+ )
61
+ parser.add_argument(
62
+ "--quiet",
63
+ "-q",
64
+ action="store_true",
65
+ help="Suppress log output; print only the final summaries (useful for agent/scripted use)",
66
+ )
67
+ parser.add_argument(
68
+ "--files",
69
+ default=None,
70
+ help=(
71
+ "Comma-separated file paths (or fnmatch patterns) to restrict findings to. "
72
+ "Paths are matched against the end of the SARIF artifact URI "
73
+ "(e.g. 'src/foo.py' or 'src/*.py')"
74
+ ),
75
+ )
76
+ parser.add_argument(
77
+ "--rule",
78
+ default=None,
79
+ help=(
80
+ "Comma-separated rule IDs (or fnmatch patterns) to restrict findings to "
81
+ "(e.g. 'py/unused-import' or 'py/*')"
82
+ ),
83
+ )
84
+ parser.add_argument(
85
+ "--limit",
86
+ type=int,
87
+ default=None,
88
+ metavar="N",
89
+ help="Return at most N findings across all files (after --files filtering)",
90
+ )
91
+ parser.add_argument(
92
+ "--offset",
93
+ type=int,
94
+ default=0,
95
+ metavar="N",
96
+ help="Skip the first N findings before applying --limit (for pagination)",
97
+ )
98
+ args = parser.parse_args()
99
+
100
+ configure_logging(args.quiet)
101
+ if args.quiet:
102
+ print("[codeql-local] running in quiet mode", file=sys.stderr, flush=True)
103
+
104
+ file_patterns = [p.strip() for p in args.files.split(",") if p.strip()] if args.files else None
105
+ rule_patterns = [p.strip() for p in args.rule.split(",") if p.strip()] if args.rule else None
106
+
107
+ repo_root = Path.cwd()
108
+ work_dir = repo_root / ".codeql"
109
+ report_dir = work_dir / "reports"
110
+
111
+ if args.report_only:
112
+ filter_langs = (
113
+ {lang_name.strip() for lang_name in args.lang.split(",")} if args.lang else None
114
+ )
115
+ sarif_files = sorted(report_dir.glob("*.sarif"))
116
+ if filter_langs:
117
+ sarif_files = [
118
+ f for f in sarif_files if f.stem.removesuffix("-code-quality") in filter_langs
119
+ ]
120
+ if not sarif_files:
121
+ err(f"No SARIF files found in {report_dir}. Run without --report-only first.")
122
+ sys.exit(1)
123
+ log("===== Summaries (from previous scan) =====")
124
+ report_failed = False
125
+ findings_found = False
126
+ for sarif in sarif_files:
127
+ lang = sarif.stem.removesuffix("-code-quality")
128
+ summary = build_sarif_summary(
129
+ sarif,
130
+ verbose=args.verbose,
131
+ files=file_patterns,
132
+ rules=rule_patterns,
133
+ limit=args.limit,
134
+ offset=args.offset,
135
+ )
136
+ findings_found = findings_found or summary.total_findings > 0
137
+ report_failed = report_failed or summary.read_error
138
+ if (file_patterns is None and rule_patterns is None) or summary.matched_findings > 0:
139
+ print(f"[{lang}] SARIF: {sarif}\n{summary.text}")
140
+ should_fail = report_failed or findings_found
141
+ sys.exit(0 if args.no_fail else int(should_fail))
142
+
143
+ config_file = repo_root / ".github" / "codeql" / "codeql-config.yml"
144
+
145
+ if args.lang:
146
+ langs = [lang_name.strip() for lang_name in args.lang.split(",") if lang_name.strip()]
147
+ else:
148
+ langs = detect_langs(repo_root)
149
+ if not langs:
150
+ err("No languages detected. Use --lang to specify one or add supported source files.")
151
+ sys.exit(1)
152
+
153
+ TOOLS_DIR.mkdir(parents=True, exist_ok=True)
154
+ work_dir.mkdir(parents=True, exist_ok=True)
155
+ report_dir.mkdir(parents=True, exist_ok=True)
156
+
157
+ gitignore = work_dir / ".gitignore"
158
+ if not gitignore.exists():
159
+ gitignore.write_text("*\n")
160
+
161
+ codeql = fetch_codeql()
162
+ cleanup_reports(report_dir, args.keep_reports, langs=langs if args.lang else None)
163
+
164
+ scan_failed = False
165
+ findings_found = False
166
+ threads_per_lang = max(1, (os.cpu_count() or 1) // len(langs)) if len(langs) > 1 else 0
167
+ log(f"Running {len(langs)} language(s) in parallel with {threads_per_lang} thread(s) each")
168
+ summaries: dict[str, tuple[str, int]] = {} # lang -> (text, matched_findings)
169
+
170
+ def scan(lang: str) -> tuple[str, str, int, int, bool]:
171
+ try:
172
+ sarif = run_lang(
173
+ lang,
174
+ codeql,
175
+ args.keep_db,
176
+ repo_root,
177
+ work_dir,
178
+ report_dir,
179
+ config_file,
180
+ threads=threads_per_lang,
181
+ quiet=args.quiet,
182
+ )
183
+ summary = build_sarif_summary(
184
+ sarif,
185
+ verbose=args.verbose,
186
+ files=file_patterns,
187
+ rules=rule_patterns,
188
+ limit=args.limit,
189
+ offset=args.offset,
190
+ )
191
+ return (
192
+ lang,
193
+ f"[{lang}] SARIF: {sarif}\n{summary.text}",
194
+ summary.total_findings,
195
+ summary.matched_findings,
196
+ False,
197
+ )
198
+ except subprocess.CalledProcessError as exc:
199
+ err(f"{lang} failed (exit {exc.returncode})")
200
+ return (lang, f"[{lang}] FAILED (exit {exc.returncode})", 0, 0, True)
201
+
202
+ with concurrent.futures.ThreadPoolExecutor() as executor:
203
+ futures = {executor.submit(scan, lang): lang for lang in langs}
204
+ for future in concurrent.futures.as_completed(futures):
205
+ if future.exception():
206
+ scan_failed = True
207
+ lang = futures[future]
208
+ err(f"{lang} crashed unexpectedly: {future.exception()}")
209
+ summaries[lang] = (f"[{lang}] FAILED (unexpected exception)", 0)
210
+ continue
211
+ lang, text, finding_count, matched_count, failed = future.result()
212
+ summaries[lang] = (text, matched_count)
213
+ findings_found = findings_found or finding_count > 0
214
+ scan_failed = scan_failed or failed
215
+
216
+ log("===== Summaries =====")
217
+ for lang in langs:
218
+ if lang in summaries:
219
+ text, matched_count = summaries[lang]
220
+ if (file_patterns is None and rule_patterns is None) or matched_count > 0:
221
+ print(text)
222
+
223
+ should_fail = scan_failed or findings_found
224
+ sys.exit(0 if args.no_fail else int(should_fail))
run_codeql/download.py ADDED
@@ -0,0 +1,158 @@
1
+ """CodeQL download, integrity verification, and safe extraction."""
2
+
3
+ import hashlib
4
+ import os
5
+ import platform
6
+ import re
7
+ import shutil
8
+ import sys
9
+ import tarfile
10
+ import time
11
+ import urllib.error
12
+ import urllib.request
13
+ from pathlib import Path
14
+ from typing import Callable, TypeVar
15
+
16
+ from run_codeql.logging_utils import LOGGER, err, log
17
+ from run_codeql.settings import (
18
+ CODEQL_BIN,
19
+ CODEQL_VERSION,
20
+ DOWNLOAD_RETRY_ATTEMPTS,
21
+ DOWNLOAD_RETRY_SLEEP_SECONDS,
22
+ DOWNLOAD_TIMEOUT_SECONDS,
23
+ TOOLS_DIR,
24
+ )
25
+
26
+ T = TypeVar("T")
27
+
28
+
29
+ def fetch_codeql() -> Path:
30
+ """Resolve an executable CodeQL binary, downloading and verifying if needed."""
31
+ which = shutil.which("codeql")
32
+ if which:
33
+ log(f"Using system CodeQL: {which}")
34
+ return Path(which)
35
+
36
+ if CODEQL_BIN.is_file() and os.access(CODEQL_BIN, os.X_OK):
37
+ log(f"Using downloaded CodeQL: {CODEQL_BIN}")
38
+ return CODEQL_BIN
39
+
40
+ system = platform.system()
41
+ if system == "Linux":
42
+ plat = "linux64"
43
+ elif system == "Darwin":
44
+ plat = "osx64"
45
+ else:
46
+ err(f"Unsupported platform for CodeQL auto-download: {system}")
47
+ sys.exit(1)
48
+
49
+ log(f"Downloading CodeQL CLI {CODEQL_VERSION} to {TOOLS_DIR}")
50
+ archive_name = f"codeql-bundle-{plat}.tar.gz"
51
+ url = (
52
+ f"https://github.com/github/codeql-action/releases/download/"
53
+ f"codeql-bundle-v{CODEQL_VERSION}/{archive_name}"
54
+ )
55
+ checksum_url = f"{url}.checksum.txt"
56
+ tmp = TOOLS_DIR / f"{archive_name}.part"
57
+ TOOLS_DIR.mkdir(parents=True, exist_ok=True)
58
+ try:
59
+ download_file_with_retry(url, tmp)
60
+ checksum_text = download_text_with_retry(checksum_url)
61
+ expected_checksum = parse_sha256_checksum(checksum_text, archive_name)
62
+ actual_checksum = compute_sha256(tmp)
63
+ if actual_checksum != expected_checksum:
64
+ raise ValueError(
65
+ f"Checksum mismatch for {archive_name}: expected {expected_checksum}, got {actual_checksum}" # noqa: E501
66
+ )
67
+ with tarfile.open(tmp, "r:gz") as tar:
68
+ safe_extract_tar(tar, TOOLS_DIR)
69
+ except Exception as exc:
70
+ err(f"Failed to download/install CodeQL: {exc}")
71
+ sys.exit(1)
72
+ finally:
73
+ tmp.unlink(missing_ok=True)
74
+
75
+ if not (CODEQL_BIN.is_file() and os.access(CODEQL_BIN, os.X_OK)):
76
+ err(f"Downloaded CodeQL bundle missing binary at {CODEQL_BIN}")
77
+ sys.exit(1)
78
+
79
+ CODEQL_BIN.chmod(CODEQL_BIN.stat().st_mode | 0o111)
80
+ log(f"Downloaded CodeQL to {CODEQL_BIN}")
81
+ return CODEQL_BIN
82
+
83
+
84
+ def _with_retries(action: str, operation: Callable[[], T]) -> T:
85
+ """Run an operation with bounded retries and fixed backoff."""
86
+ for attempt in range(1, DOWNLOAD_RETRY_ATTEMPTS + 1):
87
+ try:
88
+ return operation()
89
+ except (OSError, TimeoutError, urllib.error.URLError, ValueError) as exc:
90
+ if attempt == DOWNLOAD_RETRY_ATTEMPTS:
91
+ raise
92
+ LOGGER.warning(
93
+ "%s failed (%s), retrying in %ss (%s/%s)",
94
+ action,
95
+ exc,
96
+ DOWNLOAD_RETRY_SLEEP_SECONDS,
97
+ attempt,
98
+ DOWNLOAD_RETRY_ATTEMPTS,
99
+ )
100
+ time.sleep(DOWNLOAD_RETRY_SLEEP_SECONDS)
101
+ raise RuntimeError(f"{action} failed")
102
+
103
+
104
+ def download_file_with_retry(url: str, destination: Path) -> None:
105
+ """Download a URL to disk with retries and timeout."""
106
+
107
+ def _download() -> None:
108
+ with urllib.request.urlopen(url, timeout=DOWNLOAD_TIMEOUT_SECONDS) as response:
109
+ with destination.open("wb") as out:
110
+ shutil.copyfileobj(response, out)
111
+
112
+ _with_retries(f"Download failed for {url}", _download)
113
+
114
+
115
+ def download_text_with_retry(url: str) -> str:
116
+ """Download UTF-8 text from a URL with retries and timeout."""
117
+
118
+ def _download() -> str:
119
+ with urllib.request.urlopen(url, timeout=DOWNLOAD_TIMEOUT_SECONDS) as response:
120
+ return response.read().decode("utf-8")
121
+
122
+ return _with_retries(f"Download failed for {url}", _download)
123
+
124
+
125
+ def parse_sha256_checksum(checksum_text: str, filename: str) -> str:
126
+ """Extract the expected SHA-256 digest for a file from checksum text."""
127
+ for line in checksum_text.splitlines():
128
+ parts = line.strip().split()
129
+ if len(parts) < 2:
130
+ continue
131
+ digest, name = parts[0], parts[1].lstrip("*")
132
+ if name == filename and re.fullmatch(r"[A-Fa-f0-9]{64}", digest):
133
+ return digest.lower()
134
+ raise ValueError(f"Checksum for {filename} not found")
135
+
136
+
137
+ def compute_sha256(path: Path) -> str:
138
+ """Compute the SHA-256 digest for a file."""
139
+ digest = hashlib.sha256()
140
+ with path.open("rb") as stream:
141
+ for chunk in iter(lambda: stream.read(8192), b""):
142
+ digest.update(chunk)
143
+ return digest.hexdigest()
144
+
145
+
146
+ def safe_extract_tar(tar: tarfile.TarFile, destination: Path) -> None:
147
+ """Extract a tar archive while blocking path traversal and link entries."""
148
+ dest = destination.resolve()
149
+ for member in tar.getmembers():
150
+ if member.issym() or member.islnk():
151
+ raise ValueError(f"Refusing to extract link from archive: {member.name}")
152
+ member_path = (dest / member.name).resolve()
153
+ if member_path != dest and dest not in member_path.parents:
154
+ raise ValueError(f"Refusing to extract path outside destination: {member.name}")
155
+ try:
156
+ tar.extractall(dest, filter="data")
157
+ except TypeError:
158
+ tar.extractall(dest)
@@ -0,0 +1,28 @@
1
+ """Logging setup for run_codeql CLI output."""
2
+
3
+ import logging
4
+ import sys
5
+
6
+ LOGGER = logging.getLogger("codeql-local")
7
+ LOGGER.addHandler(logging.NullHandler())
8
+ LOGGER.propagate = False
9
+
10
+
11
+ def configure_logging(quiet: bool) -> None:
12
+ """Configure CLI logging with timestamped stderr output."""
13
+ for handler in list(LOGGER.handlers):
14
+ LOGGER.removeHandler(handler)
15
+ handler = logging.StreamHandler(sys.stderr)
16
+ handler.setFormatter(logging.Formatter("[codeql-local %(asctime)s] %(message)s", "%H:%M:%S"))
17
+ LOGGER.addHandler(handler)
18
+ LOGGER.setLevel(logging.ERROR if quiet else logging.INFO)
19
+
20
+
21
+ def err(msg: str) -> None:
22
+ """Log an error with the CLI prefix convention."""
23
+ LOGGER.error("[error] %s", msg)
24
+
25
+
26
+ def log(msg: str) -> None:
27
+ """Log an informational CLI message."""
28
+ LOGGER.info(msg)
run_codeql/sarif.py ADDED
@@ -0,0 +1,138 @@
1
+ """SARIF parsing and summary rendering."""
2
+
3
+ import fnmatch
4
+ import json
5
+ import re
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class SarifSummary:
12
+ """Rendered SARIF summary and metadata needed for exit semantics."""
13
+
14
+ text: str
15
+ total_findings: int
16
+ read_error: bool
17
+ matched_findings: int = 0 # findings that passed --files filter (before limit/offset)
18
+
19
+
20
+ def _uri_matches(uri: str, patterns: list[str]) -> bool:
21
+ """Return True if *uri* matches any of the fnmatch *patterns*.
22
+
23
+ Each pattern is tested against both the full URI and the basename so
24
+ callers can pass either ``src/foo.py`` or just ``foo.py``.
25
+ """
26
+ for pat in patterns:
27
+ if fnmatch.fnmatch(uri, pat):
28
+ return True
29
+ if fnmatch.fnmatch(uri, f"*/{pat}"):
30
+ return True
31
+ return False
32
+
33
+
34
+ def build_sarif_summary(
35
+ sarif: Path,
36
+ verbose: bool = False,
37
+ files: list[str] | None = None,
38
+ rules: list[str] | None = None,
39
+ limit: int | None = None,
40
+ offset: int = 0,
41
+ ) -> SarifSummary:
42
+ """Build a rendered SARIF summary and metadata for control flow.
43
+
44
+ Args:
45
+ sarif: Path to the ``.sarif`` file.
46
+ verbose: Include per-finding details in the output text.
47
+ files: Optional list of fnmatch patterns matched against artifact URIs.
48
+ rules: Optional list of fnmatch patterns matched against rule IDs
49
+ (e.g. ``['py/unused-import']`` or ``['py/*']``).
50
+ limit: If set, return at most this many findings (after *offset*).
51
+ offset: Skip this many findings before applying *limit* (pagination).
52
+ """
53
+ try:
54
+ data = json.loads(sarif.read_text(encoding="utf-8"))
55
+ except Exception as exc:
56
+ return SarifSummary(
57
+ text=f" (could not read SARIF: {exc})", total_findings=0, read_error=True
58
+ )
59
+
60
+ # Collect all matching results first so we can apply offset/limit uniformly.
61
+ matched: list[dict] = []
62
+ rules_map: dict[str, dict] = {}
63
+
64
+ for run in data.get("runs", []):
65
+ rules_map.update(
66
+ {r["id"]: r for r in run.get("tool", {}).get("driver", {}).get("rules", [])}
67
+ )
68
+ for result in run.get("results", []):
69
+ if files is not None:
70
+ loc = result.get("locations", [{}])[0]
71
+ uri = loc.get("physicalLocation", {}).get("artifactLocation", {}).get("uri", "")
72
+ if not _uri_matches(uri, files):
73
+ continue
74
+ if rules is not None:
75
+ rule_id = result.get("ruleId", "")
76
+ if not any(fnmatch.fnmatch(rule_id, pat) for pat in rules):
77
+ continue
78
+ matched.append(result)
79
+
80
+ # Apply pagination.
81
+ paginated = matched[offset:]
82
+ if limit is not None:
83
+ paginated = paginated[:limit]
84
+
85
+ counts: dict[str, int] = {}
86
+ finding_lines: list[str] = []
87
+
88
+ for result in paginated:
89
+ level = result.get("level", "warning")
90
+ counts[level] = counts.get(level, 0) + 1
91
+
92
+ if verbose:
93
+ rule_id = result.get("ruleId", "unknown")
94
+ rule = rules_map.get(rule_id, {})
95
+ short_desc = rule.get("shortDescription", {}).get("text", "")
96
+ message = result.get("message", {}).get("text", "")
97
+ message = re.sub(r"\[([^\]]+)\]\(\d+\)", r"\1", message)
98
+ loc = result.get("locations", [{}])[0]
99
+ phys = loc.get("physicalLocation", {})
100
+ uri = phys.get("artifactLocation", {}).get("uri", "")
101
+ line = phys.get("region", {}).get("startLine", "")
102
+ location = f"{uri}:{line}" if line else uri
103
+ finding_lines.append(
104
+ f" [{level}] {rule_id}\n"
105
+ f" {short_desc}\n"
106
+ f" {location}\n"
107
+ f" {message}"
108
+ )
109
+
110
+ total_matched = len(matched)
111
+ total_shown = len(paginated)
112
+ total = sum(counts.values())
113
+
114
+ count_lines = [f" {level}: {counts[level]}" for level in sorted(counts)]
115
+ if files is not None or rules is not None or limit is not None or offset:
116
+ count_lines.append(f" Shown: {total_shown} (matched: {total_matched})")
117
+ else:
118
+ count_lines.append(f" Total: {total}")
119
+
120
+ if verbose and finding_lines:
121
+ return SarifSummary(
122
+ text="\n".join(count_lines) + "\n\n" + "\n\n".join(finding_lines),
123
+ total_findings=total,
124
+ read_error=False,
125
+ matched_findings=total_matched,
126
+ )
127
+ return SarifSummary(
128
+ text="\n".join(count_lines),
129
+ total_findings=total,
130
+ read_error=False,
131
+ matched_findings=total_matched,
132
+ )
133
+
134
+
135
+ def summarize_sarif(sarif: Path, lang: str, verbose: bool = False) -> str:
136
+ """Render a SARIF summary string for CLI output."""
137
+ del lang # reserved for future language-specific formatting
138
+ return build_sarif_summary(sarif, verbose=verbose).text
run_codeql/scanner.py ADDED
@@ -0,0 +1,144 @@
1
+ """Language detection and CodeQL scan orchestration helpers."""
2
+
3
+ import os
4
+ import shutil
5
+ import subprocess
6
+ from pathlib import Path
7
+
8
+ from run_codeql.logging_utils import log
9
+ from run_codeql.settings import (
10
+ EXT_TO_LANG,
11
+ IGNORE_DIRS,
12
+ LANG_CONFIG,
13
+ PACKAGES_DIR,
14
+ TOOLS_DIR,
15
+ )
16
+
17
+
18
+ def detect_langs(repo_root: Path) -> list[str]:
19
+ """Scan the repo for source files and return the CodeQL languages to run."""
20
+ found: set[str] = set()
21
+ for _, dirnames, filenames in os.walk(repo_root):
22
+ dirnames[:] = [d for d in dirnames if d not in IGNORE_DIRS]
23
+ for fname in filenames:
24
+ lang = EXT_TO_LANG.get(Path(fname).suffix)
25
+ if lang:
26
+ found.add(lang)
27
+
28
+ workflows = repo_root / ".github" / "workflows"
29
+ if workflows.is_dir() and (any(workflows.glob("*.yml")) or any(workflows.glob("*.yaml"))):
30
+ found.add("actions")
31
+
32
+ langs = sorted(found)
33
+ log(f"Auto-detected languages: {', '.join(langs) if langs else '(none)'}")
34
+ return langs
35
+
36
+
37
+ def ensure_pack(pack_name: str, codeql: Path, quiet: bool) -> None:
38
+ """Download a CodeQL query pack if it is not already in the local cache."""
39
+ pack_dir = PACKAGES_DIR / pack_name
40
+ if pack_dir.exists():
41
+ return
42
+ log(f"Downloading missing pack: {pack_name}")
43
+ subprocess.run(
44
+ [str(codeql), "pack", "download", pack_name],
45
+ check=True,
46
+ stdout=subprocess.DEVNULL if quiet else None,
47
+ stderr=subprocess.DEVNULL if quiet else None,
48
+ )
49
+
50
+
51
+ def cleanup_reports(report_dir: Path, keep: bool, langs: list[str] | None = None) -> None:
52
+ """Clean reports before scanning based on target language scope."""
53
+ if keep:
54
+ return
55
+ report_dir.mkdir(parents=True, exist_ok=True)
56
+ if langs is None:
57
+ if report_dir.exists():
58
+ shutil.rmtree(report_dir)
59
+ report_dir.mkdir(parents=True, exist_ok=True)
60
+ return
61
+ for lang in set(langs):
62
+ target = report_dir / f"{lang}-code-quality.sarif"
63
+ target.unlink(missing_ok=True)
64
+
65
+
66
+ def cleanup_db(work_dir: Path, lang: str, keep: bool) -> None:
67
+ """Remove an existing language DB unless reusing previous DBs."""
68
+ if keep:
69
+ return
70
+ db_dir = work_dir / f"db-{lang}"
71
+ if db_dir.exists():
72
+ shutil.rmtree(db_dir)
73
+
74
+
75
+ def run_lang(
76
+ lang: str,
77
+ codeql: Path,
78
+ keep_db: bool,
79
+ repo_root: Path,
80
+ work_dir: Path,
81
+ report_dir: Path,
82
+ config_file: Path,
83
+ threads: int = 0,
84
+ quiet: bool = False,
85
+ ) -> Path:
86
+ """Run DB creation and analysis for one language and return SARIF path."""
87
+ cfg = LANG_CONFIG.get(lang, {})
88
+ lang_arg = cfg.get("lang_arg", lang)
89
+ suite = cfg.get("suite", f"codeql/{lang}-queries:codeql-suites/{lang}-code-quality.qls")
90
+ build_command = cfg.get("build_command")
91
+
92
+ db_dir = work_dir / f"db-{lang}"
93
+ sarif = report_dir / f"{lang}-code-quality.sarif"
94
+
95
+ cleanup_db(work_dir, lang, keep_db)
96
+
97
+ log(f"Creating DB for {lang}")
98
+ create_cmd = [
99
+ str(codeql),
100
+ "database",
101
+ "create",
102
+ str(db_dir),
103
+ f"--language={lang_arg}",
104
+ f"--source-root={repo_root}",
105
+ "--overwrite",
106
+ f"--threads={threads}",
107
+ "--no-run-unnecessary-builds",
108
+ ]
109
+ if config_file.is_file():
110
+ create_cmd += ["--codescanning-config", str(config_file)]
111
+ if build_command:
112
+ create_cmd += ["--command", build_command]
113
+
114
+ subprocess.run(
115
+ create_cmd,
116
+ check=True,
117
+ stdout=subprocess.DEVNULL if quiet else None,
118
+ stderr=subprocess.DEVNULL if quiet else None,
119
+ )
120
+
121
+ pack_name = suite.split(":")[0]
122
+ ensure_pack(pack_name, codeql, quiet=quiet)
123
+
124
+ log(f"Analyzing {lang}")
125
+ analyze_cmd = [
126
+ str(codeql),
127
+ "database",
128
+ "analyze",
129
+ str(db_dir),
130
+ suite,
131
+ "--format=sarif-latest",
132
+ f"--output={sarif}",
133
+ f"--threads={threads}",
134
+ "--ram=6144",
135
+ f"--search-path={TOOLS_DIR / 'codeql'}",
136
+ ]
137
+ subprocess.run(
138
+ analyze_cmd,
139
+ check=True,
140
+ stdout=subprocess.DEVNULL if quiet else None,
141
+ stderr=subprocess.DEVNULL if quiet else None,
142
+ )
143
+
144
+ return sarif
run_codeql/settings.py ADDED
@@ -0,0 +1,78 @@
1
+ """Shared configuration and defaults for run_codeql."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+
7
+ def _int_env(name: str, default: int) -> int:
8
+ """Read a positive integer from environment with safe fallback."""
9
+ raw = os.getenv(name)
10
+ if not raw:
11
+ return default
12
+ try:
13
+ value = int(raw)
14
+ except ValueError:
15
+ return default
16
+ return value if value > 0 else default
17
+
18
+
19
+ CODEQL_VERSION = "2.24.2"
20
+ TOOLS_DIR = Path.home() / ".codeql-tools"
21
+ CODEQL_BIN = TOOLS_DIR / "codeql" / "codeql"
22
+ PACKAGES_DIR = Path.home() / ".codeql" / "packages"
23
+ DOWNLOAD_TIMEOUT_SECONDS = _int_env("RCQL_DOWNLOAD_TIMEOUT_SECONDS", 60)
24
+ DOWNLOAD_RETRY_ATTEMPTS = _int_env("RCQL_DOWNLOAD_RETRY_ATTEMPTS", 3)
25
+ DOWNLOAD_RETRY_SLEEP_SECONDS = _int_env("RCQL_DOWNLOAD_RETRY_SLEEP_SECONDS", 2)
26
+
27
+ # Directories to skip when scanning for source files.
28
+ IGNORE_DIRS = {
29
+ ".git",
30
+ ".codeql",
31
+ ".venv",
32
+ "venv",
33
+ "env",
34
+ ".env",
35
+ "node_modules",
36
+ "vendor",
37
+ "target",
38
+ "__pycache__",
39
+ ".tox",
40
+ ".mypy_cache",
41
+ ".pytest_cache",
42
+ "dist",
43
+ "build",
44
+ }
45
+
46
+ # Maps file extensions to CodeQL language names.
47
+ EXT_TO_LANG: dict[str, str] = {
48
+ ".py": "python",
49
+ ".rs": "rust",
50
+ ".js": "javascript-typescript",
51
+ ".jsx": "javascript-typescript",
52
+ ".ts": "javascript-typescript",
53
+ ".tsx": "javascript-typescript",
54
+ ".go": "go",
55
+ ".java": "java",
56
+ ".kt": "java",
57
+ ".cs": "csharp",
58
+ ".cpp": "cpp",
59
+ ".cc": "cpp",
60
+ ".cxx": "cpp",
61
+ ".c": "cpp",
62
+ ".rb": "ruby",
63
+ ".swift": "swift",
64
+ }
65
+
66
+ LANG_CONFIG = {
67
+ "javascript-typescript": {
68
+ "lang_arg": "javascript",
69
+ "suite": "codeql/javascript-queries:codeql-suites/javascript-code-quality.qls",
70
+ },
71
+ "rust": {
72
+ "suite": "codeql/rust-queries:codeql-suites/rust-security-and-quality.qls",
73
+ "build_command": "cd rust && cargo build --workspace --all-targets --locked",
74
+ },
75
+ "actions": {
76
+ "suite": "codeql/actions-queries:codeql-suites/actions-security-and-quality.qls",
77
+ },
78
+ }
@@ -0,0 +1,275 @@
1
+ Metadata-Version: 2.4
2
+ Name: run-codeql
3
+ Version: 1.0.0
4
+ Summary: Run CodeQL code-quality analysis locally, mirroring the GitHub 'Code Quality' check
5
+ Project-URL: Homepage, https://github.com/dereknorrbom/run-codeql
6
+ Project-URL: Repository, https://github.com/dereknorrbom/run-codeql
7
+ Project-URL: Bug Tracker, https://github.com/dereknorrbom/run-codeql/issues
8
+ Author-email: Derek Norrbom <dereknorrbom@gmail.com>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 Derek Norrbom
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: code-quality,codeql,linting,security,static-analysis
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Environment :: Console
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Operating System :: OS Independent
37
+ Classifier: Programming Language :: Python :: 3
38
+ Classifier: Programming Language :: Python :: 3.10
39
+ Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Programming Language :: Python :: 3.12
41
+ Classifier: Topic :: Security
42
+ Classifier: Topic :: Software Development :: Quality Assurance
43
+ Requires-Python: >=3.10
44
+ Provides-Extra: dev
45
+ Requires-Dist: black; extra == 'dev'
46
+ Requires-Dist: pytest; extra == 'dev'
47
+ Requires-Dist: pytest-cov; extra == 'dev'
48
+ Requires-Dist: ruff; extra == 'dev'
49
+ Description-Content-Type: text/markdown
50
+
51
+ # run-codeql
52
+
53
+ A pip-installable CLI tool that runs [CodeQL](https://codeql.github.com/) code-quality analysis locally, mirroring the GitHub "Code Quality" check. Install once, run from any repository.
54
+
55
+ ## Installation
56
+
57
+ ```sh
58
+ pip install run-codeql
59
+ ```
60
+
61
+ This installs two commands: `run-codeql` and the shorthand `rcql`.
62
+
63
+ ## Requirements
64
+
65
+ - Python 3.10+
66
+ - CodeQL CLI — auto-downloaded to `~/.codeql-tools/` on first run if not already on `PATH` (SHA-256 verified, with retry/timeout policy)
67
+
68
+ ## Usage
69
+
70
+ Run from the root of any repository:
71
+
72
+ ```sh
73
+ rcql # auto-detect languages, run full scan
74
+ rcql --lang python # scan only Python
75
+ rcql --lang python,actions # scan multiple specific languages
76
+ ```
77
+
78
+ ### Options
79
+
80
+ | Flag | Description |
81
+ |------|-------------|
82
+ | `--lang` | Comma-separated languages to scan (default: auto-detected) |
83
+ | `--report-only` | Skip scanning; summarize existing SARIF reports from the last run |
84
+ | `--verbose`, `-v` | Print each finding with rule ID, location, and message |
85
+ | `--quiet`, `-q` | Suppress log output; print only final summaries (for agent/scripted use) |
86
+ | `--files` | Comma-separated file paths or fnmatch patterns to restrict findings to (e.g. `src/foo.py` or `src/*.py`) |
87
+ | `--rule` | Comma-separated rule IDs or fnmatch patterns to restrict findings to (e.g. `py/unused-import` or `py/*`) |
88
+ | `--limit N` | Return at most N findings (after `--files`/`--rule` filtering) |
89
+ | `--offset N` | Skip the first N findings before applying `--limit` (for pagination) |
90
+ | `--keep-db` | Reuse existing databases instead of recreating them |
91
+ | `--keep-reports` | Do not delete prior SARIF reports before running |
92
+ | `--no-fail` | Exit 0 even if findings or scan errors exist |
93
+
94
+ Download behavior can be tuned with environment variables:
95
+ `RCQL_DOWNLOAD_TIMEOUT_SECONDS`, `RCQL_DOWNLOAD_RETRY_ATTEMPTS`, and `RCQL_DOWNLOAD_RETRY_SLEEP_SECONDS`.
96
+
97
+ Report cleanup behavior before scans:
98
+ - with `--lang`, only the matching `<lang>-code-quality.sarif` reports are replaced
99
+ - without `--lang`, all prior SARIF reports are cleared first
100
+ - with `--keep-reports`, no reports are deleted
101
+
102
+ ### Language auto-detection
103
+
104
+ When `--lang` is not specified, the tool scans the repo for source files and detects which CodeQL languages to run. Common dependency directories are skipped (`node_modules`, `vendor`, `target`, `.venv`, etc.).
105
+
106
+ Supported languages: `python`, `rust`, `javascript-typescript`, `go`, `java`, `csharp`, `cpp`, `ruby`, `swift`, `actions`
107
+
108
+ GitHub Actions workflows (`.github/workflows/*.yml` and `.github/workflows/*.yaml`) are detected automatically and trigger the `actions` scanner.
109
+
110
+ ### Outputs
111
+
112
+ - Databases: `.codeql/db-<lang>/`
113
+ - SARIF reports: `.codeql/reports/<lang>-code-quality.sarif`
114
+
115
+ A `.codeql/.gitignore` with `*` is created automatically on first run so these artifacts are not committed.
116
+
117
+ By default, `rcql` exits non-zero if any findings are present or any language scan fails. Use `--no-fail` to force a zero exit code for informational/reporting workflows.
118
+
119
+ ## Common workflows
120
+
121
+ ### Full scan
122
+
123
+ ```sh
124
+ cd ~/projects/my-repo
125
+ rcql
126
+ ```
127
+
128
+ ### Quick re-summary after a previous scan
129
+
130
+ ```sh
131
+ rcql --report-only
132
+ rcql --report-only --verbose
133
+ rcql --report-only --lang rust
134
+ ```
135
+
136
+ ### Agent-friendly output
137
+
138
+ Produces clean, structured output suitable for an AI agent — no log noise, findings include rule ID, file location, and message:
139
+
140
+ ```sh
141
+ rcql -q -v --report-only
142
+ ```
143
+
144
+ Example output:
145
+
146
+ ```
147
+ [python] SARIF: /path/to/.codeql/reports/python-code-quality.sarif
148
+ error: 1
149
+ warning: 2
150
+ Total: 3
151
+
152
+ [error] py/sql-injection
153
+ SQL injection
154
+ src/db.py:42
155
+ This query depends on user-provided value.
156
+
157
+ [warning] py/unused-import
158
+ Unused import
159
+ src/utils.py:3
160
+ Import of 'os' is not used.
161
+ ```
162
+
163
+ ### Filtering findings for large codebases
164
+
165
+ When a scan returns hundreds or thousands of findings, use `--files`, `--rule`, `--limit`, and `--offset` to slice the results. These flags work with both `--report-only` and live scans.
166
+
167
+ **Filter to a specific file:**
168
+
169
+ ```sh
170
+ rcql -q -v --report-only --files src/models/user.py
171
+ ```
172
+
173
+ **Filter using a glob pattern:**
174
+
175
+ ```sh
176
+ rcql -q -v --report-only --files 'src/api/*.py'
177
+ ```
178
+
179
+ **Filter to a specific rule:**
180
+
181
+ ```sh
182
+ rcql -q -v --report-only --rule py/unused-import
183
+ ```
184
+
185
+ **Filter to an entire rule category:**
186
+
187
+ ```sh
188
+ rcql -q -v --report-only --rule 'py/*'
189
+ ```
190
+
191
+ **Combine file and rule filters:**
192
+
193
+ ```sh
194
+ rcql -q -v --report-only --files src/models/user.py --rule py/unused-import
195
+ ```
196
+
197
+ **Paginate through a large result set:**
198
+
199
+ ```sh
200
+ # First 20 findings
201
+ rcql -q -v --report-only --limit 20
202
+
203
+ # Next 20
204
+ rcql -q -v --report-only --limit 20 --offset 20
205
+ ```
206
+
207
+ When any filter or pagination flag is active, the summary line changes from `Total: N` to `Shown: X (matched: Y)` so you can see both how many were returned and how many matched in total.
208
+
209
+ Language blocks with zero matching findings are automatically suppressed when `--files` or `--rule` is active, so only relevant output is shown.
210
+
211
+ ### Single-language scan
212
+
213
+ ```sh
214
+ rcql --lang actions --no-fail
215
+ ```
216
+
217
+ ## Parallel execution
218
+
219
+ When scanning multiple languages, all scans run in parallel with CPU threads divided evenly across languages. Log timestamps make this visible.
220
+
221
+ ## Upgrading CodeQL
222
+
223
+ The CodeQL version is pinned in the package. The checksum for each release is fetched live from GitHub at download time, so no manual SHA updates are needed. To use a newer CodeQL version, update `CODEQL_VERSION` in `run_codeql/settings.py` and delete `~/.codeql-tools/` to trigger a fresh download on next run.
224
+
225
+ ## Development
226
+
227
+ ```sh
228
+ git clone https://github.com/YOUR_USERNAME/run-codeql
229
+ cd run-codeql
230
+ pip install -e ".[dev]"
231
+ ```
232
+
233
+ ### Make targets
234
+
235
+ | Target | Description |
236
+ |--------|-------------|
237
+ | `make test` | Run the test suite |
238
+ | `make cov` | Run tests with coverage report |
239
+ | `make lint` | Run ruff (check only) |
240
+ | `make fmt` | Auto-format with black and ruff --fix |
241
+ | `make fmt-check` | Check formatting without modifying files |
242
+ | `make check` | fmt-check + lint (CI-safe, no modifications) |
243
+ | `make fix` | lint + fmt combined (auto-fix everything) |
244
+ | `make install` | Install in editable mode with dev deps |
245
+
246
+ ### Running tests
247
+
248
+ ```sh
249
+ make test # run all 100+ tests
250
+ make cov # with per-line coverage report
251
+ ```
252
+
253
+ Tests cover SARIF filtering, language detection, download integrity, extraction safety, and CLI behavior using fixture SARIF files. No CodeQL installation is required to run the tests.
254
+
255
+ ### Package layout
256
+
257
+ | File | Purpose |
258
+ |------|---------|
259
+ | `run_codeql/cli.py` | Argument parsing and orchestration |
260
+ | `run_codeql/download.py` | CodeQL download, retry, checksum, extraction |
261
+ | `run_codeql/scanner.py` | Language detection and per-language scan execution |
262
+ | `run_codeql/sarif.py` | SARIF parsing, filtering, and summary rendering |
263
+ | `run_codeql/settings.py` | Constants and environment-tunable defaults |
264
+
265
+ ## Contributing
266
+
267
+ Contributions are welcome. Please:
268
+
269
+ 1. Fork the repo and create a feature branch
270
+ 2. Run `make check` and `make test` before submitting
271
+ 3. Open a pull request with a clear description of the change
272
+
273
+ ## License
274
+
275
+ MIT
@@ -0,0 +1,13 @@
1
+ run_codeql/__init__.py,sha256=AR8O-VPOn7oYYfbgnTsl9x4NNnJvPGv6_m2H5UYz40s,26
2
+ run_codeql/__main__.py,sha256=qypp4Fi9rWOw64J7kVgXr1r2JtFPd1F2tziFYThqjt4,40
3
+ run_codeql/cli.py,sha256=ouEUyHlR7IF8i3mncQTj7GLhl7ZmNNYPe7aVTx_C6NI,8158
4
+ run_codeql/download.py,sha256=_aV629-NBZGPgBJEkUI6xZJkMMAECOcmMfotbmSFvNY,5483
5
+ run_codeql/logging_utils.py,sha256=y4p7bqUkTMJoaEl3dCgL6vjjkA6Lu-FvkdvITKhJwwc,827
6
+ run_codeql/sarif.py,sha256=pjnXLZIc_Pm0bZtnuopx5w25JE0t08-J3AN38weTcgs,4924
7
+ run_codeql/scanner.py,sha256=PqdZIav3RHWN7NwVLg9Vw7tOhwnQ4vyJlt2md5pOEH0,4173
8
+ run_codeql/settings.py,sha256=0iDXgWJC_5Nc9Lc0UhQqXV16uDYN1YGMJbupqSUJL8I,2028
9
+ run_codeql-1.0.0.dist-info/METADATA,sha256=S2-IkXx88SZe-zqJjoVLccvbQ0QB5VbWkoaGP-XW0tg,9638
10
+ run_codeql-1.0.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
11
+ run_codeql-1.0.0.dist-info/entry_points.txt,sha256=LWD5_50tO1UVkYF2_k3MtCNcSsxUnzx48Vr0oEwFtp8,78
12
+ run_codeql-1.0.0.dist-info/licenses/LICENSE,sha256=jsTyDqXTGaB22CrjsfiI6aTXheUkbVf3-PymiZk6pH4,1070
13
+ run_codeql-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ rcql = run_codeql.cli:main
3
+ run-codeql = run_codeql.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Derek Norrbom
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.