dravix-agent 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.example.json +30 -0
- package/ARCHITECTURE.md +410 -0
- package/LICENSE +21 -0
- package/README.md +153 -0
- package/ROADMAP.md +117 -0
- package/data/vulnkb.json +666 -0
- package/dist/bin/aegis.d.ts +3 -0
- package/dist/bin/aegis.d.ts.map +1 -0
- package/dist/bin/aegis.js +489 -0
- package/dist/bin/aegis.js.map +1 -0
- package/dist/cache.d.ts +9 -0
- package/dist/cache.d.ts.map +1 -0
- package/dist/cache.js +146 -0
- package/dist/cache.js.map +1 -0
- package/dist/engines/ai-sinks.d.ts +52 -0
- package/dist/engines/ai-sinks.d.ts.map +1 -0
- package/dist/engines/ai-sinks.js +204 -0
- package/dist/engines/ai-sinks.js.map +1 -0
- package/dist/engines/eslint.d.ts +9 -0
- package/dist/engines/eslint.d.ts.map +1 -0
- package/dist/engines/eslint.js +245 -0
- package/dist/engines/eslint.js.map +1 -0
- package/dist/engines/joern.d.ts +3 -0
- package/dist/engines/joern.d.ts.map +1 -0
- package/dist/engines/joern.js +98 -0
- package/dist/engines/joern.js.map +1 -0
- package/dist/engines/js-sinks.d.ts +70 -0
- package/dist/engines/js-sinks.d.ts.map +1 -0
- package/dist/engines/js-sinks.js +370 -0
- package/dist/engines/js-sinks.js.map +1 -0
- package/dist/engines/llm-critic.d.ts +130 -0
- package/dist/engines/llm-critic.d.ts.map +1 -0
- package/dist/engines/llm-critic.js +551 -0
- package/dist/engines/llm-critic.js.map +1 -0
- package/dist/engines/pragma.d.ts +20 -0
- package/dist/engines/pragma.d.ts.map +1 -0
- package/dist/engines/pragma.js +83 -0
- package/dist/engines/pragma.js.map +1 -0
- package/dist/engines/property-test.d.ts +3 -0
- package/dist/engines/property-test.d.ts.map +1 -0
- package/dist/engines/property-test.js +134 -0
- package/dist/engines/property-test.js.map +1 -0
- package/dist/engines/pyright.d.ts +10 -0
- package/dist/engines/pyright.d.ts.map +1 -0
- package/dist/engines/pyright.js +143 -0
- package/dist/engines/pyright.js.map +1 -0
- package/dist/engines/pysa.d.ts +3 -0
- package/dist/engines/pysa.d.ts.map +1 -0
- package/dist/engines/pysa.js +83 -0
- package/dist/engines/pysa.js.map +1 -0
- package/dist/engines/python-sinks.d.ts +82 -0
- package/dist/engines/python-sinks.d.ts.map +1 -0
- package/dist/engines/python-sinks.js +459 -0
- package/dist/engines/python-sinks.js.map +1 -0
- package/dist/engines/registry.d.ts +26 -0
- package/dist/engines/registry.d.ts.map +1 -0
- package/dist/engines/registry.js +70 -0
- package/dist/engines/registry.js.map +1 -0
- package/dist/engines/secret-scan.d.ts +22 -0
- package/dist/engines/secret-scan.d.ts.map +1 -0
- package/dist/engines/secret-scan.js +179 -0
- package/dist/engines/secret-scan.js.map +1 -0
- package/dist/engines/semgrep.d.ts +10 -0
- package/dist/engines/semgrep.d.ts.map +1 -0
- package/dist/engines/semgrep.js +200 -0
- package/dist/engines/semgrep.js.map +1 -0
- package/dist/engines/treesitter.d.ts +18 -0
- package/dist/engines/treesitter.d.ts.map +1 -0
- package/dist/engines/treesitter.js +135 -0
- package/dist/engines/treesitter.js.map +1 -0
- package/dist/engines/tsc.d.ts +10 -0
- package/dist/engines/tsc.d.ts.map +1 -0
- package/dist/engines/tsc.js +142 -0
- package/dist/engines/tsc.js.map +1 -0
- package/dist/engines/types.d.ts +47 -0
- package/dist/engines/types.d.ts.map +1 -0
- package/dist/engines/types.js +27 -0
- package/dist/engines/types.js.map +1 -0
- package/dist/findings.d.ts +121 -0
- package/dist/findings.d.ts.map +1 -0
- package/dist/findings.js +98 -0
- package/dist/findings.js.map +1 -0
- package/dist/hooks/claude-code.d.ts +3 -0
- package/dist/hooks/claude-code.d.ts.map +1 -0
- package/dist/hooks/claude-code.js +187 -0
- package/dist/hooks/claude-code.js.map +1 -0
- package/dist/index/context.d.ts +127 -0
- package/dist/index/context.d.ts.map +1 -0
- package/dist/index/context.js +267 -0
- package/dist/index/context.js.map +1 -0
- package/dist/index/embeddings.d.ts +68 -0
- package/dist/index/embeddings.d.ts.map +1 -0
- package/dist/index/embeddings.js +570 -0
- package/dist/index/embeddings.js.map +1 -0
- package/dist/index/graph_routing.d.ts +36 -0
- package/dist/index/graph_routing.d.ts.map +1 -0
- package/dist/index/graph_routing.js +170 -0
- package/dist/index/graph_routing.js.map +1 -0
- package/dist/index/joern.d.ts +76 -0
- package/dist/index/joern.d.ts.map +1 -0
- package/dist/index/joern.js +782 -0
- package/dist/index/joern.js.map +1 -0
- package/dist/index/property-test.d.ts +88 -0
- package/dist/index/property-test.d.ts.map +1 -0
- package/dist/index/property-test.js +466 -0
- package/dist/index/property-test.js.map +1 -0
- package/dist/index/proto/scip.proto +897 -0
- package/dist/index/pysa.d.ts +91 -0
- package/dist/index/pysa.d.ts.map +1 -0
- package/dist/index/pysa.js +617 -0
- package/dist/index/pysa.js.map +1 -0
- package/dist/index/scip.d.ts +76 -0
- package/dist/index/scip.d.ts.map +1 -0
- package/dist/index/scip.js +541 -0
- package/dist/index/scip.js.map +1 -0
- package/dist/index/vulrag.d.ts +86 -0
- package/dist/index/vulrag.d.ts.map +1 -0
- package/dist/index/vulrag.js +242 -0
- package/dist/index/vulrag.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -0
- package/dist/install/claude-code.d.ts +31 -0
- package/dist/install/claude-code.d.ts.map +1 -0
- package/dist/install/claude-code.js +447 -0
- package/dist/install/claude-code.js.map +1 -0
- package/dist/lang.d.ts +5 -0
- package/dist/lang.d.ts.map +1 -0
- package/dist/lang.js +52 -0
- package/dist/lang.js.map +1 -0
- package/dist/learning/suppressions.d.ts +70 -0
- package/dist/learning/suppressions.d.ts.map +1 -0
- package/dist/learning/suppressions.js +179 -0
- package/dist/learning/suppressions.js.map +1 -0
- package/dist/mcp/server.d.ts +2 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +187 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/mcp/tools/explain.d.ts +58 -0
- package/dist/mcp/tools/explain.d.ts.map +1 -0
- package/dist/mcp/tools/explain.js +60 -0
- package/dist/mcp/tools/explain.js.map +1 -0
- package/dist/mcp/tools/precheck.d.ts +29 -0
- package/dist/mcp/tools/precheck.d.ts.map +1 -0
- package/dist/mcp/tools/precheck.js +42 -0
- package/dist/mcp/tools/precheck.js.map +1 -0
- package/dist/mcp/tools/validate.d.ts +73 -0
- package/dist/mcp/tools/validate.d.ts.map +1 -0
- package/dist/mcp/tools/validate.js +66 -0
- package/dist/mcp/tools/validate.js.map +1 -0
- package/dist/mcp/warm.d.ts +88 -0
- package/dist/mcp/warm.d.ts.map +1 -0
- package/dist/mcp/warm.js +331 -0
- package/dist/mcp/warm.js.map +1 -0
- package/dist/orchestrator.d.ts +46 -0
- package/dist/orchestrator.d.ts.map +1 -0
- package/dist/orchestrator.js +596 -0
- package/dist/orchestrator.js.map +1 -0
- package/dist/policy.d.ts +51 -0
- package/dist/policy.d.ts.map +1 -0
- package/dist/policy.js +201 -0
- package/dist/policy.js.map +1 -0
- package/dist/risk.d.ts +31 -0
- package/dist/risk.d.ts.map +1 -0
- package/dist/risk.js +92 -0
- package/dist/risk.js.map +1 -0
- package/dist/stats.d.ts +72 -0
- package/dist/stats.d.ts.map +1 -0
- package/dist/stats.js +217 -0
- package/dist/stats.js.map +1 -0
- package/dist/telemetry/collector.d.ts +10 -0
- package/dist/telemetry/collector.d.ts.map +1 -0
- package/dist/telemetry/collector.js +75 -0
- package/dist/telemetry/collector.js.map +1 -0
- package/dist/telemetry/consent.d.ts +9 -0
- package/dist/telemetry/consent.d.ts.map +1 -0
- package/dist/telemetry/consent.js +42 -0
- package/dist/telemetry/consent.js.map +1 -0
- package/dist/telemetry/installation.d.ts +2 -0
- package/dist/telemetry/installation.d.ts.map +1 -0
- package/dist/telemetry/installation.js +32 -0
- package/dist/telemetry/installation.js.map +1 -0
- package/dist/telemetry/sanitizer.d.ts +5 -0
- package/dist/telemetry/sanitizer.d.ts.map +1 -0
- package/dist/telemetry/sanitizer.js +60 -0
- package/dist/telemetry/sanitizer.js.map +1 -0
- package/dist/telemetry/types.d.ts +39 -0
- package/dist/telemetry/types.d.ts.map +1 -0
- package/dist/telemetry/types.js +4 -0
- package/dist/telemetry/types.js.map +1 -0
- package/dist/telemetry/uploader.d.ts +12 -0
- package/dist/telemetry/uploader.d.ts.map +1 -0
- package/dist/telemetry/uploader.js +92 -0
- package/dist/telemetry/uploader.js.map +1 -0
- package/dist/util/logger.d.ts +19 -0
- package/dist/util/logger.d.ts.map +1 -0
- package/dist/util/logger.js +58 -0
- package/dist/util/logger.js.map +1 -0
- package/dist/util/safe-paths.d.ts +8 -0
- package/dist/util/safe-paths.d.ts.map +1 -0
- package/dist/util/safe-paths.js +102 -0
- package/dist/util/safe-paths.js.map +1 -0
- package/dist/util/subprocess.d.ts +32 -0
- package/dist/util/subprocess.d.ts.map +1 -0
- package/dist/util/subprocess.js +137 -0
- package/dist/util/subprocess.js.map +1 -0
- package/package.json +93 -0
|
@@ -0,0 +1,782 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Joern CPG (Code Property Graph) — Phase 2 P1 deep-semantic engine.
|
|
3
|
+
*
|
|
4
|
+
* Joern parses the project into a CPG (AST + CFG + PDG fused) and lets us
|
|
5
|
+
* query data-flow / call-graph / control-flow facts that no regex or
|
|
6
|
+
* type-checker can answer. Per the research (Macroscope 2026; arxiv
|
|
7
|
+
* 2603.24837 codebadger), CPG-backed reachability is the only OSS path to
|
|
8
|
+
* detecting cross-function dataflow bugs without a paid SAST.
|
|
9
|
+
*
|
|
10
|
+
* **Architecture: batch + cache, not realtime.**
|
|
11
|
+
* Joern's CPG build takes 10-30 s for a medium project plus ~5 s JVM
|
|
12
|
+
* cold-start per query. That's too slow for an inline gate (<1 s budget).
|
|
13
|
+
* We split the cost:
|
|
14
|
+
*
|
|
15
|
+
* `aegis index --joern` → invoke this module's ``buildJoernCpg``.
|
|
16
|
+
* Builds CPG via ``joern-parse``, runs the
|
|
17
|
+
* CPGQL security+logic query via ``joern --script``,
|
|
18
|
+
* writes ``findings.jsonl`` into the per-project
|
|
19
|
+
* cache dir. Slow (10-60 s) but once-per-edit.
|
|
20
|
+
* ``JoernEngine`` (realtime) → reads ``findings.jsonl``, filters to the
|
|
21
|
+
* current file, returns instantly.
|
|
22
|
+
*
|
|
23
|
+
* **Cache layout:** ``~/.aegis/joern/<sha256(root)[:16]>/``
|
|
24
|
+
* - ``cpg.bin`` the binary CPG (large)
|
|
25
|
+
* - ``findings.jsonl`` one JSON Finding per line
|
|
26
|
+
* - ``info.json`` {built_at, joern_path, jdk_path, n_findings, query_version}
|
|
27
|
+
*
|
|
28
|
+
* **JDK version constraint:** Joern's Scala scripting breaks on JDK 22+ —
|
|
29
|
+
* we explicitly look for 11/17/21 even if the host's default ``java`` is
|
|
30
|
+
* newer. ``AEGIS_JOERN_JDK`` env overrides discovery.
|
|
31
|
+
*
|
|
32
|
+
* **Joern binary discovery order:**
|
|
33
|
+
* 1. ``AEGIS_JOERN_DIR`` env (path to ``joern-cli/`` directory)
|
|
34
|
+
* 2. ``~/.aegis/joern/joern-cli/`` (where ``aegis install joern`` would land)
|
|
35
|
+
* 3. ``~/.argus/joern/joern-cli/`` (re-use the existing argus install if present)
|
|
36
|
+
*
|
|
37
|
+
* **CPGQL query strategy** (mirrors argus's proven pattern in
|
|
38
|
+
* ``argus/engines/joern.py``): each detector is wrapped in its own try/catch
|
|
39
|
+
* so a language-frontend missing a particular feature never aborts the rest.
|
|
40
|
+
* Output is tab-separated lines prefixed with ``AEGIS_SINK`` / ``AEGIS_LOGIC``,
|
|
41
|
+
* parsed line-by-line.
|
|
42
|
+
*/
|
|
43
|
+
import { createHash } from "node:crypto";
|
|
44
|
+
import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from "node:fs";
|
|
45
|
+
import { homedir } from "node:os";
|
|
46
|
+
import { join, resolve as resolvePath } from "node:path";
|
|
47
|
+
import { FindingSchema, makeFindingId } from "../findings.js";
|
|
48
|
+
import { getLogger } from "../util/logger.js";
|
|
49
|
+
import { run as spawnRun } from "../util/subprocess.js";
|
|
50
|
+
const log = getLogger("aegis.joern");
|
|
51
|
+
// Bump this when the CPGQL query changes — invalidates every cached findings file.
|
|
52
|
+
// v2: added race detectors (asyncio.create_task fire-and-forget, JS .then() without .catch())
|
|
53
|
+
const QUERY_VERSION = "v2";
|
|
54
|
+
// Joern CPG build can be slow on large projects; allow up to 10 min.
|
|
55
|
+
const DEFAULT_CPG_BUILD_TIMEOUT_MS = 10 * 60 * 1000;
|
|
56
|
+
const DEFAULT_QUERY_TIMEOUT_MS = 5 * 60 * 1000;
|
|
57
|
+
// JDK candidates — must be 11..21 inclusive. argus uses the same list; we
|
|
58
|
+
// add more Corretto roots since both Adoptium and Corretto are installed
|
|
59
|
+
// side-by-side on this dev box.
|
|
60
|
+
const JDK_CANDIDATES_WINDOWS = [
|
|
61
|
+
String.raw `C:\Program Files\Eclipse Adoptium\jdk-21.0.6.7-hotspot`,
|
|
62
|
+
String.raw `C:\Program Files\Amazon Corretto\jdk21.0.6_7`,
|
|
63
|
+
String.raw `C:\Program Files\Eclipse Adoptium\jdk-17.0.14.7-hotspot`,
|
|
64
|
+
String.raw `C:\Program Files\Amazon Corretto\jdk17.0.14_7`,
|
|
65
|
+
String.raw `C:\Program Files\Eclipse Adoptium\jdk-11.0.26.4-hotspot`,
|
|
66
|
+
String.raw `C:\Program Files\Amazon Corretto\jdk11.0.26_4`,
|
|
67
|
+
String.raw `C:\Program Files\Eclipse Adoptium\jdk-21.0.5.11-hotspot`,
|
|
68
|
+
String.raw `C:\Program Files\Amazon Corretto\jdk21.0.5_11`,
|
|
69
|
+
];
|
|
70
|
+
const JDK_CANDIDATES_POSIX = [
|
|
71
|
+
"/usr/lib/jvm/java-21-openjdk",
|
|
72
|
+
"/usr/lib/jvm/java-17-openjdk",
|
|
73
|
+
"/usr/lib/jvm/java-11-openjdk",
|
|
74
|
+
"/Library/Java/JavaVirtualMachines/temurin-21.jdk/Contents/Home",
|
|
75
|
+
"/Library/Java/JavaVirtualMachines/temurin-17.jdk/Contents/Home",
|
|
76
|
+
];
|
|
77
|
+
const JOERN_DIR_CANDIDATES_RELATIVE = [
|
|
78
|
+
".aegis/joern/joern-cli",
|
|
79
|
+
".argus/joern/joern-cli", // graceful re-use of an existing argus install
|
|
80
|
+
];
|
|
81
|
+
// ── Discovery ─────────────────────────────────────────────────────────────
|
|
82
|
+
function expandHome(p) {
|
|
83
|
+
if (!p.startsWith("~"))
|
|
84
|
+
return p;
|
|
85
|
+
return resolvePath(homedir(), p.slice(2));
|
|
86
|
+
}
|
|
87
|
+
/** Locate the joern-cli directory containing ``joern-parse`` + ``joern``. */
|
|
88
|
+
export function findJoernDir() {
|
|
89
|
+
const env = process.env.AEGIS_JOERN_DIR;
|
|
90
|
+
if (env && env.trim()) {
|
|
91
|
+
const e = expandHome(env.trim());
|
|
92
|
+
if (existsSync(join(e, "bin", "joern-parse")) || existsSync(join(e, "bin", "joern-parse.bat"))) {
|
|
93
|
+
return e;
|
|
94
|
+
}
|
|
95
|
+
log.warn("AEGIS_JOERN_DIR set but no joern-parse inside", { value: env });
|
|
96
|
+
}
|
|
97
|
+
for (const rel of JOERN_DIR_CANDIDATES_RELATIVE) {
|
|
98
|
+
const p = join(homedir(), rel);
|
|
99
|
+
const has = existsSync(join(p, "bin", "joern-parse")) || existsSync(join(p, "bin", "joern-parse.bat"));
|
|
100
|
+
if (has)
|
|
101
|
+
return p;
|
|
102
|
+
}
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
function joernBinSuffix() {
|
|
106
|
+
return process.platform === "win32" ? ".bat" : "";
|
|
107
|
+
}
|
|
108
|
+
/** Locate the joern REPL launcher.
|
|
109
|
+
*
|
|
110
|
+
* Modern Joern ships ``joern-cli/joern{.bat}`` at the root of the joern-cli
|
|
111
|
+
* dir (alongside ``bin/``). Some distros (older argus-bundled installs)
|
|
112
|
+
* also drop a ``bin/joern-cli`` wrapper. We check both layouts; the REPL
|
|
113
|
+
* we actually want exposes ``--script <value>`` (verified via ``--help``). */
|
|
114
|
+
function findJoernReplBin(joernDir) {
|
|
115
|
+
const suffix = joernBinSuffix();
|
|
116
|
+
const candidates = [
|
|
117
|
+
// Canonical layout: joern-cli/joern{.bat}
|
|
118
|
+
join(joernDir, "joern" + suffix),
|
|
119
|
+
// Some installs put the launcher under bin/
|
|
120
|
+
join(joernDir, "bin", "joern" + suffix),
|
|
121
|
+
// Legacy / partial argus install
|
|
122
|
+
join(joernDir, "bin", "joern-cli" + suffix),
|
|
123
|
+
];
|
|
124
|
+
for (const p of candidates) {
|
|
125
|
+
if (existsSync(p))
|
|
126
|
+
return p;
|
|
127
|
+
}
|
|
128
|
+
return null;
|
|
129
|
+
}
|
|
130
|
+
/** Locate a JDK in the 11..21 range. ``AEGIS_JOERN_JDK`` env wins. */
|
|
131
|
+
export function findJoernJdk() {
|
|
132
|
+
const env = process.env.AEGIS_JOERN_JDK;
|
|
133
|
+
if (env && env.trim()) {
|
|
134
|
+
const e = expandHome(env.trim());
|
|
135
|
+
if (existsSync(join(e, "bin", process.platform === "win32" ? "java.exe" : "java"))) {
|
|
136
|
+
return e;
|
|
137
|
+
}
|
|
138
|
+
log.warn("AEGIS_JOERN_JDK set but no java inside", { value: env });
|
|
139
|
+
}
|
|
140
|
+
const candidates = process.platform === "win32" ? JDK_CANDIDATES_WINDOWS : JDK_CANDIDATES_POSIX;
|
|
141
|
+
for (const c of candidates) {
|
|
142
|
+
const javaBin = join(c, "bin", process.platform === "win32" ? "java.exe" : "java");
|
|
143
|
+
if (existsSync(javaBin))
|
|
144
|
+
return c;
|
|
145
|
+
}
|
|
146
|
+
return null;
|
|
147
|
+
}
|
|
148
|
+
// ── Cache layout ─────────────────────────────────────────────────────────-
|
|
149
|
+
function aegisHome() {
|
|
150
|
+
return process.env.AEGIS_HOME ?? resolvePath(homedir(), ".aegis");
|
|
151
|
+
}
|
|
152
|
+
function cacheDirFor(projectRoot) {
|
|
153
|
+
const h = createHash("sha256").update(resolvePath(projectRoot)).digest("hex").slice(0, 16);
|
|
154
|
+
return join(aegisHome(), "joern", h);
|
|
155
|
+
}
|
|
156
|
+
export function joernFindingsPath(projectRoot) {
|
|
157
|
+
return join(cacheDirFor(projectRoot), "findings.jsonl");
|
|
158
|
+
}
|
|
159
|
+
export function joernInfoPath(projectRoot) {
|
|
160
|
+
return join(cacheDirFor(projectRoot), "info.json");
|
|
161
|
+
}
|
|
162
|
+
/** Read the Joern info.json — returns null if missing / unreadable. */
|
|
163
|
+
export function readJoernInfo(projectRoot) {
|
|
164
|
+
const p = joernInfoPath(projectRoot);
|
|
165
|
+
if (!existsSync(p))
|
|
166
|
+
return null;
|
|
167
|
+
try {
|
|
168
|
+
return JSON.parse(readFileSync(p, "utf8"));
|
|
169
|
+
}
|
|
170
|
+
catch {
|
|
171
|
+
return null;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
/** Read cached findings as Finding[]. Returns [] on missing / unreadable. */
|
|
175
|
+
export function readJoernFindings(projectRoot) {
|
|
176
|
+
const p = joernFindingsPath(projectRoot);
|
|
177
|
+
if (!existsSync(p))
|
|
178
|
+
return [];
|
|
179
|
+
try {
|
|
180
|
+
const out = [];
|
|
181
|
+
const lines = readFileSync(p, "utf8").split(/\r?\n/);
|
|
182
|
+
for (const line of lines) {
|
|
183
|
+
if (!line.trim())
|
|
184
|
+
continue;
|
|
185
|
+
const obj = JSON.parse(line);
|
|
186
|
+
const parsed = FindingSchema.safeParse(obj);
|
|
187
|
+
if (parsed.success)
|
|
188
|
+
out.push(parsed.data);
|
|
189
|
+
}
|
|
190
|
+
return out;
|
|
191
|
+
}
|
|
192
|
+
catch (err) {
|
|
193
|
+
log.warn("joern findings cache unreadable", { err: String(err) });
|
|
194
|
+
return [];
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
const SINKS = {
|
|
198
|
+
eval: { cwe: "CWE-95", severity: "high", title: "Code injection via eval" },
|
|
199
|
+
exec: { cwe: "CWE-95", severity: "high", title: "Code injection via exec" },
|
|
200
|
+
compile: { cwe: "CWE-95", severity: "medium", title: "Dynamic code compilation" },
|
|
201
|
+
system: { cwe: "CWE-78", severity: "high", title: "OS command injection via system()" },
|
|
202
|
+
popen: { cwe: "CWE-78", severity: "high", title: "OS command injection via popen" },
|
|
203
|
+
Popen: { cwe: "CWE-78", severity: "high", title: "OS command injection via Popen" },
|
|
204
|
+
check_output: { cwe: "CWE-78", severity: "high", title: "OS command injection via check_output" },
|
|
205
|
+
check_call: { cwe: "CWE-78", severity: "high", title: "OS command injection via check_call" },
|
|
206
|
+
execute: { cwe: "CWE-89", severity: "high", title: "SQL injection via execute()" },
|
|
207
|
+
executemany: { cwe: "CWE-89", severity: "high", title: "SQL injection via executemany()" },
|
|
208
|
+
raw: { cwe: "CWE-89", severity: "medium", title: "Raw SQL query" },
|
|
209
|
+
loads: { cwe: "CWE-502", severity: "medium", title: "Unsafe deserialization" },
|
|
210
|
+
};
|
|
211
|
+
const SINK_NAMES = Object.keys(SINKS);
|
|
212
|
+
const LOGIC = {
|
|
213
|
+
empty_catch: { cwe: "CWE-703", severity: "medium", title: "Swallowed exception (empty catch)" },
|
|
214
|
+
assign_in_condition: {
|
|
215
|
+
cwe: "CWE-481",
|
|
216
|
+
severity: "high",
|
|
217
|
+
title: "Assignment in conditional (= instead of ==)",
|
|
218
|
+
},
|
|
219
|
+
};
|
|
220
|
+
const RACES = {
|
|
221
|
+
// Python: ``asyncio.create_task(...)`` whose result is neither awaited,
|
|
222
|
+
// assigned, nor returned — fire-and-forget pattern. The task object is
|
|
223
|
+
// garbage-collected as soon as the local scope exits, and its side
|
|
224
|
+
// effects can race with whatever the caller does next. The Python docs
|
|
225
|
+
// explicitly warn about this since 3.7.
|
|
226
|
+
fire_and_forget_task: {
|
|
227
|
+
cwe: "CWE-362",
|
|
228
|
+
severity: "medium",
|
|
229
|
+
title: "Fire-and-forget asyncio.create_task — race + GC hazard",
|
|
230
|
+
},
|
|
231
|
+
// JS/TS: ``foo.then(handler)`` not followed by ``.catch`` in the same
|
|
232
|
+
// chain AND not awaited. An unhandled rejection on Node 15+ terminates
|
|
233
|
+
// the process; in older Node it silently swallows the error AND lets
|
|
234
|
+
// the caller's next line execute before the promise settles → race.
|
|
235
|
+
floating_then: {
|
|
236
|
+
cwe: "CWE-755",
|
|
237
|
+
severity: "medium",
|
|
238
|
+
title: "Floating Promise — unhandled rejection / sequencing race",
|
|
239
|
+
},
|
|
240
|
+
};
|
|
241
|
+
function buildCpgqlScript(cpgPath) {
|
|
242
|
+
const cpgFwd = cpgPath.replace(/\\/g, "/");
|
|
243
|
+
const names = SINK_NAMES.sort().join("|");
|
|
244
|
+
// Each detector wrapped in try/catch so a language-frontend missing a
|
|
245
|
+
// construct can never abort the others. Output: tab-separated lines.
|
|
246
|
+
return [
|
|
247
|
+
`importCpg("${cpgFwd}")`,
|
|
248
|
+
// --- security sinks fed by non-literal (potentially tainted) data ---
|
|
249
|
+
"try {",
|
|
250
|
+
` cpg.call.name("${names}").foreach { c =>`,
|
|
251
|
+
" val nonLiteralArgs = c.argument.argumentIndexGt(0).isLiteral.size < c.argument.argumentIndexGt(0).size",
|
|
252
|
+
' val file = c.file.name.headOption.getOrElse("?")',
|
|
253
|
+
' val line = c.lineNumber.map(_.toString).getOrElse("0")',
|
|
254
|
+
' val code = c.code.replace("\\n"," ").replace("\\t"," ").take(200)',
|
|
255
|
+
' println(s"AEGIS_SINK\\t${c.name}\\t${nonLiteralArgs}\\t$file\\t$line\\t$code")',
|
|
256
|
+
" }",
|
|
257
|
+
"} catch { case _: Throwable => }",
|
|
258
|
+
// --- swallowed exceptions: a catch block containing no statements ---
|
|
259
|
+
"try {",
|
|
260
|
+
' cpg.controlStructure.controlStructureType("CATCH").foreach { cs =>',
|
|
261
|
+
" if (cs.ast.isCall.size == 0 && cs.ast.isControlStructure.size <= 1) {",
|
|
262
|
+
' val file = cs.file.name.headOption.getOrElse("?")',
|
|
263
|
+
' val line = cs.lineNumber.map(_.toString).getOrElse("0")',
|
|
264
|
+
' val code = cs.code.replace("\\n"," ").replace("\\t"," ").take(120)',
|
|
265
|
+
' println(s"AEGIS_LOGIC\\tempty_catch\\t$file\\t$line\\t$code")',
|
|
266
|
+
" }",
|
|
267
|
+
" }",
|
|
268
|
+
"} catch { case _: Throwable => }",
|
|
269
|
+
// --- assignment used inside an if-condition (= instead of ==) ---
|
|
270
|
+
"try {",
|
|
271
|
+
' cpg.controlStructure.controlStructureType("IF").condition.isCall.name("<operator>.assignment").foreach { a =>',
|
|
272
|
+
' val file = a.file.name.headOption.getOrElse("?")',
|
|
273
|
+
' val line = a.lineNumber.map(_.toString).getOrElse("0")',
|
|
274
|
+
' val code = a.code.replace("\\n"," ").replace("\\t"," ").take(120)',
|
|
275
|
+
' println(s"AEGIS_LOGIC\\tassign_in_condition\\t$file\\t$line\\t$code")',
|
|
276
|
+
" }",
|
|
277
|
+
"} catch { case _: Throwable => }",
|
|
278
|
+
// --- RACE #1: Python asyncio.create_task whose AST parent is the
|
|
279
|
+
// method body (BLOCK) — i.e. statement-position call, neither
|
|
280
|
+
// assigned (CALL <operator>.assignment) nor returned (RETURN)
|
|
281
|
+
// nor passed as an arg (CALL). This is the canonical
|
|
282
|
+
// "fire-and-forget" signature. The Python docs explicitly warn
|
|
283
|
+
// about this since 3.7 — the task object can be GC'd before
|
|
284
|
+
// completion AND its side effects race with the next statement. ---
|
|
285
|
+
"try {",
|
|
286
|
+
' cpg.call.name("create_task").foreach { c =>',
|
|
287
|
+
' if (c.astParent.label == "BLOCK") {',
|
|
288
|
+
' val file = c.file.name.headOption.getOrElse("?")',
|
|
289
|
+
' val line = c.lineNumber.map(_.toString).getOrElse("0")',
|
|
290
|
+
' val code = c.code.replace("\\n"," ").replace("\\t"," ").take(160)',
|
|
291
|
+
' println(s"AEGIS_RACE\\tfire_and_forget_task\\t$file\\t$line\\t$code")',
|
|
292
|
+
" }",
|
|
293
|
+
" }",
|
|
294
|
+
"} catch { case _: Throwable => }",
|
|
295
|
+
// --- RACE #2: JS/TS ``foo.then(handler)`` call whose AST parent is
|
|
296
|
+
// the method body AND no ``.catch`` appears in the same source
|
|
297
|
+
// line (cheap proxy for chain inspection — exact CPG chain walk
|
|
298
|
+
// would need extra Joern API). Floating promise → unhandled
|
|
299
|
+
// rejection on Node 15+ + sequencing race vs the next statement. ---
|
|
300
|
+
"try {",
|
|
301
|
+
' cpg.call.code(".*\\\\.then\\\\(.*").foreach { t =>',
|
|
302
|
+
' val parentLabel = t.astParent.label',
|
|
303
|
+
' val lineCode = t.code',
|
|
304
|
+
' val hasCatchInChain = lineCode.contains(".catch(")',
|
|
305
|
+
' if (parentLabel == "BLOCK" && !hasCatchInChain) {',
|
|
306
|
+
' val file = t.file.name.headOption.getOrElse("?")',
|
|
307
|
+
' val line = t.lineNumber.map(_.toString).getOrElse("0")',
|
|
308
|
+
' val code = lineCode.replace("\\n"," ").replace("\\t"," ").take(160)',
|
|
309
|
+
' println(s"AEGIS_RACE\\tfloating_then\\t$file\\t$line\\t$code")',
|
|
310
|
+
" }",
|
|
311
|
+
" }",
|
|
312
|
+
"} catch { case _: Throwable => }",
|
|
313
|
+
].join("\n");
|
|
314
|
+
}
|
|
315
|
+
// ── Output parsing ────────────────────────────────────────────────────────
|
|
316
|
+
/** Convert a Joern stdout dump to deduped Finding[]. */
|
|
317
|
+
function parseJoernOutput(stdout, projectRoot) {
|
|
318
|
+
const out = [];
|
|
319
|
+
const seen = new Set();
|
|
320
|
+
for (const rawLine of stdout.split(/\r?\n/)) {
|
|
321
|
+
const line = rawLine.trim();
|
|
322
|
+
if (line.startsWith("AEGIS_SINK\t")) {
|
|
323
|
+
const f = parseSinkLine(line, projectRoot, seen);
|
|
324
|
+
if (f)
|
|
325
|
+
out.push(f);
|
|
326
|
+
}
|
|
327
|
+
else if (line.startsWith("AEGIS_LOGIC\t")) {
|
|
328
|
+
const f = parseLogicLine(line, projectRoot, seen);
|
|
329
|
+
if (f)
|
|
330
|
+
out.push(f);
|
|
331
|
+
}
|
|
332
|
+
else if (line.startsWith("AEGIS_RACE\t")) {
|
|
333
|
+
const f = parseRaceLine(line, projectRoot, seen);
|
|
334
|
+
if (f)
|
|
335
|
+
out.push(f);
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
return out;
|
|
339
|
+
}
|
|
340
|
+
function toRelPosix(absPath, projectRoot) {
|
|
341
|
+
try {
|
|
342
|
+
const root = resolvePath(projectRoot);
|
|
343
|
+
const abs = resolvePath(absPath);
|
|
344
|
+
if (abs.startsWith(root)) {
|
|
345
|
+
let rel = abs.slice(root.length).replace(/^[\\/]+/, "");
|
|
346
|
+
rel = rel.replace(/\\/g, "/");
|
|
347
|
+
return rel;
|
|
348
|
+
}
|
|
349
|
+
return absPath.replace(/\\/g, "/");
|
|
350
|
+
}
|
|
351
|
+
catch {
|
|
352
|
+
return absPath.replace(/\\/g, "/");
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
function parseSinkLine(line, projectRoot, seen) {
|
|
356
|
+
const parts = line.split("\t");
|
|
357
|
+
if (parts.length < 6)
|
|
358
|
+
return null;
|
|
359
|
+
const [, name, tainted, file, lineNo, code] = parts;
|
|
360
|
+
const meta = SINKS[name];
|
|
361
|
+
if (!meta)
|
|
362
|
+
return null;
|
|
363
|
+
if (tainted.trim().toLowerCase() !== "true")
|
|
364
|
+
return null;
|
|
365
|
+
const rel = toRelPosix(file, projectRoot);
|
|
366
|
+
const ln = parseInt(lineNo, 10);
|
|
367
|
+
const safeLine = Number.isFinite(ln) && ln > 0 ? ln : undefined;
|
|
368
|
+
const dedupKey = `${rel}::${safeLine ?? "?"}::${meta.cwe}::${name}`;
|
|
369
|
+
if (seen.has(dedupKey))
|
|
370
|
+
return null;
|
|
371
|
+
seen.add(dedupKey);
|
|
372
|
+
const ruleId = `joern.${name}`;
|
|
373
|
+
const finding = {
|
|
374
|
+
id: makeFindingId({ engine: "joern", file: rel, ...(safeLine !== undefined ? { line: safeLine } : {}), rule_id: ruleId }),
|
|
375
|
+
engine: "joern",
|
|
376
|
+
file: rel,
|
|
377
|
+
...(safeLine !== undefined ? { line: safeLine } : {}),
|
|
378
|
+
rule_id: ruleId,
|
|
379
|
+
cwe: meta.cwe,
|
|
380
|
+
severity: meta.severity,
|
|
381
|
+
message: `${meta.title}: call to \`${name}\` with non-literal (potentially tainted) argument. ${code.trim().slice(0, 300)}`,
|
|
382
|
+
confidence: 0.7, // CPG reachability is a signal, not proof; critic refines
|
|
383
|
+
source: "dataflow",
|
|
384
|
+
evidence: {
|
|
385
|
+
snippet: code.trim().slice(0, 400) || undefined,
|
|
386
|
+
},
|
|
387
|
+
};
|
|
388
|
+
const parsed = FindingSchema.safeParse(finding);
|
|
389
|
+
return parsed.success ? parsed.data : null;
|
|
390
|
+
}
|
|
391
|
+
function parseLogicLine(line, projectRoot, seen) {
|
|
392
|
+
const parts = line.split("\t");
|
|
393
|
+
if (parts.length < 5)
|
|
394
|
+
return null;
|
|
395
|
+
const [, kind, file, lineNo, code] = parts;
|
|
396
|
+
const meta = LOGIC[kind];
|
|
397
|
+
if (!meta)
|
|
398
|
+
return null;
|
|
399
|
+
const rel = toRelPosix(file, projectRoot);
|
|
400
|
+
const ln = parseInt(lineNo, 10);
|
|
401
|
+
const safeLine = Number.isFinite(ln) && ln > 0 ? ln : undefined;
|
|
402
|
+
const dedupKey = `${rel}::${safeLine ?? "?"}::${meta.cwe}::${kind}`;
|
|
403
|
+
if (seen.has(dedupKey))
|
|
404
|
+
return null;
|
|
405
|
+
seen.add(dedupKey);
|
|
406
|
+
const ruleId = `joern.${kind}`;
|
|
407
|
+
const finding = {
|
|
408
|
+
id: makeFindingId({ engine: "joern", file: rel, ...(safeLine !== undefined ? { line: safeLine } : {}), rule_id: ruleId }),
|
|
409
|
+
engine: "joern",
|
|
410
|
+
file: rel,
|
|
411
|
+
...(safeLine !== undefined ? { line: safeLine } : {}),
|
|
412
|
+
rule_id: ruleId,
|
|
413
|
+
cwe: meta.cwe,
|
|
414
|
+
severity: meta.severity,
|
|
415
|
+
message: `${meta.title}. ${code.trim().slice(0, 300)}`,
|
|
416
|
+
confidence: 0.6,
|
|
417
|
+
source: "dataflow",
|
|
418
|
+
evidence: { snippet: code.trim().slice(0, 400) || undefined },
|
|
419
|
+
};
|
|
420
|
+
const parsed = FindingSchema.safeParse(finding);
|
|
421
|
+
return parsed.success ? parsed.data : null;
|
|
422
|
+
}
|
|
423
|
+
function parseRaceLine(line, projectRoot, seen) {
|
|
424
|
+
const parts = line.split("\t");
|
|
425
|
+
if (parts.length < 5)
|
|
426
|
+
return null;
|
|
427
|
+
const [, kind, file, lineNo, code] = parts;
|
|
428
|
+
const meta = RACES[kind];
|
|
429
|
+
if (!meta)
|
|
430
|
+
return null;
|
|
431
|
+
const rel = toRelPosix(file, projectRoot);
|
|
432
|
+
const ln = parseInt(lineNo, 10);
|
|
433
|
+
const safeLine = Number.isFinite(ln) && ln > 0 ? ln : undefined;
|
|
434
|
+
const dedupKey = `${rel}::${safeLine ?? "?"}::${meta.cwe}::${kind}`;
|
|
435
|
+
if (seen.has(dedupKey))
|
|
436
|
+
return null;
|
|
437
|
+
seen.add(dedupKey);
|
|
438
|
+
const ruleId = `joern.race.${kind}`;
|
|
439
|
+
const finding = {
|
|
440
|
+
id: makeFindingId({ engine: "joern", file: rel, ...(safeLine !== undefined ? { line: safeLine } : {}), rule_id: ruleId }),
|
|
441
|
+
engine: "joern",
|
|
442
|
+
file: rel,
|
|
443
|
+
...(safeLine !== undefined ? { line: safeLine } : {}),
|
|
444
|
+
rule_id: ruleId,
|
|
445
|
+
cwe: meta.cwe,
|
|
446
|
+
severity: meta.severity,
|
|
447
|
+
// Race signals are heuristic (CPGQL pattern, not full lattice-based
|
|
448
|
+
// RacerD), so confidence is intentionally lower than a clean sink hit.
|
|
449
|
+
// The critic refines: confirms real races, drops FPs like "Promise
|
|
450
|
+
// returned to a framework that handles rejection internally".
|
|
451
|
+
confidence: 0.55,
|
|
452
|
+
source: "dataflow",
|
|
453
|
+
message: `${meta.title}. ${code.trim().slice(0, 300)}`,
|
|
454
|
+
evidence: { snippet: code.trim().slice(0, 400) || undefined },
|
|
455
|
+
};
|
|
456
|
+
const parsed = FindingSchema.safeParse(finding);
|
|
457
|
+
return parsed.success ? parsed.data : null;
|
|
458
|
+
}
|
|
459
|
+
// ── Builder (one-shot) ────────────────────────────────────────────────────
|
|
460
|
+
/** Detect which Joern frontends to run for this project.
|
|
461
|
+
*
|
|
462
|
+
* Joern's CPG is single-language-per-build, so on a multi-language project
|
|
463
|
+
* (e.g. TS app with embedded Python scripts, or a backend repo with both
|
|
464
|
+
* Python and Go) we MUST build a CPG per language and merge findings —
|
|
465
|
+
* otherwise the frontend Joern picks via heuristics scans only one
|
|
466
|
+
* language and silently drops the rest.
|
|
467
|
+
*
|
|
468
|
+
* Returns an ordered list of (languageHint, suffix) pairs. The suffix is
|
|
469
|
+
* appended to ``cpg.bin`` so per-language artifacts don't clobber each
|
|
470
|
+
* other in the cache.
|
|
471
|
+
*
|
|
472
|
+
* The detection is cheap: top-level markers (package.json, pyproject.toml,
|
|
473
|
+
* go.mod, Cargo.toml) PLUS a shallow scan for files in ``tests/`` /
|
|
474
|
+
* ``src/`` etc. so a TS app with a Python smoke fixture still gets both
|
|
475
|
+
* passes. The whole walk is bounded to ~200 dir-entries to keep cold cost
|
|
476
|
+
* negligible.
|
|
477
|
+
*/
|
|
478
|
+
function detectJoernLanguages(root) {
|
|
479
|
+
const langs = [];
|
|
480
|
+
const hasPy = hasMarkerOrFile(root, "pyproject.toml", /\.py$/, 200);
|
|
481
|
+
const hasJsTs = hasMarkerOrFile(root, "package.json", /\.(?:ts|tsx|js|jsx|mjs|cjs)$/, 200);
|
|
482
|
+
const hasGo = existsSync(join(root, "go.mod"));
|
|
483
|
+
const hasRust = existsSync(join(root, "Cargo.toml"));
|
|
484
|
+
if (hasPy)
|
|
485
|
+
langs.push({ lang: "python", suffix: ".py" });
|
|
486
|
+
if (hasJsTs)
|
|
487
|
+
langs.push({ lang: "javascript", suffix: ".js" });
|
|
488
|
+
if (hasGo)
|
|
489
|
+
langs.push({ lang: "go", suffix: ".go" });
|
|
490
|
+
if (hasRust)
|
|
491
|
+
langs.push({ lang: "rust", suffix: ".rs" });
|
|
492
|
+
// No marker matched? Single pass with no hint — Joern's auto-detect picks.
|
|
493
|
+
if (langs.length === 0)
|
|
494
|
+
langs.push({ lang: "", suffix: "" });
|
|
495
|
+
return langs;
|
|
496
|
+
}
|
|
497
|
+
/** True iff ``root`` has the named marker file OR any file matching ``ext``
|
|
498
|
+
* within a shallow recursive walk (depth 2, capped at ``cap`` entries). */
|
|
499
|
+
function hasMarkerOrFile(root, marker, ext, cap) {
|
|
500
|
+
if (existsSync(join(root, marker)))
|
|
501
|
+
return true;
|
|
502
|
+
// Shallow walk: root, root/*, root/*/*
|
|
503
|
+
let count = 0;
|
|
504
|
+
function walk(dir, depth) {
|
|
505
|
+
if (depth > 2 || count > cap)
|
|
506
|
+
return false;
|
|
507
|
+
let entries;
|
|
508
|
+
try {
|
|
509
|
+
entries = readdirSync(dir);
|
|
510
|
+
}
|
|
511
|
+
catch {
|
|
512
|
+
return false;
|
|
513
|
+
}
|
|
514
|
+
for (const e of entries) {
|
|
515
|
+
count++;
|
|
516
|
+
if (count > cap)
|
|
517
|
+
return false;
|
|
518
|
+
// Skip known noise / heavyweight dirs.
|
|
519
|
+
if (e === "node_modules" || e === ".git" || e === ".aegis" ||
|
|
520
|
+
e === "dist" || e === "build" || e === "venv" || e === ".venv" ||
|
|
521
|
+
e === "__pycache__" || e.startsWith(".")) {
|
|
522
|
+
continue;
|
|
523
|
+
}
|
|
524
|
+
const full = join(dir, e);
|
|
525
|
+
let st;
|
|
526
|
+
try {
|
|
527
|
+
st = statSync(full);
|
|
528
|
+
}
|
|
529
|
+
catch {
|
|
530
|
+
continue;
|
|
531
|
+
}
|
|
532
|
+
if (st.isFile()) {
|
|
533
|
+
if (ext.test(e))
|
|
534
|
+
return true;
|
|
535
|
+
}
|
|
536
|
+
else if (st.isDirectory()) {
|
|
537
|
+
if (walk(full, depth + 1))
|
|
538
|
+
return true;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
return false;
|
|
542
|
+
}
|
|
543
|
+
return walk(root, 0);
|
|
544
|
+
}
|
|
545
|
+
/** Build the CPG for a project, run the security+logic query, write the
|
|
546
|
+
* findings cache. Returns a structured report. Never throws — failures
|
|
547
|
+
* produce ``{ok:false, reason}``. */
|
|
548
|
+
export async function buildJoernCpg(projectRoot, opts) {
|
|
549
|
+
const t0 = Date.now();
|
|
550
|
+
const dir = cacheDirFor(projectRoot);
|
|
551
|
+
const cpgPath = join(dir, "cpg.bin");
|
|
552
|
+
const findingsPath = joernFindingsPath(projectRoot);
|
|
553
|
+
const infoPath = joernInfoPath(projectRoot);
|
|
554
|
+
const force = opts?.force ?? false;
|
|
555
|
+
const joernDir = findJoernDir();
|
|
556
|
+
if (!joernDir) {
|
|
557
|
+
return {
|
|
558
|
+
ok: false,
|
|
559
|
+
cacheDir: dir,
|
|
560
|
+
findingsPath,
|
|
561
|
+
findingsCount: 0,
|
|
562
|
+
durationMs: Date.now() - t0,
|
|
563
|
+
reason: "joern not installed — set AEGIS_JOERN_DIR or extract joern-cli to ~/.aegis/joern/joern-cli/",
|
|
564
|
+
};
|
|
565
|
+
}
|
|
566
|
+
const jdk = findJoernJdk();
|
|
567
|
+
if (!jdk) {
|
|
568
|
+
return {
|
|
569
|
+
ok: false,
|
|
570
|
+
cacheDir: dir,
|
|
571
|
+
findingsPath,
|
|
572
|
+
findingsCount: 0,
|
|
573
|
+
durationMs: Date.now() - t0,
|
|
574
|
+
reason: "no JDK 11..21 found (Joern's Scala scripting breaks on JDK 22+). Install Temurin/Corretto 21 or set AEGIS_JOERN_JDK.",
|
|
575
|
+
};
|
|
576
|
+
}
|
|
577
|
+
// Cache freshness: if the info file is newer than every top-level source
|
|
578
|
+
// file and query version matches, reuse. Force always rebuilds.
|
|
579
|
+
if (!force && existsSync(infoPath) && existsSync(findingsPath)) {
|
|
580
|
+
try {
|
|
581
|
+
const info = JSON.parse(readFileSync(infoPath, "utf8"));
|
|
582
|
+
if (info.query_version === QUERY_VERSION) {
|
|
583
|
+
const count = (readFileSync(findingsPath, "utf8").match(/\n/g) ?? []).length;
|
|
584
|
+
log.info("joern cache hit", { dir, age_ms: Date.now() - info.built_at, n: count });
|
|
585
|
+
return {
|
|
586
|
+
ok: true,
|
|
587
|
+
cacheDir: dir,
|
|
588
|
+
findingsPath,
|
|
589
|
+
findingsCount: count,
|
|
590
|
+
durationMs: Date.now() - t0,
|
|
591
|
+
};
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
catch {
|
|
595
|
+
// fall through to rebuild
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
mkdirSync(dir, { recursive: true });
|
|
599
|
+
const env = {
|
|
600
|
+
JAVA_HOME: jdk,
|
|
601
|
+
PATH: `${join(jdk, "bin")}${process.platform === "win32" ? ";" : ":"}${process.env.PATH ?? ""}`,
|
|
602
|
+
};
|
|
603
|
+
const joernReplBin = findJoernReplBin(joernDir);
|
|
604
|
+
if (!joernReplBin) {
|
|
605
|
+
return {
|
|
606
|
+
ok: false,
|
|
607
|
+
cacheDir: dir,
|
|
608
|
+
findingsPath,
|
|
609
|
+
findingsCount: 0,
|
|
610
|
+
durationMs: Date.now() - t0,
|
|
611
|
+
reason: `joern REPL binary not found in ${joernDir} (looked for joern, joern-cli; check install with joern --help)`,
|
|
612
|
+
};
|
|
613
|
+
}
|
|
614
|
+
const joernParseBin = join(joernDir, "bin", `joern-parse${joernBinSuffix()}`);
|
|
615
|
+
if (!existsSync(joernParseBin)) {
|
|
616
|
+
return {
|
|
617
|
+
ok: false,
|
|
618
|
+
cacheDir: dir,
|
|
619
|
+
findingsPath,
|
|
620
|
+
findingsCount: 0,
|
|
621
|
+
durationMs: Date.now() - t0,
|
|
622
|
+
reason: `joern-parse not found at ${joernParseBin}`,
|
|
623
|
+
};
|
|
624
|
+
}
|
|
625
|
+
// Per-language passes: Joern's CPG is single-language-per-build, so we
|
|
626
|
+
// build N CPGs (one per detected language) and aggregate findings. This
|
|
627
|
+
// is what catches a Python smoke fixture inside a TS app — otherwise
|
|
628
|
+
// joern-parse's frontend heuristic picks one language and silently
|
|
629
|
+
// skips the rest.
|
|
630
|
+
const languages = detectJoernLanguages(projectRoot);
|
|
631
|
+
log.info("joern: detected languages", { languages: languages.map((l) => l.lang || "auto") });
|
|
632
|
+
const allFindings = [];
|
|
633
|
+
let totalParseMs = 0;
|
|
634
|
+
let totalQueryMs = 0;
|
|
635
|
+
const passDiagnostics = [];
|
|
636
|
+
for (const { lang, suffix: langSuffix } of languages) {
|
|
637
|
+
const perLangCpg = join(dir, `cpg${langSuffix}.bin`);
|
|
638
|
+
// Joern's Python path through joern-parse is broken on Windows (it
|
|
639
|
+
// hardcodes `py2cpg.sh` instead of using pysrc2cpg(.bat) — verified
|
|
640
|
+
// empirically 2026-05-25). Workaround: invoke pysrc2cpg directly,
|
|
641
|
+
// which produces an equivalent CPG that joern --script can load.
|
|
642
|
+
const isPythonOnWindows = lang === "python" && process.platform === "win32";
|
|
643
|
+
const directPyBin = isPythonOnWindows
|
|
644
|
+
? join(joernDir, `pysrc2cpg${joernBinSuffix()}`)
|
|
645
|
+
: null;
|
|
646
|
+
const useDirectPyFrontend = directPyBin !== null && existsSync(directPyBin);
|
|
647
|
+
let parseRes;
|
|
648
|
+
if (useDirectPyFrontend && directPyBin) {
|
|
649
|
+
log.info("joern: building CPG via pysrc2cpg (Windows workaround)", {
|
|
650
|
+
lang, cpg: perLangCpg,
|
|
651
|
+
});
|
|
652
|
+
parseRes = await spawnRun(directPyBin, {
|
|
653
|
+
args: [resolvePath(projectRoot), "--output", perLangCpg],
|
|
654
|
+
env,
|
|
655
|
+
timeoutMs: opts?.timeoutMs ?? DEFAULT_CPG_BUILD_TIMEOUT_MS,
|
|
656
|
+
});
|
|
657
|
+
}
|
|
658
|
+
else {
|
|
659
|
+
const parseArgs = [resolvePath(projectRoot), "--output", perLangCpg];
|
|
660
|
+
if (lang)
|
|
661
|
+
parseArgs.push("--language", lang);
|
|
662
|
+
log.info("joern: building CPG", { lang: lang || "auto", cpg: perLangCpg });
|
|
663
|
+
parseRes = await spawnRun(joernParseBin, {
|
|
664
|
+
args: parseArgs,
|
|
665
|
+
env,
|
|
666
|
+
timeoutMs: opts?.timeoutMs ?? DEFAULT_CPG_BUILD_TIMEOUT_MS,
|
|
667
|
+
});
|
|
668
|
+
}
|
|
669
|
+
totalParseMs += parseRes.durationMs;
|
|
670
|
+
if (parseRes.timedOut || parseRes.exitCode !== 0 || !existsSync(perLangCpg)) {
|
|
671
|
+
// Don't abort the whole build — record the failure and continue with
|
|
672
|
+
// the other languages. A broken Python frontend should not prevent
|
|
673
|
+
// a working JS scan from emitting findings.
|
|
674
|
+
log.warn("joern-parse failed for lang", {
|
|
675
|
+
lang: lang || "auto",
|
|
676
|
+
rc: parseRes.exitCode,
|
|
677
|
+
stderr_tail: parseRes.stderr.slice(-300),
|
|
678
|
+
});
|
|
679
|
+
passDiagnostics.push({
|
|
680
|
+
lang: lang || "auto",
|
|
681
|
+
ok: false,
|
|
682
|
+
parse_ms: parseRes.durationMs,
|
|
683
|
+
reason: `parse rc=${parseRes.exitCode}`,
|
|
684
|
+
});
|
|
685
|
+
continue;
|
|
686
|
+
}
|
|
687
|
+
// Write the per-language query script and run it.
|
|
688
|
+
const queryPath = join(dir, `query${langSuffix}.sc`);
|
|
689
|
+
writeFileSync(queryPath, buildCpgqlScript(perLangCpg), "utf8");
|
|
690
|
+
const queryRes = await spawnRun(joernReplBin, {
|
|
691
|
+
args: ["--script", queryPath],
|
|
692
|
+
env,
|
|
693
|
+
cwd: dir,
|
|
694
|
+
timeoutMs: opts?.timeoutMs ?? DEFAULT_QUERY_TIMEOUT_MS,
|
|
695
|
+
maxBufferBytes: 50 * 1024 * 1024,
|
|
696
|
+
});
|
|
697
|
+
totalQueryMs += queryRes.durationMs;
|
|
698
|
+
if (queryRes.timedOut || queryRes.exitCode !== 0) {
|
|
699
|
+
log.warn("joern --script failed for lang", {
|
|
700
|
+
lang: lang || "auto",
|
|
701
|
+
rc: queryRes.exitCode,
|
|
702
|
+
stderr_tail: queryRes.stderr.slice(-300),
|
|
703
|
+
});
|
|
704
|
+
passDiagnostics.push({
|
|
705
|
+
lang: lang || "auto",
|
|
706
|
+
ok: false,
|
|
707
|
+
query_ms: queryRes.durationMs,
|
|
708
|
+
reason: `query rc=${queryRes.exitCode}`,
|
|
709
|
+
});
|
|
710
|
+
continue;
|
|
711
|
+
}
|
|
712
|
+
const passFindings = parseJoernOutput(queryRes.stdout, projectRoot);
|
|
713
|
+
allFindings.push(...passFindings);
|
|
714
|
+
passDiagnostics.push({
|
|
715
|
+
lang: lang || "auto",
|
|
716
|
+
ok: true,
|
|
717
|
+
parse_ms: parseRes.durationMs,
|
|
718
|
+
query_ms: queryRes.durationMs,
|
|
719
|
+
findings: passFindings.length,
|
|
720
|
+
});
|
|
721
|
+
}
|
|
722
|
+
// Dedup across languages — when a file is picked up by multiple frontends
|
|
723
|
+
// (rare but possible), the same Finding id would otherwise duplicate.
|
|
724
|
+
const seen = new Set();
|
|
725
|
+
const dedupedFindings = [];
|
|
726
|
+
for (const f of allFindings) {
|
|
727
|
+
if (seen.has(f.id))
|
|
728
|
+
continue;
|
|
729
|
+
seen.add(f.id);
|
|
730
|
+
dedupedFindings.push(f);
|
|
731
|
+
}
|
|
732
|
+
// Also write a symlink-ish copy at cpg.bin → the largest per-lang CPG, so
|
|
733
|
+
// external tools that expect the canonical name still work. We just copy
|
|
734
|
+
// by writing a tiny pointer file; the real CPGs stay where they are.
|
|
735
|
+
// (No-op if no pass produced a CPG.)
|
|
736
|
+
try {
|
|
737
|
+
if (passDiagnostics.some((d) => d.ok === true)) {
|
|
738
|
+
writeFileSync(join(dir, "cpg.bin.pointer"), JSON.stringify(passDiagnostics, null, 2), "utf8");
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
catch {
|
|
742
|
+
// ignore
|
|
743
|
+
}
|
|
744
|
+
writeFileSync(findingsPath, dedupedFindings.map((f) => JSON.stringify(f)).join("\n") +
|
|
745
|
+
(dedupedFindings.length ? "\n" : ""), "utf8");
|
|
746
|
+
const filesIndexed = new Set(dedupedFindings.map((f) => f.file)).size;
|
|
747
|
+
const info = {
|
|
748
|
+
built_at: Date.now(),
|
|
749
|
+
query_version: QUERY_VERSION,
|
|
750
|
+
joern_path: joernDir,
|
|
751
|
+
jdk_path: jdk,
|
|
752
|
+
n_findings: dedupedFindings.length,
|
|
753
|
+
files_indexed: filesIndexed,
|
|
754
|
+
};
|
|
755
|
+
writeFileSync(infoPath, JSON.stringify(info, null, 2), "utf8");
|
|
756
|
+
log.info("joern: query done", {
|
|
757
|
+
findings: dedupedFindings.length,
|
|
758
|
+
parse_ms: totalParseMs,
|
|
759
|
+
query_ms: totalQueryMs,
|
|
760
|
+
total_ms: Date.now() - t0,
|
|
761
|
+
passes: passDiagnostics,
|
|
762
|
+
});
|
|
763
|
+
const findings = dedupedFindings;
|
|
764
|
+
return {
|
|
765
|
+
ok: true,
|
|
766
|
+
cacheDir: dir,
|
|
767
|
+
findingsPath,
|
|
768
|
+
findingsCount: findings.length,
|
|
769
|
+
durationMs: Date.now() - t0,
|
|
770
|
+
};
|
|
771
|
+
}
|
|
772
|
+
// Test-friendly exports
|
|
773
|
+
export const _testing = {
|
|
774
|
+
buildCpgqlScript,
|
|
775
|
+
parseJoernOutput,
|
|
776
|
+
toRelPosix,
|
|
777
|
+
SINKS,
|
|
778
|
+
LOGIC,
|
|
779
|
+
RACES,
|
|
780
|
+
QUERY_VERSION,
|
|
781
|
+
};
|
|
782
|
+
//# sourceMappingURL=joern.js.map
|