jaku.sh 1.0.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +161 -18
- package/action.yml +32 -1
- package/package.json +2 -1
- package/src/agents/ai-agent.js +47 -1
- package/src/agents/api-agent.js +9 -0
- package/src/agents/logic-agent.js +158 -90
- package/src/agents/orchestrator.js +56 -1
- package/src/agents/security-agent.js +86 -54
- package/src/cli.js +68 -6
- package/src/core/ai/ai-endpoint-detector.js +28 -4
- package/src/core/ai/prompt-injector.js +34 -0
- package/src/core/api/api-key-auditor.js +1 -1
- package/src/core/api/cors-ws-tester.js +1 -1
- package/src/core/crawler.js +22 -1
- package/src/core/llm/augmentations.js +210 -0
- package/src/core/llm/llm-client.js +184 -0
- package/src/core/llm/providers/anthropic-provider.js +46 -0
- package/src/core/llm/providers/base-provider.js +44 -0
- package/src/core/llm/providers/null-provider.js +21 -0
- package/src/core/llm/providers/openai-provider.js +47 -0
- package/src/core/logic/access-boundary-tester.js +1 -1
- package/src/core/logic/business-rule-inferrer.js +50 -1
- package/src/core/security/sqli-prober.js +312 -43
- package/src/core/security/xss-scanner.js +26 -2
- package/src/reporting/report-generator.js +96 -9
- package/src/reporting/sarif-generator.js +81 -5
- package/src/utils/config.js +196 -2
- package/src/utils/finding.js +3 -0
- package/src/utils/logger.js +33 -0
- package/src/utils/param-discovery.js +93 -0
- package/src/utils/safety.js +44 -0
- package/src/utils/version.js +30 -0
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import fs from 'fs';
|
|
2
2
|
import path from 'path';
|
|
3
|
+
import crypto from 'crypto';
|
|
4
|
+
import { getVersion } from '../utils/version.js';
|
|
3
5
|
|
|
4
6
|
/**
|
|
5
7
|
* SARIF Generator — Generates Static Analysis Results Interchange Format (SARIF) v2.1.0
|
|
@@ -89,6 +91,11 @@ export function generateSARIF(findings, meta = {}) {
|
|
|
89
91
|
rules.push(rule);
|
|
90
92
|
}
|
|
91
93
|
|
|
94
|
+
// Represent the affected surface as a proper absolute web URI rather
|
|
95
|
+
// than anchoring it to the source tree (no %SRCROOT%), so GitHub does
|
|
96
|
+
// not try to map DAST URLs to repository files.
|
|
97
|
+
const webUri = _toWebUri(finding.affected_surface || meta.target);
|
|
98
|
+
|
|
92
99
|
// Build result
|
|
93
100
|
const result = {
|
|
94
101
|
ruleId,
|
|
@@ -98,19 +105,31 @@ export function generateSARIF(findings, meta = {}) {
|
|
|
98
105
|
locations: [{
|
|
99
106
|
physicalLocation: {
|
|
100
107
|
artifactLocation: {
|
|
101
|
-
uri:
|
|
102
|
-
uriBaseId: '%SRCROOT%',
|
|
108
|
+
uri: webUri,
|
|
103
109
|
},
|
|
104
110
|
},
|
|
105
111
|
}],
|
|
112
|
+
// Stable per-result fingerprint so GitHub can track findings across
|
|
113
|
+
// runs (derived from module + normalized title + affected surface).
|
|
114
|
+
partialFingerprints: {
|
|
115
|
+
'jakuFindingHash/v1': _fingerprint(finding),
|
|
116
|
+
},
|
|
106
117
|
properties: {
|
|
107
118
|
severity: finding.severity,
|
|
108
119
|
module: finding.module,
|
|
109
120
|
status: finding.status || 'open',
|
|
110
121
|
timestamp: finding.timestamp || new Date().toISOString(),
|
|
122
|
+
affectedUrl: finding.affected_surface || meta.target || null,
|
|
111
123
|
},
|
|
112
124
|
};
|
|
113
125
|
|
|
126
|
+
// Describe the tested surface as a SARIF webRequest where we can, which
|
|
127
|
+
// signals to consumers that this is a dynamic (DAST) finding.
|
|
128
|
+
const webRequest = _toWebRequest(finding.affected_surface || meta.target);
|
|
129
|
+
if (webRequest) {
|
|
130
|
+
result.webRequest = webRequest;
|
|
131
|
+
}
|
|
132
|
+
|
|
114
133
|
if (finding.remediation) {
|
|
115
134
|
result.fixes = [{ description: { text: finding.remediation } }];
|
|
116
135
|
}
|
|
@@ -123,7 +142,7 @@ export function generateSARIF(findings, meta = {}) {
|
|
|
123
142
|
location: {
|
|
124
143
|
message: { text: typeof finding.evidence === 'string' ? finding.evidence : JSON.stringify(finding.evidence) },
|
|
125
144
|
physicalLocation: {
|
|
126
|
-
artifactLocation: { uri:
|
|
145
|
+
artifactLocation: { uri: webUri },
|
|
127
146
|
},
|
|
128
147
|
},
|
|
129
148
|
}],
|
|
@@ -141,8 +160,8 @@ export function generateSARIF(findings, meta = {}) {
|
|
|
141
160
|
tool: {
|
|
142
161
|
driver: {
|
|
143
162
|
name: 'JAKU',
|
|
144
|
-
version: meta.version ||
|
|
145
|
-
semanticVersion: meta.version ||
|
|
163
|
+
version: meta.version || getVersion(),
|
|
164
|
+
semanticVersion: meta.version || getVersion(),
|
|
146
165
|
informationUri: 'https://github.com/jaku-security',
|
|
147
166
|
rules,
|
|
148
167
|
},
|
|
@@ -168,6 +187,63 @@ export function writeSARIF(findings, outputDir, meta = {}) {
|
|
|
168
187
|
return sarifPath;
|
|
169
188
|
}
|
|
170
189
|
|
|
190
|
+
/**
|
|
191
|
+
* Normalize an affected surface into a clean absolute web URI.
|
|
192
|
+
* Falls back to a stable placeholder when it isn't a parseable URL.
|
|
193
|
+
*/
|
|
194
|
+
function _toWebUri(surface) {
|
|
195
|
+
if (!surface) return 'urn:jaku:unknown-surface';
|
|
196
|
+
try {
|
|
197
|
+
const u = new URL(surface);
|
|
198
|
+
return u.toString();
|
|
199
|
+
} catch {
|
|
200
|
+
// Not a URL (e.g. a file path or descriptive surface) — keep it as a
|
|
201
|
+
// logical identifier so GitHub doesn't treat it as a source artifact.
|
|
202
|
+
return `urn:jaku:surface:${encodeURIComponent(String(surface))}`;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Build a minimal SARIF webRequest object for a tested URL, when possible.
|
|
208
|
+
*/
|
|
209
|
+
function _toWebRequest(surface) {
|
|
210
|
+
if (!surface) return null;
|
|
211
|
+
try {
|
|
212
|
+
const u = new URL(surface);
|
|
213
|
+
return {
|
|
214
|
+
protocol: u.protocol.replace(':', ''),
|
|
215
|
+
target: u.toString(),
|
|
216
|
+
method: 'GET',
|
|
217
|
+
};
|
|
218
|
+
} catch {
|
|
219
|
+
return null;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Derive a stable fingerprint for cross-run finding tracking.
|
|
225
|
+
* Based on module + normalized title + affected surface (host + path only,
|
|
226
|
+
* so transient query values don't churn the hash between runs).
|
|
227
|
+
*/
|
|
228
|
+
function _fingerprint(finding) {
|
|
229
|
+
const module = (finding.module || 'security').toLowerCase();
|
|
230
|
+
const title = (finding.title || '')
|
|
231
|
+
.toLowerCase()
|
|
232
|
+
.replace(/\s+/g, ' ')
|
|
233
|
+
.trim();
|
|
234
|
+
|
|
235
|
+
let surface = finding.affected_surface || '';
|
|
236
|
+
try {
|
|
237
|
+
const u = new URL(surface);
|
|
238
|
+
surface = `${u.origin}${u.pathname}`;
|
|
239
|
+
} catch {
|
|
240
|
+
// leave non-URL surfaces as-is
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const basis = `${module}|${title}|${surface.toLowerCase()}`;
|
|
244
|
+
return crypto.createHash('sha256').update(basis).digest('hex');
|
|
245
|
+
}
|
|
246
|
+
|
|
171
247
|
/**
|
|
172
248
|
* Match a finding to a CWE based on title/description patterns.
|
|
173
249
|
*/
|
package/src/utils/config.js
CHANGED
|
@@ -1,6 +1,20 @@
|
|
|
1
1
|
import fs from 'fs';
|
|
2
2
|
import path from 'path';
|
|
3
3
|
|
|
4
|
+
/**
|
|
5
|
+
* Safety mode tiers control how aggressive JAKU is allowed to be.
|
|
6
|
+
*
|
|
7
|
+
* passive — recon/observation + static analysis only. No probing requests
|
|
8
|
+
* that send attack payloads, no state-changing requests.
|
|
9
|
+
* safe-active — (default) active but non-destructive probing (XSS/SQLi probes,
|
|
10
|
+
* enumeration checks, etc.). Never issues state-changing/mutating
|
|
11
|
+
* requests. Destructive logic tests are skipped.
|
|
12
|
+
* aggressive — everything, including destructive/state-changing tests
|
|
13
|
+
* (race conditions, pricing/checkout mutation, etc.).
|
|
14
|
+
*/
|
|
15
|
+
export const SAFETY_MODES = ['passive', 'safe-active', 'aggressive'];
|
|
16
|
+
export const DEFAULT_SAFETY_MODE = 'safe-active';
|
|
17
|
+
|
|
4
18
|
const DEFAULTS = {
|
|
5
19
|
target_url: null,
|
|
6
20
|
credentials: [],
|
|
@@ -8,11 +22,24 @@ const DEFAULTS = {
|
|
|
8
22
|
severity_threshold: 'low',
|
|
9
23
|
halt_on_critical: false,
|
|
10
24
|
notify_webhook: null,
|
|
25
|
+
safety_mode: DEFAULT_SAFETY_MODE,
|
|
26
|
+
// LLM augmentation is OFF by default and strictly additive. The API key is
|
|
27
|
+
// NEVER stored here — it is read from the environment at runtime only.
|
|
28
|
+
llm: {
|
|
29
|
+
enabled: false,
|
|
30
|
+
provider: 'openai', // openai | anthropic
|
|
31
|
+
model: null, // null → provider default (cheap model)
|
|
32
|
+
max_tokens: 1024, // per-call output cap
|
|
33
|
+
max_calls: 50, // per-scan call budget
|
|
34
|
+
token_budget: 100000, // per-scan total token budget
|
|
35
|
+
timeout_seconds: 30,
|
|
36
|
+
consent: false, // must be true before any data egress
|
|
37
|
+
base_url: null, // optional override (self-hosted/proxy)
|
|
38
|
+
},
|
|
11
39
|
crawler: {
|
|
12
40
|
max_depth: 5,
|
|
13
41
|
max_pages: 50,
|
|
14
42
|
timeout: 30000,
|
|
15
|
-
respect_robots_txt: true,
|
|
16
43
|
concurrency: 4,
|
|
17
44
|
},
|
|
18
45
|
viewports: {
|
|
@@ -45,6 +72,164 @@ const SCAN_PROFILES = {
|
|
|
45
72
|
},
|
|
46
73
|
};
|
|
47
74
|
|
|
75
|
+
// ── Lightweight config schema (for validation) ──────────────
|
|
76
|
+
const KNOWN_TOP_LEVEL_KEYS = new Set([
|
|
77
|
+
'target_url', 'credentials', 'modules_enabled', 'severity_threshold',
|
|
78
|
+
'halt_on_critical', 'prod_safe', 'notify_webhook', 'safety_mode',
|
|
79
|
+
'crawler', 'viewports', 'auth', 'business_context', 'output_dir',
|
|
80
|
+
'llm', '_profile', '_authManager',
|
|
81
|
+
]);
|
|
82
|
+
const KNOWN_CRAWLER_KEYS = new Set(['max_depth', 'max_pages', 'timeout', 'concurrency']);
|
|
83
|
+
const KNOWN_LLM_KEYS = new Set([
|
|
84
|
+
'enabled', 'provider', 'model', 'max_tokens', 'max_calls',
|
|
85
|
+
'token_budget', 'timeout_seconds', 'consent', 'base_url',
|
|
86
|
+
]);
|
|
87
|
+
// Secret-bearing keys must NEVER live in the config file.
|
|
88
|
+
const LLM_SECRET_KEYS = new Set(['api_key', 'apiKey', 'key', 'openai_api_key', 'anthropic_api_key', 'token']);
|
|
89
|
+
const VALID_LLM_PROVIDERS = new Set(['openai', 'anthropic']);
|
|
90
|
+
const VALID_SEVERITIES = new Set(['critical', 'high', 'medium', 'low', 'info']);
|
|
91
|
+
|
|
92
|
+
// Keys that have been removed/deprecated. Mapped to a short reason so we can
|
|
93
|
+
// warn and drop them cleanly rather than silently honoring drifted config.
|
|
94
|
+
const DEPRECATED_KEYS = {
|
|
95
|
+
respect_robots: 'JAKU is a security scanner and intentionally does not honor robots.txt',
|
|
96
|
+
respect_robots_txt: 'JAKU is a security scanner and intentionally does not honor robots.txt',
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Validate a parsed jaku.config.json. Warns (does not throw) on unknown keys,
|
|
101
|
+
* bad types, and deprecated keys, and returns a cleaned copy.
|
|
102
|
+
*/
|
|
103
|
+
export function validateConfig(fileConfig) {
|
|
104
|
+
const warnings = [];
|
|
105
|
+
const cfg = { ...fileConfig };
|
|
106
|
+
|
|
107
|
+
// Alias drift: README historically documented `modules`; canonical is `modules_enabled`.
|
|
108
|
+
if (cfg.modules !== undefined && cfg.modules_enabled === undefined) {
|
|
109
|
+
cfg.modules_enabled = cfg.modules;
|
|
110
|
+
warnings.push('Config key "modules" is deprecated — use "modules_enabled". Aliased for now.');
|
|
111
|
+
}
|
|
112
|
+
delete cfg.modules;
|
|
113
|
+
|
|
114
|
+
// Top-level deprecated keys (e.g. respect_robots).
|
|
115
|
+
for (const key of Object.keys(DEPRECATED_KEYS)) {
|
|
116
|
+
if (cfg[key] !== undefined) {
|
|
117
|
+
warnings.push(`Config key "${key}" is no longer supported (${DEPRECATED_KEYS[key]}). Ignoring it.`);
|
|
118
|
+
delete cfg[key];
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Unknown top-level keys.
|
|
123
|
+
for (const key of Object.keys(cfg)) {
|
|
124
|
+
if (!KNOWN_TOP_LEVEL_KEYS.has(key) && key !== 'modules') {
|
|
125
|
+
warnings.push(`Unknown config key "${key}" — ignoring.`);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Type checks (light).
|
|
130
|
+
if (cfg.severity_threshold !== undefined && !VALID_SEVERITIES.has(cfg.severity_threshold)) {
|
|
131
|
+
warnings.push(`Invalid severity_threshold "${cfg.severity_threshold}" — expected one of ${[...VALID_SEVERITIES].join(', ')}.`);
|
|
132
|
+
}
|
|
133
|
+
if (cfg.safety_mode !== undefined && !SAFETY_MODES.includes(cfg.safety_mode)) {
|
|
134
|
+
warnings.push(`Invalid safety_mode "${cfg.safety_mode}" — expected one of ${SAFETY_MODES.join(', ')}. Falling back to "${DEFAULT_SAFETY_MODE}".`);
|
|
135
|
+
delete cfg.safety_mode;
|
|
136
|
+
}
|
|
137
|
+
if (cfg.halt_on_critical !== undefined && typeof cfg.halt_on_critical !== 'boolean') {
|
|
138
|
+
warnings.push('Config key "halt_on_critical" should be a boolean.');
|
|
139
|
+
}
|
|
140
|
+
if (cfg.modules_enabled !== undefined && !Array.isArray(cfg.modules_enabled)) {
|
|
141
|
+
warnings.push('Config key "modules_enabled" should be an array of module names.');
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Crawler sub-keys.
|
|
145
|
+
if (cfg.crawler !== undefined) {
|
|
146
|
+
if (typeof cfg.crawler !== 'object' || cfg.crawler === null) {
|
|
147
|
+
warnings.push('Config key "crawler" should be an object.');
|
|
148
|
+
} else {
|
|
149
|
+
for (const key of Object.keys(cfg.crawler)) {
|
|
150
|
+
if (key in DEPRECATED_KEYS) {
|
|
151
|
+
warnings.push(`Config key "crawler.${key}" is no longer supported (${DEPRECATED_KEYS[key]}). Ignoring it.`);
|
|
152
|
+
delete cfg.crawler[key];
|
|
153
|
+
} else if (!KNOWN_CRAWLER_KEYS.has(key)) {
|
|
154
|
+
warnings.push(`Unknown crawler config key "crawler.${key}" — ignoring.`);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// LLM sub-keys.
|
|
161
|
+
if (cfg.llm !== undefined) {
|
|
162
|
+
if (typeof cfg.llm !== 'object' || cfg.llm === null) {
|
|
163
|
+
warnings.push('Config key "llm" should be an object.');
|
|
164
|
+
delete cfg.llm;
|
|
165
|
+
} else {
|
|
166
|
+
cfg.llm = { ...cfg.llm };
|
|
167
|
+
// SECURITY: warn-and-drop any API key placed in the config file.
|
|
168
|
+
for (const key of Object.keys(cfg.llm)) {
|
|
169
|
+
if (LLM_SECRET_KEYS.has(key)) {
|
|
170
|
+
warnings.push(`Config key "llm.${key}" is not allowed — API keys must come from the environment (OPENAI_API_KEY / ANTHROPIC_API_KEY), never the config file. Dropping it.`);
|
|
171
|
+
delete cfg.llm[key];
|
|
172
|
+
} else if (!KNOWN_LLM_KEYS.has(key)) {
|
|
173
|
+
warnings.push(`Unknown llm config key "llm.${key}" — ignoring.`);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
if (cfg.llm.enabled !== undefined && typeof cfg.llm.enabled !== 'boolean') {
|
|
177
|
+
warnings.push('Config key "llm.enabled" should be a boolean.');
|
|
178
|
+
}
|
|
179
|
+
if (cfg.llm.consent !== undefined && typeof cfg.llm.consent !== 'boolean') {
|
|
180
|
+
warnings.push('Config key "llm.consent" should be a boolean.');
|
|
181
|
+
}
|
|
182
|
+
if (cfg.llm.provider !== undefined && !VALID_LLM_PROVIDERS.has(cfg.llm.provider)) {
|
|
183
|
+
warnings.push(`Invalid llm.provider "${cfg.llm.provider}" — expected one of ${[...VALID_LLM_PROVIDERS].join(', ')}.`);
|
|
184
|
+
}
|
|
185
|
+
for (const numKey of ['max_tokens', 'max_calls', 'token_budget', 'timeout_seconds']) {
|
|
186
|
+
if (cfg.llm[numKey] !== undefined && (typeof cfg.llm[numKey] !== 'number' || cfg.llm[numKey] <= 0)) {
|
|
187
|
+
warnings.push(`Config key "llm.${numKey}" should be a positive number.`);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
for (const w of warnings) {
|
|
194
|
+
console.warn(`⚠ JAKU config: ${w}`);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return cfg;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Resolve the safety mode from CLI flags / file config / default.
|
|
202
|
+
* Precedence: explicit CLI flag > config file > default (safe-active).
|
|
203
|
+
*/
|
|
204
|
+
/**
|
|
205
|
+
* Resolve LLM settings. Precedence: CLI flags > config file > defaults.
|
|
206
|
+
* NOTE: the API key is NEVER part of this object — it is read from env at
|
|
207
|
+
* runtime by LLMClient. Any stray secret keys are stripped for defense in depth.
|
|
208
|
+
*/
|
|
209
|
+
function resolveLLM(cliOptions, fileLLM) {
|
|
210
|
+
const llm = { ...DEFAULTS.llm, ...(fileLLM || {}) };
|
|
211
|
+
|
|
212
|
+
if (cliOptions.llm) llm.enabled = true;
|
|
213
|
+
if (cliOptions.llmProvider) llm.provider = cliOptions.llmProvider;
|
|
214
|
+
if (cliOptions.llmModel) llm.model = cliOptions.llmModel;
|
|
215
|
+
if (cliOptions.llmConsent) llm.consent = true;
|
|
216
|
+
|
|
217
|
+
// Defense in depth: never let a secret survive into the merged config.
|
|
218
|
+
for (const key of Object.keys(llm)) {
|
|
219
|
+
if (LLM_SECRET_KEYS.has(key)) delete llm[key];
|
|
220
|
+
}
|
|
221
|
+
return llm;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function resolveSafetyMode(cliOptions, fileMode) {
|
|
225
|
+
if (cliOptions.aggressive) return 'aggressive';
|
|
226
|
+
if (cliOptions.passive) return 'passive';
|
|
227
|
+
if (cliOptions.safeActive) return 'safe-active';
|
|
228
|
+
if (cliOptions.safety && SAFETY_MODES.includes(cliOptions.safety)) return cliOptions.safety;
|
|
229
|
+
if (fileMode && SAFETY_MODES.includes(fileMode)) return fileMode;
|
|
230
|
+
return DEFAULT_SAFETY_MODE;
|
|
231
|
+
}
|
|
232
|
+
|
|
48
233
|
export function loadConfig(cliOptions = {}) {
|
|
49
234
|
let fileConfig = {};
|
|
50
235
|
|
|
@@ -59,12 +244,16 @@ export function loadConfig(cliOptions = {}) {
|
|
|
59
244
|
}
|
|
60
245
|
}
|
|
61
246
|
|
|
247
|
+
// Validate + clean file config (warn on unknown/deprecated/bad-typed keys)
|
|
248
|
+
fileConfig = validateConfig(fileConfig);
|
|
249
|
+
|
|
62
250
|
// Merge: defaults < file config < CLI options
|
|
63
251
|
const config = {
|
|
64
252
|
...DEFAULTS,
|
|
65
253
|
...fileConfig,
|
|
66
254
|
crawler: { ...DEFAULTS.crawler, ...(fileConfig.crawler || {}) },
|
|
67
255
|
viewports: { ...DEFAULTS.viewports, ...(fileConfig.viewports || {}) },
|
|
256
|
+
llm: { ...DEFAULTS.llm, ...(fileConfig.llm || {}) },
|
|
68
257
|
};
|
|
69
258
|
|
|
70
259
|
// Apply scan profile (overrides default settings)
|
|
@@ -93,9 +282,14 @@ export function loadConfig(cliOptions = {}) {
|
|
|
93
282
|
if (cliOptions.maxPages) config.crawler.max_pages = parseInt(cliOptions.maxPages);
|
|
94
283
|
if (cliOptions.maxDepth) config.crawler.max_depth = parseInt(cliOptions.maxDepth);
|
|
95
284
|
|
|
285
|
+
// Resolve safety mode (CLI flag > file > default)
|
|
286
|
+
config.safety_mode = resolveSafetyMode(cliOptions, config.safety_mode);
|
|
287
|
+
|
|
288
|
+
// Resolve LLM settings (CLI flag > file > default). Key stays in env only.
|
|
289
|
+
config.llm = resolveLLM(cliOptions, config.llm);
|
|
290
|
+
|
|
96
291
|
return config;
|
|
97
292
|
}
|
|
98
293
|
|
|
99
294
|
export { SCAN_PROFILES };
|
|
100
295
|
export default loadConfig;
|
|
101
|
-
|
package/src/utils/finding.js
CHANGED
|
@@ -18,6 +18,7 @@ export function createFinding({
|
|
|
18
18
|
remediation = '',
|
|
19
19
|
references = [],
|
|
20
20
|
status = 'open',
|
|
21
|
+
source = null,
|
|
21
22
|
}) {
|
|
22
23
|
const prefix = module.toUpperCase();
|
|
23
24
|
const shortId = nanoid(6);
|
|
@@ -35,6 +36,8 @@ export function createFinding({
|
|
|
35
36
|
references,
|
|
36
37
|
status,
|
|
37
38
|
timestamp: new Date().toISOString(),
|
|
39
|
+
// Provenance: 'llm' marks AI-generated/augmented findings; null = deterministic.
|
|
40
|
+
...(source ? { source } : {}),
|
|
38
41
|
};
|
|
39
42
|
|
|
40
43
|
// Auto-tag with OWASP Top 10 classification
|
package/src/utils/logger.js
CHANGED
|
@@ -4,6 +4,38 @@ import fs from 'fs';
|
|
|
4
4
|
|
|
5
5
|
const LOG_DIR = path.join(process.cwd(), 'jaku-reports', 'logs');
|
|
6
6
|
|
|
7
|
+
// Patterns for secrets that must never reach any transport (file or console).
|
|
8
|
+
const SECRET_PATTERNS = [
|
|
9
|
+
/\bsk-[A-Za-z0-9_-]{8,}\b/g, // OpenAI-style keys
|
|
10
|
+
/\bsk-ant-[A-Za-z0-9_-]{8,}\b/g, // Anthropic-style keys
|
|
11
|
+
/(Bearer\s+)[A-Za-z0-9._-]{8,}/gi, // Authorization: Bearer <token>
|
|
12
|
+
/(x-api-key["']?\s*[:=]\s*["']?)[A-Za-z0-9._-]{8,}/gi, // x-api-key headers
|
|
13
|
+
/((?:api[_-]?key|apikey|token)["']?\s*[:=]\s*["']?)[A-Za-z0-9._-]{8,}/gi,
|
|
14
|
+
];
|
|
15
|
+
|
|
16
|
+
/** Replace any secret-looking substrings with [REDACTED]. */
|
|
17
|
+
export function redactSecrets(value) {
|
|
18
|
+
if (value == null) return value;
|
|
19
|
+
let str = typeof value === 'string' ? value : String(value);
|
|
20
|
+
for (const re of SECRET_PATTERNS) {
|
|
21
|
+
str = str.replace(re, (m, prefix) => (prefix ? `${prefix}[REDACTED]` : '[REDACTED]'));
|
|
22
|
+
}
|
|
23
|
+
return str;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Winston format that scrubs secrets from message + meta before transports.
|
|
27
|
+
const redactionFormat = winston.format((info) => {
|
|
28
|
+
if (typeof info.message === 'string') {
|
|
29
|
+
info.message = redactSecrets(info.message);
|
|
30
|
+
}
|
|
31
|
+
for (const key of Object.keys(info)) {
|
|
32
|
+
if (key === 'level' || key === 'message' || key === 'timestamp') continue;
|
|
33
|
+
const v = info[key];
|
|
34
|
+
if (typeof v === 'string') info[key] = redactSecrets(v);
|
|
35
|
+
}
|
|
36
|
+
return info;
|
|
37
|
+
});
|
|
38
|
+
|
|
7
39
|
export function createLogger(options = {}) {
|
|
8
40
|
const { verbose = false, logDir = LOG_DIR } = options;
|
|
9
41
|
|
|
@@ -14,6 +46,7 @@ export function createLogger(options = {}) {
|
|
|
14
46
|
const logger = winston.createLogger({
|
|
15
47
|
level: verbose ? 'debug' : 'info',
|
|
16
48
|
format: winston.format.combine(
|
|
49
|
+
redactionFormat(),
|
|
17
50
|
winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }),
|
|
18
51
|
winston.format.errors({ stack: true }),
|
|
19
52
|
winston.format.json()
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parameter discovery helpers.
|
|
3
|
+
*
|
|
4
|
+
* Derives real, application-specific parameter names from a SurfaceInventory so
|
|
5
|
+
* injection scanners can test what the app actually uses instead of relying on
|
|
6
|
+
* a fixed guess-list. Sources:
|
|
7
|
+
* - form fields (per-page forms and the global forms list)
|
|
8
|
+
* - query-string params on discovered page URLs and links
|
|
9
|
+
* - query-string params on intercepted API endpoint URLs
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
function _addUrlParams(url, set) {
|
|
13
|
+
if (!url || typeof url !== 'string') return;
|
|
14
|
+
try {
|
|
15
|
+
const u = new URL(url);
|
|
16
|
+
for (const key of u.searchParams.keys()) {
|
|
17
|
+
if (key) set.add(key);
|
|
18
|
+
}
|
|
19
|
+
} catch {
|
|
20
|
+
// not a parseable URL — ignore
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function _addFormFields(form, set) {
|
|
25
|
+
for (const field of (form?.fields || [])) {
|
|
26
|
+
const name = field?.name || field?.id;
|
|
27
|
+
if (name) set.add(name);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Collect a de-duplicated list of candidate parameter names from an inventory.
|
|
33
|
+
* @param {object} inventory - SurfaceInventory ({ pages, forms, apiEndpoints })
|
|
34
|
+
* @returns {string[]} discovered parameter names
|
|
35
|
+
*/
|
|
36
|
+
export function collectParamNames(inventory) {
|
|
37
|
+
const names = new Set();
|
|
38
|
+
if (!inventory) return [];
|
|
39
|
+
|
|
40
|
+
for (const page of (inventory.pages || [])) {
|
|
41
|
+
_addUrlParams(page?.url || page, names);
|
|
42
|
+
for (const link of (page?.links || [])) {
|
|
43
|
+
_addUrlParams(link, names);
|
|
44
|
+
}
|
|
45
|
+
for (const form of (page?.forms || [])) {
|
|
46
|
+
_addFormFields(form, names);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
for (const form of (inventory.forms || [])) {
|
|
51
|
+
_addFormFields(form, names);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
for (const api of (inventory.apiEndpoints || [])) {
|
|
55
|
+
_addUrlParams(api?.url || api, names);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return [...names];
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Collect, per discovered page/link/api URL, the set of query params that
|
|
63
|
+
* already appear on that exact URL. Useful for scanners that want to test the
|
|
64
|
+
* parameters that a given endpoint genuinely accepts.
|
|
65
|
+
* @returns {Map<string, string[]>} url → param names present on that url
|
|
66
|
+
*/
|
|
67
|
+
export function collectUrlParamMap(inventory) {
|
|
68
|
+
const map = new Map();
|
|
69
|
+
if (!inventory) return map;
|
|
70
|
+
|
|
71
|
+
const record = (url) => {
|
|
72
|
+
if (!url || typeof url !== 'string') return;
|
|
73
|
+
try {
|
|
74
|
+
const u = new URL(url);
|
|
75
|
+
const keys = [...u.searchParams.keys()].filter(Boolean);
|
|
76
|
+
if (keys.length > 0) map.set(url, keys);
|
|
77
|
+
} catch {
|
|
78
|
+
/* ignore */
|
|
79
|
+
}
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
for (const page of (inventory.pages || [])) {
|
|
83
|
+
record(page?.url || page);
|
|
84
|
+
for (const link of (page?.links || [])) record(link);
|
|
85
|
+
}
|
|
86
|
+
for (const api of (inventory.apiEndpoints || [])) {
|
|
87
|
+
record(api?.url || api);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return map;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export default { collectParamNames, collectUrlParamMap };
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { DEFAULT_SAFETY_MODE, SAFETY_MODES } from './config.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Safety tier helpers.
|
|
5
|
+
*
|
|
6
|
+
* Tiers are ordered: passive < safe-active < aggressive.
|
|
7
|
+
* A test declares the MINIMUM tier it requires to run; it runs only when the
|
|
8
|
+
* active safety mode is at least that tier.
|
|
9
|
+
*
|
|
10
|
+
* passive — no attack/probing requests, no state-changing requests
|
|
11
|
+
* safe-active — active but non-destructive probing (default)
|
|
12
|
+
* aggressive — destructive/state-changing tests allowed
|
|
13
|
+
*/
|
|
14
|
+
const TIER_RANK = {
|
|
15
|
+
passive: 0,
|
|
16
|
+
'safe-active': 1,
|
|
17
|
+
aggressive: 2,
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
/** Resolve the active safety mode from a config object. */
|
|
21
|
+
export function getSafetyMode(config) {
|
|
22
|
+
const mode = config?.safety_mode;
|
|
23
|
+
return SAFETY_MODES.includes(mode) ? mode : DEFAULT_SAFETY_MODE;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Returns true if the active safety mode permits a test that requires
|
|
28
|
+
* `requiredTier` (one of 'passive' | 'safe-active' | 'aggressive').
|
|
29
|
+
*/
|
|
30
|
+
export function allows(config, requiredTier) {
|
|
31
|
+
const current = TIER_RANK[getSafetyMode(config)] ?? TIER_RANK[DEFAULT_SAFETY_MODE];
|
|
32
|
+
const required = TIER_RANK[requiredTier] ?? TIER_RANK['safe-active'];
|
|
33
|
+
return current >= required;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function isPassive(config) {
|
|
37
|
+
return getSafetyMode(config) === 'passive';
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function isAggressive(config) {
|
|
41
|
+
return getSafetyMode(config) === 'aggressive';
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export { SAFETY_MODES };
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Centralized version resolver.
|
|
7
|
+
*
|
|
8
|
+
* Reads the version from package.json once and caches it, so the version
|
|
9
|
+
* string is never duplicated/hardcoded across the codebase. Use getVersion()
|
|
10
|
+
* everywhere a version is needed (CLI banner, reports, SARIF, webhooks, etc.).
|
|
11
|
+
*/
|
|
12
|
+
let _cachedVersion = null;
|
|
13
|
+
|
|
14
|
+
export function getVersion() {
|
|
15
|
+
if (_cachedVersion) return _cachedVersion;
|
|
16
|
+
|
|
17
|
+
try {
|
|
18
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
19
|
+
// src/utils/version.js → ../../package.json
|
|
20
|
+
const pkgPath = path.join(__dirname, '..', '..', 'package.json');
|
|
21
|
+
const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf-8'));
|
|
22
|
+
_cachedVersion = pkg.version || '0.0.0';
|
|
23
|
+
} catch {
|
|
24
|
+
_cachedVersion = '0.0.0';
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return _cachedVersion;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export default getVersion;
|