npm - jaku.sh - Versions diffs - 1.0.2 → 1.2.0 - Mend

jaku.sh 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +161 -18
package/action.yml +32 -1
package/package.json +2 -1
package/src/agents/ai-agent.js +47 -1
package/src/agents/api-agent.js +9 -0
package/src/agents/logic-agent.js +158 -90
package/src/agents/orchestrator.js +56 -1
package/src/agents/security-agent.js +86 -54
package/src/cli.js +68 -6
package/src/core/ai/ai-endpoint-detector.js +28 -4
package/src/core/ai/prompt-injector.js +34 -0
package/src/core/api/api-key-auditor.js +1 -1
package/src/core/api/cors-ws-tester.js +1 -1
package/src/core/crawler.js +22 -1
package/src/core/llm/augmentations.js +210 -0
package/src/core/llm/llm-client.js +184 -0
package/src/core/llm/providers/anthropic-provider.js +46 -0
package/src/core/llm/providers/base-provider.js +44 -0
package/src/core/llm/providers/null-provider.js +21 -0
package/src/core/llm/providers/openai-provider.js +47 -0
package/src/core/logic/access-boundary-tester.js +1 -1
package/src/core/logic/business-rule-inferrer.js +50 -1
package/src/core/security/sqli-prober.js +312 -43
package/src/core/security/xss-scanner.js +26 -2
package/src/reporting/report-generator.js +96 -9
package/src/reporting/sarif-generator.js +81 -5
package/src/utils/config.js +196 -2
package/src/utils/finding.js +3 -0
package/src/utils/logger.js +33 -0
package/src/utils/param-discovery.js +93 -0
package/src/utils/safety.js +44 -0
package/src/utils/version.js +30 -0

package/src/core/llm/llm-client.js ADDED Viewed

@@ -0,0 +1,184 @@
+import { isPassive } from '../../utils/safety.js';
+import { OpenAIProvider } from './providers/openai-provider.js';
+import { AnthropicProvider } from './providers/anthropic-provider.js';
+import { NullProvider } from './providers/null-provider.js';
+/**
+ * LLMClient — the single facade every consumer touches.
+ *
+ * Responsibilities:
+ *   - Provider selection (openai | anthropic | null)
+ *   - API key injection FROM ENV ONLY (never stored in config)
+ *   - Enforce enablement + consent + safety-mode gating (egress disabled in passive)
+ *   - Per-scan token + call budget, per-call timeout (AbortSignal.timeout)
+ *   - Retry/backoff on 429, circuit-breaker on connection failure
+ *   - Returns null whenever disabled/unavailable/over-budget so callers degrade
+ *
+ * Hard rule: ask() NEVER throws to callers and NEVER returns secrets. If the LLM
+ * is not usable for any reason, it returns null and the caller keeps its
+ * deterministic behavior.
+ */
+const PROVIDER_ENV = {
+    openai: 'OPENAI_API_KEY',
+    anthropic: 'ANTHROPIC_API_KEY',
+};
+const DEFAULT_MODELS = {
+    openai: 'gpt-4o-mini',
+    anthropic: 'claude-3-5-haiku-latest',
+};
+/**
+ * Resolve the runtime LLM state from config + env. The returned `apiKey` (if
+ * any) is for immediate provider construction only and is never persisted.
+ */
+function resolveLLMRuntime(config) {
+    const llm = (config && config.llm) || {};
+    const providerName = llm.provider || 'openai';
+    const model = llm.model || DEFAULT_MODELS[providerName] || null;
+    const envVar = PROVIDER_ENV[providerName];
+    const apiKey = envVar ? process.env[envVar] : null;
+    const base = { providerName, model, envVar };
+    if (!llm.enabled) {
+        return { ...base, enabled: false, reason: 'not enabled (set llm.enabled or pass --llm)' };
+    }
+    if (isPassive(config)) {
+        return { ...base, enabled: false, reason: 'disabled in passive safety mode (no third-party egress)' };
+    }
+    if (!PROVIDER_ENV[providerName]) {
+        return { ...base, enabled: false, reason: `unknown provider "${providerName}" (use openai|anthropic)` };
+    }
+    if (!llm.consent) {
+        return { ...base, enabled: false, reason: 'no consent (set llm.consent=true or pass --llm-consent)' };
+    }
+    if (!apiKey) {
+        return { ...base, enabled: false, reason: `no API key in env ${envVar}` };
+    }
+    return { ...base, enabled: true, reason: 'active', apiKey };
+}
+function createProvider(name, opts) {
+    if (name === 'openai') return new OpenAIProvider(opts);
+    if (name === 'anthropic') return new AnthropicProvider(opts);
+    return new NullProvider(opts);
+}
+function isConnectionError(err) {
+    const code = err?.cause?.code || err?.code;
+    return ['ECONNREFUSED', 'ENOTFOUND', 'EAI_AGAIN', 'ECONNRESET', 'UND_ERR_CONNECT_TIMEOUT'].includes(code);
+}
+export class LLMClient {
+    constructor(config, logger) {
+        this.config = config || {};
+        this.logger = logger || null;
+        const llm = this.config.llm || {};
+        this._maxCalls = Number.isFinite(llm.max_calls) ? llm.max_calls : 50;
+        this._perCallTokens = Number.isFinite(llm.max_tokens) ? llm.max_tokens : 1024;
+        this._tokenBudget = Number.isFinite(llm.token_budget) ? llm.token_budget : 100000;
+        this._timeoutMs = (Number.isFinite(llm.timeout_seconds) ? llm.timeout_seconds : 30) * 1000;
+        this._calls = 0;
+        this._tokensUsed = 0;
+        this._circuitOpen = false;
+        this._warnedBudget = false;
+        const runtime = resolveLLMRuntime(this.config);
+        this.enabled = runtime.enabled;
+        this.reason = runtime.reason;
+        this.providerName = runtime.providerName;
+        this.model = runtime.model;
+        if (this.enabled) {
+            this.provider = createProvider(runtime.providerName, {
+                apiKey: runtime.apiKey,
+                model: runtime.model,
+                baseUrl: llm.base_url || null,
+                logger,
+            });
+            // One-line consent/egress warning. Never logs the key.
+            this.logger?.warn?.(
+                `[LLM] Augmentation ENABLED via ${runtime.providerName}/${runtime.model}. ` +
+                `Minimal finding/target data may be sent to a third-party API. ` +
+                `Disable by removing --llm or setting llm.enabled=false.`
+            );
+        } else {
+            this.provider = new NullProvider({ logger });
+        }
+    }
+    /** True if a real provider is active. */
+    isEnabled() {
+        return this.enabled && !this._circuitOpen;
+    }
+    /** Human-readable one-line status (no secrets). */
+    static describe(config) {
+        const r = resolveLLMRuntime(config);
+        if (r.enabled) return `enabled (${r.providerName}/${r.model})`;
+        return `disabled — ${r.reason}`;
+    }
+    /** Per-scan usage snapshot (for logging, never includes keys). */
+    usage() {
+        return { calls: this._calls, tokensUsed: this._tokensUsed, circuitOpen: this._circuitOpen };
+    }
+    /**
+     * Ask the LLM. Returns the completion text, or null on any disablement /
+     * budget exhaustion / error. Never throws.
+     */
+    async ask({ system, prompt, maxTokens, temperature = 0 } = {}) {
+        if (!this.enabled || this._circuitOpen || !prompt) return null;
+        if (this._calls >= this._maxCalls || this._tokensUsed >= this._tokenBudget) {
+            if (!this._warnedBudget) {
+                this._warnedBudget = true;
+                this.logger?.debug?.('[LLM] budget exhausted — further augmentation skipped');
+            }
+            return null;
+        }
+        const outTokens = Math.min(maxTokens || this._perCallTokens, this._perCallTokens);
+        this._calls++;
+        let attempt = 0;
+        const maxAttempts = 2;
+        while (attempt <= maxAttempts) {
+            try {
+                const res = await this.provider.complete({
+                    system,
+                    prompt,
+                    maxTokens: outTokens,
+                    temperature,
+                    signal: AbortSignal.timeout(this._timeoutMs),
+                });
+                this._tokensUsed += res?.usage?.total_tokens || outTokens;
+                return res?.text ?? null;
+            } catch (err) {
+                if (err?.status === 429 && attempt < maxAttempts) {
+                    const backoff = 1000 * Math.pow(2, attempt);
+                    await new Promise(r => setTimeout(r, backoff));
+                    attempt++;
+                    continue;
+                }
+                if (isConnectionError(err)) {
+                    this._circuitOpen = true;
+                    this.logger?.warn?.('[LLM] connection failure — disabling LLM augmentation for the remainder of this scan');
+                    return null;
+                }
+                // Logger format redacts secrets; keep the message terse regardless.
+                this.logger?.debug?.(`[LLM] call failed (${err?.status || err?.name || 'error'}) — degrading to deterministic behavior`);
+                return null;
+            }
+        }
+        return null;
+    }
+}
+export { resolveLLMRuntime, PROVIDER_ENV, DEFAULT_MODELS };
+export default LLMClient;

package/src/core/llm/providers/anthropic-provider.js ADDED Viewed

@@ -0,0 +1,46 @@
+import { BaseLLMProvider } from './base-provider.js';
+/**
+ * AnthropicProvider — Adapter for the Anthropic Messages API.
+ * Uses the built-in global fetch (Node ≥20). No SDK dependency.
+ */
+export class AnthropicProvider extends BaseLLMProvider {
+    get name() { return 'anthropic'; }
+    async complete({ system, prompt, maxTokens = 1024, temperature = 0, signal } = {}) {
+        const base = (this.baseUrl || 'https://api.anthropic.com/v1').replace(/\/$/, '');
+        const url = `${base}/messages`;
+        const res = await fetch(url, {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+                'x-api-key': this._apiKey,
+                'anthropic-version': '2023-06-01',
+            },
+            body: JSON.stringify({
+                model: this.model,
+                max_tokens: maxTokens,
+                temperature,
+                system: system || undefined,
+                messages: [{ role: 'user', content: prompt }],
+            }),
+            signal,
+        });
+        if (!res.ok) {
+            await res.text().catch(() => '');
+            throw this._httpError(res.status, res.statusText);
+        }
+        const json = await res.json();
+        // content is an array of blocks; concatenate any text blocks.
+        const text = Array.isArray(json?.content)
+            ? json.content.filter(b => b?.type === 'text').map(b => b.text).join('').trim() || null
+            : null;
+        const total = (json?.usage?.input_tokens || 0) + (json?.usage?.output_tokens || 0);
+        return { text, usage: { total_tokens: total } };
+    }
+}
+export default AnthropicProvider;

package/src/core/llm/providers/base-provider.js ADDED Viewed

@@ -0,0 +1,44 @@
+/**
+ * BaseLLMProvider — Abstract interface for LLM provider adapters.
+ *
+ * Mirrors the abstract-base pattern used by BaseAgent: subclasses MUST implement
+ * `name` and `complete()`. Providers are thin HTTP adapters built on the
+ * Node ≥20 global `fetch` — no third-party SDKs.
+ */
+export class BaseLLMProvider {
+    constructor({ apiKey, model, baseUrl, logger } = {}) {
+        if (new.target === BaseLLMProvider) {
+            throw new Error('BaseLLMProvider is abstract — extend it, do not instantiate directly.');
+        }
+        // The API key is held only in this adapter instance for the duration of
+        // the scan. It is NEVER written to config, logs, reports, or findings.
+        this._apiKey = apiKey || null;
+        this.model = model || null;
+        this.baseUrl = baseUrl || null;
+        this.logger = logger || null;
+    }
+    /** Provider display name (e.g. "openai"). Must be overridden. */
+    get name() { throw new Error('Provider must define a name'); }
+    /**
+     * Perform a single completion.
+     * @param {object} req
+     * @param {string} req.system   - system instruction
+     * @param {string} req.prompt   - user prompt
+     * @param {number} req.maxTokens - max output tokens
+     * @param {number} req.temperature
+     * @param {AbortSignal} req.signal
+     * @returns {Promise<{text: string|null, usage: {total_tokens: number}}>}
+     */
+    async complete(_req) {
+        throw new Error(`${this.name} must implement complete()`);
+    }
+    /** Build an Error carrying an HTTP status so the client can branch (e.g. 429). */
+    _httpError(status, label) {
+        return Object.assign(new Error(`${this.name} API error: ${label || status}`), { status });
+    }
+}
+export default BaseLLMProvider;

package/src/core/llm/providers/null-provider.js ADDED Viewed

@@ -0,0 +1,21 @@
+import { BaseLLMProvider } from './base-provider.js';
+/**
+ * NullProvider — No-op provider used when LLM augmentation is disabled or for
+ * tests. Always returns null text so every consumer degrades gracefully to its
+ * deterministic, non-LLM behavior.
+ */
+export class NullProvider extends BaseLLMProvider {
+    constructor(opts = {}) {
+        // Allow direct instantiation (it's the safe default).
+        super({ ...opts, apiKey: null });
+    }
+    get name() { return 'null'; }
+    async complete() {
+        return { text: null, usage: { total_tokens: 0 } };
+    }
+}
+export default NullProvider;

package/src/core/llm/providers/openai-provider.js ADDED Viewed

@@ -0,0 +1,47 @@
+import { BaseLLMProvider } from './base-provider.js';
+/**
+ * OpenAIProvider — Adapter for the OpenAI Chat Completions API.
+ * Uses the built-in global fetch (Node ≥20). No SDK dependency.
+ */
+export class OpenAIProvider extends BaseLLMProvider {
+    get name() { return 'openai'; }
+    async complete({ system, prompt, maxTokens = 1024, temperature = 0, signal } = {}) {
+        const base = (this.baseUrl || 'https://api.openai.com/v1').replace(/\/$/, '');
+        const url = `${base}/chat/completions`;
+        const messages = [];
+        if (system) messages.push({ role: 'system', content: system });
+        messages.push({ role: 'user', content: prompt });
+        const res = await fetch(url, {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+                Authorization: `Bearer ${this._apiKey}`,
+            },
+            body: JSON.stringify({
+                model: this.model,
+                temperature,
+                max_tokens: maxTokens,
+                messages,
+            }),
+            signal,
+        });
+        if (!res.ok) {
+            // Drain body without surfacing it (it may echo request data); never log keys.
+            await res.text().catch(() => '');
+            throw this._httpError(res.status, res.statusText);
+        }
+        const json = await res.json();
+        const text = json?.choices?.[0]?.message?.content ?? null;
+        const total = json?.usage?.total_tokens
+            ?? ((json?.usage?.prompt_tokens || 0) + (json?.usage?.completion_tokens || 0));
+        return { text, usage: { total_tokens: total || 0 } };
+    }
+}
+export default OpenAIProvider;

package/src/core/logic/access-boundary-tester.js CHANGED Viewed

@@ -116,7 +116,7 @@ export class AccessBoundaryTester {
      */
     async _testIDOR(businessContext, surfaceInventory) {
         const findings = [];
-        const apis = surfaceInventory.apis || [];
+        const apis = surfaceInventory.apiEndpoints || surfaceInventory.apis || [];
         for (const api of apis) {
             const url = api.url || api;

package/src/core/logic/business-rule-inferrer.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { createFinding } from '../../utils/finding.js';
+import { inferBusinessDomains } from '../llm/augmentations.js';
 /**
  * BusinessRuleInferrer — Infers business rules from the surface inventory.
@@ -80,7 +81,9 @@ export class BusinessRuleInferrer {
         const pages = surfaceInventory.pages || [];
         const forms = surfaceInventory.forms || [];
-        const apis = surfaceInventory.apis || [];
+        // The crawler emits `apiEndpoints` (not `apis`); read the correct field
+        // so business-logic API surfaces are actually categorized.
+        const apis = surfaceInventory.apiEndpoints || surfaceInventory.apis || [];
         // 1. Categorize pages by domain
         for (const page of pages) {
@@ -156,6 +159,52 @@ export class BusinessRuleInferrer {
         return context;
     }
+    /**
+     * Phase 3 — Augment the regex-based domain inference with LLM-inferred
+     * domains and security-relevant invariants. STRICTLY ADDITIVE: the regex
+     * DOMAIN_PATTERNS result above is untouched; results are attached under
+     * `context.llmInsights` (source:'llm'). No-op when LLM is disabled.
+     *
+     * Data minimization: only URL paths + form field NAMES are sent (no values,
+     * no bodies, no secrets).
+     */
+    async augmentWithLLM(context, surfaceInventory, llmClient) {
+        if (!llmClient?.isEnabled?.()) return context;
+        try {
+            const pages = surfaceInventory.pages || [];
+            const apis = surfaceInventory.apiEndpoints || surfaceInventory.apis || [];
+            const forms = surfaceInventory.forms || [];
+            const toPath = (u) => {
+                try { return new URL(typeof u === 'string' ? u : (u.url || '')).pathname; }
+                catch { return typeof u === 'string' ? u : (u?.url || ''); }
+            };
+            const paths = [...new Set([
+                ...pages.map(p => toPath(p.url || p)),
+                ...apis.map(a => toPath(a.url || a)),
+            ])].filter(Boolean);
+            const formFields = [...new Set(
+                forms.flatMap(f => (f.fields || []).map(fl => (fl.name || fl.id || '').toLowerCase()))
+            )].filter(Boolean);
+            const result = await inferBusinessDomains(llmClient, { paths, formFields });
+            if (result && (result.domains.length || result.invariants.length)) {
+                context.llmInsights = {
+                    domains: result.domains,
+                    invariants: result.invariants,
+                    source: 'llm',
+                };
+                this.logger?.info?.(`Business Rule Inferrer (LLM): +${result.domains.length} domains, ${result.invariants.length} invariants`);
+            }
+        } catch (err) {
+            this.logger?.debug?.(`Business Rule Inferrer LLM augmentation skipped: ${err.message}`);
+        }
+        return context;
+    }
     /**
      * Detect multi-step flows (pages with step indicators in URLs).
      */