@kernlang/review 2.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/concept-rules/boundary-mutation.d.ts +13 -0
- package/dist/concept-rules/boundary-mutation.js +40 -0
- package/dist/concept-rules/boundary-mutation.js.map +1 -0
- package/dist/concept-rules/ignored-error.d.ts +13 -0
- package/dist/concept-rules/ignored-error.js +40 -0
- package/dist/concept-rules/ignored-error.js.map +1 -0
- package/dist/concept-rules/illegal-dependency.d.ts +13 -0
- package/dist/concept-rules/illegal-dependency.js +49 -0
- package/dist/concept-rules/illegal-dependency.js.map +1 -0
- package/dist/concept-rules/index.d.ts +15 -0
- package/dist/concept-rules/index.js +27 -0
- package/dist/concept-rules/index.js.map +1 -0
- package/dist/concept-rules/unguarded-effect.d.ts +13 -0
- package/dist/concept-rules/unguarded-effect.js +58 -0
- package/dist/concept-rules/unguarded-effect.js.map +1 -0
- package/dist/concept-rules/unrecovered-effect.d.ts +13 -0
- package/dist/concept-rules/unrecovered-effect.js +61 -0
- package/dist/concept-rules/unrecovered-effect.js.map +1 -0
- package/dist/confidence.d.ts +92 -0
- package/dist/confidence.js +263 -0
- package/dist/confidence.js.map +1 -0
- package/dist/differ.js +4 -2
- package/dist/differ.js.map +1 -1
- package/dist/external-tools.js +7 -3
- package/dist/external-tools.js.map +1 -1
- package/dist/file-role.d.ts +10 -0
- package/dist/file-role.js +80 -0
- package/dist/file-role.js.map +1 -0
- package/dist/graph.d.ts +11 -0
- package/dist/graph.js +152 -0
- package/dist/graph.js.map +1 -0
- package/dist/index.d.ts +46 -3
- package/dist/index.js +313 -27
- package/dist/index.js.map +1 -1
- package/dist/inferrer.js +123 -25
- package/dist/inferrer.js.map +1 -1
- package/dist/kern-lint.d.ts +18 -0
- package/dist/kern-lint.js +24 -0
- package/dist/kern-lint.js.map +1 -0
- package/dist/llm-bridge.d.ts +42 -0
- package/dist/llm-bridge.js +176 -0
- package/dist/llm-bridge.js.map +1 -0
- package/dist/llm-review.d.ts +8 -1
- package/dist/llm-review.js +20 -7
- package/dist/llm-review.js.map +1 -1
- package/dist/mappers/ts-concepts.d.ts +9 -0
- package/dist/mappers/ts-concepts.js +518 -0
- package/dist/mappers/ts-concepts.js.map +1 -0
- package/dist/quality-rules.d.ts +3 -3
- package/dist/quality-rules.js +3 -11
- package/dist/quality-rules.js.map +1 -1
- package/dist/reporter.d.ts +19 -3
- package/dist/reporter.js +232 -20
- package/dist/reporter.js.map +1 -1
- package/dist/rules/base.js +167 -15
- package/dist/rules/base.js.map +1 -1
- package/dist/rules/confidence.d.ts +37 -0
- package/dist/rules/confidence.js +159 -0
- package/dist/rules/confidence.js.map +1 -0
- package/dist/rules/dead-logic.d.ts +13 -0
- package/dist/rules/dead-logic.js +393 -0
- package/dist/rules/dead-logic.js.map +1 -0
- package/dist/rules/express.js +69 -2
- package/dist/rules/express.js.map +1 -1
- package/dist/rules/ground-layer.d.ts +23 -0
- package/dist/rules/ground-layer.js +132 -0
- package/dist/rules/ground-layer.js.map +1 -0
- package/dist/rules/index.d.ts +1 -1
- package/dist/rules/index.js +8 -2
- package/dist/rules/index.js.map +1 -1
- package/dist/rules/kern-source.d.ts +16 -0
- package/dist/rules/kern-source.js +726 -0
- package/dist/rules/kern-source.js.map +1 -0
- package/dist/rules/nextjs.js +38 -10
- package/dist/rules/nextjs.js.map +1 -1
- package/dist/rules/null-safety.d.ts +12 -0
- package/dist/rules/null-safety.js +123 -0
- package/dist/rules/null-safety.js.map +1 -0
- package/dist/rules/react.js +64 -1
- package/dist/rules/react.js.map +1 -1
- package/dist/rules/security-v2.d.ts +12 -0
- package/dist/rules/security-v2.js +415 -0
- package/dist/rules/security-v2.js.map +1 -0
- package/dist/rules/security-v3.d.ts +12 -0
- package/dist/rules/security-v3.js +397 -0
- package/dist/rules/security-v3.js.map +1 -0
- package/dist/rules/security-v4.d.ts +22 -0
- package/dist/rules/security-v4.js +688 -0
- package/dist/rules/security-v4.js.map +1 -0
- package/dist/rules/security.d.ts +12 -0
- package/dist/rules/security.js +286 -0
- package/dist/rules/security.js.map +1 -0
- package/dist/rules/utils.d.ts +7 -0
- package/dist/rules/utils.js +21 -0
- package/dist/rules/utils.js.map +1 -0
- package/dist/rules/vue.js +1 -1
- package/dist/rules/vue.js.map +1 -1
- package/dist/spec-checker.d.ts +83 -0
- package/dist/spec-checker.js +405 -0
- package/dist/spec-checker.js.map +1 -0
- package/dist/suppression/apply-suppression.d.ts +17 -0
- package/dist/suppression/apply-suppression.js +94 -0
- package/dist/suppression/apply-suppression.js.map +1 -0
- package/dist/suppression/index.d.ts +6 -0
- package/dist/suppression/index.js +6 -0
- package/dist/suppression/index.js.map +1 -0
- package/dist/suppression/parse-directives.d.ts +25 -0
- package/dist/suppression/parse-directives.js +161 -0
- package/dist/suppression/parse-directives.js.map +1 -0
- package/dist/suppression/types.d.ts +32 -0
- package/dist/suppression/types.js +5 -0
- package/dist/suppression/types.js.map +1 -0
- package/dist/taint.d.ts +115 -0
- package/dist/taint.js +1052 -0
- package/dist/taint.js.map +1 -0
- package/dist/types.d.ts +71 -0
- package/dist/types.js.map +1 -1
- package/package.json +7 -4
|
@@ -0,0 +1,688 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Security v4 rules — LLM prompt injection attack surface.
|
|
3
|
+
*
|
|
4
|
+
* Covers 10 attack vectors beyond basic prompt injection (v3):
|
|
5
|
+
* S14: indirect-prompt-injection — DB-stored data flows to LLM prompt
|
|
6
|
+
* S15: llm-output-execution — LLM output passed to eval/exec
|
|
7
|
+
* S16: system-prompt-leakage — system prompt exposed in responses
|
|
8
|
+
* S17: rag-poisoning — retrieval results flow unsanitized to prompt
|
|
9
|
+
* S18: tool-calling-manipulation — user input controls tool names/schemas
|
|
10
|
+
* S19: encoding-bypass — decoded content enters prompt unsanitized
|
|
11
|
+
* S20: delimiter-injection — user input with delimiters in prompt context
|
|
12
|
+
* S21: unsanitized-history — chat history pushed without sanitization
|
|
13
|
+
* S22: json-output-manipulation — JSON.parse on LLM output without schema
|
|
14
|
+
* S23: missing-output-validation — LLM output used without validation
|
|
15
|
+
*
|
|
16
|
+
* All AST-based. Always active regardless of target.
|
|
17
|
+
* OWASP LLM01-LLM09
|
|
18
|
+
*/
|
|
19
|
+
import { SyntaxKind } from 'ts-morph';
|
|
20
|
+
import { createFingerprint } from '../types.js';
|
|
21
|
+
function span(file, line, col = 1) {
|
|
22
|
+
return { file, startLine: line, startCol: col, endLine: line, endCol: col };
|
|
23
|
+
}
|
|
24
|
+
function finding(ruleId, severity, category, message, file, line, extra) {
|
|
25
|
+
return {
|
|
26
|
+
source: 'kern',
|
|
27
|
+
ruleId,
|
|
28
|
+
severity,
|
|
29
|
+
category,
|
|
30
|
+
message,
|
|
31
|
+
primarySpan: span(file, line),
|
|
32
|
+
fingerprint: createFingerprint(ruleId, line, 1),
|
|
33
|
+
...extra,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
// ── Shared patterns ──────────────────────────────────────────────────
|
|
37
|
+
/** Escape a string for use inside a RegExp */
|
|
38
|
+
function escapeRegExp(s) {
|
|
39
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
40
|
+
}
|
|
41
|
+
const DB_READ_PATTERNS = /\b(db\.query|findOne|findById|findMany|getItem|collection\.find|\.findUnique|\.findFirst)\b/;
|
|
42
|
+
const LLM_API_PATTERNS = /\bgenerateContent\b|\bchat\.completions\.create\b|\bcomplete\b|\bsendMessage\b|\bcreateCompletion\b|\bcreateChatCompletion\b/;
|
|
43
|
+
const LLM_RESPONSE_NAMES = /^(completion|llmResponse|llmResult|aiResponse|chatResponse|aiOutput|generatedText)$/i;
|
|
44
|
+
const PROMPT_CONTEXT = /prompt|system|instruction|context|template/i;
|
|
45
|
+
const SANITIZER_CALL = /\bsanitize\w*\s?\(|\bescape\w*\s?\(|\bclean\w*\s?\(|\bstripDelimiters\s?\(|\bcleanForPrompt\s?\(/;
|
|
46
|
+
const VALIDATION_CALL = /\.parse\s*\(|\.safeParse\s*\(|\.validate\s*\(|\.validateSync\s*\(/;
|
|
47
|
+
const RETRIEVAL_PATTERNS = /\bvectorStore\.search\b|\bvectorDb\.search\b|\bretrieve\b|\bsimilaritySearch\b|\bembedding\.query\b|\bindex\.query\b|\bsemantic[Ss]earch\b|\b\w+[Dd]b\.search\b|\b\w+[Ss]tore\.search\b/;
|
|
48
|
+
const EXEC_SINKS = /\beval\s*\(|\bnew\s+Function\s*\(|\bvm\.runInContext\s*\(|\bvm\.runInNewContext\s*\(|\bexec\s*\(|\bexecSync\s*\(/;
|
|
49
|
+
// ── Rule S14: indirect-prompt-injection ───────────────────────────────
|
|
50
|
+
// DB read results flow into LLM prompt construction without sanitization.
|
|
51
|
+
// CWE-77, OWASP LLM01
|
|
52
|
+
function indirectPromptInjection(ctx) {
|
|
53
|
+
const findings = [];
|
|
54
|
+
const fileText = ctx.sourceFile.getFullText();
|
|
55
|
+
if (!DB_READ_PATTERNS.test(fileText))
|
|
56
|
+
return findings;
|
|
57
|
+
if (!LLM_API_PATTERNS.test(fileText) && !PROMPT_CONTEXT.test(fileText))
|
|
58
|
+
return findings;
|
|
59
|
+
for (const call of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.CallExpression)) {
|
|
60
|
+
const callText = call.getExpression().getText();
|
|
61
|
+
if (!DB_READ_PATTERNS.test(callText))
|
|
62
|
+
continue;
|
|
63
|
+
// Find variable this DB result is assigned to
|
|
64
|
+
let varName = '';
|
|
65
|
+
let parent = call.getParent();
|
|
66
|
+
if (parent?.getKind() === SyntaxKind.AwaitExpression)
|
|
67
|
+
parent = parent.getParent();
|
|
68
|
+
if (parent?.getKind() === SyntaxKind.VariableDeclaration) {
|
|
69
|
+
varName = parent.getName();
|
|
70
|
+
}
|
|
71
|
+
if (!varName)
|
|
72
|
+
continue;
|
|
73
|
+
// Check if this variable appears in a prompt/LLM context
|
|
74
|
+
const fnBody = call.getFirstAncestorByKind(SyntaxKind.Block);
|
|
75
|
+
if (!fnBody)
|
|
76
|
+
continue;
|
|
77
|
+
const bodyText = fnBody.getText();
|
|
78
|
+
const ev = escapeRegExp(varName);
|
|
79
|
+
// Is it used in template interpolation or concat?
|
|
80
|
+
const usedInTemplate = new RegExp(`\\$\\{[^}]*\\b${ev}\\b`).test(bodyText);
|
|
81
|
+
const usedInConcat = new RegExp(`\\+\\s*${ev}\\b|${ev}\\b\\s*\\+`).test(bodyText);
|
|
82
|
+
if (!usedInTemplate && !usedInConcat)
|
|
83
|
+
continue;
|
|
84
|
+
// Is this in a prompt context?
|
|
85
|
+
if (!LLM_API_PATTERNS.test(bodyText) && !PROMPT_CONTEXT.test(bodyText))
|
|
86
|
+
continue;
|
|
87
|
+
// Is it sanitized? Check if every template interpolation wraps the var in sanitize
|
|
88
|
+
const sanitizeWraps = new RegExp(`\\$\\{[^}]*(?:sanitize\\w*|escape\\w*|cleanForPrompt)\\s*\\([^)]*\\b${ev}\\b`).test(bodyText);
|
|
89
|
+
const sanitizeBefore = new RegExp(`(?:sanitize\\w*|escape\\w*|cleanForPrompt)\\s*\\(\\s*${ev}`).test(bodyText);
|
|
90
|
+
if (sanitizeWraps || sanitizeBefore)
|
|
91
|
+
continue;
|
|
92
|
+
findings.push(finding('indirect-prompt-injection', 'warning', 'bug', `DB result '${varName}' from ${callText.substring(0, 40)} used in LLM prompt without sanitization — indirect injection risk`, ctx.filePath, call.getStartLineNumber(), { suggestion: 'Sanitize DB-sourced content before embedding in LLM prompts — stored data may contain injection payloads' }));
|
|
93
|
+
}
|
|
94
|
+
return findings;
|
|
95
|
+
}
|
|
96
|
+
// ── Rule S15: llm-output-execution ───────────────────────────────────
|
|
97
|
+
// LLM API response passed to eval(), new Function(), vm.runIn*(), exec().
|
|
98
|
+
// CWE-94, OWASP LLM02
|
|
99
|
+
function llmOutputExecution(ctx) {
|
|
100
|
+
const findings = [];
|
|
101
|
+
const fileText = ctx.sourceFile.getFullText();
|
|
102
|
+
if (!EXEC_SINKS.test(fileText))
|
|
103
|
+
return findings;
|
|
104
|
+
// Collect variables assigned from LLM API calls
|
|
105
|
+
const llmVars = new Set();
|
|
106
|
+
for (const call of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.CallExpression)) {
|
|
107
|
+
const callText = call.getExpression().getText();
|
|
108
|
+
if (!LLM_API_PATTERNS.test(callText))
|
|
109
|
+
continue;
|
|
110
|
+
let parent = call.getParent();
|
|
111
|
+
if (parent?.getKind() === SyntaxKind.AwaitExpression)
|
|
112
|
+
parent = parent.getParent();
|
|
113
|
+
if (parent?.getKind() === SyntaxKind.VariableDeclaration) {
|
|
114
|
+
llmVars.add(parent.getName());
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
// Only match LLM_RESPONSE_NAMES if file actually has LLM API calls (Fix 7)
|
|
118
|
+
if (llmVars.size > 0) {
|
|
119
|
+
for (const decl of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.VariableDeclaration)) {
|
|
120
|
+
if (LLM_RESPONSE_NAMES.test(decl.getName())) {
|
|
121
|
+
llmVars.add(decl.getName());
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
// Propagate: if const x = llmVar.something, x is also tainted
|
|
126
|
+
for (const decl of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.VariableDeclaration)) {
|
|
127
|
+
const init = decl.getInitializer()?.getText() || '';
|
|
128
|
+
for (const v of llmVars) {
|
|
129
|
+
if (new RegExp(`\\b${escapeRegExp(v)}\\b`).test(init) && !llmVars.has(decl.getName())) {
|
|
130
|
+
llmVars.add(decl.getName());
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
if (llmVars.size === 0)
|
|
135
|
+
return findings;
|
|
136
|
+
// Check call expressions that are exec sinks
|
|
137
|
+
for (const call of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.CallExpression)) {
|
|
138
|
+
const calleeText = call.getExpression().getText();
|
|
139
|
+
if (!EXEC_SINKS.test(calleeText + '('))
|
|
140
|
+
continue;
|
|
141
|
+
const argsText = call.getArguments().map(a => a.getText()).join(' ');
|
|
142
|
+
for (const v of llmVars) {
|
|
143
|
+
if (new RegExp(`\\b${v}\\b`).test(argsText)) {
|
|
144
|
+
findings.push(finding('llm-output-execution', 'error', 'bug', `LLM output '${v}' passed to ${calleeText}() — arbitrary code execution risk`, ctx.filePath, call.getStartLineNumber(), { suggestion: 'Never execute LLM output directly. Validate against an allowlist or use a sandboxed interpreter' }));
|
|
145
|
+
break;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
// Check new Function(llmVar)
|
|
150
|
+
for (const newExpr of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.NewExpression)) {
|
|
151
|
+
if (newExpr.getExpression().getText() !== 'Function')
|
|
152
|
+
continue;
|
|
153
|
+
const argsText = newExpr.getArguments().map(a => a.getText()).join(' ');
|
|
154
|
+
for (const v of llmVars) {
|
|
155
|
+
if (new RegExp(`\\b${v}\\b`).test(argsText)) {
|
|
156
|
+
findings.push(finding('llm-output-execution', 'error', 'bug', `LLM output '${v}' passed to new Function() — arbitrary code execution risk`, ctx.filePath, newExpr.getStartLineNumber(), { suggestion: 'Never execute LLM output. Use a sandboxed interpreter or validate against an allowlist' }));
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
return findings;
|
|
162
|
+
}
|
|
163
|
+
// ── Rule S16: system-prompt-leakage ──────────────────────────────────
|
|
164
|
+
// System prompt variables exposed in error responses or API responses.
|
|
165
|
+
// CWE-209, OWASP LLM07
|
|
166
|
+
const SYSTEM_PROMPT_VARS = /\b(systemPrompt|SYSTEM_PROMPT|system_prompt|sysPrompt|systemInstruction|SYSTEM_INSTRUCTION)\b/;
|
|
167
|
+
function systemPromptLeakage(ctx) {
|
|
168
|
+
const findings = [];
|
|
169
|
+
for (const call of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.CallExpression)) {
|
|
170
|
+
const callee = call.getExpression();
|
|
171
|
+
if (callee.getKind() !== SyntaxKind.PropertyAccessExpression)
|
|
172
|
+
continue;
|
|
173
|
+
const pa = callee;
|
|
174
|
+
const methodName = pa.getName();
|
|
175
|
+
// res.json(), res.send(), console.log()
|
|
176
|
+
const isResponseMethod = methodName === 'json' || methodName === 'send';
|
|
177
|
+
const isLogMethod = methodName === 'log' || methodName === 'error' || methodName === 'warn';
|
|
178
|
+
if (!isResponseMethod && !isLogMethod)
|
|
179
|
+
continue;
|
|
180
|
+
// Walk up to check if it's on a response object or console
|
|
181
|
+
let objText = pa.getExpression().getText();
|
|
182
|
+
if (objText.includes('.status(')) {
|
|
183
|
+
objText = objText.split('.')[0];
|
|
184
|
+
}
|
|
185
|
+
const isRes = /^res(ponse)?$/.test(objText);
|
|
186
|
+
const isConsole = objText === 'console';
|
|
187
|
+
if (!isRes && !isConsole)
|
|
188
|
+
continue;
|
|
189
|
+
const args = call.getArguments();
|
|
190
|
+
if (args.length === 0)
|
|
191
|
+
continue;
|
|
192
|
+
const argText = args.map(a => a.getText()).join(' ');
|
|
193
|
+
if (!SYSTEM_PROMPT_VARS.test(argText))
|
|
194
|
+
continue;
|
|
195
|
+
if (isRes) {
|
|
196
|
+
// Check if we're inside a catch block (error leak path)
|
|
197
|
+
let ancestor = call.getParent();
|
|
198
|
+
let inCatch = false;
|
|
199
|
+
while (ancestor) {
|
|
200
|
+
if (ancestor.getKind() === SyntaxKind.CatchClause) {
|
|
201
|
+
inCatch = true;
|
|
202
|
+
break;
|
|
203
|
+
}
|
|
204
|
+
ancestor = ancestor.getParent();
|
|
205
|
+
}
|
|
206
|
+
findings.push(finding('system-prompt-leakage', inCatch ? 'error' : 'warning', 'bug', `System prompt variable exposed in ${inCatch ? 'error ' : ''}response — leaks system instructions to client`, ctx.filePath, call.getStartLineNumber(), { suggestion: 'Never include system prompt content in API responses or error messages' }));
|
|
207
|
+
}
|
|
208
|
+
else {
|
|
209
|
+
// Console logging of system prompt — may be visible in client-side logs
|
|
210
|
+
findings.push(finding('system-prompt-leakage', 'info', 'bug', `System prompt variable logged via console.${methodName}() — may be visible in browser devtools`, ctx.filePath, call.getStartLineNumber(), { suggestion: 'Avoid logging system prompts; use server-side only logging if needed' }));
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
// Also check: system prompt vars interpolated in template literals that are returned
|
|
214
|
+
// (the caller may expose the prompt to clients)
|
|
215
|
+
for (const template of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.TemplateExpression)) {
|
|
216
|
+
const templateText = template.getText();
|
|
217
|
+
if (!SYSTEM_PROMPT_VARS.test(templateText))
|
|
218
|
+
continue;
|
|
219
|
+
// Is this template returned or assigned to a response?
|
|
220
|
+
const parent = template.getParent();
|
|
221
|
+
const isReturned = parent?.getKind() === SyntaxKind.ReturnStatement;
|
|
222
|
+
const isAssignedToResponse = parent?.getKind() === SyntaxKind.VariableDeclaration &&
|
|
223
|
+
/response|reply|output|result/i.test(parent.getName());
|
|
224
|
+
if (isReturned || isAssignedToResponse) {
|
|
225
|
+
findings.push(finding('system-prompt-leakage', 'warning', 'bug', `System prompt variable interpolated in returned value — may be exposed to callers`, ctx.filePath, template.getStartLineNumber(), { suggestion: 'Avoid including system prompt content in return values that may reach clients' }));
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
return findings;
|
|
229
|
+
}
|
|
230
|
+
// ── Rule S17: rag-poisoning ──────────────────────────────────────────
|
|
231
|
+
// Retrieval function outputs flow directly into prompt without sanitization.
|
|
232
|
+
// OWASP LLM01, LLM03
|
|
233
|
+
function ragPoisoning(ctx) {
|
|
234
|
+
const findings = [];
|
|
235
|
+
const fileText = ctx.sourceFile.getFullText();
|
|
236
|
+
if (!RETRIEVAL_PATTERNS.test(fileText))
|
|
237
|
+
return findings;
|
|
238
|
+
if (!LLM_API_PATTERNS.test(fileText) && !PROMPT_CONTEXT.test(fileText))
|
|
239
|
+
return findings;
|
|
240
|
+
for (const call of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.CallExpression)) {
|
|
241
|
+
const callText = call.getExpression().getText();
|
|
242
|
+
if (!RETRIEVAL_PATTERNS.test(callText))
|
|
243
|
+
continue;
|
|
244
|
+
let varName = '';
|
|
245
|
+
let parent = call.getParent();
|
|
246
|
+
if (parent?.getKind() === SyntaxKind.AwaitExpression)
|
|
247
|
+
parent = parent.getParent();
|
|
248
|
+
if (parent?.getKind() === SyntaxKind.VariableDeclaration) {
|
|
249
|
+
varName = parent.getName();
|
|
250
|
+
}
|
|
251
|
+
if (!varName)
|
|
252
|
+
continue;
|
|
253
|
+
const fnBody = call.getFirstAncestorByKind(SyntaxKind.Block);
|
|
254
|
+
if (!fnBody)
|
|
255
|
+
continue;
|
|
256
|
+
const bodyText = fnBody.getText();
|
|
257
|
+
// Collect derived variables: const context = docs.map(...).join(...)
|
|
258
|
+
const taintedVars = new Set([varName]);
|
|
259
|
+
for (const decl of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.VariableDeclaration)) {
|
|
260
|
+
const initText = decl.getInitializer()?.getText() || '';
|
|
261
|
+
for (const tv of taintedVars) {
|
|
262
|
+
if (new RegExp(`\\b${tv}\\b`).test(initText)) {
|
|
263
|
+
taintedVars.add(decl.getName());
|
|
264
|
+
break;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
// Used in template or concat? (check all tainted vars)
|
|
269
|
+
let foundUsage = false;
|
|
270
|
+
for (const tv of taintedVars) {
|
|
271
|
+
const usedInTemplate = new RegExp(`\\$\\{[^}]*\\b${tv}\\b`).test(bodyText);
|
|
272
|
+
const usedInConcat = new RegExp(`\\+\\s*${tv}\\b|${tv}\\b\\s*\\+`).test(bodyText);
|
|
273
|
+
if (usedInTemplate || usedInConcat) {
|
|
274
|
+
foundUsage = true;
|
|
275
|
+
break;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
if (!foundUsage)
|
|
279
|
+
continue;
|
|
280
|
+
// In prompt context?
|
|
281
|
+
if (!LLM_API_PATTERNS.test(bodyText) && !PROMPT_CONTEXT.test(bodyText))
|
|
282
|
+
continue;
|
|
283
|
+
// Sanitized?
|
|
284
|
+
let sanitized = false;
|
|
285
|
+
for (const tv of taintedVars) {
|
|
286
|
+
if (SANITIZER_CALL.test(bodyText) && new RegExp(`\\w+\\s*\\([^)]*\\b${tv}\\b`).test(bodyText)) {
|
|
287
|
+
sanitized = true;
|
|
288
|
+
break;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
if (sanitized)
|
|
292
|
+
continue;
|
|
293
|
+
findings.push(finding('rag-poisoning', 'warning', 'bug', `Retrieval result '${varName}' from ${callText.substring(0, 40)} embedded in prompt without sanitization — RAG poisoning risk`, ctx.filePath, call.getStartLineNumber(), { suggestion: 'Sanitize retrieved content before embedding in prompts — indexed documents may contain injection payloads' }));
|
|
294
|
+
}
|
|
295
|
+
return findings;
|
|
296
|
+
}
|
|
297
|
+
// ── Rule S18: tool-calling-manipulation ──────────────────────────────
|
|
298
|
+
// User input influences function/tool names or schemas in LLM tool-use APIs.
|
|
299
|
+
// OWASP LLM01, LLM07
|
|
300
|
+
const USER_INPUT_PATTERNS = /req\.(body|query|params|headers)\b/;
|
|
301
|
+
const TOOL_API_PROPERTIES = /\b(tools|functions|function_call|tool_choice|function_name)\b/;
|
|
302
|
+
function toolCallingManipulation(ctx) {
|
|
303
|
+
const findings = [];
|
|
304
|
+
const fileText = ctx.sourceFile.getFullText();
|
|
305
|
+
const hasToolProps = TOOL_API_PROPERTIES.test(fileText);
|
|
306
|
+
const hasToolCalls = /tool_calls|toolCalls|function_calls/.test(fileText);
|
|
307
|
+
if (!hasToolProps && !hasToolCalls)
|
|
308
|
+
return findings;
|
|
309
|
+
// Find object literals that contain tool/function configuration (requires user input)
|
|
310
|
+
if (USER_INPUT_PATTERNS.test(fileText) && hasToolProps)
|
|
311
|
+
for (const objLit of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.ObjectLiteralExpression)) {
|
|
312
|
+
const objText = objLit.getText();
|
|
313
|
+
if (!TOOL_API_PROPERTIES.test(objText))
|
|
314
|
+
continue;
|
|
315
|
+
// Check if any property value references user input
|
|
316
|
+
for (const prop of objLit.getProperties()) {
|
|
317
|
+
if (prop.getKind() !== SyntaxKind.PropertyAssignment)
|
|
318
|
+
continue;
|
|
319
|
+
const pa = prop;
|
|
320
|
+
const propName = pa.getName();
|
|
321
|
+
if (!TOOL_API_PROPERTIES.test(propName))
|
|
322
|
+
continue;
|
|
323
|
+
const valueText = pa.getInitializer()?.getText() || '';
|
|
324
|
+
// Check direct user input OR aliased user input (const tool = req.body.tool → tool)
|
|
325
|
+
const isUserInput = USER_INPUT_PATTERNS.test(valueText) ||
|
|
326
|
+
/\b(userInput|input|query|message)\b/.test(valueText);
|
|
327
|
+
// Also check if value is a variable that was assigned from user input
|
|
328
|
+
let isAliasedInput = false;
|
|
329
|
+
if (!isUserInput) {
|
|
330
|
+
for (const decl of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.VariableDeclaration)) {
|
|
331
|
+
if (decl.getName() === valueText.trim()) {
|
|
332
|
+
const initText = decl.getInitializer()?.getText() || '';
|
|
333
|
+
if (USER_INPUT_PATTERNS.test(initText)) {
|
|
334
|
+
isAliasedInput = true;
|
|
335
|
+
break;
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
if (isUserInput || isAliasedInput) {
|
|
341
|
+
findings.push(finding('tool-calling-manipulation', 'error', 'bug', `User input controls '${propName}' in LLM tool configuration — attacker can invoke arbitrary tools`, ctx.filePath, pa.getStartLineNumber(), { suggestion: 'Never let user input control tool names or schemas. Use a fixed allowlist of tools' }));
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
// Check computed property access on tool arrays: tools[userInput]
|
|
346
|
+
for (const elem of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.ElementAccessExpression)) {
|
|
347
|
+
const objText = elem.getExpression().getText();
|
|
348
|
+
if (!/tool|function/i.test(objText))
|
|
349
|
+
continue;
|
|
350
|
+
const argText = elem.getArgumentExpression()?.getText() || '';
|
|
351
|
+
if (USER_INPUT_PATTERNS.test(argText) || /\b(userInput|input|query)\b/.test(argText)) {
|
|
352
|
+
findings.push(finding('tool-calling-manipulation', 'error', 'bug', `User input used as index into tool array '${objText}' — tool selection manipulation`, ctx.filePath, elem.getStartLineNumber(), { suggestion: 'Validate tool selection against a fixed allowlist' }));
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
// Check for executing LLM-returned tool_calls without validation:
|
|
356
|
+
// response.tool_calls → executeTool(call.name, ...) without allowlist check
|
|
357
|
+
for (const forOf of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.ForOfStatement)) {
|
|
358
|
+
const iterExpr = forOf.getExpression().getText();
|
|
359
|
+
if (!/tool_calls|toolCalls|function_calls/.test(iterExpr))
|
|
360
|
+
continue;
|
|
361
|
+
const forBody = forOf.getStatement().getText();
|
|
362
|
+
// Is there an execute/call pattern using the loop variable's .name?
|
|
363
|
+
if (/\b(execute|call|invoke|run|dispatch)\w*\s?\(/i.test(forBody) &&
|
|
364
|
+
/\.name\b|\.function\b/.test(forBody)) {
|
|
365
|
+
// Check if there's an allowlist/validation on the tool NAME before execution
|
|
366
|
+
if (!/allowlist\w*\.has\s?\(\s?\w+\.name|whitelist\w*\.has\s?\(\s?\w+\.name|allowed\w*\.includes\s?\(\s?\w+\.name|validTools\w*\.has\s?\(\s?\w+\.name/.test(forBody)) {
|
|
367
|
+
findings.push(finding('tool-calling-manipulation', 'error', 'bug', `LLM-returned tool calls executed without allowlist validation — attacker can invoke arbitrary tools via prompt injection`, ctx.filePath, forOf.getStartLineNumber(), { suggestion: 'Validate tool names against a fixed allowlist before executing LLM-returned tool_calls' }));
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
// Also check forEach on tool_calls
|
|
372
|
+
for (const call of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.CallExpression)) {
|
|
373
|
+
const calleeText = call.getExpression().getText();
|
|
374
|
+
if (!/tool_calls\.forEach|toolCalls\.forEach|function_calls\.forEach/.test(calleeText))
|
|
375
|
+
continue;
|
|
376
|
+
const argsText = call.getArguments().map(a => a.getText()).join(' ');
|
|
377
|
+
if (/\b(execute|call|invoke|run|dispatch)\w*\s?\(/i.test(argsText) &&
|
|
378
|
+
/\.name\b|\.function\b/.test(argsText)) {
|
|
379
|
+
if (!/allowlist\w*\.has\s?\(\s?\w+\.name|whitelist\w*\.has\s?\(\s?\w+\.name|allowed\w*\.includes\s?\(\s?\w+\.name|validTools\w*\.has\s?\(\s?\w+\.name/.test(argsText)) {
|
|
380
|
+
findings.push(finding('tool-calling-manipulation', 'error', 'bug', `LLM-returned tool calls executed without allowlist validation — attacker can invoke arbitrary tools via prompt injection`, ctx.filePath, call.getStartLineNumber(), { suggestion: 'Validate tool names against a fixed allowlist before executing LLM-returned tool_calls' }));
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
return findings;
|
|
385
|
+
}
|
|
386
|
+
// ── Rule S19: encoding-bypass ────────────────────────────────────────
|
|
387
|
+
// Decoded content (base64, hex, URI) enters prompt without re-sanitization.
|
|
388
|
+
// OWASP LLM01
|
|
389
|
+
const DECODE_PATTERNS = /\batob\s*\(|\bBuffer\.from\s*\([^)]*(['"]base64['"]|['"]hex['"])|\bdecodeURIComponent\s*\(|\bdecodeURI\s*\(/;
|
|
390
|
+
function encodingBypass(ctx) {
|
|
391
|
+
const findings = [];
|
|
392
|
+
const fileText = ctx.sourceFile.getFullText();
|
|
393
|
+
if (!DECODE_PATTERNS.test(fileText))
|
|
394
|
+
return findings;
|
|
395
|
+
if (!LLM_API_PATTERNS.test(fileText) && !PROMPT_CONTEXT.test(fileText))
|
|
396
|
+
return findings;
|
|
397
|
+
for (const call of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.CallExpression)) {
|
|
398
|
+
const callText = call.getText();
|
|
399
|
+
if (!DECODE_PATTERNS.test(callText))
|
|
400
|
+
continue;
|
|
401
|
+
// Find what variable it's assigned to
|
|
402
|
+
let varName = '';
|
|
403
|
+
let parent = call.getParent();
|
|
404
|
+
if (parent?.getKind() === SyntaxKind.VariableDeclaration) {
|
|
405
|
+
varName = parent.getName();
|
|
406
|
+
}
|
|
407
|
+
if (!varName)
|
|
408
|
+
continue;
|
|
409
|
+
const fnBody = call.getFirstAncestorByKind(SyntaxKind.Block);
|
|
410
|
+
if (!fnBody)
|
|
411
|
+
continue;
|
|
412
|
+
const bodyText = fnBody.getText();
|
|
413
|
+
// Used in prompt context?
|
|
414
|
+
const usedInTemplate = new RegExp(`\\$\\{[^}]*\\b${varName}\\b`).test(bodyText);
|
|
415
|
+
const usedInConcat = new RegExp(`\\+\\s*${varName}\\b|${varName}\\b\\s*\\+`).test(bodyText);
|
|
416
|
+
if (!usedInTemplate && !usedInConcat)
|
|
417
|
+
continue;
|
|
418
|
+
if (!LLM_API_PATTERNS.test(bodyText) && !PROMPT_CONTEXT.test(bodyText))
|
|
419
|
+
continue;
|
|
420
|
+
// Re-sanitized after decode?
|
|
421
|
+
if (new RegExp(`sanitize\\w*\\s*\\(\\s*${varName}|escape\\w*\\s*\\(\\s*${varName}`).test(bodyText))
|
|
422
|
+
continue;
|
|
423
|
+
const decodeFn = callText.includes('atob') ? 'atob()' :
|
|
424
|
+
callText.includes('Buffer.from') ? 'Buffer.from()' : 'decodeURIComponent()';
|
|
425
|
+
findings.push(finding('encoding-bypass', 'warning', 'bug', `Decoded content '${varName}' from ${decodeFn} used in prompt without re-sanitization — encoding bypass risk`, ctx.filePath, call.getStartLineNumber(), { suggestion: 'Sanitize decoded content before embedding in prompts — base64/hex encoding can bypass input filters' }));
|
|
426
|
+
}
|
|
427
|
+
return findings;
|
|
428
|
+
}
|
|
429
|
+
// ── Rule S20: delimiter-injection ────────────────────────────────────
|
|
430
|
+
// User input in prompt templates without stripping delimiters.
|
|
431
|
+
// OWASP LLM01
|
|
432
|
+
const DELIMITER_PATTERN = /`{3}|---|###|<\/?system>|<\/?instruction>|<\/?user>|<\/?assistant>|<\|system\|>|<\|user\|>|<\|end\|>|<\|assistant\|>|\[INST\]|\[\/INST\]/;
|
|
433
|
+
function delimiterInjection(ctx) {
|
|
434
|
+
const findings = [];
|
|
435
|
+
for (const template of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.TemplateExpression)) {
|
|
436
|
+
const templateText = template.getText();
|
|
437
|
+
// Is this a prompt template? Check for prompt-like content
|
|
438
|
+
if (!PROMPT_CONTEXT.test(templateText) && !DELIMITER_PATTERN.test(templateText))
|
|
439
|
+
continue;
|
|
440
|
+
// Does the template itself use delimiters to structure the prompt?
|
|
441
|
+
const usesDelimiters = DELIMITER_PATTERN.test(templateText);
|
|
442
|
+
if (!usesDelimiters)
|
|
443
|
+
continue;
|
|
444
|
+
// Must be in an LLM/prompt context — skip pure Markdown/frontmatter templates
|
|
445
|
+
const fnBody = template.getFirstAncestorByKind(SyntaxKind.Block);
|
|
446
|
+
if (fnBody) {
|
|
447
|
+
const fnText = fnBody.getText();
|
|
448
|
+
if (!LLM_API_PATTERNS.test(fnText) && !PROMPT_CONTEXT.test(fnText))
|
|
449
|
+
continue;
|
|
450
|
+
}
|
|
451
|
+
// Check if user input is interpolated without stripping delimiters
|
|
452
|
+
const spans = template.getTemplateSpans();
|
|
453
|
+
for (const sp of spans) {
|
|
454
|
+
const exprText = sp.getExpression().getText();
|
|
455
|
+
// Skip sanitized expressions
|
|
456
|
+
if (SANITIZER_CALL.test(exprText))
|
|
457
|
+
continue;
|
|
458
|
+
if (/stripDelimiters|escapeDelimiters|cleanDelimiters/.test(exprText))
|
|
459
|
+
continue;
|
|
460
|
+
// Is this user-controlled?
|
|
461
|
+
const isUserControlled = USER_INPUT_PATTERNS.test(exprText) ||
|
|
462
|
+
/^(question|userInput|userMessage|message|input|query|caption|instruction)\b/.test(exprText);
|
|
463
|
+
if (isUserControlled) {
|
|
464
|
+
findings.push(finding('delimiter-injection', 'warning', 'bug', `User input '${exprText.substring(0, 50)}' in delimiter-structured prompt without stripping — delimiter injection risk`, ctx.filePath, template.getStartLineNumber(), { suggestion: 'Strip or escape delimiters (```, ---, ###, XML tags) from user input before embedding in structured prompts' }));
|
|
465
|
+
break;
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
return findings;
|
|
470
|
+
}
|
|
471
|
+
// ── Rule S21: unsanitized-history ────────────────────────────────────
|
|
472
|
+
// Chat history arrays where user messages aren't sanitized before LLM API.
|
|
473
|
+
// OWASP LLM01
|
|
474
|
+
function unsanitizedHistory(ctx) {
|
|
475
|
+
const findings = [];
|
|
476
|
+
const fileText = ctx.sourceFile.getFullText();
|
|
477
|
+
// Pattern A: Spread unsanitized message arrays into LLM API calls
|
|
478
|
+
// e.g., llm.complete({ messages: [...userMessages] }) or messages: [system, ...rawHistory]
|
|
479
|
+
if (LLM_API_PATTERNS.test(fileText)) {
|
|
480
|
+
for (const spread of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.SpreadElement)) {
|
|
481
|
+
const spreadVar = spread.getExpression().getText();
|
|
482
|
+
// Is this spreading a messages/history-like variable?
|
|
483
|
+
if (!/message|history|conversation|chat|userMessage/i.test(spreadVar))
|
|
484
|
+
continue;
|
|
485
|
+
// Skip if sanitized
|
|
486
|
+
if (SANITIZER_CALL.test(spreadVar))
|
|
487
|
+
continue;
|
|
488
|
+
// Is this inside an array that feeds an LLM call?
|
|
489
|
+
const parentArray = spread.getFirstAncestorByKind(SyntaxKind.ArrayLiteralExpression);
|
|
490
|
+
if (!parentArray)
|
|
491
|
+
continue;
|
|
492
|
+
const fnBody = spread.getFirstAncestorByKind(SyntaxKind.Block);
|
|
493
|
+
if (!fnBody)
|
|
494
|
+
continue;
|
|
495
|
+
if (!LLM_API_PATTERNS.test(fnBody.getText()))
|
|
496
|
+
continue;
|
|
497
|
+
findings.push(finding('unsanitized-history', 'warning', 'bug', `Unsanitized message array '${spreadVar.substring(0, 40)}' spread into LLM API call — conversation injection risk`, ctx.filePath, spread.getStartLineNumber(), { suggestion: 'Sanitize user messages before spreading into conversation history sent to LLM APIs' }));
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
// Pattern B: messages.push({ role: "user", content: unsanitizedVar })
|
|
501
|
+
if (!(/\.push\s*\(/.test(fileText) && /role.*user|user.*role/.test(fileText)))
|
|
502
|
+
return findings;
|
|
503
|
+
for (const call of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.CallExpression)) {
|
|
504
|
+
const callee = call.getExpression();
|
|
505
|
+
if (callee.getKind() !== SyntaxKind.PropertyAccessExpression)
|
|
506
|
+
continue;
|
|
507
|
+
const pa = callee;
|
|
508
|
+
if (pa.getName() !== 'push')
|
|
509
|
+
continue;
|
|
510
|
+
// Is this pushing to a messages-like array?
|
|
511
|
+
const arrayName = pa.getExpression().getText();
|
|
512
|
+
if (!/message|history|conversation|chat/i.test(arrayName))
|
|
513
|
+
continue;
|
|
514
|
+
const args = call.getArguments();
|
|
515
|
+
if (args.length === 0)
|
|
516
|
+
continue;
|
|
517
|
+
const argText = args[0].getText();
|
|
518
|
+
// Does it contain role: "user" and a content field?
|
|
519
|
+
if (!/role\s*:\s*['"]user['"]/.test(argText))
|
|
520
|
+
continue;
|
|
521
|
+
if (!/content\s*:/.test(argText))
|
|
522
|
+
continue;
|
|
523
|
+
// Extract the content value
|
|
524
|
+
const contentMatch = argText.match(/content\s*:\s*([^,}]+)/);
|
|
525
|
+
if (!contentMatch)
|
|
526
|
+
continue;
|
|
527
|
+
const contentValue = contentMatch[1].trim();
|
|
528
|
+
// Is it sanitized?
|
|
529
|
+
if (SANITIZER_CALL.test(contentValue))
|
|
530
|
+
continue;
|
|
531
|
+
// Is it a literal string? (safe)
|
|
532
|
+
if (/^['"]/.test(contentValue))
|
|
533
|
+
continue;
|
|
534
|
+
// Check if there's a nearby LLM API call in the same function
|
|
535
|
+
const fnBody = call.getFirstAncestorByKind(SyntaxKind.Block);
|
|
536
|
+
if (!fnBody)
|
|
537
|
+
continue;
|
|
538
|
+
if (!LLM_API_PATTERNS.test(fnBody.getText()))
|
|
539
|
+
continue;
|
|
540
|
+
findings.push(finding('unsanitized-history', 'warning', 'bug', `Unsanitized content '${contentValue.substring(0, 40)}' pushed to chat history '${arrayName}' — conversation injection risk`, ctx.filePath, call.getStartLineNumber(), { suggestion: 'Sanitize user messages before adding to conversation history sent to LLM APIs' }));
|
|
541
|
+
}
|
|
542
|
+
return findings;
|
|
543
|
+
}
|
|
544
|
+
// ── Rule S22: json-output-manipulation ───────────────────────────────
|
|
545
|
+
// JSON.parse on LLM output without schema validation.
|
|
546
|
+
// OWASP LLM02
|
|
547
|
+
function jsonOutputManipulation(ctx) {
|
|
548
|
+
const findings = [];
|
|
549
|
+
const fileText = ctx.sourceFile.getFullText();
|
|
550
|
+
if (!/JSON\.parse/.test(fileText))
|
|
551
|
+
return findings;
|
|
552
|
+
// Collect LLM response variables
|
|
553
|
+
const llmVars = new Set();
|
|
554
|
+
for (const call of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.CallExpression)) {
|
|
555
|
+
const callText = call.getExpression().getText();
|
|
556
|
+
if (!LLM_API_PATTERNS.test(callText))
|
|
557
|
+
continue;
|
|
558
|
+
let parent = call.getParent();
|
|
559
|
+
if (parent?.getKind() === SyntaxKind.AwaitExpression)
|
|
560
|
+
parent = parent.getParent();
|
|
561
|
+
if (parent?.getKind() === SyntaxKind.VariableDeclaration) {
|
|
562
|
+
llmVars.add(parent.getName());
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
// Only match LLM_RESPONSE_NAMES if file actually has LLM API calls (Fix 7)
|
|
566
|
+
if (llmVars.size > 0) {
|
|
567
|
+
for (const decl of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.VariableDeclaration)) {
|
|
568
|
+
if (LLM_RESPONSE_NAMES.test(decl.getName()))
|
|
569
|
+
llmVars.add(decl.getName());
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
// Propagate: const raw = llmVar.text → raw is also tainted (Fix 6)
|
|
573
|
+
for (const decl of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.VariableDeclaration)) {
|
|
574
|
+
const init = decl.getInitializer()?.getText() || '';
|
|
575
|
+
for (const v of llmVars) {
|
|
576
|
+
if (new RegExp(`\\b${escapeRegExp(v)}\\b`).test(init) && !llmVars.has(decl.getName())) {
|
|
577
|
+
llmVars.add(decl.getName());
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
if (llmVars.size === 0)
|
|
582
|
+
return findings;
|
|
583
|
+
// Find JSON.parse calls on LLM output
|
|
584
|
+
for (const call of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.CallExpression)) {
|
|
585
|
+
const callee = call.getExpression().getText();
|
|
586
|
+
if (callee !== 'JSON.parse')
|
|
587
|
+
continue;
|
|
588
|
+
const args = call.getArguments();
|
|
589
|
+
if (args.length === 0)
|
|
590
|
+
continue;
|
|
591
|
+
const argText = args[0].getText();
|
|
592
|
+
// Does the argument reference an LLM output variable (including property access)?
|
|
593
|
+
let matchedVar = '';
|
|
594
|
+
for (const v of llmVars) {
|
|
595
|
+
if (new RegExp(`\\b${v}\\b`).test(argText)) {
|
|
596
|
+
matchedVar = v;
|
|
597
|
+
break;
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
// Also check if argument contains LLM-like names directly
|
|
601
|
+
if (!matchedVar && LLM_RESPONSE_NAMES.test(argText)) {
|
|
602
|
+
matchedVar = argText.substring(0, 30);
|
|
603
|
+
}
|
|
604
|
+
if (!matchedVar)
|
|
605
|
+
continue;
|
|
606
|
+
// Find what variable the parsed result is assigned to
|
|
607
|
+
let parsedVar = '';
|
|
608
|
+
let parent = call.getParent();
|
|
609
|
+
if (parent?.getKind() === SyntaxKind.VariableDeclaration) {
|
|
610
|
+
parsedVar = parent.getName();
|
|
611
|
+
}
|
|
612
|
+
// Check if schema validation follows (exclude JSON.parse itself from matches)
|
|
613
|
+
const fnBody = call.getFirstAncestorByKind(SyntaxKind.Block);
|
|
614
|
+
if (!fnBody)
|
|
615
|
+
continue;
|
|
616
|
+
const parseIdx = fnBody.getText().indexOf('JSON.parse');
|
|
617
|
+
const bodyAfterParse = fnBody.getText().slice(parseIdx);
|
|
618
|
+
// Remove the JSON.parse call itself to avoid false positive on .parse()
|
|
619
|
+
const bodyAfterParseCall = bodyAfterParse.replace(/JSON\.parse\s*\([^)]*\)/, '');
|
|
620
|
+
const hasValidation = VALIDATION_CALL.test(bodyAfterParseCall) &&
|
|
621
|
+
(parsedVar ? new RegExp(`\\b${parsedVar}\\b`).test(bodyAfterParseCall) : true);
|
|
622
|
+
if (hasValidation)
|
|
623
|
+
continue;
|
|
624
|
+
findings.push(finding('json-output-manipulation', 'warning', 'bug', `JSON.parse on LLM output '${matchedVar}' without schema validation — output may contain injected keys`, ctx.filePath, call.getStartLineNumber(), { suggestion: 'Validate parsed LLM output with zod/joi/.parse() before using in application logic' }));
|
|
625
|
+
}
|
|
626
|
+
return findings;
|
|
627
|
+
}
|
|
628
|
+
// ── Rule S23: missing-output-validation ──────────────────────────────
|
|
629
|
+
// LLM API response used directly in application logic without validation.
|
|
630
|
+
// OWASP LLM02
|
|
631
|
+
function missingOutputValidation(ctx) {
|
|
632
|
+
const findings = [];
|
|
633
|
+
// Collect LLM response variables and their declaration lines
|
|
634
|
+
const llmVars = new Map(); // varName → line
|
|
635
|
+
for (const call of ctx.sourceFile.getDescendantsOfKind(SyntaxKind.CallExpression)) {
|
|
636
|
+
const callText = call.getExpression().getText();
|
|
637
|
+
if (!LLM_API_PATTERNS.test(callText))
|
|
638
|
+
continue;
|
|
639
|
+
let parent = call.getParent();
|
|
640
|
+
if (parent?.getKind() === SyntaxKind.AwaitExpression)
|
|
641
|
+
parent = parent.getParent();
|
|
642
|
+
if (parent?.getKind() === SyntaxKind.VariableDeclaration) {
|
|
643
|
+
const decl = parent;
|
|
644
|
+
llmVars.set(decl.getName(), decl.getStartLineNumber());
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
if (llmVars.size === 0)
|
|
648
|
+
return findings;
|
|
649
|
+
for (const [varName, declLine] of llmVars) {
|
|
650
|
+
// Check if this variable is validated before use
|
|
651
|
+
const fnBody = ctx.sourceFile.getDescendantsOfKind(SyntaxKind.Block).find(b => b.getStartLineNumber() <= declLine && b.getEndLineNumber() >= declLine);
|
|
652
|
+
if (!fnBody)
|
|
653
|
+
continue;
|
|
654
|
+
const bodyText = fnBody.getText();
|
|
655
|
+
const ev = escapeRegExp(varName);
|
|
656
|
+
// Is it used in control flow or as a return value?
|
|
657
|
+
const condMatch = new RegExp(`if\\s*\\([^)]*\\b${ev}\\b`).exec(bodyText);
|
|
658
|
+
const retMatch = new RegExp(`return\\s+[^;]*\\b${ev}\\b`).exec(bodyText);
|
|
659
|
+
const argMatch = new RegExp(`\\w+\\s*\\([^)]*\\b${ev}\\b[^)]*\\)`).exec(bodyText);
|
|
660
|
+
const usedInCondition = !!condMatch;
|
|
661
|
+
const usedInReturn = !!retMatch;
|
|
662
|
+
const usedAsArg = !!argMatch;
|
|
663
|
+
// Find earliest usage position
|
|
664
|
+
const usagePos = Math.min(condMatch?.index ?? Infinity, retMatch?.index ?? Infinity, argMatch?.index ?? Infinity);
|
|
665
|
+
// Is there validation BEFORE the first unsafe use?
|
|
666
|
+
const validationMatch = new RegExp(`\\w+Schema\\.parse\\s*\\(\\s*${ev}|validate\\w*\\s*\\(\\s*${ev}|\\.parse\\s*\\(\\s*${ev}\\s*\\)|\\.safeParse\\s*\\(\\s*${ev}\\s*\\)`).exec(bodyText);
|
|
667
|
+
if (validationMatch && validationMatch.index < usagePos)
|
|
668
|
+
continue;
|
|
669
|
+
if (!usedInCondition && !usedInReturn && !usedAsArg)
|
|
670
|
+
continue;
|
|
671
|
+
findings.push(finding('missing-output-validation', 'warning', 'bug', `LLM response '${varName}' used in application logic without validation`, ctx.filePath, declLine, { suggestion: 'Validate LLM output with a schema before using in conditionals, returns, or function arguments' }));
|
|
672
|
+
}
|
|
673
|
+
return findings;
|
|
674
|
+
}
|
|
675
|
+
// ── Exported Security v4 Rules ────────────────────────────────────────
|
|
676
|
+
export const securityV4Rules = [
|
|
677
|
+
indirectPromptInjection,
|
|
678
|
+
llmOutputExecution,
|
|
679
|
+
systemPromptLeakage,
|
|
680
|
+
ragPoisoning,
|
|
681
|
+
toolCallingManipulation,
|
|
682
|
+
encodingBypass,
|
|
683
|
+
delimiterInjection,
|
|
684
|
+
unsanitizedHistory,
|
|
685
|
+
jsonOutputManipulation,
|
|
686
|
+
missingOutputValidation,
|
|
687
|
+
];
|
|
688
|
+
//# sourceMappingURL=security-v4.js.map
|