@clear-capabilities/agentic-security-scanner 0.77.0 → 0.78.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/.agentic-security/findings.json +1907 -0
- package/bin/.agentic-security/last-scan.json +1907 -0
- package/bin/.agentic-security/last-scan.json.sig +1 -0
- package/bin/.agentic-security/scan-history.json +115 -0
- package/bin/.agentic-security/streak.json +20 -0
- package/bin/agentic-security.js +33 -2
- package/dist/178.index.js +1 -1
- package/dist/384.index.js +1 -1
- package/dist/637.index.js +1 -1
- package/dist/718.index.js +106 -0
- package/dist/824.index.js +126 -0
- package/dist/838.index.js +1 -1
- package/dist/agentic-security.mjs +32 -32
- package/dist/agentic-security.mjs.sha256 +1 -1
- package/package.json +3 -3
- package/src/.agentic-security/findings.json +82642 -0
- package/src/.agentic-security/last-scan.json +82642 -0
- package/src/.agentic-security/last-scan.json.sig +1 -0
- package/src/.agentic-security/scan-history.json +10054 -0
- package/src/.agentic-security/streak.json +21 -0
- package/src/dataflow/.agentic-security/findings.json +3515 -0
- package/src/dataflow/.agentic-security/last-scan.json +3515 -0
- package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
- package/src/dataflow/.agentic-security/scan-history.json +702 -0
- package/src/dataflow/.agentic-security/streak.json +22 -0
- package/src/dataflow/async-sequencing.js +16 -7
- package/src/dataflow/builtin-summaries.js +131 -0
- package/src/dataflow/catalog.js +107 -0
- package/src/dataflow/cross-repo.js +75 -1
- package/src/dataflow/engine.js +129 -0
- package/src/dataflow/implicit-flow.js +24 -6
- package/src/dataflow/stub-aware-filter.js +69 -11
- package/src/dataflow/summaries.js +28 -3
- package/src/engine-parallel.js +70 -0
- package/src/engine.js +165 -15
- package/src/ir/.agentic-security/findings.json +3777 -0
- package/src/ir/.agentic-security/last-scan.json +3777 -0
- package/src/ir/.agentic-security/last-scan.json.sig +1 -0
- package/src/ir/.agentic-security/scan-history.json +771 -0
- package/src/ir/.agentic-security/streak.json +21 -0
- package/src/ir/index.js +22 -1
- package/src/ir/parser-go.js +403 -0
- package/src/ir/parser-js.js +2 -0
- package/src/ir/parser-php.js +330 -0
- package/src/ir/parser-py.helper.py +137 -11
- package/src/ir/parser-rb.js +309 -0
- package/src/posture/.agentic-security/findings.json +51562 -0
- package/src/posture/.agentic-security/last-scan.json +51562 -0
- package/src/posture/.agentic-security/last-scan.json.sig +1 -0
- package/src/posture/.agentic-security/scan-history.json +650 -0
- package/src/posture/.agentic-security/streak.json +20 -0
- package/src/posture/calibration.js +14 -0
- package/src/posture/triage.js +13 -0
- package/src/report/.agentic-security/findings.json +80 -0
- package/src/report/.agentic-security/last-scan.json +80 -0
- package/src/report/.agentic-security/last-scan.json.sig +1 -0
- package/src/report/.agentic-security/scan-history.json +35 -0
- package/src/report/.agentic-security/streak.json +22 -0
- package/src/report/index.js +23 -2
- package/src/sast/.agentic-security/findings.json +5190 -0
- package/src/sast/.agentic-security/last-scan.json +5190 -0
- package/src/sast/.agentic-security/last-scan.json.sig +1 -0
- package/src/sast/.agentic-security/scan-history.json +408 -0
- package/src/sast/.agentic-security/streak.json +20 -0
- package/src/sast/cache-poisoning.js +77 -0
- package/src/sast/comparison-safety.js +73 -0
- package/src/sast/db-taint.js +54 -0
- package/src/sast/graphql.js +127 -0
- package/src/sast/llm-stored-prompt.js +57 -0
- package/src/sast/mutation-xss.js +43 -0
- package/src/sast/nosql-injection.js +5 -0
- package/src/sast/null-byte-injection.js +76 -0
- package/src/sast/redos-nfa.js +338 -0
- package/src/sast/sensitive-data-logging.js +73 -0
- package/src/sast/weak-password-hash.js +77 -0
- package/src/sast/weak-randomness.js +100 -0
- package/src/sca/.agentic-security/findings.json +1587 -0
- package/src/sca/.agentic-security/last-scan.json +1587 -0
- package/src/sca/.agentic-security/last-scan.json.sig +1 -0
- package/src/sca/.agentic-security/scan-history.json +36 -0
- package/src/sca/.agentic-security/streak.json +21 -0
- package/src/sca/llm-function-extract.js +107 -0
- package/src/sca/vendor-detect.js +91 -0
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
// PHP IR frontend.
|
|
2
|
+
//
|
|
3
|
+
// Regex-based, follows the parser-cs.js / parser-go.js pattern. Focused on
|
|
4
|
+
// PDO, mysqli, Laravel DB facade, and PHP superglobal taint surface.
|
|
5
|
+
//
|
|
6
|
+
// What we model:
|
|
7
|
+
// - function / method declarations
|
|
8
|
+
// - $var = expr assignments
|
|
9
|
+
// - function calls and method calls ($obj->method(args))
|
|
10
|
+
// - return
|
|
11
|
+
// - foreach as loop-header + assign
|
|
12
|
+
// - PHP superglobals ($_GET, $_POST, $_REQUEST, etc.) as ident sources
|
|
13
|
+
//
|
|
14
|
+
// What we do NOT model:
|
|
15
|
+
// - arrow functions (fn($x) => expr)
|
|
16
|
+
// - traits / interfaces
|
|
17
|
+
// - anonymous classes
|
|
18
|
+
// - control flow (if/for/while/switch) — body is straight-line
|
|
19
|
+
|
|
20
|
+
import * as crypto from 'node:crypto';
|
|
21
|
+
|
|
22
|
+
const FUNC_RE = new RegExp(
|
|
23
|
+
'(?:^|[\\n;{}])\\s*' +
|
|
24
|
+
'(?:(?:public|private|protected|static|abstract|final)\\s+)*' +
|
|
25
|
+
'function\\s+' +
|
|
26
|
+
'([A-Za-z_]\\w*)' + // function name (g1)
|
|
27
|
+
'\\s*\\(([^)]*)\\)' + // params (g2)
|
|
28
|
+
'(?:\\s*:\\s*\\??[A-Za-z_]\\w*)?' + // optional return type
|
|
29
|
+
'\\s*\\{', 'g');
|
|
30
|
+
|
|
31
|
+
function _splitStatements(body) {
|
|
32
|
+
const out = [];
|
|
33
|
+
let buf = '';
|
|
34
|
+
let depth = 0;
|
|
35
|
+
let inStr = null;
|
|
36
|
+
let escape = false;
|
|
37
|
+
for (let i = 0; i < body.length; i++) {
|
|
38
|
+
const c = body[i];
|
|
39
|
+
if (escape) { buf += c; escape = false; continue; }
|
|
40
|
+
if (inStr) {
|
|
41
|
+
buf += c;
|
|
42
|
+
if (c === '\\') { escape = true; continue; }
|
|
43
|
+
if (c === inStr) inStr = null;
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
if (c === '"' || c === '\'') { inStr = c; buf += c; continue; }
|
|
47
|
+
if (c === '/' && body[i + 1] === '/') {
|
|
48
|
+
while (i < body.length && body[i] !== '\n') i++;
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
if (c === '{' || c === '(' || c === '[') depth++;
|
|
52
|
+
if (c === '}' || c === ')' || c === ']') depth--;
|
|
53
|
+
if (c === ';' && depth === 0) {
|
|
54
|
+
const t = buf.trim();
|
|
55
|
+
if (t) out.push(t);
|
|
56
|
+
buf = '';
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
buf += c;
|
|
60
|
+
}
|
|
61
|
+
if (buf.trim()) out.push(buf.trim());
|
|
62
|
+
return out;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function _lowerExpr(text) {
|
|
66
|
+
const s = String(text || '').trim();
|
|
67
|
+
if (!s) return { kind: 'unknown' };
|
|
68
|
+
if (/^"/.test(s) || /^'/.test(s)) return { kind: 'literal', value: s };
|
|
69
|
+
if (/^\d/.test(s)) return { kind: 'literal', value: s };
|
|
70
|
+
if (/^(true|false|null|NULL)\b/.test(s)) return { kind: 'literal', value: s };
|
|
71
|
+
// Superglobals
|
|
72
|
+
if (/^\$_(GET|POST|REQUEST|COOKIE|SERVER|FILES|SESSION|ENV)\b/.test(s)) {
|
|
73
|
+
const parts = s.split(/[\[\]'"]+/).filter(Boolean);
|
|
74
|
+
if (parts.length === 1) return { kind: 'ident', name: parts[0] };
|
|
75
|
+
let cur = { kind: 'ident', name: parts[0] };
|
|
76
|
+
for (let i = 1; i < parts.length; i++) {
|
|
77
|
+
cur = { kind: 'member', object: cur, prop: parts[i] || '[]' };
|
|
78
|
+
}
|
|
79
|
+
return cur;
|
|
80
|
+
}
|
|
81
|
+
// Variable
|
|
82
|
+
if (/^\$[A-Za-z_]\w*$/.test(s)) return { kind: 'ident', name: s };
|
|
83
|
+
// Method call: $obj->method(args) or ClassName::method(args)
|
|
84
|
+
const methodCall = s.match(/^(\$[\w]+(?:->[\w]+)*|[A-Za-z_][\w]*(?:::[\w]+)*)\s*\((.*)\)\s*$/s);
|
|
85
|
+
if (methodCall) {
|
|
86
|
+
const callee = methodCall[1].replace(/->/g, '.').replace(/::/g, '.');
|
|
87
|
+
const args = _splitTopLevelCommas(methodCall[2]).map(_lowerExpr);
|
|
88
|
+
return { kind: 'call', callee, args };
|
|
89
|
+
}
|
|
90
|
+
// Function call: func(args)
|
|
91
|
+
const funcCall = s.match(/^([A-Za-z_][\w]*)\s*\((.*)\)\s*$/s);
|
|
92
|
+
if (funcCall) {
|
|
93
|
+
return { kind: 'call', callee: funcCall[1], args: _splitTopLevelCommas(funcCall[2]).map(_lowerExpr) };
|
|
94
|
+
}
|
|
95
|
+
// Concat with .
|
|
96
|
+
if (s.includes('.') && /["'\$]/.test(s)) {
|
|
97
|
+
const parts = _splitTopLevelDot(s).map(_lowerExpr);
|
|
98
|
+
return { kind: 'tpl', parts };
|
|
99
|
+
}
|
|
100
|
+
// Member: $obj->prop
|
|
101
|
+
if (/^\$[\w]+(?:->[\w]+)+$/.test(s)) {
|
|
102
|
+
const parts = s.split('->');
|
|
103
|
+
let cur = { kind: 'ident', name: parts[0] };
|
|
104
|
+
for (let i = 1; i < parts.length; i++) cur = { kind: 'member', object: cur, prop: parts[i] };
|
|
105
|
+
return cur;
|
|
106
|
+
}
|
|
107
|
+
return { kind: 'unknown' };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function _splitTopLevelCommas(s) {
|
|
111
|
+
const out = [];
|
|
112
|
+
let buf = '';
|
|
113
|
+
let depth = 0;
|
|
114
|
+
let inStr = null;
|
|
115
|
+
for (let i = 0; i < s.length; i++) {
|
|
116
|
+
const c = s[i];
|
|
117
|
+
if (inStr) {
|
|
118
|
+
buf += c;
|
|
119
|
+
if (c === '\\') { i++; buf += s[i] || ''; continue; }
|
|
120
|
+
if (c === inStr) inStr = null;
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
if (c === '"' || c === '\'') { inStr = c; buf += c; continue; }
|
|
124
|
+
if (c === '(' || c === '{' || c === '[') depth++;
|
|
125
|
+
if (c === ')' || c === '}' || c === ']') depth--;
|
|
126
|
+
if (c === ',' && depth === 0) { out.push(buf.trim()); buf = ''; continue; }
|
|
127
|
+
buf += c;
|
|
128
|
+
}
|
|
129
|
+
if (buf.trim()) out.push(buf.trim());
|
|
130
|
+
return out;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function _splitTopLevelDot(s) {
|
|
134
|
+
const out = [];
|
|
135
|
+
let buf = '';
|
|
136
|
+
let depth = 0;
|
|
137
|
+
let inStr = null;
|
|
138
|
+
for (let i = 0; i < s.length; i++) {
|
|
139
|
+
const c = s[i];
|
|
140
|
+
if (inStr) {
|
|
141
|
+
buf += c;
|
|
142
|
+
if (c === '\\') { i++; buf += s[i] || ''; continue; }
|
|
143
|
+
if (c === inStr) inStr = null;
|
|
144
|
+
continue;
|
|
145
|
+
}
|
|
146
|
+
if (c === '"' || c === '\'') { inStr = c; buf += c; continue; }
|
|
147
|
+
if (c === '(' || c === '{' || c === '[') depth++;
|
|
148
|
+
if (c === ')' || c === '}' || c === ']') depth--;
|
|
149
|
+
if (c === '.' && depth === 0) { out.push(buf.trim()); buf = ''; continue; }
|
|
150
|
+
buf += c;
|
|
151
|
+
}
|
|
152
|
+
if (buf.trim()) out.push(buf.trim());
|
|
153
|
+
return out;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
function _lowerStmt(stmt, line) {
|
|
157
|
+
const s = stmt.trim();
|
|
158
|
+
if (!s || s.startsWith('//') || s.startsWith('#')) return null;
|
|
159
|
+
if (/^return\b/.test(s)) {
|
|
160
|
+
const rest = s.replace(/^return\s*/, '').trim();
|
|
161
|
+
return { kind: 'return', line, value: rest ? _lowerExpr(rest) : null };
|
|
162
|
+
}
|
|
163
|
+
if (/^throw\b/.test(s)) {
|
|
164
|
+
return { kind: 'throw', line, value: _lowerExpr(s.replace(/^throw\s+/, '')) };
|
|
165
|
+
}
|
|
166
|
+
// Assignment: $var = expr
|
|
167
|
+
const assign = s.match(/^(\$[\w]+(?:->[\w]+)*)\s*=\s*(.+)$/s);
|
|
168
|
+
if (assign) {
|
|
169
|
+
return { kind: 'assign', line, target: assign[1], source: _lowerExpr(assign[2]) };
|
|
170
|
+
}
|
|
171
|
+
// Statement-form call
|
|
172
|
+
const call = s.match(/^(\$[\w]+(?:->[\w]+)*|[A-Za-z_][\w]*(?:::[\w]+)*)\s*\((.*)\)\s*$/s);
|
|
173
|
+
if (call) {
|
|
174
|
+
const callee = call[1].replace(/->/g, '.').replace(/::/g, '.');
|
|
175
|
+
return { kind: 'call', line, callee, args: _splitTopLevelCommas(call[2]).map(_lowerExpr) };
|
|
176
|
+
}
|
|
177
|
+
return null;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function _extractBody(src, openBrace) {
|
|
181
|
+
let depth = 1;
|
|
182
|
+
let i = openBrace + 1;
|
|
183
|
+
let inStr = null;
|
|
184
|
+
let escape = false;
|
|
185
|
+
while (i < src.length && depth > 0) {
|
|
186
|
+
const c = src[i];
|
|
187
|
+
if (escape) { escape = false; i++; continue; }
|
|
188
|
+
if (inStr) {
|
|
189
|
+
if (c === '\\') { escape = true; i++; continue; }
|
|
190
|
+
if (c === inStr) inStr = null;
|
|
191
|
+
i++; continue;
|
|
192
|
+
}
|
|
193
|
+
if (c === '"' || c === '\'') { inStr = c; i++; continue; }
|
|
194
|
+
if (c === '{') depth++;
|
|
195
|
+
else if (c === '}') depth--;
|
|
196
|
+
if (depth === 0) return { body: src.slice(openBrace + 1, i), end: i };
|
|
197
|
+
i++;
|
|
198
|
+
}
|
|
199
|
+
return null;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
function _lineAt(src, idx) {
|
|
203
|
+
let line = 1;
|
|
204
|
+
for (let i = 0; i < idx && i < src.length; i++) if (src[i] === '\n') line++;
|
|
205
|
+
return line;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
function _qid(file, name, line, body) {
|
|
209
|
+
const sha = crypto.createHash('sha256').update(body).digest('hex').slice(0, 8);
|
|
210
|
+
return `${file}::${name}@${line}#${sha}`;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
let _nid = 0;
|
|
214
|
+
function _nextId() { return `pn${++_nid}`; }
|
|
215
|
+
|
|
216
|
+
function _addNode(nodes, node) {
|
|
217
|
+
const id = _nextId();
|
|
218
|
+
node.succ = node.succ || [];
|
|
219
|
+
node.pred = node.pred || [];
|
|
220
|
+
nodes[id] = node;
|
|
221
|
+
return id;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function _linkNodes(nodes, src, dst) {
|
|
225
|
+
if (!nodes[src] || !nodes[dst]) return;
|
|
226
|
+
if (!nodes[src].succ.includes(dst)) nodes[src].succ.push(dst);
|
|
227
|
+
if (!nodes[dst].pred.includes(src)) nodes[dst].pred.push(src);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function _buildCfg(bodyText, nodes, prevId, startLine) {
|
|
231
|
+
const stmts = _splitStatements(bodyText);
|
|
232
|
+
let prev = prevId;
|
|
233
|
+
let line = startLine;
|
|
234
|
+
for (const stmt of stmts) {
|
|
235
|
+
const s = stmt.trim();
|
|
236
|
+
if (!s || s.startsWith('//') || s.startsWith('#')) { line++; continue; }
|
|
237
|
+
|
|
238
|
+
const ifMatch = s.match(/^if\s*\((.+?)\)\s*\{([\s\S]*)\}(?:\s*else\s*\{([\s\S]*)\})?\s*$/s);
|
|
239
|
+
if (ifMatch) {
|
|
240
|
+
const ifNode = _addNode(nodes, { kind: 'if', cond: _lowerExpr(ifMatch[1]), line });
|
|
241
|
+
_linkNodes(nodes, prev, ifNode);
|
|
242
|
+
const join = _addNode(nodes, { kind: 'noop', line });
|
|
243
|
+
const thenTail = _buildCfg(ifMatch[2], nodes, ifNode, line + 1);
|
|
244
|
+
_linkNodes(nodes, thenTail, join);
|
|
245
|
+
if (ifMatch[3]) {
|
|
246
|
+
const elseTail = _buildCfg(ifMatch[3], nodes, ifNode, line + 1);
|
|
247
|
+
_linkNodes(nodes, elseTail, join);
|
|
248
|
+
} else {
|
|
249
|
+
_linkNodes(nodes, ifNode, join);
|
|
250
|
+
}
|
|
251
|
+
prev = join;
|
|
252
|
+
line += (s.match(/\n/g) || []).length + 1;
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
const whileMatch = s.match(/^while\s*\((.+?)\)\s*\{([\s\S]*)\}\s*$/s);
|
|
257
|
+
if (whileMatch) {
|
|
258
|
+
const header = _addNode(nodes, { kind: 'loop-header', line });
|
|
259
|
+
_linkNodes(nodes, prev, header);
|
|
260
|
+
const bodyTail = _buildCfg(whileMatch[2], nodes, header, line + 1);
|
|
261
|
+
_linkNodes(nodes, bodyTail, header);
|
|
262
|
+
const join = _addNode(nodes, { kind: 'noop', line });
|
|
263
|
+
_linkNodes(nodes, header, join);
|
|
264
|
+
prev = join;
|
|
265
|
+
line += (s.match(/\n/g) || []).length + 1;
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
const foreachMatch = s.match(/^foreach\s*\((.+?)\s+as\s+(?:\$\w+\s*=>\s*)?(\$\w+)\)\s*\{([\s\S]*)\}\s*$/s);
|
|
270
|
+
if (foreachMatch) {
|
|
271
|
+
const header = _addNode(nodes, { kind: 'loop-header', line });
|
|
272
|
+
_linkNodes(nodes, prev, header);
|
|
273
|
+
const assignId = _addNode(nodes, { kind: 'assign', target: foreachMatch[2], source: _lowerExpr(foreachMatch[1]), line });
|
|
274
|
+
_linkNodes(nodes, header, assignId);
|
|
275
|
+
const bodyTail = _buildCfg(foreachMatch[3], nodes, assignId, line + 1);
|
|
276
|
+
_linkNodes(nodes, bodyTail, header);
|
|
277
|
+
const join = _addNode(nodes, { kind: 'noop', line });
|
|
278
|
+
_linkNodes(nodes, header, join);
|
|
279
|
+
prev = join;
|
|
280
|
+
line += (s.match(/\n/g) || []).length + 1;
|
|
281
|
+
continue;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
const node = _lowerStmt(s, line);
|
|
285
|
+
if (!node) { line++; continue; }
|
|
286
|
+
const id = _addNode(nodes, node);
|
|
287
|
+
_linkNodes(nodes, prev, id);
|
|
288
|
+
prev = id;
|
|
289
|
+
line += (s.match(/\n/g) || []).length + 1;
|
|
290
|
+
}
|
|
291
|
+
return prev;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
export function parsePhpFile(file, code) {
|
|
295
|
+
if (!file || typeof code !== 'string') return null;
|
|
296
|
+
if (!/\.(?:php|phtml)$/i.test(file)) return null;
|
|
297
|
+
if (code.length > 1_000_000) return null;
|
|
298
|
+
|
|
299
|
+
const functions = [];
|
|
300
|
+
FUNC_RE.lastIndex = 0;
|
|
301
|
+
_nid = 0;
|
|
302
|
+
let m;
|
|
303
|
+
while ((m = FUNC_RE.exec(code)) !== null) {
|
|
304
|
+
const name = m[1];
|
|
305
|
+
const paramsText = m[2] || '';
|
|
306
|
+
const params = paramsText.split(',').map(p => {
|
|
307
|
+
const t = p.trim();
|
|
308
|
+
if (!t) return null;
|
|
309
|
+
const vm = t.match(/\$(\w+)/);
|
|
310
|
+
return vm ? '$' + vm[1] : null;
|
|
311
|
+
}).filter(Boolean);
|
|
312
|
+
const braceIdx = code.indexOf('{', m.index + m[0].length - 1);
|
|
313
|
+
if (braceIdx < 0) continue;
|
|
314
|
+
const extracted = _extractBody(code, braceIdx);
|
|
315
|
+
if (!extracted) continue;
|
|
316
|
+
const startLine = _lineAt(code, m.index);
|
|
317
|
+
const nodes = {};
|
|
318
|
+
const entry = _addNode(nodes, { kind: 'entry', line: startLine });
|
|
319
|
+
const exit = _addNode(nodes, { kind: 'exit', line: startLine });
|
|
320
|
+
const tail = _buildCfg(extracted.body, nodes, entry, startLine + 1);
|
|
321
|
+
_linkNodes(nodes, tail, exit);
|
|
322
|
+
functions.push({
|
|
323
|
+
qid: _qid(file, name, startLine, extracted.body),
|
|
324
|
+
name, line: startLine, params, file,
|
|
325
|
+
cfg: { entry, exit, nodes },
|
|
326
|
+
});
|
|
327
|
+
FUNC_RE.lastIndex = extracted.end + 1;
|
|
328
|
+
}
|
|
329
|
+
return functions.length ? { file, functions, topLevel: null } : null;
|
|
330
|
+
}
|
|
@@ -166,8 +166,14 @@ def _lower_expr(node: ast.AST) -> dict[str, Any]:
|
|
|
166
166
|
if isinstance(node, ast.Starred):
|
|
167
167
|
return _lower_expr(node.value)
|
|
168
168
|
if isinstance(node, ast.NamedExpr):
|
|
169
|
-
# Walrus: `(x := expr)` —
|
|
170
|
-
return
|
|
169
|
+
# Walrus: `(x := expr)` — surface both the target binding and the value.
|
|
170
|
+
return {
|
|
171
|
+
"kind": "union",
|
|
172
|
+
"branches": [
|
|
173
|
+
{"kind": "ident", "name": node.target.id},
|
|
174
|
+
_lower_expr(node.value),
|
|
175
|
+
],
|
|
176
|
+
}
|
|
171
177
|
if isinstance(node, ast.UnaryOp):
|
|
172
178
|
return _lower_expr(node.operand)
|
|
173
179
|
if isinstance(node, ast.Await):
|
|
@@ -205,9 +211,9 @@ def _flatten_callee(node: ast.AST) -> Any:
|
|
|
205
211
|
return None
|
|
206
212
|
|
|
207
213
|
|
|
208
|
-
def _assign_target(node: ast.AST) ->
|
|
209
|
-
"""Return a single identifier
|
|
210
|
-
|
|
214
|
+
def _assign_target(node: ast.AST) -> "str | list[str] | None":
|
|
215
|
+
"""Return a single identifier, dotted-path string, or a list of targets
|
|
216
|
+
for destructuring assignments (Tuple/List unpacking)."""
|
|
211
217
|
if isinstance(node, ast.Name):
|
|
212
218
|
return node.id
|
|
213
219
|
if isinstance(node, ast.Attribute):
|
|
@@ -219,7 +225,40 @@ def _assign_target(node: ast.AST) -> Optional[str]:
|
|
|
219
225
|
if isinstance(cur, ast.Name):
|
|
220
226
|
parts.insert(0, cur.id)
|
|
221
227
|
return ".".join(parts)
|
|
222
|
-
|
|
228
|
+
if isinstance(node, (ast.Tuple, ast.List)):
|
|
229
|
+
targets = []
|
|
230
|
+
for elt in node.elts:
|
|
231
|
+
t = _assign_target(elt)
|
|
232
|
+
targets.append(t if isinstance(t, str) else None)
|
|
233
|
+
return targets
|
|
234
|
+
if isinstance(node, ast.Starred):
|
|
235
|
+
return _assign_target(node.value)
|
|
236
|
+
return None
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _lower_match_pattern(pattern: ast.AST, subject: dict) -> dict:
|
|
240
|
+
"""Lower a match-case pattern to an expression for the if-condition."""
|
|
241
|
+
if isinstance(pattern, ast.MatchValue):
|
|
242
|
+
return {"kind": "binary", "op": "Eq", "left": subject, "right": _lower_expr(pattern.value)}
|
|
243
|
+
if isinstance(pattern, ast.MatchSingleton):
|
|
244
|
+
return {"kind": "binary", "op": "Is", "left": subject, "right": {"kind": "literal", "value": pattern.value}}
|
|
245
|
+
if isinstance(pattern, ast.MatchAs):
|
|
246
|
+
if pattern.pattern is not None:
|
|
247
|
+
return _lower_match_pattern(pattern.pattern, subject)
|
|
248
|
+
return {"kind": "unknown"}
|
|
249
|
+
if isinstance(pattern, ast.MatchOr):
|
|
250
|
+
if pattern.patterns:
|
|
251
|
+
return _lower_match_pattern(pattern.patterns[0], subject)
|
|
252
|
+
return {"kind": "unknown"}
|
|
253
|
+
return {"kind": "unknown"}
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _match_pattern_capture(pattern: ast.AST) -> Optional[str]:
|
|
257
|
+
"""Extract the capture variable name from a match-case pattern, if any."""
|
|
258
|
+
if isinstance(pattern, ast.MatchAs):
|
|
259
|
+
return pattern.name
|
|
260
|
+
if isinstance(pattern, ast.MatchStar) and hasattr(pattern, "name"):
|
|
261
|
+
return pattern.name
|
|
223
262
|
return None
|
|
224
263
|
|
|
225
264
|
|
|
@@ -250,6 +289,28 @@ class CfgBuilder:
|
|
|
250
289
|
if src_id not in dn["pred"]:
|
|
251
290
|
dn["pred"].append(src_id)
|
|
252
291
|
|
|
292
|
+
@staticmethod
|
|
293
|
+
def _collect_walrus(node: ast.AST) -> list[ast.NamedExpr]:
|
|
294
|
+
"""Collect all NamedExpr (walrus) nodes from an expression tree."""
|
|
295
|
+
out: list[ast.NamedExpr] = []
|
|
296
|
+
for child in ast.walk(node):
|
|
297
|
+
if isinstance(child, ast.NamedExpr):
|
|
298
|
+
out.append(child)
|
|
299
|
+
return out
|
|
300
|
+
|
|
301
|
+
def _emit_walrus_assigns(self, expr_node: ast.AST, prev: str, line: int) -> str:
|
|
302
|
+
"""Emit assign nodes for any walrus operators in an expression."""
|
|
303
|
+
for w in self._collect_walrus(expr_node):
|
|
304
|
+
a = self._add({
|
|
305
|
+
"kind": "assign",
|
|
306
|
+
"target": w.target.id,
|
|
307
|
+
"source": _lower_expr(w.value),
|
|
308
|
+
"line": line,
|
|
309
|
+
})
|
|
310
|
+
self._link(prev, a)
|
|
311
|
+
prev = a
|
|
312
|
+
return prev
|
|
313
|
+
|
|
253
314
|
def lower(self, body: list[ast.stmt]) -> None:
|
|
254
315
|
tail = self.entry
|
|
255
316
|
tail = self._lower_block(body, tail)
|
|
@@ -274,6 +335,13 @@ class CfgBuilder:
|
|
|
274
335
|
+ [_lower_expr(kw.value) for kw in (stmt.value.keywords or [])],
|
|
275
336
|
"line": line,
|
|
276
337
|
})
|
|
338
|
+
elif isinstance(stmt.value, ast.NamedExpr):
|
|
339
|
+
cur = self._add({
|
|
340
|
+
"kind": "assign",
|
|
341
|
+
"target": stmt.value.target.id,
|
|
342
|
+
"source": _lower_expr(stmt.value.value),
|
|
343
|
+
"line": line,
|
|
344
|
+
})
|
|
277
345
|
else:
|
|
278
346
|
cur = self._add({"kind": "noop", "line": line})
|
|
279
347
|
self._link(prev, cur)
|
|
@@ -300,10 +368,46 @@ class CfgBuilder:
|
|
|
300
368
|
# ast.Assign: targets may be multi (a = b = c). We use the first.
|
|
301
369
|
tgt = _assign_target(stmt.targets[0]) if stmt.targets else None
|
|
302
370
|
src = _lower_expr(stmt.value)
|
|
371
|
+
if isinstance(tgt, list):
|
|
372
|
+
# Destructuring: a, b = expr → one assign per element.
|
|
373
|
+
rhs = src
|
|
374
|
+
tail = prev
|
|
375
|
+
for i, t in enumerate(tgt):
|
|
376
|
+
elem_src = {"kind": "member", "object": rhs, "prop": "[]"}
|
|
377
|
+
a = self._add({"kind": "assign", "target": t, "source": elem_src, "line": line})
|
|
378
|
+
self._link(tail, a)
|
|
379
|
+
tail = a
|
|
380
|
+
return tail
|
|
381
|
+
# Comprehension with filters at statement level: emit filter conditions.
|
|
382
|
+
rhs_node = stmt.value if isinstance(stmt, (ast.Assign, ast.AnnAssign)) else None
|
|
383
|
+
if rhs_node and isinstance(rhs_node, (ast.ListComp, ast.SetComp, ast.GeneratorExp, ast.DictComp)):
|
|
384
|
+
tail = prev
|
|
385
|
+
for gen in rhs_node.generators:
|
|
386
|
+
# Emit loop var assign from iter
|
|
387
|
+
loop_tgt = _assign_target(gen.target)
|
|
388
|
+
if loop_tgt and isinstance(loop_tgt, str):
|
|
389
|
+
la = self._add({
|
|
390
|
+
"kind": "assign", "target": loop_tgt,
|
|
391
|
+
"source": _lower_expr(gen.iter), "line": line,
|
|
392
|
+
})
|
|
393
|
+
self._link(tail, la)
|
|
394
|
+
tail = la
|
|
395
|
+
for if_clause in gen.ifs:
|
|
396
|
+
if_n = self._add({
|
|
397
|
+
"kind": "if",
|
|
398
|
+
"cond": _lower_expr(if_clause),
|
|
399
|
+
"line": line,
|
|
400
|
+
})
|
|
401
|
+
self._link(tail, if_n)
|
|
402
|
+
tail = if_n
|
|
403
|
+
cur = self._add({"kind": "assign", "target": tgt if isinstance(tgt, str) else None, "source": src, "line": line})
|
|
404
|
+
self._link(tail, cur)
|
|
405
|
+
return cur
|
|
303
406
|
cur = self._add({"kind": "assign", "target": tgt, "source": src, "line": line})
|
|
304
407
|
self._link(prev, cur)
|
|
305
408
|
return cur
|
|
306
409
|
if isinstance(stmt, ast.If):
|
|
410
|
+
prev = self._emit_walrus_assigns(stmt.test, prev, line)
|
|
307
411
|
if_node = self._add({
|
|
308
412
|
"kind": "if",
|
|
309
413
|
"cond": _lower_expr(stmt.test),
|
|
@@ -342,6 +446,7 @@ class CfgBuilder:
|
|
|
342
446
|
self._link(lh, join)
|
|
343
447
|
return join
|
|
344
448
|
if isinstance(stmt, (ast.While,)):
|
|
449
|
+
prev = self._emit_walrus_assigns(stmt.test, prev, line)
|
|
345
450
|
lh = self._add({"kind": "loop-header", "line": line})
|
|
346
451
|
self._link(prev, lh)
|
|
347
452
|
body_tail = self._lower_block(stmt.body, lh)
|
|
@@ -409,11 +514,32 @@ class CfgBuilder:
|
|
|
409
514
|
self._link(prev, cur)
|
|
410
515
|
return cur
|
|
411
516
|
if isinstance(stmt, ast.Match):
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
517
|
+
subject = _lower_expr(stmt.subject)
|
|
518
|
+
join = self._add({"kind": "noop", "line": line})
|
|
519
|
+
for case in stmt.cases:
|
|
520
|
+
case_line = getattr(case, "lineno", line) or line
|
|
521
|
+
pattern_expr = _lower_match_pattern(case.pattern, subject)
|
|
522
|
+
if_node = self._add({
|
|
523
|
+
"kind": "if",
|
|
524
|
+
"cond": pattern_expr,
|
|
525
|
+
"line": case_line,
|
|
526
|
+
})
|
|
527
|
+
self._link(prev, if_node)
|
|
528
|
+
# If pattern has a capture name, emit an assign for it.
|
|
529
|
+
capture = _match_pattern_capture(case.pattern)
|
|
530
|
+
if capture:
|
|
531
|
+
a = self._add({
|
|
532
|
+
"kind": "assign", "target": capture,
|
|
533
|
+
"source": subject, "line": case_line,
|
|
534
|
+
})
|
|
535
|
+
self._link(if_node, a)
|
|
536
|
+
body_prev = a
|
|
537
|
+
else:
|
|
538
|
+
body_prev = if_node
|
|
539
|
+
body_tail = self._lower_block(case.body, body_prev)
|
|
540
|
+
self._link(body_tail, join)
|
|
541
|
+
self._link(prev, join)
|
|
542
|
+
return join
|
|
417
543
|
# ast.Pass, ast.Break, ast.Continue, ast.Import, ast.ImportFrom,
|
|
418
544
|
# ast.Global, ast.Nonlocal, ast.Delete — all noops for taint.
|
|
419
545
|
cur = self._add({"kind": "noop", "line": line})
|