circle-ir 3.16.7 → 3.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analysis/html/html-attribute-security-pass.d.ts +22 -0
- package/dist/analysis/html/html-attribute-security-pass.js +269 -0
- package/dist/analysis/html/html-attribute-security-pass.js.map +1 -0
- package/dist/analysis/html/html-extractor.d.ts +69 -0
- package/dist/analysis/html/html-extractor.js +169 -0
- package/dist/analysis/html/html-extractor.js.map +1 -0
- package/dist/analysis/html/html-merge.d.ts +22 -0
- package/dist/analysis/html/html-merge.js +164 -0
- package/dist/analysis/html/html-merge.js.map +1 -0
- package/dist/analysis/html/index.d.ts +11 -0
- package/dist/analysis/html/index.js +12 -0
- package/dist/analysis/html/index.js.map +1 -0
- package/dist/analyzer.js +84 -0
- package/dist/analyzer.js.map +1 -1
- package/dist/browser/circle-ir.js +630 -0
- package/dist/core/circle-ir-core.cjs +2 -0
- package/dist/core/circle-ir-core.js +2 -0
- package/dist/core/parser.d.ts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/languages/index.d.ts +1 -1
- package/dist/languages/index.js +1 -1
- package/dist/languages/index.js.map +1 -1
- package/dist/languages/plugins/html.d.ts +33 -0
- package/dist/languages/plugins/html.js +84 -0
- package/dist/languages/plugins/html.js.map +1 -0
- package/dist/languages/plugins/index.d.ts +1 -0
- package/dist/languages/plugins/index.js +3 -0
- package/dist/languages/plugins/index.js.map +1 -1
- package/dist/languages/types.d.ts +1 -1
- package/dist/types/index.d.ts +1 -1
- package/dist/wasm/tree-sitter-html.wasm +0 -0
- package/package.json +3 -2
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML Attribute Security Pass
|
|
3
|
+
*
|
|
4
|
+
* Runs attribute-level security checks directly on the HTML AST.
|
|
5
|
+
* These rules do not require IR — they operate on element attributes.
|
|
6
|
+
*
|
|
7
|
+
* Rules:
|
|
8
|
+
* H1: html-missing-noopener (CWE-1022) — <a target="_blank"> without rel="noopener"
|
|
9
|
+
* H2: html-javascript-uri (CWE-79) — javascript: in href/src/action
|
|
10
|
+
* H3: html-missing-sandbox (CWE-1021) — <iframe> without sandbox
|
|
11
|
+
* H4: html-mixed-content (CWE-319) — http:// resource in script/link/img/iframe
|
|
12
|
+
* H5: html-missing-sri (CWE-353) — CDN script/stylesheet without integrity
|
|
13
|
+
* H6: html-autocomplete-sensitive (CWE-525) — sensitive input without autocomplete="off"
|
|
14
|
+
* H7: html-inline-event-handler (CWE-79) — inline on* handler (CSP incompatible)
|
|
15
|
+
* H8: html-form-action-javascript (CWE-79) — <form action="javascript:...">
|
|
16
|
+
*/
|
|
17
|
+
import type { Node as SyntaxNode } from 'web-tree-sitter';
|
|
18
|
+
import type { SastFinding } from '../../types/index.js';
|
|
19
|
+
/**
|
|
20
|
+
* Run all HTML attribute security checks.
|
|
21
|
+
*/
|
|
22
|
+
export declare function runHtmlAttributeSecurityChecks(rootNode: SyntaxNode, filePath: string): SastFinding[];
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML Attribute Security Pass
|
|
3
|
+
*
|
|
4
|
+
* Runs attribute-level security checks directly on the HTML AST.
|
|
5
|
+
* These rules do not require IR — they operate on element attributes.
|
|
6
|
+
*
|
|
7
|
+
* Rules:
|
|
8
|
+
* H1: html-missing-noopener (CWE-1022) — <a target="_blank"> without rel="noopener"
|
|
9
|
+
* H2: html-javascript-uri (CWE-79) — javascript: in href/src/action
|
|
10
|
+
* H3: html-missing-sandbox (CWE-1021) — <iframe> without sandbox
|
|
11
|
+
* H4: html-mixed-content (CWE-319) — http:// resource in script/link/img/iframe
|
|
12
|
+
* H5: html-missing-sri (CWE-353) — CDN script/stylesheet without integrity
|
|
13
|
+
* H6: html-autocomplete-sensitive (CWE-525) — sensitive input without autocomplete="off"
|
|
14
|
+
* H7: html-inline-event-handler (CWE-79) — inline on* handler (CSP incompatible)
|
|
15
|
+
* H8: html-form-action-javascript (CWE-79) — <form action="javascript:...">
|
|
16
|
+
*/
|
|
17
|
+
import { getAttributeValue, getTagName, findChildByType } from './html-extractor.js';
|
|
18
|
+
/**
|
|
19
|
+
* Run all HTML attribute security checks.
|
|
20
|
+
*/
|
|
21
|
+
export function runHtmlAttributeSecurityChecks(rootNode, filePath) {
|
|
22
|
+
const findings = [];
|
|
23
|
+
walkForSecurityChecks(rootNode, filePath, findings);
|
|
24
|
+
return findings;
|
|
25
|
+
}
|
|
26
|
+
function walkForSecurityChecks(node, filePath, findings) {
|
|
27
|
+
// tree-sitter-html uses special node types for <script> and <style>
|
|
28
|
+
if (node.type === 'element' || node.type === 'self_closing_tag' ||
|
|
29
|
+
node.type === 'script_element' || node.type === 'style_element') {
|
|
30
|
+
checkElement(node, filePath, findings);
|
|
31
|
+
}
|
|
32
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
33
|
+
const child = node.child(i);
|
|
34
|
+
if (child) {
|
|
35
|
+
walkForSecurityChecks(child, filePath, findings);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
function checkElement(node, filePath, findings) {
|
|
40
|
+
const tagName = getTagName(node).toLowerCase();
|
|
41
|
+
const tag = node.type === 'self_closing_tag'
|
|
42
|
+
? node
|
|
43
|
+
: findChildByType(node, 'start_tag');
|
|
44
|
+
if (!tag)
|
|
45
|
+
return;
|
|
46
|
+
const line = tag.startPosition.row + 1;
|
|
47
|
+
const snippet = tag.text.length > 120 ? tag.text.slice(0, 120) + '...' : tag.text;
|
|
48
|
+
// H1: Missing noopener on target="_blank" links
|
|
49
|
+
if (tagName === 'a') {
|
|
50
|
+
checkMissingNoopener(tag, filePath, line, snippet, findings);
|
|
51
|
+
}
|
|
52
|
+
// H2: javascript: URI in href/src/action
|
|
53
|
+
checkJavascriptUri(tag, filePath, line, snippet, findings);
|
|
54
|
+
// H3: Missing sandbox on iframe
|
|
55
|
+
if (tagName === 'iframe') {
|
|
56
|
+
checkMissingSandbox(tag, filePath, line, snippet, findings);
|
|
57
|
+
}
|
|
58
|
+
// H4: Mixed content (http:// resources)
|
|
59
|
+
if (['script', 'link', 'img', 'iframe', 'video', 'audio', 'source', 'object', 'embed'].includes(tagName)) {
|
|
60
|
+
checkMixedContent(tag, tagName, filePath, line, snippet, findings);
|
|
61
|
+
}
|
|
62
|
+
// H5: Missing SRI on CDN resources
|
|
63
|
+
if (tagName === 'script' || tagName === 'link') {
|
|
64
|
+
checkMissingSri(tag, tagName, filePath, line, snippet, findings);
|
|
65
|
+
}
|
|
66
|
+
// H6: Autocomplete on sensitive inputs
|
|
67
|
+
if (tagName === 'input') {
|
|
68
|
+
checkAutocompleteSensitive(tag, filePath, line, snippet, findings);
|
|
69
|
+
}
|
|
70
|
+
// H7: Inline event handlers
|
|
71
|
+
checkInlineEventHandlers(tag, filePath, line, findings);
|
|
72
|
+
// H8: Form action javascript:
|
|
73
|
+
if (tagName === 'form') {
|
|
74
|
+
checkFormActionJavascript(tag, filePath, line, snippet, findings);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
/** H1: <a target="_blank"> without rel="noopener" or rel="noreferrer" */
|
|
78
|
+
function checkMissingNoopener(tag, filePath, line, snippet, findings) {
|
|
79
|
+
const target = getAttributeValue(tag, 'target');
|
|
80
|
+
if (target !== '_blank')
|
|
81
|
+
return;
|
|
82
|
+
const rel = getAttributeValue(tag, 'rel')?.toLowerCase() ?? '';
|
|
83
|
+
if (rel.includes('noopener') || rel.includes('noreferrer'))
|
|
84
|
+
return;
|
|
85
|
+
findings.push({
|
|
86
|
+
id: `html-missing-noopener-${filePath}-${line}`,
|
|
87
|
+
pass: 'html-missing-noopener',
|
|
88
|
+
category: 'security',
|
|
89
|
+
rule_id: 'html-missing-noopener',
|
|
90
|
+
cwe: 'CWE-1022',
|
|
91
|
+
severity: 'medium',
|
|
92
|
+
level: 'warning',
|
|
93
|
+
message: '<a target="_blank"> is missing rel="noopener" — may allow reverse tabnapping',
|
|
94
|
+
file: filePath,
|
|
95
|
+
line,
|
|
96
|
+
snippet,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
/** H2: javascript: URI in href, src, or action */
|
|
100
|
+
function checkJavascriptUri(tag, filePath, line, snippet, findings) {
|
|
101
|
+
for (const attr of ['href', 'src', 'action']) {
|
|
102
|
+
const value = getAttributeValue(tag, attr);
|
|
103
|
+
if (value && value.trim().toLowerCase().startsWith('javascript:')) {
|
|
104
|
+
findings.push({
|
|
105
|
+
id: `html-javascript-uri-${filePath}-${line}-${attr}`,
|
|
106
|
+
pass: 'html-javascript-uri',
|
|
107
|
+
category: 'security',
|
|
108
|
+
rule_id: 'html-javascript-uri',
|
|
109
|
+
cwe: 'CWE-79',
|
|
110
|
+
severity: 'high',
|
|
111
|
+
level: 'error',
|
|
112
|
+
message: `${attr}="javascript:..." is an XSS vector — use event listeners instead`,
|
|
113
|
+
file: filePath,
|
|
114
|
+
line,
|
|
115
|
+
snippet,
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
/** H3: <iframe> without sandbox attribute */
|
|
121
|
+
function checkMissingSandbox(tag, filePath, line, snippet, findings) {
|
|
122
|
+
const sandbox = getAttributeValue(tag, 'sandbox');
|
|
123
|
+
if (sandbox !== undefined)
|
|
124
|
+
return; // Present (even empty string is fine)
|
|
125
|
+
findings.push({
|
|
126
|
+
id: `html-missing-sandbox-${filePath}-${line}`,
|
|
127
|
+
pass: 'html-missing-sandbox',
|
|
128
|
+
category: 'security',
|
|
129
|
+
rule_id: 'html-missing-sandbox',
|
|
130
|
+
cwe: 'CWE-1021',
|
|
131
|
+
severity: 'medium',
|
|
132
|
+
level: 'warning',
|
|
133
|
+
message: '<iframe> without sandbox attribute — embedded content has full privileges',
|
|
134
|
+
file: filePath,
|
|
135
|
+
line,
|
|
136
|
+
snippet,
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
/** H4: HTTP resource loaded (mixed content) */
|
|
140
|
+
function checkMixedContent(tag, tagName, filePath, line, snippet, findings) {
|
|
141
|
+
const attrName = tagName === 'link' ? 'href' : 'src';
|
|
142
|
+
const value = getAttributeValue(tag, attrName);
|
|
143
|
+
if (!value || !value.startsWith('http://'))
|
|
144
|
+
return;
|
|
145
|
+
findings.push({
|
|
146
|
+
id: `html-mixed-content-${filePath}-${line}`,
|
|
147
|
+
pass: 'html-mixed-content',
|
|
148
|
+
category: 'security',
|
|
149
|
+
rule_id: 'html-mixed-content',
|
|
150
|
+
cwe: 'CWE-319',
|
|
151
|
+
severity: 'medium',
|
|
152
|
+
level: 'warning',
|
|
153
|
+
message: `Loading resource over HTTP (${attrName}="${truncate(value, 60)}") — use HTTPS to prevent MITM`,
|
|
154
|
+
file: filePath,
|
|
155
|
+
line,
|
|
156
|
+
snippet,
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
/** H5: External CDN script/stylesheet without integrity (SRI) */
|
|
160
|
+
function checkMissingSri(tag, tagName, filePath, line, snippet, findings) {
|
|
161
|
+
// Determine the URL attribute
|
|
162
|
+
const url = tagName === 'script'
|
|
163
|
+
? getAttributeValue(tag, 'src')
|
|
164
|
+
: getAttributeValue(tag, 'href');
|
|
165
|
+
if (!url)
|
|
166
|
+
return;
|
|
167
|
+
// Only flag external resources (starts with http:// or https:// or //)
|
|
168
|
+
if (!url.startsWith('http://') && !url.startsWith('https://') && !url.startsWith('//'))
|
|
169
|
+
return;
|
|
170
|
+
// For <link>, only flag stylesheets
|
|
171
|
+
if (tagName === 'link') {
|
|
172
|
+
const rel = getAttributeValue(tag, 'rel')?.toLowerCase();
|
|
173
|
+
if (rel !== 'stylesheet')
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
// Check for integrity attribute
|
|
177
|
+
const integrity = getAttributeValue(tag, 'integrity');
|
|
178
|
+
if (integrity)
|
|
179
|
+
return;
|
|
180
|
+
findings.push({
|
|
181
|
+
id: `html-missing-sri-${filePath}-${line}`,
|
|
182
|
+
pass: 'html-missing-sri',
|
|
183
|
+
category: 'security',
|
|
184
|
+
rule_id: 'html-missing-sri',
|
|
185
|
+
cwe: 'CWE-353',
|
|
186
|
+
severity: 'medium',
|
|
187
|
+
level: 'warning',
|
|
188
|
+
message: `External ${tagName === 'script' ? 'script' : 'stylesheet'} without integrity attribute — vulnerable to CDN compromise`,
|
|
189
|
+
file: filePath,
|
|
190
|
+
line,
|
|
191
|
+
snippet,
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
/** H6: Sensitive input without autocomplete="off" */
|
|
195
|
+
function checkAutocompleteSensitive(tag, filePath, line, snippet, findings) {
|
|
196
|
+
const type = getAttributeValue(tag, 'type')?.toLowerCase();
|
|
197
|
+
const name = getAttributeValue(tag, 'name')?.toLowerCase() ?? '';
|
|
198
|
+
const isSensitive = type === 'password' ||
|
|
199
|
+
/\b(ssn|social.?security|credit.?card|card.?number|cvv|cvc|ccv)\b/.test(name);
|
|
200
|
+
if (!isSensitive)
|
|
201
|
+
return;
|
|
202
|
+
const autocomplete = getAttributeValue(tag, 'autocomplete')?.toLowerCase();
|
|
203
|
+
if (autocomplete === 'off' || autocomplete === 'new-password')
|
|
204
|
+
return;
|
|
205
|
+
findings.push({
|
|
206
|
+
id: `html-autocomplete-sensitive-${filePath}-${line}`,
|
|
207
|
+
pass: 'html-autocomplete-sensitive',
|
|
208
|
+
category: 'security',
|
|
209
|
+
rule_id: 'html-autocomplete-sensitive',
|
|
210
|
+
cwe: 'CWE-525',
|
|
211
|
+
severity: 'low',
|
|
212
|
+
level: 'note',
|
|
213
|
+
message: 'Sensitive input field without autocomplete="off" — browser may cache sensitive data',
|
|
214
|
+
file: filePath,
|
|
215
|
+
line,
|
|
216
|
+
snippet,
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
/** H7: Inline event handler attributes (on*) */
|
|
220
|
+
function checkInlineEventHandlers(tag, filePath, line, findings) {
|
|
221
|
+
for (let i = 0; i < tag.childCount; i++) {
|
|
222
|
+
const child = tag.child(i);
|
|
223
|
+
if (!child || child.type !== 'attribute')
|
|
224
|
+
continue;
|
|
225
|
+
const nameNode = findChildByType(child, 'attribute_name');
|
|
226
|
+
if (!nameNode)
|
|
227
|
+
continue;
|
|
228
|
+
const attrName = nameNode.text.toLowerCase();
|
|
229
|
+
if (attrName.startsWith('on') && attrName.length > 2) {
|
|
230
|
+
const attrLine = child.startPosition.row + 1;
|
|
231
|
+
findings.push({
|
|
232
|
+
id: `html-inline-event-handler-${filePath}-${attrLine}-${attrName}`,
|
|
233
|
+
pass: 'html-inline-event-handler',
|
|
234
|
+
category: 'security',
|
|
235
|
+
rule_id: 'html-inline-event-handler',
|
|
236
|
+
cwe: 'CWE-79',
|
|
237
|
+
severity: 'low',
|
|
238
|
+
level: 'note',
|
|
239
|
+
message: `Inline ${attrName} handler — incompatible with strict Content Security Policy; use addEventListener() instead`,
|
|
240
|
+
file: filePath,
|
|
241
|
+
line: attrLine,
|
|
242
|
+
snippet: child.text,
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
/** H8: <form action="javascript:..."> */
|
|
248
|
+
function checkFormActionJavascript(tag, filePath, line, snippet, findings) {
|
|
249
|
+
const action = getAttributeValue(tag, 'action');
|
|
250
|
+
if (!action || !action.trim().toLowerCase().startsWith('javascript:'))
|
|
251
|
+
return;
|
|
252
|
+
findings.push({
|
|
253
|
+
id: `html-form-action-javascript-${filePath}-${line}`,
|
|
254
|
+
pass: 'html-form-action-javascript',
|
|
255
|
+
category: 'security',
|
|
256
|
+
rule_id: 'html-form-action-javascript',
|
|
257
|
+
cwe: 'CWE-79',
|
|
258
|
+
severity: 'high',
|
|
259
|
+
level: 'error',
|
|
260
|
+
message: '<form action="javascript:..."> is an XSS vector — use proper form submission',
|
|
261
|
+
file: filePath,
|
|
262
|
+
line,
|
|
263
|
+
snippet,
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
function truncate(s, maxLen) {
|
|
267
|
+
return s.length > maxLen ? s.slice(0, maxLen) + '...' : s;
|
|
268
|
+
}
|
|
269
|
+
//# sourceMappingURL=html-attribute-security-pass.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-attribute-security-pass.js","sourceRoot":"","sources":["../../../src/analysis/html/html-attribute-security-pass.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAIH,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAErF;;GAEG;AACH,MAAM,UAAU,8BAA8B,CAC5C,QAAoB,EACpB,QAAgB;IAEhB,MAAM,QAAQ,GAAkB,EAAE,CAAC;IACnC,qBAAqB,CAAC,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;IACpD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,qBAAqB,CAC5B,IAAgB,EAChB,QAAgB,EAChB,QAAuB;IAEvB,oEAAoE;IACpE,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,CAAC,IAAI,KAAK,kBAAkB;QAC3D,IAAI,CAAC,IAAI,KAAK,gBAAgB,IAAI,IAAI,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;QACpE,YAAY,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;IACzC,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC5B,IAAI,KAAK,EAAE,CAAC;YACV,qBAAqB,CAAC,KAAK,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,YAAY,CACnB,IAAgB,EAChB,QAAgB,EAChB,QAAuB;IAEvB,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC;IAC/C,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,KAAK,kBAAkB;QAC1C,CAAC,CAAC,IAAI;QACN,CAAC,CAAC,eAAe,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IACvC,IAAI,CAAC,GAAG;QAAE,OAAO;IAEjB,MAAM,IAAI,GAAG,GAAG,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC;IACvC,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC;IAElF,gDAAgD;IAChD,IAAI,OAAO,KAAK,GAAG,EAAE,CAAC;QACpB,oBAAoB,CAAC,GAAG,EAAE,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;IAC/D,CAAC;IAED,yCAAyC;IACzC,kBAAkB,CAAC,GAAG,EAAE,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;IAE3D,gCAAgC;IAChC,IAAI,OAAO,KAAK,QAAQ,EAAE,CAAC;QACzB,mBAAmB,CAAC,GAAG,EAAE,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;IAC9D,CAAC;IAED,wCAAwC;IACxC,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QACzG,iBAAiB,CAAC,GAAG,EAAE,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;IACrE,CAAC;IAED,mCAAmC;IACnC,IAAI,OAAO,KAAK,QAAQ,IAAI,OAAO,KAAK,MAAM,EAAE,CAAC;QAC/C,eAAe,CAAC,GAAG,EAAE,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;IACnE,CAAC;IAED,uCAAuC;IACvC,IAAI,OAAO,KAAK,OAAO,EAAE,CAAC;QACxB,0BAA0B,CAAC,GAAG,EAAE,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;IACrE,CAAC;IAED,4BAA4B;IAC5B,wBAAwB,CAAC,GAAG,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;IAExD,8BAA8B;IAC9B,IAAI,OAAO,KAAK,MAAM,EAAE,CAAC;QACvB,yBAAyB,CAAC,GAAG,EAAE,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;IACpE,CAAC;AACH,CAAC;AAED,yEAAyE;AACzE,SAAS,oBAAoB,CAC3B,GAAe,EACf,QAAgB,EAChB,IAAY,EACZ,OAAe,EACf,QAAuB;IAEvB,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAChD,IAAI,MAAM,KAAK,QAAQ;QAAE,OAAO;IAEhC,MAAM,GAAG,GAAG,iBAAiB,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IAC/D,IAAI,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,YAAY,CAAC;QAAE,OAAO;IAEnE,QAAQ,CAAC,IAAI,CAAC;QACZ,EAAE,EAAE,yBAAyB,QAAQ,IAAI,IAAI,EAAE;QAC/C,IAAI,EAAE,uBAAuB;QAC7B,QAAQ,EAAE,UAAU;QACpB,OAAO,EAAE,uBAAuB;QAChC,GAAG,EAAE,UAAU;QACf,QAAQ,EAAE,QAAQ;QAClB,KAAK,EAAE,SAAS;QAChB,OAAO,EAAE,8EAA8E;QACvF,IAAI,EAAE,QAAQ;QACd,IAAI;QACJ,OAAO;KACR,CAAC,CAAC;AACL,CAAC;AAED,kDAAkD;AAClD,SAAS,kBAAkB,CACzB,GAAe,EACf,QAAgB,EAChB,IAAY,EACZ,OAAe,EACf,QAAuB;IAEvB,KAAK,MAAM,IAAI,IAAI,CAAC,MAAM,EAAE,KAAK,EAAE,QAAQ,CAAC,EAAE,CAAC;QAC7C,MAAM,KAAK,GAAG,iBAAiB,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAC3C,IAAI,KAAK,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;YAClE,QAAQ,CAAC,IAAI,CAAC;gBACZ,EAAE,EAAE,uBAAuB,QAAQ,IAAI,IAAI,IAAI,IAAI,EAAE;gBACrD,IAAI,EAAE,qBAAqB;gBAC3B,QAAQ,EAAE,UAAU;gBACpB,OAAO,EAAE,qBAAqB;gBAC9B,GAAG,EAAE,QAAQ;gBACb,QAAQ,EAAE,MAAM;gBAChB,KAAK,EAAE,OAAO;gBACd,OAAO,EAAE,GAAG,IAAI,kEAAkE;gBAClF,IAAI,EAAE,QAAQ;gBACd,IAAI;gBACJ,OAAO;aACR,CAAC,CAAC;QACL,CAAC;IACH,CAAC;AACH,CAAC;AAED,6CAA6C;AAC7C,SAAS,mBAAmB,CAC1B,GAAe,EACf,QAAgB,EAChB,IAAY,EACZ,OAAe,EACf,QAAuB;IAEvB,MAAM,OAAO,GAAG,iBAAiB,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;IAClD,IAAI,OAAO,KAAK,SAAS;QAAE,OAAO,CAAC,sCAAsC;IAEzE,QAAQ,CAAC,IAAI,CAAC;QACZ,EAAE,EAAE,wBAAwB,QAAQ,IAAI,IAAI,EAAE;QAC9C,IAAI,EAAE,sBAAsB;QAC5B,QAAQ,EAAE,UAAU;QACpB,OAAO,EAAE,sBAAsB;QAC/B,GAAG,EAAE,UAAU;QACf,QAAQ,EAAE,QAAQ;QAClB,KAAK,EAAE,SAAS;QAChB,OAAO,EAAE,2EAA2E;QACpF,IAAI,EAAE,QAAQ;QACd,IAAI;QACJ,OAAO;KACR,CAAC,CAAC;AACL,CAAC;AAED,+CAA+C;AAC/C,SAAS,iBAAiB,CACxB,GAAe,EACf,OAAe,EACf,QAAgB,EAChB,IAAY,EACZ,OAAe,EACf,QAAuB;IAEvB,MAAM,QAAQ,GAAG,OAAO,KAAK,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC;IACrD,MAAM,KAAK,GAAG,iBAAiB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAC/C,IAAI,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO;IAEnD,QAAQ,CAAC,IAAI,CAAC;QACZ,EAAE,EAAE,sBAAsB,QAAQ,IAAI,IAAI,EAAE;QAC5C,IAAI,EAAE,oBAAoB;QAC1B,QAAQ,EAAE,UAAU;QACpB,OAAO,EAAE,oBAAoB;QAC7B,GAAG,EAAE,SAAS;QACd,QAAQ,EAAE,QAAQ;QAClB,KAAK,EAAE,SAAS;QAChB,OAAO,EAAE,+BAA+B,QAAQ,KAAK,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,gCAAgC;QACxG,IAAI,EAAE,QAAQ;QACd,IAAI;QACJ,OAAO;KACR,CAAC,CAAC;AACL,CAAC;AAED,iEAAiE;AACjE,SAAS,eAAe,CACtB,GAAe,EACf,OAAe,EACf,QAAgB,EAChB,IAAY,EACZ,OAAe,EACf,QAAuB;IAEvB,8BAA8B;IAC9B,MAAM,GAAG,GAAG,OAAO,KAAK,QAAQ;QAC9B,CAAC,CAAC,iBAAiB,CAAC,GAAG,EAAE,KAAK,CAAC;QAC/B,CAAC,CAAC,iBAAiB,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IACnC,IAAI,CAAC,GAAG;QAAE,OAAO;IAEjB,uEAAuE;IACvE,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO;IAE/F,oCAAoC;IACpC,IAAI,OAAO,KAAK,MAAM,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,iBAAiB,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,WAAW,EAAE,CAAC;QACzD,IAAI,GAAG,KAAK,YAAY;YAAE,OAAO;IACnC,CAAC;IAED,gCAAgC;IAChC,MAAM,SAAS,GAAG,iBAAiB,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;IACtD,IAAI,SAAS;QAAE,OAAO;IAEtB,QAAQ,CAAC,IAAI,CAAC;QACZ,EAAE,EAAE,oBAAoB,QAAQ,IAAI,IAAI,EAAE;QAC1C,IAAI,EAAE,kBAAkB;QACxB,QAAQ,EAAE,UAAU;QACpB,OAAO,EAAE,kBAAkB;QAC3B,GAAG,EAAE,SAAS;QACd,QAAQ,EAAE,QAAQ;QAClB,KAAK,EAAE,SAAS;QAChB,OAAO,EAAE,YAAY,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY,6DAA6D;QAChI,IAAI,EAAE,QAAQ;QACd,IAAI;QACJ,OAAO;KACR,CAAC,CAAC;AACL,CAAC;AAED,qDAAqD;AACrD,SAAS,0BAA0B,CACjC,GAAe,EACf,QAAgB,EAChB,IAAY,EACZ,OAAe,EACf,QAAuB;IAEvB,MAAM,IAAI,GAAG,iBAAiB,CAAC,GAAG,EAAE,MAAM,CAAC,EAAE,WAAW,EAAE,CAAC;IAC3D,MAAM,IAAI,GAAG,iBAAiB,CAAC,GAAG,EAAE,MAAM,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IAEjE,MAAM,WAAW,GACf,IAAI,KAAK,UAAU;QACnB,kEAAkE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEhF,IAAI,CAAC,WAAW;QAAE,OAAO;IAEzB,MAAM,YAAY,GAAG,iBAAiB,CAAC,GAAG,EAAE,cAAc,CAAC,EAAE,WAAW,EAAE,CAAC;IAC3E,IAAI,YAAY,KAAK,KAAK,IAAI,YAAY,KAAK,cAAc;QAAE,OAAO;IAEtE,QAAQ,CAAC,IAAI,CAAC;QACZ,EAAE,EAAE,+BAA+B,QAAQ,IAAI,IAAI,EAAE;QACrD,IAAI,EAAE,6BAA6B;QACnC,QAAQ,EAAE,UAAU;QACpB,OAAO,EAAE,6BAA6B;QACtC,GAAG,EAAE,SAAS;QACd,QAAQ,EAAE,KAAK;QACf,KAAK,EAAE,MAAM;QACb,OAAO,EAAE,qFAAqF;QAC9F,IAAI,EAAE,QAAQ;QACd,IAAI;QACJ,OAAO;KACR,CAAC,CAAC;AACL,CAAC;AAED,gDAAgD;AAChD,SAAS,wBAAwB,CAC/B,GAAe,EACf,QAAgB,EAChB,IAAY,EACZ,QAAuB;IAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3B,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW;YAAE,SAAS;QAEnD,MAAM,QAAQ,GAAG,eAAe,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;QAC1D,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;QAC7C,IAAI,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrD,MAAM,QAAQ,GAAG,KAAK,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC;YAC7C,QAAQ,CAAC,IAAI,CAAC;gBACZ,EAAE,EAAE,6BAA6B,QAAQ,IAAI,QAAQ,IAAI,QAAQ,EAAE;gBACnE,IAAI,EAAE,2BAA2B;gBACjC,QAAQ,EAAE,UAAU;gBACpB,OAAO,EAAE,2BAA2B;gBACpC,GAAG,EAAE,QAAQ;gBACb,QAAQ,EAAE,KAAK;gBACf,KAAK,EAAE,MAAM;gBACb,OAAO,EAAE,UAAU,QAAQ,6FAA6F;gBACxH,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,KAAK,CAAC,IAAI;aACpB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;AACH,CAAC;AAED,yCAAyC;AACzC,SAAS,yBAAyB,CAChC,GAAe,EACf,QAAgB,EAChB,IAAY,EACZ,OAAe,EACf,QAAuB;IAEvB,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAChD,IAAI,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO;IAE9E,QAAQ,CAAC,IAAI,CAAC;QACZ,EAAE,EAAE,+BAA+B,QAAQ,IAAI,IAAI,EAAE;QACrD,IAAI,EAAE,6BAA6B;QACnC,QAAQ,EAAE,UAAU;QACpB,OAAO,EAAE,6BAA6B;QACtC,GAAG,EAAE,QAAQ;QACb,QAAQ,EAAE,MAAM;QAChB,KAAK,EAAE,OAAO;QACd,OAAO,EAAE,8EAA8E;QACvF,IAAI,EAAE,QAAQ;QACd,IAAI;QACJ,OAAO;KACR,CAAC,CAAC;AACL,CAAC;AAED,SAAS,QAAQ,CAAC,CAAS,EAAE,MAAc;IACzC,OAAO,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;AAC5D,CAAC"}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML Content Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts JavaScript content from HTML files for analysis:
|
|
5
|
+
* - Inline <script> blocks with line offset tracking
|
|
6
|
+
* - Inline event handler attributes (onclick, onerror, etc.)
|
|
7
|
+
* - External script src references (informational)
|
|
8
|
+
*/
|
|
9
|
+
import type { Node as SyntaxNode } from 'web-tree-sitter';
|
|
10
|
+
/**
|
|
11
|
+
* Represents an extracted <script> block from HTML.
|
|
12
|
+
*/
|
|
13
|
+
export interface HtmlScriptBlock {
|
|
14
|
+
/** The raw JS source code inside the <script> tags */
|
|
15
|
+
code: string;
|
|
16
|
+
/** 1-based line offset of the first line of JS within the HTML file */
|
|
17
|
+
lineOffset: number;
|
|
18
|
+
/** Whether this is an inline <script> or an external src= reference */
|
|
19
|
+
kind: 'inline' | 'external-src';
|
|
20
|
+
/** The src URL if kind === 'external-src' (informational only) */
|
|
21
|
+
src?: string;
|
|
22
|
+
/** The script type/lang attribute value, if present */
|
|
23
|
+
scriptType?: string;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Represents an inline event handler attribute extracted from HTML.
|
|
27
|
+
*/
|
|
28
|
+
export interface HtmlEventHandler {
|
|
29
|
+
/** The JS expression from the event handler attribute value */
|
|
30
|
+
code: string;
|
|
31
|
+
/** Attribute name, e.g. "onclick", "onerror" */
|
|
32
|
+
eventName: string;
|
|
33
|
+
/** 1-based line of the attribute in the HTML file */
|
|
34
|
+
line: number;
|
|
35
|
+
/** The element tag name, e.g. "img", "div" */
|
|
36
|
+
element: string;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Result of extracting JS content from an HTML file.
|
|
40
|
+
*/
|
|
41
|
+
export interface HtmlExtractionResult {
|
|
42
|
+
scriptBlocks: HtmlScriptBlock[];
|
|
43
|
+
eventHandlers: HtmlEventHandler[];
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Extract JavaScript content from an HTML AST.
|
|
47
|
+
*
|
|
48
|
+
* Walks the tree-sitter-html AST to find:
|
|
49
|
+
* 1. <script> elements — extracts inline code or notes external src
|
|
50
|
+
* 2. Elements with on* event handler attributes
|
|
51
|
+
*/
|
|
52
|
+
export declare function extractHtmlContent(rootNode: SyntaxNode): HtmlExtractionResult;
|
|
53
|
+
/**
|
|
54
|
+
* Get the tag name from an element or self-closing tag node.
|
|
55
|
+
*/
|
|
56
|
+
declare function getTagName(node: SyntaxNode): string;
|
|
57
|
+
/**
|
|
58
|
+
* Get the value of a named attribute from a start_tag or self_closing_tag.
|
|
59
|
+
*/
|
|
60
|
+
declare function getAttributeValue(tag: SyntaxNode | null, name: string): string | undefined;
|
|
61
|
+
/**
|
|
62
|
+
* Find the first child node of a given type.
|
|
63
|
+
*/
|
|
64
|
+
declare function findChildByType(node: SyntaxNode, type: string): SyntaxNode | null;
|
|
65
|
+
/**
|
|
66
|
+
* Strip surrounding quotes from an attribute value.
|
|
67
|
+
*/
|
|
68
|
+
declare function stripQuotes(text: string): string;
|
|
69
|
+
export { getAttributeValue, getTagName, findChildByType, stripQuotes };
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML Content Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts JavaScript content from HTML files for analysis:
|
|
5
|
+
* - Inline <script> blocks with line offset tracking
|
|
6
|
+
* - Inline event handler attributes (onclick, onerror, etc.)
|
|
7
|
+
* - External script src references (informational)
|
|
8
|
+
*/
|
|
9
|
+
/** Known inline event handler attribute names */
|
|
10
|
+
const EVENT_HANDLER_ATTRS = new Set([
|
|
11
|
+
'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover',
|
|
12
|
+
'onmousemove', 'onmouseout', 'onmouseenter', 'onmouseleave',
|
|
13
|
+
'onkeydown', 'onkeyup', 'onkeypress',
|
|
14
|
+
'onfocus', 'onblur', 'onchange', 'oninput', 'onsubmit', 'onreset',
|
|
15
|
+
'onload', 'onerror', 'onabort', 'onresize', 'onscroll',
|
|
16
|
+
'oncontextmenu', 'ondrag', 'ondrop', 'oncopy', 'onpaste', 'oncut',
|
|
17
|
+
'ontouchstart', 'ontouchend', 'ontouchmove',
|
|
18
|
+
'onanimationend', 'onanimationstart', 'ontransitionend',
|
|
19
|
+
]);
|
|
20
|
+
/**
|
|
21
|
+
* Extract JavaScript content from an HTML AST.
|
|
22
|
+
*
|
|
23
|
+
* Walks the tree-sitter-html AST to find:
|
|
24
|
+
* 1. <script> elements — extracts inline code or notes external src
|
|
25
|
+
* 2. Elements with on* event handler attributes
|
|
26
|
+
*/
|
|
27
|
+
export function extractHtmlContent(rootNode) {
|
|
28
|
+
const scriptBlocks = [];
|
|
29
|
+
const eventHandlers = [];
|
|
30
|
+
walkNode(rootNode, scriptBlocks, eventHandlers);
|
|
31
|
+
return { scriptBlocks, eventHandlers };
|
|
32
|
+
}
|
|
33
|
+
function walkNode(node, scriptBlocks, eventHandlers) {
|
|
34
|
+
if (node.type === 'script_element') {
|
|
35
|
+
extractScriptBlock(node, scriptBlocks);
|
|
36
|
+
}
|
|
37
|
+
// Check for event handler attributes on any element or self-closing tag
|
|
38
|
+
if (node.type === 'element' || node.type === 'self_closing_tag') {
|
|
39
|
+
extractEventHandlers(node, eventHandlers);
|
|
40
|
+
}
|
|
41
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
42
|
+
const child = node.child(i);
|
|
43
|
+
if (child) {
|
|
44
|
+
walkNode(child, scriptBlocks, eventHandlers);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Extract a <script> block — either inline code or external src reference.
|
|
50
|
+
*/
|
|
51
|
+
function extractScriptBlock(scriptNode, scriptBlocks) {
|
|
52
|
+
const startTag = scriptNode.childForFieldName('start_tag') ?? findChildByType(scriptNode, 'start_tag');
|
|
53
|
+
// Check for src attribute (external script)
|
|
54
|
+
const src = getAttributeValue(startTag, 'src');
|
|
55
|
+
if (src) {
|
|
56
|
+
scriptBlocks.push({
|
|
57
|
+
code: '',
|
|
58
|
+
lineOffset: scriptNode.startPosition.row + 1,
|
|
59
|
+
kind: 'external-src',
|
|
60
|
+
src,
|
|
61
|
+
scriptType: getAttributeValue(startTag, 'type') ?? getAttributeValue(startTag, 'lang'),
|
|
62
|
+
});
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
// Look for inline script content (raw_text child)
|
|
66
|
+
const rawText = findChildByType(scriptNode, 'raw_text');
|
|
67
|
+
if (rawText && rawText.text.trim()) {
|
|
68
|
+
scriptBlocks.push({
|
|
69
|
+
code: rawText.text,
|
|
70
|
+
lineOffset: rawText.startPosition.row + 1,
|
|
71
|
+
kind: 'inline',
|
|
72
|
+
scriptType: getAttributeValue(startTag, 'type') ?? getAttributeValue(startTag, 'lang'),
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Extract inline event handler attributes from an element.
|
|
78
|
+
*/
|
|
79
|
+
function extractEventHandlers(elementNode, eventHandlers) {
|
|
80
|
+
// Get the element's tag name
|
|
81
|
+
const tagName = getTagName(elementNode);
|
|
82
|
+
// Find the start_tag (or the self_closing_tag itself)
|
|
83
|
+
const tag = elementNode.type === 'self_closing_tag'
|
|
84
|
+
? elementNode
|
|
85
|
+
: findChildByType(elementNode, 'start_tag');
|
|
86
|
+
if (!tag)
|
|
87
|
+
return;
|
|
88
|
+
// Iterate attributes looking for event handlers
|
|
89
|
+
for (let i = 0; i < tag.childCount; i++) {
|
|
90
|
+
const child = tag.child(i);
|
|
91
|
+
if (!child || child.type !== 'attribute')
|
|
92
|
+
continue;
|
|
93
|
+
const nameNode = findChildByType(child, 'attribute_name');
|
|
94
|
+
if (!nameNode)
|
|
95
|
+
continue;
|
|
96
|
+
const attrName = nameNode.text.toLowerCase();
|
|
97
|
+
if (!EVENT_HANDLER_ATTRS.has(attrName))
|
|
98
|
+
continue;
|
|
99
|
+
const valueNode = findChildByType(child, 'quoted_attribute_value') ?? findChildByType(child, 'attribute_value');
|
|
100
|
+
if (!valueNode)
|
|
101
|
+
continue;
|
|
102
|
+
const code = stripQuotes(valueNode.text);
|
|
103
|
+
if (code) {
|
|
104
|
+
eventHandlers.push({
|
|
105
|
+
code,
|
|
106
|
+
eventName: attrName,
|
|
107
|
+
line: child.startPosition.row + 1,
|
|
108
|
+
element: tagName,
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Get the tag name from an element or self-closing tag node.
|
|
115
|
+
*/
|
|
116
|
+
function getTagName(node) {
|
|
117
|
+
if (node.type === 'self_closing_tag') {
|
|
118
|
+
const tagNameNode = findChildByType(node, 'tag_name');
|
|
119
|
+
return tagNameNode?.text ?? 'unknown';
|
|
120
|
+
}
|
|
121
|
+
const startTag = findChildByType(node, 'start_tag');
|
|
122
|
+
if (startTag) {
|
|
123
|
+
const tagNameNode = findChildByType(startTag, 'tag_name');
|
|
124
|
+
return tagNameNode?.text ?? 'unknown';
|
|
125
|
+
}
|
|
126
|
+
return 'unknown';
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Get the value of a named attribute from a start_tag or self_closing_tag.
|
|
130
|
+
*/
|
|
131
|
+
function getAttributeValue(tag, name) {
|
|
132
|
+
if (!tag)
|
|
133
|
+
return undefined;
|
|
134
|
+
for (let i = 0; i < tag.childCount; i++) {
|
|
135
|
+
const child = tag.child(i);
|
|
136
|
+
if (!child || child.type !== 'attribute')
|
|
137
|
+
continue;
|
|
138
|
+
const nameNode = findChildByType(child, 'attribute_name');
|
|
139
|
+
if (nameNode?.text.toLowerCase() === name) {
|
|
140
|
+
const valueNode = findChildByType(child, 'quoted_attribute_value') ?? findChildByType(child, 'attribute_value');
|
|
141
|
+
return valueNode ? stripQuotes(valueNode.text) : '';
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
return undefined;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Find the first child node of a given type.
|
|
148
|
+
*/
|
|
149
|
+
function findChildByType(node, type) {
|
|
150
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
151
|
+
const child = node.child(i);
|
|
152
|
+
if (child?.type === type)
|
|
153
|
+
return child;
|
|
154
|
+
}
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Strip surrounding quotes from an attribute value.
|
|
159
|
+
*/
|
|
160
|
+
function stripQuotes(text) {
|
|
161
|
+
if ((text.startsWith('"') && text.endsWith('"')) ||
|
|
162
|
+
(text.startsWith("'") && text.endsWith("'"))) {
|
|
163
|
+
return text.slice(1, -1);
|
|
164
|
+
}
|
|
165
|
+
return text;
|
|
166
|
+
}
|
|
167
|
+
// Re-export getAttributeValue and getTagName for use by security pass
|
|
168
|
+
export { getAttributeValue, getTagName, findChildByType, stripQuotes };
|
|
169
|
+
//# sourceMappingURL=html-extractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-extractor.js","sourceRoot":"","sources":["../../../src/analysis/html/html-extractor.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AA0CH,iDAAiD;AACjD,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC;IAClC,SAAS,EAAE,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,aAAa;IAClE,aAAa,EAAE,YAAY,EAAE,cAAc,EAAE,cAAc;IAC3D,WAAW,EAAE,SAAS,EAAE,YAAY;IACpC,SAAS,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,UAAU,EAAE,SAAS;IACjE,QAAQ,EAAE,SAAS,EAAE,SAAS,EAAE,UAAU,EAAE,UAAU;IACtD,eAAe,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO;IACjE,cAAc,EAAE,YAAY,EAAE,aAAa;IAC3C,gBAAgB,EAAE,kBAAkB,EAAE,iBAAiB;CACxD,CAAC,CAAC;AAEH;;;;;;GAMG;AACH,MAAM,UAAU,kBAAkB,CAAC,QAAoB;IACrD,MAAM,YAAY,GAAsB,EAAE,CAAC;IAC3C,MAAM,aAAa,GAAuB,EAAE,CAAC;IAE7C,QAAQ,CAAC,QAAQ,EAAE,YAAY,EAAE,aAAa,CAAC,CAAC;IAEhD,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,CAAC;AACzC,CAAC;AAED,SAAS,QAAQ,CACf,IAAgB,EAChB,YAA+B,EAC/B,aAAiC;IAEjC,IAAI,IAAI,CAAC,IAAI,KAAK,gBAAgB,EAAE,CAAC;QACnC,kBAAkB,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;IACzC,CAAC;IAED,wEAAwE;IACxE,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,CAAC,IAAI,KAAK,kBAAkB,EAAE,CAAC;QAChE,oBAAoB,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;IAC5C,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC5B,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,CAAC,KAAK,EAAE,YAAY,EAAE,aAAa,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CACzB,UAAsB,EACtB,YAA+B;IAE/B,MAAM,QAAQ,GAAG,UAAU,CAAC,iBAAiB,CAAC,WAAW,CAAC,IAAI,eAAe,CAAC,UAAU,EAAE,WAAW,CAAC,CAAC;IAEvG,4CAA4C;IAC5C,MAAM,GAAG,GAAG,iBAAiB,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;IAC/C,IAAI,GAAG,EAAE,CAAC;QACR,YAAY,CAAC,IAAI,CAAC;YAChB,IAAI,EAAE,EAAE;YACR,UAAU,EAAE,UAAU,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC;YAC5C,IAAI,EAAE,cAAc;YACpB,GAAG;YACH,UAAU,EAAE,iBAAiB,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,iBAAiB,CAAC,QAAQ,EAAE,MAAM,CAAC;SACvF,CAAC,CAAC;QACH,OAAO;IACT,CAAC;IAED,kDAAkD;IAClD,MAAM,OAAO,GAAG,eAAe,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;IACxD,IAAI,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QACnC,YAAY,CAAC,IAAI,CAAC;YAChB,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,UAAU,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC;YACzC,IAAI,EAAE,QAAQ;YACd,UAAU,EAAE,iBAAiB,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,iBAAiB,CAAC,QAAQ,EAAE,MAAM,CAAC;SACvF,CAAC,CAAC;IACL,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAC3B,WAAuB,EACvB,aAAiC;IAEjC,6BAA6B;IAC7B,MAAM,OAAO,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC;IAExC,sDAAsD;IACtD,MAAM,GAAG,GAAG,WAAW,CAAC,IAAI,KAAK,kBAAkB;QACjD,CAAC,CAAC,WAAW;QACb,CAAC,CAAC,eAAe,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC;IAC9C,IAAI,CAAC,GAAG;QAAE,OAAO;IAEjB,gDAAgD;IAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3B,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW;YAAE,SAAS;QAEnD,MAAM,QAAQ,GAAG,eAAe,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;QAC1D,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;QAC7C,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,QAAQ,CAAC;YAAE,SAAS;QAEjD,MAAM,SAAS,GAAG,eAAe,CAAC,KAAK,EAAE,wBAAwB,CAAC,IAAI,eAAe,CAAC,KAAK,EAAE,iBAAiB,CAAC,CAAC;QAChH,IAAI,CAAC,SAAS;YAAE,SAAS;QAEzB,MAAM,IAAI,GAAG,WAAW,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACzC,IAAI,IAAI,EAAE,CAAC;YACT,aAAa,CAAC,IAAI,CAAC;gBACjB,IAAI;gBACJ,SAAS,EAAE,QAAQ;gBACnB,IAAI,EAAE,KAAK,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC;gBACjC,OAAO,EAAE,OAAO;aACjB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,IAAgB;IAClC,IAAI,IAAI,CAAC,IAAI,KAAK,kBAAkB,EAAE,CAAC;QACrC,MAAM,WAAW,GAAG,eAAe,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;QACtD,OAAO,WAAW,EAAE,IAAI,IAAI,SAAS,CAAC;IACxC,CAAC;IAED,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IACpD,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,WAAW,GAAG,eAAe,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAC1D,OAAO,WAAW,EAAE,IAAI,IAAI,SAAS,CAAC;IACxC,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,GAAsB,EAAE,IAAY;IAC7D,IAAI,CAAC,GAAG;QAAE,OAAO,SAAS,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3B,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,IAAI,KAAK,WAAW;YAAE,SAAS;QAEnD,MAAM,QAAQ,GAAG,eAAe,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;QAC1D,IAAI,QAAQ,EAAE,IAAI,CAAC,WAAW,EAAE,KAAK,IAAI,EAAE,CAAC;YAC1C,MAAM,SAAS,GAAG,eAAe,CAAC,KAAK,EAAE,wBAAwB,CAAC,IAAI,eAAe,CAAC,KAAK,EAAE,iBAAiB,CAAC,CAAC;YAChH,OAAO,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACtD,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,IAAgB,EAAE,IAAY;IACrD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC5B,IAAI,KAAK,EAAE,IAAI,KAAK,IAAI;YAAE,OAAO,KAAK,CAAC;IACzC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,IAAY;IAC/B,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QAC5C,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;QACjD,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAC3B,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,sEAAsE;AACtE,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,eAAe,EAAE,WAAW,EAAE,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML Result Merger
|
|
3
|
+
*
|
|
4
|
+
* Merges multiple CircleIR results (one per script block) and attribute-level
|
|
5
|
+
* security findings into a single CircleIR for the HTML file.
|
|
6
|
+
*
|
|
7
|
+
* Key operation: adjusts all line numbers by (lineOffset - 1) for each script block
|
|
8
|
+
* and normalizes file paths to the HTML file path.
|
|
9
|
+
*/
|
|
10
|
+
import type { CircleIR, Meta, SastFinding } from '../../types/index.js';
|
|
11
|
+
export interface ScriptBlockResult {
|
|
12
|
+
ir: CircleIR;
|
|
13
|
+
lineOffset: number;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Merge HTML analysis results into a single CircleIR.
|
|
17
|
+
*
|
|
18
|
+
* @param htmlMeta - Meta for the HTML file itself
|
|
19
|
+
* @param scriptResults - CircleIR results from each script block with line offsets
|
|
20
|
+
* @param attributeFindings - SastFindings from attribute-level security checks
|
|
21
|
+
*/
|
|
22
|
+
export declare function mergeHtmlResults(htmlMeta: Meta, scriptResults: ScriptBlockResult[], attributeFindings: SastFinding[]): CircleIR;
|