@clear-capabilities/agentic-security-scanner 0.78.0 → 0.80.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/.agentic-security/findings.json +16 -16
- package/bin/.agentic-security/last-scan.json +16 -16
- package/bin/.agentic-security/last-scan.json.sig +1 -1
- package/bin/.agentic-security/scan-history.json +51 -0
- package/bin/.agentic-security/streak.json +5 -5
- package/bin/agentic-security.js +22 -7
- package/dist/178.index.js +1 -1
- package/dist/333.index.js +283 -0
- package/dist/384.index.js +1 -1
- package/dist/476.index.js +5 -5
- package/dist/637.index.js +1 -1
- package/dist/700.index.js +138 -0
- package/dist/718.index.js +53 -0
- package/dist/838.index.js +1 -1
- package/dist/985.index.js +95 -1
- package/dist/agentic-security.mjs +83 -83
- package/dist/agentic-security.mjs.sha256 +1 -1
- package/package.json +6 -4
- package/src/.agentic-security/findings.json +29799 -7803
- package/src/.agentic-security/last-scan.json +29799 -7803
- package/src/.agentic-security/last-scan.json.sig +1 -1
- package/src/.agentic-security/scan-history.json +5119 -2611
- package/src/.agentic-security/streak.json +6 -6
- package/src/dataflow/.agentic-security/findings.json +2879 -308
- package/src/dataflow/.agentic-security/last-scan.json +2879 -308
- package/src/dataflow/.agentic-security/last-scan.json.sig +1 -1
- package/src/dataflow/.agentic-security/scan-history.json +68 -520
- package/src/dataflow/.agentic-security/streak.json +6 -7
- package/src/dataflow/cross-service-taint.js +201 -0
- package/src/dataflow/engine.js +52 -8
- package/src/dataflow/formal-verify.js +204 -0
- package/src/dataflow/ifds-precise.js +222 -0
- package/src/dataflow/k2-summary-cache.js +153 -0
- package/src/dataflow/lib-taint-summaries.js +198 -0
- package/src/dataflow/privacy-taint.js +205 -0
- package/src/dataflow/smt-feasibility.js +189 -0
- package/src/engine.js +890 -132
- package/src/integrations/index.js +2 -1
- package/src/ir/.agentic-security/findings.json +240 -6
- package/src/ir/.agentic-security/last-scan.json +240 -6
- package/src/ir/.agentic-security/last-scan.json.sig +1 -1
- package/src/ir/.agentic-security/scan-history.json +16 -594
- package/src/ir/.agentic-security/streak.json +8 -9
- package/src/ir/callgraph.js +27 -7
- package/src/ir/cpp-preprocessor.js +142 -0
- package/src/ir/csharp-ir.js +604 -0
- package/src/ir/universal-ir.js +403 -0
- package/src/llm-validator/index.js +7 -5
- package/src/mcp/.agentic-security/findings.json +8632 -0
- package/src/mcp/.agentic-security/last-scan.json +8632 -0
- package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
- package/src/mcp/.agentic-security/scan-history.json +143 -0
- package/src/mcp/.agentic-security/streak.json +20 -0
- package/src/mcp/audit.js +5 -0
- package/src/mcp/tools.js +90 -1
- package/src/posture/.agentic-security/findings.json +16809 -4367
- package/src/posture/.agentic-security/last-scan.json +16809 -4367
- package/src/posture/.agentic-security/last-scan.json.sig +1 -1
- package/src/posture/.agentic-security/scan-history.json +6689 -177
- package/src/posture/.agentic-security/streak.json +8 -7
- package/src/posture/api-contract.js +193 -0
- package/src/posture/attack-taxonomy.js +227 -0
- package/src/posture/calibration-drift.js +2 -1
- package/src/posture/calibration.js +3 -2
- package/src/posture/compliance-policy.js +218 -0
- package/src/posture/composite-risk.js +122 -0
- package/src/posture/csharp-analysis.js +330 -0
- package/src/posture/exploit-bundle.js +210 -0
- package/src/posture/federated-learning.js +172 -0
- package/src/posture/fix-history.js +8 -2
- package/src/posture/license-attributions.js +94 -0
- package/src/posture/license-graph.js +238 -0
- package/src/posture/pqc-migration-plan.js +158 -0
- package/src/posture/profile.js +4 -5
- package/src/posture/reachability-filter.js +33 -2
- package/src/posture/realtime-cve-monitor.js +214 -0
- package/src/posture/rule-overrides.js +2 -3
- package/src/posture/rule-pack-signing.js +2 -3
- package/src/posture/rule-synthesis.js +5 -6
- package/src/posture/runtime-correlation.js +174 -0
- package/src/posture/sbom-diff.js +171 -0
- package/src/posture/sca-policy.js +235 -0
- package/src/posture/sca-upgrade.js +259 -0
- package/src/posture/security-trend.js +4 -7
- package/src/posture/state-dir.js +124 -0
- package/src/posture/streak.js +3 -0
- package/src/posture/suppressions.js +5 -8
- package/src/posture/threat-model-auto.js +268 -0
- package/src/posture/triage-learning.js +170 -0
- package/src/posture/triage.js +29 -6
- package/src/posture/validator-metrics.js +3 -6
- package/src/sast/.agentic-security/findings.json +996 -32
- package/src/sast/.agentic-security/last-scan.json +996 -32
- package/src/sast/.agentic-security/last-scan.json.sig +1 -1
- package/src/sast/.agentic-security/scan-history.json +565 -32
- package/src/sast/.agentic-security/streak.json +10 -8
- package/src/sast/_secret-entropy.js +145 -0
- package/src/sast/cloud-iam.js +312 -0
- package/src/sast/cpp.js +138 -4
- package/src/sast/crypto-protocol.js +388 -0
- package/src/sast/csharp-tokenizer.js +392 -0
- package/src/sast/csharp.js +924 -138
- package/src/sast/dapp-frontend.js +200 -0
- package/src/sast/db-taint.js +24 -0
- package/src/sast/k8s-admission.js +271 -0
- package/src/sast/llm-app.js +272 -0
- package/src/sast/ml-supply-chain.js +259 -0
- package/src/sast/mobile.js +224 -0
- package/src/sast/post-quantum-crypto.js +348 -0
- package/src/sast/rust.js +26 -0
- package/src/sast/web3-advanced.js +375 -0
- package/src/sca/.agentic-security/findings.json +6044 -171
- package/src/sca/.agentic-security/last-scan.json +6044 -171
- package/src/sca/.agentic-security/last-scan.json.sig +1 -1
- package/src/sca/.agentic-security/scan-history.json +83 -6
- package/src/sca/.agentic-security/streak.json +9 -9
- package/src/sca/CLAUDE.md +161 -0
- package/src/sca/binary-metadata.js +146 -0
- package/src/sca/py-package-functions.js +118 -0
- package/src/sca/sigstore-verify.js +215 -0
- package/src/sca/vendor-detect.js +53 -0
- package/src/report/.agentic-security/findings.json +0 -80
- package/src/report/.agentic-security/last-scan.json +0 -80
- package/src/report/.agentic-security/last-scan.json.sig +0 -1
- package/src/report/.agentic-security/scan-history.json +0 -35
- package/src/report/.agentic-security/streak.json +0 -22
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
// C# tokenizer — produces a token stream that respects C# string/comment
|
|
2
|
+
// semantics, so downstream detectors don't have to dodge string literals and
|
|
3
|
+
// commented-out code the way regex-on-raw-text does.
|
|
4
|
+
//
|
|
5
|
+
// Token shape:
|
|
6
|
+
// { kind, value, line, col, start, end }
|
|
7
|
+
//
|
|
8
|
+
// kinds:
|
|
9
|
+
// 'ident' | identifier (letters, digits, _; can begin with '@' to escape keyword)
|
|
10
|
+
// 'kw' | keyword (subset; only those that affect detector logic)
|
|
11
|
+
// 'num' | numeric literal (int, float, hex, binary, with suffix)
|
|
12
|
+
// 'string' | regular "..." string literal (escapes processed)
|
|
13
|
+
// 'verbatim' | @"..." verbatim literal (no escape processing)
|
|
14
|
+
// 'interp' | $"..." or $@"..." interpolated string; value is the
|
|
15
|
+
// raw source between the quotes; the embedded {expr} holes
|
|
16
|
+
// are kept as nested tokens via meta.parts (a list of
|
|
17
|
+
// {kind:'lit',text} | {kind:'expr',tokens} entries).
|
|
18
|
+
// 'char' | '.' character literal
|
|
19
|
+
// 'op' | punctuation / operator (=, ==, +, +=, &&, ?., null!, etc.)
|
|
20
|
+
// 'attr-open' | '[' that begins an attribute (heuristic — see below)
|
|
21
|
+
// 'attr-close' | ']' that closes an attribute
|
|
22
|
+
// 'lbrace' | '{'
|
|
23
|
+
// 'rbrace' | '}'
|
|
24
|
+
// 'lparen' | '('
|
|
25
|
+
// 'rparen' | ')'
|
|
26
|
+
// 'lbracket' | '[' (non-attribute)
|
|
27
|
+
// 'rbracket' | ']' (non-attribute)
|
|
28
|
+
// 'comma' | ','
|
|
29
|
+
// 'semi' | ';'
|
|
30
|
+
// 'dot' | '.'
|
|
31
|
+
// 'arrow' | '=>'
|
|
32
|
+
// 'eof' | end of input
|
|
33
|
+
//
|
|
34
|
+
// We do NOT produce 'comment' tokens — comments are stripped entirely. Line
|
|
35
|
+
// numbers stay correct because we count newlines we consume.
|
|
36
|
+
//
|
|
37
|
+
// The tokenizer is conservative: when it can't determine intent (e.g. is
|
|
38
|
+
// `[` an attribute start or an indexer?), it labels with the heuristic that
|
|
39
|
+
// best serves security detectors and never panics.
|
|
40
|
+
|
|
41
|
+
const KEYWORDS = new Set([
|
|
42
|
+
'abstract', 'as', 'async', 'await', 'base', 'bool', 'break', 'byte', 'case',
|
|
43
|
+
'catch', 'char', 'checked', 'class', 'const', 'continue', 'decimal', 'default',
|
|
44
|
+
'delegate', 'do', 'double', 'else', 'enum', 'event', 'explicit', 'extern',
|
|
45
|
+
'false', 'finally', 'fixed', 'float', 'for', 'foreach', 'goto', 'if',
|
|
46
|
+
'implicit', 'in', 'int', 'interface', 'internal', 'is', 'lock', 'long',
|
|
47
|
+
'namespace', 'new', 'null', 'object', 'operator', 'out', 'override', 'params',
|
|
48
|
+
'partial', 'private', 'protected', 'public', 'readonly', 'ref', 'return',
|
|
49
|
+
'sbyte', 'sealed', 'short', 'sizeof', 'stackalloc', 'static', 'string',
|
|
50
|
+
'struct', 'switch', 'this', 'throw', 'true', 'try', 'typeof', 'uint',
|
|
51
|
+
'ulong', 'unchecked', 'unsafe', 'ushort', 'using', 'var', 'virtual', 'void',
|
|
52
|
+
'volatile', 'when', 'where', 'while', 'yield',
|
|
53
|
+
]);
|
|
54
|
+
|
|
55
|
+
// Multi-char operator longest-match table; ordered longest-first.
|
|
56
|
+
const OPS = [
|
|
57
|
+
'<<=', '>>=', '??=', '...',
|
|
58
|
+
'==', '!=', '<=', '>=', '&&', '||', '<<', '>>',
|
|
59
|
+
'++', '--', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '=>', '??', '?.', '?[', '->',
|
|
60
|
+
'+', '-', '*', '/', '%', '&', '|', '^', '~', '!', '=', '<', '>', '?', ':',
|
|
61
|
+
];
|
|
62
|
+
|
|
63
|
+
function isIdentStart(c) { return /[A-Za-z_]/.test(c); }
|
|
64
|
+
function isIdentCont(c) { return /[A-Za-z0-9_]/.test(c); }
|
|
65
|
+
function isDigit(c) { return c >= '0' && c <= '9'; }
|
|
66
|
+
|
|
67
|
+
class Reader {
|
|
68
|
+
constructor(src) {
|
|
69
|
+
this.src = src;
|
|
70
|
+
this.i = 0;
|
|
71
|
+
this.line = 1;
|
|
72
|
+
this.col = 1;
|
|
73
|
+
}
|
|
74
|
+
eof() { return this.i >= this.src.length; }
|
|
75
|
+
peek(off = 0) { return this.src[this.i + off]; }
|
|
76
|
+
startsWith(s) { return this.src.startsWith(s, this.i); }
|
|
77
|
+
advance(n = 1) {
|
|
78
|
+
for (let k = 0; k < n && this.i < this.src.length; k++) {
|
|
79
|
+
const c = this.src[this.i++];
|
|
80
|
+
if (c === '\n') { this.line++; this.col = 1; } else { this.col++; }
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
slice(from, to) { return this.src.slice(from, to); }
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function skipWhitespaceAndComments(r) {
|
|
87
|
+
while (!r.eof()) {
|
|
88
|
+
const c = r.peek();
|
|
89
|
+
if (c === ' ' || c === '\t' || c === '\n' || c === '\r') { r.advance(); continue; }
|
|
90
|
+
// Line comment
|
|
91
|
+
if (c === '/' && r.peek(1) === '/') {
|
|
92
|
+
while (!r.eof() && r.peek() !== '\n') r.advance();
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
// Block comment
|
|
96
|
+
if (c === '/' && r.peek(1) === '*') {
|
|
97
|
+
r.advance(2);
|
|
98
|
+
while (!r.eof() && !(r.peek() === '*' && r.peek(1) === '/')) r.advance();
|
|
99
|
+
if (!r.eof()) r.advance(2);
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
// Preprocessor directives — skip the entire line. We don't model them.
|
|
103
|
+
if (c === '#' && r.col === 1) {
|
|
104
|
+
while (!r.eof() && r.peek() !== '\n') r.advance();
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function readStringLiteral(r, startLine, startCol, kind = 'string') {
|
|
112
|
+
// kind === 'string' for "...", 'verbatim' for @"...", 'interp' for $"..."
|
|
113
|
+
const start = r.i;
|
|
114
|
+
r.advance(); // opening "
|
|
115
|
+
const value = [];
|
|
116
|
+
while (!r.eof()) {
|
|
117
|
+
const c = r.peek();
|
|
118
|
+
if (kind === 'verbatim') {
|
|
119
|
+
if (c === '"' && r.peek(1) === '"') { value.push('"'); r.advance(2); continue; }
|
|
120
|
+
if (c === '"') { r.advance(); break; }
|
|
121
|
+
value.push(c); r.advance();
|
|
122
|
+
} else {
|
|
123
|
+
if (c === '\\') {
|
|
124
|
+
const next = r.peek(1) || '';
|
|
125
|
+
// escape sequence; consume two chars for the common cases, more for \uXXXX
|
|
126
|
+
if (next === 'u' || next === 'U') {
|
|
127
|
+
const len = next === 'u' ? 4 : 8;
|
|
128
|
+
value.push(r.slice(r.i, r.i + 2 + len));
|
|
129
|
+
r.advance(2 + len);
|
|
130
|
+
} else {
|
|
131
|
+
value.push(c + next);
|
|
132
|
+
r.advance(2);
|
|
133
|
+
}
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
if (c === '"') { r.advance(); break; }
|
|
137
|
+
if (c === '\n' && kind !== 'verbatim') { /* malformed; stop */ break; }
|
|
138
|
+
value.push(c); r.advance();
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return { kind, value: value.join(''), line: startLine, col: startCol, start, end: r.i };
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function readInterpolatedString(r, startLine, startCol) {
|
|
145
|
+
// $"..." or $@"..." — captures parts as { kind: 'lit'|'expr', text|tokens }
|
|
146
|
+
const start = r.i;
|
|
147
|
+
r.advance(); // $
|
|
148
|
+
const isVerbatim = r.peek() === '@';
|
|
149
|
+
if (isVerbatim) r.advance();
|
|
150
|
+
if (r.peek() !== '"') {
|
|
151
|
+
// not actually an interpolated string after all; back up to '$' as op
|
|
152
|
+
r.i = start; r.line = startLine; r.col = startCol;
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
r.advance(); // opening "
|
|
156
|
+
const parts = [];
|
|
157
|
+
let buf = [];
|
|
158
|
+
const flushLit = () => { if (buf.length) { parts.push({ kind: 'lit', text: buf.join('') }); buf = []; } };
|
|
159
|
+
while (!r.eof()) {
|
|
160
|
+
const c = r.peek();
|
|
161
|
+
if (c === '"') {
|
|
162
|
+
// Verbatim: "" → literal "
|
|
163
|
+
if (isVerbatim && r.peek(1) === '"') { buf.push('"'); r.advance(2); continue; }
|
|
164
|
+
r.advance(); break;
|
|
165
|
+
}
|
|
166
|
+
if (c === '{') {
|
|
167
|
+
if (r.peek(1) === '{') { buf.push('{'); r.advance(2); continue; }
|
|
168
|
+
flushLit();
|
|
169
|
+
r.advance();
|
|
170
|
+
// Read raw text until matching '}', respecting nested braces.
|
|
171
|
+
let depth = 1;
|
|
172
|
+
const exprStart = r.i;
|
|
173
|
+
while (!r.eof() && depth > 0) {
|
|
174
|
+
const ch = r.peek();
|
|
175
|
+
if (ch === '{') { depth++; r.advance(); continue; }
|
|
176
|
+
if (ch === '}') { depth--; if (depth === 0) break; r.advance(); continue; }
|
|
177
|
+
// Skip nested string literals inside interpolation holes — they
|
|
178
|
+
// contain braces we don't want to count as expression braces.
|
|
179
|
+
if (ch === '"') { skipInlineString(r); continue; }
|
|
180
|
+
r.advance();
|
|
181
|
+
}
|
|
182
|
+
const exprText = r.slice(exprStart, r.i);
|
|
183
|
+
if (!r.eof()) r.advance(); // consume '}'
|
|
184
|
+
// Tokenize the embedded expression so detectors can see identifier
|
|
185
|
+
// references like `userInput` inside `$"…{userInput}…"`.
|
|
186
|
+
const tokens = tokenize(exprText, { embedded: true });
|
|
187
|
+
parts.push({ kind: 'expr', text: exprText, tokens });
|
|
188
|
+
continue;
|
|
189
|
+
}
|
|
190
|
+
if (!isVerbatim && c === '\\') {
|
|
191
|
+
const next = r.peek(1) || '';
|
|
192
|
+
buf.push(c + next); r.advance(2); continue;
|
|
193
|
+
}
|
|
194
|
+
buf.push(c); r.advance();
|
|
195
|
+
}
|
|
196
|
+
flushLit();
|
|
197
|
+
return { kind: 'interp', verbatim: isVerbatim, parts, line: startLine, col: startCol, start, end: r.i };
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function skipInlineString(r) {
|
|
201
|
+
// Inside an interpolation hole there can be a nested string. Consume it
|
|
202
|
+
// exactly so brace-counting around it is right.
|
|
203
|
+
if (r.peek() !== '"') return;
|
|
204
|
+
r.advance();
|
|
205
|
+
while (!r.eof()) {
|
|
206
|
+
const c = r.peek();
|
|
207
|
+
if (c === '\\') { r.advance(2); continue; }
|
|
208
|
+
if (c === '"') { r.advance(); return; }
|
|
209
|
+
r.advance();
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function readCharLiteral(r, startLine, startCol) {
|
|
214
|
+
const start = r.i;
|
|
215
|
+
r.advance(); // '
|
|
216
|
+
let v = '';
|
|
217
|
+
while (!r.eof() && r.peek() !== "'") {
|
|
218
|
+
if (r.peek() === '\\') { v += r.slice(r.i, r.i + 2); r.advance(2); continue; }
|
|
219
|
+
v += r.peek(); r.advance();
|
|
220
|
+
}
|
|
221
|
+
if (!r.eof()) r.advance();
|
|
222
|
+
return { kind: 'char', value: v, line: startLine, col: startCol, start, end: r.i };
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function readNumber(r, startLine, startCol) {
|
|
226
|
+
const start = r.i;
|
|
227
|
+
// hex, binary, decimal — also handle suffixes (u, l, f, m, d).
|
|
228
|
+
if (r.peek() === '0' && (r.peek(1) === 'x' || r.peek(1) === 'X')) {
|
|
229
|
+
r.advance(2);
|
|
230
|
+
while (!r.eof() && /[0-9A-Fa-f_]/.test(r.peek())) r.advance();
|
|
231
|
+
} else if (r.peek() === '0' && (r.peek(1) === 'b' || r.peek(1) === 'B')) {
|
|
232
|
+
r.advance(2);
|
|
233
|
+
while (!r.eof() && /[01_]/.test(r.peek())) r.advance();
|
|
234
|
+
} else {
|
|
235
|
+
while (!r.eof() && (isDigit(r.peek()) || r.peek() === '_')) r.advance();
|
|
236
|
+
if (r.peek() === '.' && isDigit(r.peek(1))) {
|
|
237
|
+
r.advance();
|
|
238
|
+
while (!r.eof() && (isDigit(r.peek()) || r.peek() === '_')) r.advance();
|
|
239
|
+
}
|
|
240
|
+
if (r.peek() === 'e' || r.peek() === 'E') {
|
|
241
|
+
r.advance();
|
|
242
|
+
if (r.peek() === '+' || r.peek() === '-') r.advance();
|
|
243
|
+
while (!r.eof() && isDigit(r.peek())) r.advance();
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
// Numeric suffixes
|
|
247
|
+
while (!r.eof() && /[uUlLfFmMdD]/.test(r.peek())) r.advance();
|
|
248
|
+
return { kind: 'num', value: r.slice(start, r.i), line: startLine, col: startCol, start, end: r.i };
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function readIdentifier(r, startLine, startCol, allowAt = false) {
|
|
252
|
+
const start = r.i;
|
|
253
|
+
if (allowAt && r.peek() === '@') r.advance();
|
|
254
|
+
while (!r.eof() && isIdentCont(r.peek())) r.advance();
|
|
255
|
+
const value = r.slice(start, r.i);
|
|
256
|
+
if (value.startsWith('@')) {
|
|
257
|
+
// @class etc — strip the @ for matching, but mark the token as ident not kw
|
|
258
|
+
return { kind: 'ident', value: value.slice(1), at: true, line: startLine, col: startCol, start, end: r.i };
|
|
259
|
+
}
|
|
260
|
+
return { kind: KEYWORDS.has(value) ? 'kw' : 'ident', value, line: startLine, col: startCol, start, end: r.i };
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function readOperator(r, startLine, startCol) {
|
|
264
|
+
for (const op of OPS) {
|
|
265
|
+
if (r.startsWith(op)) {
|
|
266
|
+
const start = r.i;
|
|
267
|
+
r.advance(op.length);
|
|
268
|
+
return { kind: 'op', value: op, line: startLine, col: startCol, start, end: r.i };
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
// Single fallback
|
|
272
|
+
const start = r.i;
|
|
273
|
+
const ch = r.peek();
|
|
274
|
+
r.advance();
|
|
275
|
+
return { kind: 'op', value: ch, line: startLine, col: startCol, start, end: r.i };
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Heuristic attribute detection: a `[` that appears at statement-start
|
|
279
|
+
// position (after a newline / whitespace / `{`) and is followed by an
|
|
280
|
+
// identifier looks like an attribute. Indexers/array literals will fail
|
|
281
|
+
// this test because they follow an expression / identifier directly.
|
|
282
|
+
function looksLikeAttributeStart(r, prevToken) {
|
|
283
|
+
if (!prevToken) return true;
|
|
284
|
+
// Attribute if preceded by structural punctuation, NOT an expression.
|
|
285
|
+
const k = prevToken.kind;
|
|
286
|
+
if (k === 'lbrace' || k === 'rbrace' || k === 'semi' || k === 'attr-close') return true;
|
|
287
|
+
if (k === 'kw' && (prevToken.value === 'public' || prevToken.value === 'private' || prevToken.value === 'protected' || prevToken.value === 'internal' || prevToken.value === 'static' || prevToken.value === 'override' || prevToken.value === 'virtual' || prevToken.value === 'abstract' || prevToken.value === 'sealed' || prevToken.value === 'partial' || prevToken.value === 'async')) return true;
|
|
288
|
+
return false;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
export function tokenize(src, opts = {}) {
|
|
292
|
+
if (typeof src !== 'string' || !src.length) return [];
|
|
293
|
+
const r = new Reader(src);
|
|
294
|
+
const out = [];
|
|
295
|
+
let prevToken = null;
|
|
296
|
+
|
|
297
|
+
while (!r.eof()) {
|
|
298
|
+
skipWhitespaceAndComments(r);
|
|
299
|
+
if (r.eof()) break;
|
|
300
|
+
const c = r.peek();
|
|
301
|
+
const startLine = r.line, startCol = r.col;
|
|
302
|
+
|
|
303
|
+
// Verbatim @"..." or @ident
|
|
304
|
+
if (c === '@') {
|
|
305
|
+
if (r.peek(1) === '"') {
|
|
306
|
+
r.advance(); // consume @
|
|
307
|
+
const tok = readStringLiteral(r, startLine, startCol, 'verbatim');
|
|
308
|
+
out.push(tok); prevToken = tok; continue;
|
|
309
|
+
}
|
|
310
|
+
if (isIdentStart(r.peek(1)) || r.peek(1) === '_') {
|
|
311
|
+
const tok = readIdentifier(r, startLine, startCol, true);
|
|
312
|
+
out.push(tok); prevToken = tok; continue;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Interpolated $"..." or $@"..."
|
|
317
|
+
if (c === '$') {
|
|
318
|
+
if (r.peek(1) === '"' || (r.peek(1) === '@' && r.peek(2) === '"')) {
|
|
319
|
+
const tok = readInterpolatedString(r, startLine, startCol);
|
|
320
|
+
if (tok) { out.push(tok); prevToken = tok; continue; }
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
if (c === '"') {
|
|
325
|
+
const tok = readStringLiteral(r, startLine, startCol, 'string');
|
|
326
|
+
out.push(tok); prevToken = tok; continue;
|
|
327
|
+
}
|
|
328
|
+
if (c === "'") {
|
|
329
|
+
const tok = readCharLiteral(r, startLine, startCol);
|
|
330
|
+
out.push(tok); prevToken = tok; continue;
|
|
331
|
+
}
|
|
332
|
+
if (isDigit(c)) {
|
|
333
|
+
const tok = readNumber(r, startLine, startCol);
|
|
334
|
+
out.push(tok); prevToken = tok; continue;
|
|
335
|
+
}
|
|
336
|
+
if (isIdentStart(c)) {
|
|
337
|
+
const tok = readIdentifier(r, startLine, startCol);
|
|
338
|
+
out.push(tok); prevToken = tok; continue;
|
|
339
|
+
}
|
|
340
|
+
// Structural punctuation
|
|
341
|
+
if (c === '{') { r.advance(); const t = { kind: 'lbrace', value: '{', line: startLine, col: startCol, start: r.i - 1, end: r.i }; out.push(t); prevToken = t; continue; }
|
|
342
|
+
if (c === '}') { r.advance(); const t = { kind: 'rbrace', value: '}', line: startLine, col: startCol, start: r.i - 1, end: r.i }; out.push(t); prevToken = t; continue; }
|
|
343
|
+
if (c === '(') { r.advance(); const t = { kind: 'lparen', value: '(', line: startLine, col: startCol, start: r.i - 1, end: r.i }; out.push(t); prevToken = t; continue; }
|
|
344
|
+
if (c === ')') { r.advance(); const t = { kind: 'rparen', value: ')', line: startLine, col: startCol, start: r.i - 1, end: r.i }; out.push(t); prevToken = t; continue; }
|
|
345
|
+
if (c === '[') {
|
|
346
|
+
r.advance();
|
|
347
|
+
const isAttr = !opts.embedded && looksLikeAttributeStart(r, prevToken);
|
|
348
|
+
const t = { kind: isAttr ? 'attr-open' : 'lbracket', value: '[', line: startLine, col: startCol, start: r.i - 1, end: r.i };
|
|
349
|
+
out.push(t); prevToken = t; continue;
|
|
350
|
+
}
|
|
351
|
+
if (c === ']') {
|
|
352
|
+
r.advance();
|
|
353
|
+
// Match the most recent attr-open if open count is positive.
|
|
354
|
+
let attrDepth = 0;
|
|
355
|
+
for (let k = out.length - 1; k >= 0; k--) {
|
|
356
|
+
if (out[k].kind === 'attr-open') attrDepth++;
|
|
357
|
+
else if (out[k].kind === 'attr-close') attrDepth--;
|
|
358
|
+
else if (out[k].kind === 'lbracket' || out[k].kind === 'rbracket') break;
|
|
359
|
+
}
|
|
360
|
+
const isAttr = attrDepth > 0;
|
|
361
|
+
const t = { kind: isAttr ? 'attr-close' : 'rbracket', value: ']', line: startLine, col: startCol, start: r.i - 1, end: r.i };
|
|
362
|
+
out.push(t); prevToken = t; continue;
|
|
363
|
+
}
|
|
364
|
+
if (c === ',') { r.advance(); const t = { kind: 'comma', value: ',', line: startLine, col: startCol, start: r.i - 1, end: r.i }; out.push(t); prevToken = t; continue; }
|
|
365
|
+
if (c === ';') { r.advance(); const t = { kind: 'semi', value: ';', line: startLine, col: startCol, start: r.i - 1, end: r.i }; out.push(t); prevToken = t; continue; }
|
|
366
|
+
if (c === '.' && !isDigit(r.peek(1))) { r.advance(); const t = { kind: 'dot', value: '.', line: startLine, col: startCol, start: r.i - 1, end: r.i }; out.push(t); prevToken = t; continue; }
|
|
367
|
+
|
|
368
|
+
// Operators (after structural punctuation so '?.', '=>' work)
|
|
369
|
+
const tok = readOperator(r, startLine, startCol);
|
|
370
|
+
if (tok.value === '=>') tok.kind = 'arrow';
|
|
371
|
+
out.push(tok); prevToken = tok;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
out.push({ kind: 'eof', line: r.line, col: r.col, start: r.i, end: r.i });
|
|
375
|
+
return out;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// Utility: collect all identifier names that appear anywhere in a token slice,
|
|
379
|
+
// including those nested inside interpolated-string expression holes.
|
|
380
|
+
export function identsIn(tokens) {
|
|
381
|
+
const names = [];
|
|
382
|
+
for (const t of tokens || []) {
|
|
383
|
+
if (!t) continue;
|
|
384
|
+
if (t.kind === 'ident') names.push(t.value);
|
|
385
|
+
if (t.kind === 'interp') {
|
|
386
|
+
for (const p of t.parts || []) {
|
|
387
|
+
if (p.kind === 'expr') names.push(...identsIn(p.tokens));
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
return names;
|
|
392
|
+
}
|