@blundergoat/gruff-ts 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/CONTRIBUTING.md +87 -0
- package/LICENSE +21 -0
- package/README.md +303 -0
- package/SECURITY.md +45 -0
- package/bin/gruff-ts +25 -0
- package/docs/CONFIGURATION.md +220 -0
- package/docs/RELEASING.md +103 -0
- package/docs/REPORTS_AND_CI.md +156 -0
- package/fixtures/sample.ts +21 -0
- package/package.json +56 -0
- package/scripts/bump-version.sh +145 -0
- package/scripts/check.sh +4 -0
- package/scripts/npm-publish.sh +258 -0
- package/scripts/preflight-checks.sh +357 -0
- package/scripts/start-dev.sh +8 -0
- package/scripts/test-performance.sh +695 -0
- package/src/analyser.ts +461 -0
- package/src/baseline.ts +90 -0
- package/src/blocks.ts +687 -0
- package/src/class-rules.ts +326 -0
- package/src/cli-program.ts +326 -0
- package/src/cli.ts +19 -0
- package/src/comment-rules.ts +605 -0
- package/src/comment-scanner.ts +357 -0
- package/src/config.ts +622 -0
- package/src/constants.ts +4 -0
- package/src/context-doc-rules.ts +241 -0
- package/src/dashboard.ts +114 -0
- package/src/dead-code-rules.ts +183 -0
- package/src/discovery.ts +508 -0
- package/src/doc-rules.ts +368 -0
- package/src/findings-helpers.ts +108 -0
- package/src/findings.ts +45 -0
- package/src/fixture-purpose-rules.ts +334 -0
- package/src/fixtures/rule-catalogue-security-doctrine.ts +132 -0
- package/src/github-actions-rules.ts +413 -0
- package/src/line-rules.ts +538 -0
- package/src/naming-pushers.ts +191 -0
- package/src/project-config-rules.ts +555 -0
- package/src/project-rules.ts +545 -0
- package/src/report-renderers.ts +691 -0
- package/src/rule-list.ts +179 -0
- package/src/rules.ts +135 -0
- package/src/safety-rules.ts +355 -0
- package/src/scoring.ts +74 -0
- package/src/security-flow-rules.ts +112 -0
- package/src/sensitive-data-rules.ts +288 -0
- package/src/source-text.ts +722 -0
- package/src/test-block-rules.ts +347 -0
- package/src/test-fixtures.ts +621 -0
- package/src/text-scans.ts +193 -0
- package/src/types.ts +113 -0
- package/tsconfig.json +15 -0
|
@@ -0,0 +1,722 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provides lightweight source text scanners that separate executable TypeScript
|
|
3
|
+
* from comments, strings, and regex bodies before rule matching runs.
|
|
4
|
+
*/
|
|
5
|
+
import type { RunDiagnostic } from "./types.ts";
|
|
6
|
+
|
|
7
|
+
// Just enough of `SourceFile` to keep `parseDiagnostics` decoupled from the full project type:
|
|
8
|
+
// `isScript` gates whether to run the delimiter check; `displayPath` is the report-path anchor.
|
|
9
|
+
interface DiagnosticSourceFile {
|
|
10
|
+
displayPath: string;
|
|
11
|
+
isScript: boolean;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Lightweight delimiter sanity check for TypeScript/JavaScript. This is not a parser; it only
|
|
16
|
+
* reports closers that outrun openers because those are local enough for a heuristic to trust.
|
|
17
|
+
*
|
|
18
|
+
* @param file - Source metadata used to skip non-script inputs and report paths.
|
|
19
|
+
* @param source - Raw file text to scan for delimiter balance.
|
|
20
|
+
*/
|
|
21
|
+
function parseDiagnostics(file: DiagnosticSourceFile, source: string): RunDiagnostic[] {
|
|
22
|
+
if (!file.isScript) {
|
|
23
|
+
return [];
|
|
24
|
+
}
|
|
25
|
+
const ctx: DelimiterScanContext = {
|
|
26
|
+
scan: defaultDelimiterScanState(),
|
|
27
|
+
counts: { braces: 0, parentheses: 0, brackets: 0 },
|
|
28
|
+
};
|
|
29
|
+
const lines = source.split(/\r?\n/);
|
|
30
|
+
for (const [index, line] of lines.entries()) {
|
|
31
|
+
scanDelimiterLine(line, ctx);
|
|
32
|
+
if (hasNegativeDelimiterCount(ctx.counts)) {
|
|
33
|
+
return [parseErrorDiagnostic(file, index + 1)];
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
// Intentional: the EOF imbalance check was removed in M38 false-positive triage. The brace
|
|
37
|
+
// scanner is a regex-vs-division heuristic, not a real parser, and produces drift on valid
|
|
38
|
+
// TypeScript containing nested template literals, regex literals with parens in character
|
|
39
|
+
// classes, and similar constructs. tsc owns syntax validation; gruff's job here is to catch
|
|
40
|
+
// obvious local mismatches (negative counts) rather than rediscover end-of-file parser errors.
|
|
41
|
+
return [];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Running totals for `{}`, `()`, and `[]`. A negative count means a closer appeared with no opener
|
|
45
|
+
// and is reported as a `parse-error` at that line; a non-zero final value is reported at EOF.
|
|
46
|
+
interface DelimiterCounts {
|
|
47
|
+
braces: number;
|
|
48
|
+
parentheses: number;
|
|
49
|
+
brackets: number;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Lexer state for the delimiter scanner. `previousCode` is the last non-whitespace executable
|
|
53
|
+
// character - required because `/` may start a regex or a division depending on what came before.
|
|
54
|
+
// `templateInterpolationStack` records the brace count at each `${` so a matching `}` can re-enter
|
|
55
|
+
// template literal mode; without it, nested-template-literal files like `\`\${x.map(n => \`\${n}\`)}\``
|
|
56
|
+
// flip out of quote mode early and start counting code-level braces inside the string body.
|
|
57
|
+
interface DelimiterScanState {
|
|
58
|
+
quote: string | undefined;
|
|
59
|
+
isEscaped: boolean;
|
|
60
|
+
isInBlockComment: boolean;
|
|
61
|
+
isInRegex: boolean;
|
|
62
|
+
isInRegexCharClass: boolean;
|
|
63
|
+
isRegexEscaped: boolean;
|
|
64
|
+
previousCode: string;
|
|
65
|
+
templateInterpolationStack: number[];
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Bundles state + counts so per-character handlers can mutate both without threading two arguments.
|
|
69
|
+
interface DelimiterScanContext {
|
|
70
|
+
scan: DelimiterScanState;
|
|
71
|
+
counts: DelimiterCounts;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Per-character scanner result. `skip` consumes the next N characters (e.g. the `/` of `*/`);
|
|
75
|
+
// `shouldStopLine` bails out of the rest of the line when a `//` line comment starts.
|
|
76
|
+
interface ScanStep {
|
|
77
|
+
skip: number;
|
|
78
|
+
shouldStopLine: boolean;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// All flags begin false / quote unset. `previousCode` starts empty so the first `/` in a file
|
|
82
|
+
// is treated as a regex opener (matches the JS grammar at program start).
|
|
83
|
+
function defaultDelimiterScanState(): DelimiterScanState {
|
|
84
|
+
return {
|
|
85
|
+
quote: undefined,
|
|
86
|
+
isEscaped: false,
|
|
87
|
+
isInBlockComment: false,
|
|
88
|
+
isInRegex: false,
|
|
89
|
+
isInRegexCharClass: false,
|
|
90
|
+
isRegexEscaped: false,
|
|
91
|
+
previousCode: "",
|
|
92
|
+
templateInterpolationStack: [],
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Walks one line, mutating `ctx.counts` and `ctx.scan` in place. The `shouldStopLine` step is the only
|
|
97
|
+
// way out before line end; comment, string, and regex bodies merely advance offset without counting.
|
|
98
|
+
function scanDelimiterLine(line: string, ctx: DelimiterScanContext): void {
|
|
99
|
+
for (let offset = 0; offset < line.length; offset += 1) {
|
|
100
|
+
const step = scanDelimiterCharacter(line, offset, ctx);
|
|
101
|
+
offset += step.skip;
|
|
102
|
+
if (step.shouldStopLine) {
|
|
103
|
+
break;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// State-machine dispatch: block-comment, string, regex, or code. Order matters - once inside a
|
|
109
|
+
// block comment, characters must not be re-interpreted as a quote or regex opener.
|
|
110
|
+
function scanDelimiterCharacter(line: string, offset: number, ctx: DelimiterScanContext): ScanStep {
|
|
111
|
+
const character = line[offset] ?? "";
|
|
112
|
+
const next = line[offset + 1] ?? "";
|
|
113
|
+
if (ctx.scan.isInBlockComment) {
|
|
114
|
+
return scanBlockCommentDelimiter(character, next, ctx.scan);
|
|
115
|
+
}
|
|
116
|
+
if (ctx.scan.quote) {
|
|
117
|
+
return scanQuotedDelimiter(character, next, ctx);
|
|
118
|
+
}
|
|
119
|
+
if (ctx.scan.isInRegex) {
|
|
120
|
+
scanRegexDelimiter(character, ctx.scan);
|
|
121
|
+
return continueScan();
|
|
122
|
+
}
|
|
123
|
+
return scanCodeDelimiter(line, offset, character, next, ctx);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Inside `/* ... */`. Returns `skip: 1` to swallow the `/` of the closing `*/`, then the next
|
|
127
|
+
// step resumes in code mode. Block-comment contents do not affect delimiter counts.
|
|
128
|
+
function scanBlockCommentDelimiter(character: string, next: string, scan: DelimiterScanState): ScanStep {
|
|
129
|
+
if (character === "*" && next === "/") {
|
|
130
|
+
scan.isInBlockComment = false;
|
|
131
|
+
return skipNextCharacter();
|
|
132
|
+
}
|
|
133
|
+
return continueScan();
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Inside a string or template literal. `\` arms `isEscaped` so the next character (including a
|
|
137
|
+
// closing quote) is treated as literal text. Necessary to handle `"\\\""` and similar correctly.
|
|
138
|
+
// Template literals also recognise `${` as an interpolation opener - the scanner pushes the current
|
|
139
|
+
// brace count onto a stack, exits quote mode, and counts the opening `{` so a matching `}` re-enters
|
|
140
|
+
// template literal mode. Without this, nested-template files mis-attribute later `\`` as the closer.
|
|
141
|
+
function scanQuotedDelimiter(character: string, next: string, ctx: DelimiterScanContext): ScanStep {
|
|
142
|
+
const scan = ctx.scan;
|
|
143
|
+
if (scan.isEscaped) {
|
|
144
|
+
scan.isEscaped = false;
|
|
145
|
+
return continueScan();
|
|
146
|
+
}
|
|
147
|
+
if (character === "\\") {
|
|
148
|
+
scan.isEscaped = true;
|
|
149
|
+
return continueScan();
|
|
150
|
+
}
|
|
151
|
+
if (scan.quote === "`" && character === "$" && next === "{") {
|
|
152
|
+
scan.templateInterpolationStack.push(ctx.counts.braces);
|
|
153
|
+
scan.quote = undefined;
|
|
154
|
+
ctx.counts.braces += 1;
|
|
155
|
+
scan.previousCode = "{";
|
|
156
|
+
return skipNextCharacter();
|
|
157
|
+
}
|
|
158
|
+
if (character === scan.quote) {
|
|
159
|
+
scan.quote = undefined;
|
|
160
|
+
}
|
|
161
|
+
return continueScan();
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Inside `/regex/`. `[...]` character classes can legally contain `/`, so the closing slash
|
|
165
|
+
// is only honoured when `isInRegexCharClass` is false; otherwise `/[/]/` would terminate early.
|
|
166
|
+
function scanRegexDelimiter(character: string, scan: DelimiterScanState): void {
|
|
167
|
+
if (scan.isRegexEscaped) {
|
|
168
|
+
scan.isRegexEscaped = false;
|
|
169
|
+
} else if (character === "\\") {
|
|
170
|
+
scan.isRegexEscaped = true;
|
|
171
|
+
} else if (character === "[") {
|
|
172
|
+
scan.isInRegexCharClass = true;
|
|
173
|
+
} else if (character === "]") {
|
|
174
|
+
scan.isInRegexCharClass = false;
|
|
175
|
+
} else if (character === "/" && !scan.isInRegexCharClass) {
|
|
176
|
+
scan.isInRegex = false;
|
|
177
|
+
scan.previousCode = "x";
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// In executable code. The chain order (line comment → block comment → quote → regex → plain) is
|
|
182
|
+
// deliberate: line comments must win over block-comment openers because `/* /* */` is one comment.
|
|
183
|
+
function scanCodeDelimiter(line: string, offset: number, character: string, next: string, ctx: DelimiterScanContext): ScanStep {
|
|
184
|
+
return (
|
|
185
|
+
scanLineCommentStart(character, next) ??
|
|
186
|
+
scanBlockCommentStart(character, next, ctx.scan) ??
|
|
187
|
+
scanQuoteStart(character, ctx.scan) ??
|
|
188
|
+
scanRegexStart(line, offset, character, ctx.scan) ??
|
|
189
|
+
scanPlainCodeDelimiter(character, ctx)
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// `//` ends the line for delimiter purposes - anything to the right is comment text.
|
|
194
|
+
function scanLineCommentStart(character: string, next: string): ScanStep | undefined {
|
|
195
|
+
if (character === "/" && next === "/") {
|
|
196
|
+
return stopLineScan();
|
|
197
|
+
}
|
|
198
|
+
return undefined;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// `/*` flips the scanner into block-comment mode; `skip: 1` swallows the `*` so it isn't re-evaluated.
|
|
202
|
+
function scanBlockCommentStart(character: string, next: string, scan: DelimiterScanState): ScanStep | undefined {
|
|
203
|
+
if (character === "/" && next === "*") {
|
|
204
|
+
scan.isInBlockComment = true;
|
|
205
|
+
return skipNextCharacter();
|
|
206
|
+
}
|
|
207
|
+
return undefined;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Records which quote character opened the literal so the scanner closes on the same kind.
|
|
211
|
+
function scanQuoteStart(character: string, scan: DelimiterScanState): ScanStep | undefined {
|
|
212
|
+
if (isQuote(character)) {
|
|
213
|
+
scan.quote = character;
|
|
214
|
+
return continueScan();
|
|
215
|
+
}
|
|
216
|
+
return undefined;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// `/` is ambiguous in JS - it can start a regex or be division. `isRegexLiteralStart` decides based
|
|
220
|
+
// on the previous non-whitespace token; getting this wrong would let division `a / b / c` enter
|
|
221
|
+
// regex mode and absorb everything to the next slash.
|
|
222
|
+
function scanRegexStart(line: string, offset: number, character: string, scan: DelimiterScanState): ScanStep | undefined {
|
|
223
|
+
if (character === "/" && isRegexLiteralStart(scan.previousCode, line.slice(0, offset))) {
|
|
224
|
+
scan.isInRegex = true;
|
|
225
|
+
scan.isInRegexCharClass = false;
|
|
226
|
+
scan.isRegexEscaped = false;
|
|
227
|
+
return continueScan();
|
|
228
|
+
}
|
|
229
|
+
return undefined;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Updates the delimiter tallies and remembers this character as `previousCode` so the next `/`
|
|
233
|
+
// can decide regex-vs-division. Whitespace is not recorded; it would corrupt the regex heuristic.
|
|
234
|
+
// Also detects when a `}` returns to a saved template-interpolation depth and transitions the
|
|
235
|
+
// scanner back into template-literal mode.
|
|
236
|
+
function scanPlainCodeDelimiter(character: string, ctx: DelimiterScanContext): ScanStep {
|
|
237
|
+
countDelimiter(character, ctx.counts);
|
|
238
|
+
if (character === "}" && ctx.scan.templateInterpolationStack.length > 0 && ctx.counts.braces === (ctx.scan.templateInterpolationStack[ctx.scan.templateInterpolationStack.length - 1] ?? -1)) {
|
|
239
|
+
ctx.scan.templateInterpolationStack.pop();
|
|
240
|
+
ctx.scan.quote = "`";
|
|
241
|
+
ctx.scan.previousCode = "`";
|
|
242
|
+
return continueScan();
|
|
243
|
+
}
|
|
244
|
+
if (character.trim() !== "") {
|
|
245
|
+
ctx.scan.previousCode = character;
|
|
246
|
+
}
|
|
247
|
+
return continueScan();
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Increment on opener, decrement on closer. Other characters are no-ops so the function can be
|
|
251
|
+
// called unconditionally per character without a guard in the caller.
|
|
252
|
+
function countDelimiter(character: string, counts: DelimiterCounts): void {
|
|
253
|
+
if (character === "{") {
|
|
254
|
+
counts.braces += 1;
|
|
255
|
+
} else if (character === "}") {
|
|
256
|
+
counts.braces -= 1;
|
|
257
|
+
} else if (character === "(") {
|
|
258
|
+
counts.parentheses += 1;
|
|
259
|
+
} else if (character === ")") {
|
|
260
|
+
counts.parentheses -= 1;
|
|
261
|
+
} else if (character === "[") {
|
|
262
|
+
counts.brackets += 1;
|
|
263
|
+
} else if (character === "]") {
|
|
264
|
+
counts.brackets -= 1;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Negative on any of the three means a closer ran ahead of its opener - caller reports immediately
|
|
269
|
+
// at the current line because the original mismatch is local, not at EOF.
|
|
270
|
+
function hasNegativeDelimiterCount(counts: DelimiterCounts): boolean {
|
|
271
|
+
return counts.braces < 0 || counts.parentheses < 0 || counts.brackets < 0;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Non-zero after the whole file means an opener was never closed. Caller reports at EOF.
|
|
275
|
+
function hasUnbalancedDelimiterCount(counts: DelimiterCounts): boolean {
|
|
276
|
+
return counts.braces !== 0 || counts.parentheses !== 0 || counts.brackets !== 0;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/*
|
|
280
|
+
* Emits a `parse-error` diagnostic. The CLI exit contract forces a non-zero exit (see `exitFor`)
|
|
281
|
+
* whenever diagnostics fire - this builder reports failures so a broken file in the scan tree
|
|
282
|
+
* cannot hide silently rather than throw the error or recover quietly.
|
|
283
|
+
*/
|
|
284
|
+
function parseErrorDiagnostic(file: DiagnosticSourceFile, line: number): RunDiagnostic {
|
|
285
|
+
return {
|
|
286
|
+
diagnosticType: "parse-error",
|
|
287
|
+
message: "Unbalanced TypeScript delimiters detected.",
|
|
288
|
+
filePath: file.displayPath,
|
|
289
|
+
line,
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Heuristic for the regex-vs-division ambiguity: `/` is a regex opener after operators, control
|
|
294
|
+
// punctuation, or `return`; otherwise it's division. False positives would expand a regex across
|
|
295
|
+
// real code and break delimiter balance, so this list is intentionally conservative.
|
|
296
|
+
function isRegexLiteralStart(previousCode: string, beforeSlash: string): boolean {
|
|
297
|
+
return previousCode === "" || "([{=,:!&|?;".includes(previousCode) || /\breturn$/.test(beforeSlash.trimEnd());
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// Masks non-code bytes without changing offsets. Regex-driven rules rely on the 1:1 byte mapping
|
|
301
|
+
// to report raw-source line numbers while avoiding matches inside comments, strings, and regex bodies.
|
|
302
|
+
function maskNonCode(source: string): string {
|
|
303
|
+
let result = "";
|
|
304
|
+
const state = defaultMaskState();
|
|
305
|
+
for (let index = 0; index < source.length; index += 1) {
|
|
306
|
+
const step = maskNonCodeCharacter(source, index, state);
|
|
307
|
+
result += step.text;
|
|
308
|
+
index += step.skip;
|
|
309
|
+
}
|
|
310
|
+
return result;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Replaces only `` ` `` template-literal body characters with spaces. Unlike `maskNonCode`, single
|
|
314
|
+
// and double-quoted string bodies are preserved so syntax-pattern rules (e.g. import edges) still
|
|
315
|
+
// see the specifier text on real `import ... from "..."` lines. Used by `import-edge` style rules
|
|
316
|
+
// that must ignore fixture content embedded in template literals without losing real imports.
|
|
317
|
+
function maskTemplateLiteralBodies(source: string): string {
|
|
318
|
+
let result = "";
|
|
319
|
+
const state = templateMaskState();
|
|
320
|
+
for (let index = 0; index < source.length; index += 1) {
|
|
321
|
+
const character = source[index] ?? "";
|
|
322
|
+
const next = source[index + 1] ?? "";
|
|
323
|
+
const step = templateMaskCharacter(character, next, state);
|
|
324
|
+
result += step.text;
|
|
325
|
+
index += step.skip;
|
|
326
|
+
}
|
|
327
|
+
return result;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Mutable lexer state for `maskTemplateLiteralBodies`. Smaller than `MaskState` because the
|
|
331
|
+
// helper only needs to know which quote/comment context the current character sits inside.
|
|
332
|
+
interface TemplateMaskState {
|
|
333
|
+
quote: '"' | "'" | "`" | undefined;
|
|
334
|
+
isEscaped: boolean;
|
|
335
|
+
isLineComment: boolean;
|
|
336
|
+
isBlockComment: boolean;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Fresh default state for a single-file template-mask pass.
|
|
340
|
+
function templateMaskState(): TemplateMaskState {
|
|
341
|
+
return { quote: undefined, isEscaped: false, isLineComment: false, isBlockComment: false };
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// Per-character dispatch: newlines clear single-line state; backtick body chars are masked; quoted
|
|
345
|
+
// and commented chars pass through. Skips the regex branch because regex bodies cannot start an import.
|
|
346
|
+
function templateMaskCharacter(character: string, next: string, state: TemplateMaskState): MaskStep {
|
|
347
|
+
if (character === "\n") return templateMaskNewline(state);
|
|
348
|
+
if (state.isLineComment) return { text: character, skip: 0 };
|
|
349
|
+
if (state.isBlockComment) return templateMaskBlockComment(character, next, state);
|
|
350
|
+
if (state.quote) return templateMaskQuotedCharacter(character, state);
|
|
351
|
+
return templateMaskCodeCharacter(character, next, state);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// Newlines clear line-comment state and any non-template quote; template literals survive across lines.
|
|
355
|
+
function templateMaskNewline(state: TemplateMaskState): MaskStep {
|
|
356
|
+
state.isLineComment = false;
|
|
357
|
+
if (state.quote !== "`") {
|
|
358
|
+
state.quote = undefined;
|
|
359
|
+
}
|
|
360
|
+
return { text: "\n", skip: 0 };
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// Inside a block comment: pass body through and detect the closing `*/`.
|
|
364
|
+
function templateMaskBlockComment(character: string, next: string, state: TemplateMaskState): MaskStep {
|
|
365
|
+
if (character === "*" && next === "/") {
|
|
366
|
+
state.isBlockComment = false;
|
|
367
|
+
return { text: "*/", skip: 1 };
|
|
368
|
+
}
|
|
369
|
+
return { text: character, skip: 0 };
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// Inside any quoted string: bodies of `` ` `` get masked, single/double-quote bodies pass through.
|
|
373
|
+
function templateMaskQuotedCharacter(character: string, state: TemplateMaskState): MaskStep {
|
|
374
|
+
const masked = state.quote === "`";
|
|
375
|
+
if (state.isEscaped) {
|
|
376
|
+
state.isEscaped = false;
|
|
377
|
+
return { text: masked ? " " : character, skip: 0 };
|
|
378
|
+
}
|
|
379
|
+
if (character === "\\") {
|
|
380
|
+
state.isEscaped = true;
|
|
381
|
+
return { text: masked ? " " : character, skip: 0 };
|
|
382
|
+
}
|
|
383
|
+
if (character === state.quote) {
|
|
384
|
+
state.quote = undefined;
|
|
385
|
+
return { text: character, skip: 0 };
|
|
386
|
+
}
|
|
387
|
+
return { text: masked ? " " : character, skip: 0 };
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// In code: detect comment openers and the three quote types, otherwise pass through.
|
|
391
|
+
function templateMaskCodeCharacter(character: string, next: string, state: TemplateMaskState): MaskStep {
|
|
392
|
+
if (character === "/" && next === "/") {
|
|
393
|
+
state.isLineComment = true;
|
|
394
|
+
return { text: "//", skip: 1 };
|
|
395
|
+
}
|
|
396
|
+
if (character === "/" && next === "*") {
|
|
397
|
+
state.isBlockComment = true;
|
|
398
|
+
return { text: "/*", skip: 1 };
|
|
399
|
+
}
|
|
400
|
+
if (character === '"' || character === "'" || character === "`") {
|
|
401
|
+
state.quote = character;
|
|
402
|
+
return { text: character, skip: 0 };
|
|
403
|
+
}
|
|
404
|
+
return { text: character, skip: 0 };
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// Mutable lexer state for the masking pass. Mirrors `DelimiterScanState` because both passes solve
|
|
408
|
+
// the same code-vs-literal problem, but `maskNonCode` runs over the whole file rather than line-by-line.
|
|
409
|
+
interface MaskState {
|
|
410
|
+
quote: string | undefined;
|
|
411
|
+
isEscaped: boolean;
|
|
412
|
+
isLineComment: boolean;
|
|
413
|
+
isBlockComment: boolean;
|
|
414
|
+
isRegex: boolean;
|
|
415
|
+
isRegexCharClass: boolean;
|
|
416
|
+
isRegexEscaped: boolean;
|
|
417
|
+
previousCode: string;
|
|
418
|
+
templateInterpolationDepth: number;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// `text` is what gets written for this character (space for masked, original for code, " " for
|
|
422
|
+
// two-char openers like `//`); `skip` advances past a paired character so it isn't re-scanned.
|
|
423
|
+
interface MaskStep {
|
|
424
|
+
text: string;
|
|
425
|
+
skip: number;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// All flags start cleared so the first scanner step treats source as code.
|
|
429
|
+
function defaultMaskState(): MaskState {
|
|
430
|
+
return {
|
|
431
|
+
quote: undefined,
|
|
432
|
+
isEscaped: false,
|
|
433
|
+
isLineComment: false,
|
|
434
|
+
isBlockComment: false,
|
|
435
|
+
isRegex: false,
|
|
436
|
+
isRegexCharClass: false,
|
|
437
|
+
isRegexEscaped: false,
|
|
438
|
+
previousCode: "",
|
|
439
|
+
templateInterpolationDepth: 0,
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// Newlines short-circuit first - single-line `//` comments must clear at line end, and ordinary
|
|
444
|
+
// quotes do too, but template literals (` ` `) survive across lines.
|
|
445
|
+
function maskNonCodeCharacter(source: string, index: number, state: MaskState): MaskStep {
|
|
446
|
+
const character = source[index] ?? "";
|
|
447
|
+
const next = source[index + 1] ?? "";
|
|
448
|
+
if (character === "\n") {
|
|
449
|
+
return maskNewline(state);
|
|
450
|
+
}
|
|
451
|
+
if (state.isLineComment) {
|
|
452
|
+
return maskSingleCharacter();
|
|
453
|
+
}
|
|
454
|
+
if (state.isBlockComment) {
|
|
455
|
+
return maskBlockComment(character, next, state);
|
|
456
|
+
}
|
|
457
|
+
if (state.quote) {
|
|
458
|
+
return maskQuotedCharacter(character, next, state);
|
|
459
|
+
}
|
|
460
|
+
if (state.isRegex) {
|
|
461
|
+
return maskRegexCharacter(character, state);
|
|
462
|
+
}
|
|
463
|
+
return maskCodeCharacter(source, index, character, next, state);
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// At a newline: drop line-comment / single-quote / regex state, but preserve template-literal state
|
|
467
|
+
// because backtick strings legitimately span multiple lines. Emits `\n` verbatim to keep offsets aligned.
|
|
468
|
+
function maskNewline(state: MaskState): MaskStep {
|
|
469
|
+
state.isLineComment = false;
|
|
470
|
+
if (state.quote !== "`") {
|
|
471
|
+
state.quote = undefined;
|
|
472
|
+
}
|
|
473
|
+
state.isRegex = false;
|
|
474
|
+
state.isRegexCharClass = false;
|
|
475
|
+
state.isRegexEscaped = false;
|
|
476
|
+
return { text: "\n", skip: 0 };
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// Replaces block-comment text with spaces; the closing `*/` becomes " " so the two output
|
|
480
|
+
// characters line up with the two consumed characters.
|
|
481
|
+
function maskBlockComment(character: string, next: string, state: MaskState): MaskStep {
|
|
482
|
+
if (character === "*" && next === "/") {
|
|
483
|
+
state.isBlockComment = false;
|
|
484
|
+
return { text: " ", skip: 1 };
|
|
485
|
+
}
|
|
486
|
+
return maskSingleCharacter();
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
// Inside a string: mask body characters but emit the closing quote unchanged so downstream rules
|
|
490
|
+
// can still see the quote boundary. `\` arms `isEscaped` to keep the next character literal.
|
|
491
|
+
function maskQuotedCharacter(character: string, next: string, state: MaskState): MaskStep {
|
|
492
|
+
if (state.isEscaped) {
|
|
493
|
+
state.isEscaped = false;
|
|
494
|
+
return maskSingleCharacter();
|
|
495
|
+
}
|
|
496
|
+
if (character === "\\") {
|
|
497
|
+
state.isEscaped = true;
|
|
498
|
+
return maskSingleCharacter();
|
|
499
|
+
}
|
|
500
|
+
if (state.quote === "`" && character === "$" && next === "{") {
|
|
501
|
+
state.quote = undefined;
|
|
502
|
+
state.templateInterpolationDepth += 1;
|
|
503
|
+
state.previousCode = "{";
|
|
504
|
+
return { text: "${", skip: 1 };
|
|
505
|
+
}
|
|
506
|
+
if (character === state.quote) {
|
|
507
|
+
state.previousCode = character;
|
|
508
|
+
state.quote = undefined;
|
|
509
|
+
return { text: character, skip: 0 };
|
|
510
|
+
}
|
|
511
|
+
return maskSingleCharacter();
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// Inside `/.../`: mask body characters and pass through the closing `/`. `[ ... ]` character classes
|
|
515
|
+
// suspend the closing-slash check so `/[/]/` does not terminate at the inner slash.
|
|
516
|
+
function maskRegexCharacter(character: string, state: MaskState): MaskStep {
|
|
517
|
+
if (state.isRegexEscaped) {
|
|
518
|
+
state.isRegexEscaped = false;
|
|
519
|
+
return maskSingleCharacter();
|
|
520
|
+
}
|
|
521
|
+
if (character === "\\") {
|
|
522
|
+
state.isRegexEscaped = true;
|
|
523
|
+
return maskSingleCharacter();
|
|
524
|
+
}
|
|
525
|
+
if (character === "[") {
|
|
526
|
+
state.isRegexCharClass = true;
|
|
527
|
+
return maskSingleCharacter();
|
|
528
|
+
}
|
|
529
|
+
if (character === "]") {
|
|
530
|
+
state.isRegexCharClass = false;
|
|
531
|
+
return maskSingleCharacter();
|
|
532
|
+
}
|
|
533
|
+
if (character === "/" && !state.isRegexCharClass) {
|
|
534
|
+
state.isRegex = false;
|
|
535
|
+
state.previousCode = character;
|
|
536
|
+
return { text: character, skip: 0 };
|
|
537
|
+
}
|
|
538
|
+
return maskSingleCharacter();
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// Dispatch order in code mode matters: detect comment openers before regex, since `/*` would
|
|
542
|
+
// otherwise be misread as a regex start; detect regex before quote because no quote begins with `/`.
|
|
543
|
+
function maskCodeCharacter(source: string, index: number, character: string, next: string, state: MaskState): MaskStep {
|
|
544
|
+
return (
|
|
545
|
+
maskTemplateInterpolationBrace(character, state) ??
|
|
546
|
+
maskLineCommentStart(character, next, state) ??
|
|
547
|
+
maskBlockCommentStart(character, next, state) ??
|
|
548
|
+
maskRegexStart(source, index, character, state) ??
|
|
549
|
+
maskQuoteStart(character, state) ??
|
|
550
|
+
maskPlainCodeCharacter(character, state)
|
|
551
|
+
);
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
// While inside a template `${...}` expression, braces are executable code and must stay visible.
|
|
555
|
+
// The depth counter returns to backtick masking only after the matching interpolation closer.
|
|
556
|
+
function maskTemplateInterpolationBrace(character: string, state: MaskState): MaskStep | undefined {
|
|
557
|
+
if (state.templateInterpolationDepth === 0) {
|
|
558
|
+
return undefined;
|
|
559
|
+
}
|
|
560
|
+
if (character === "{") {
|
|
561
|
+
state.templateInterpolationDepth += 1;
|
|
562
|
+
state.previousCode = character;
|
|
563
|
+
return { text: character, skip: 0 };
|
|
564
|
+
}
|
|
565
|
+
if (character === "}") {
|
|
566
|
+
state.templateInterpolationDepth -= 1;
|
|
567
|
+
state.previousCode = character;
|
|
568
|
+
if (state.templateInterpolationDepth === 0) {
|
|
569
|
+
state.quote = "`";
|
|
570
|
+
}
|
|
571
|
+
return { text: character, skip: 0 };
|
|
572
|
+
}
|
|
573
|
+
return undefined;
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
// Detects `//` and arms line-comment mode; the actual masking happens on subsequent characters
|
|
577
|
+
// via the `state.isLineComment` branch in `maskNonCodeCharacter`.
|
|
578
|
+
function maskLineCommentStart(character: string, next: string, state: MaskState): MaskStep | undefined {
|
|
579
|
+
if (character === "/" && next === "/") {
|
|
580
|
+
state.isLineComment = true;
|
|
581
|
+
return { text: " ", skip: 1 };
|
|
582
|
+
}
|
|
583
|
+
return undefined;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// Detects `/*` and masks the two opener characters. State flips so subsequent chars route to `maskBlockComment`.
|
|
587
|
+
function maskBlockCommentStart(character: string, next: string, state: MaskState): MaskStep | undefined {
|
|
588
|
+
if (character === "/" && next === "*") {
|
|
589
|
+
state.isBlockComment = true;
|
|
590
|
+
return { text: " ", skip: 1 };
|
|
591
|
+
}
|
|
592
|
+
return undefined;
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
// Same regex-vs-division heuristic as `parseDiagnostics`, but with up to 80 prior characters of
|
|
596
|
+
// context (rather than just one) to recognise the `return /pattern/` case across multi-token expressions.
|
|
597
|
+
function maskRegexStart(source: string, index: number, character: string, state: MaskState): MaskStep | undefined {
|
|
598
|
+
if (character === "/" && isRegexLiteralStart(state.previousCode, source.slice(Math.max(0, index - 80), index))) {
|
|
599
|
+
state.isRegex = true;
|
|
600
|
+
state.previousCode = character;
|
|
601
|
+
return { text: character, skip: 0 };
|
|
602
|
+
}
|
|
603
|
+
return undefined;
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
// Opening quote is emitted unchanged so downstream rules can detect string boundaries; subsequent
|
|
607
|
+
// body characters get masked by `maskQuotedCharacter`.
|
|
608
|
+
function maskQuoteStart(character: string, state: MaskState): MaskStep | undefined {
|
|
609
|
+
if (isQuote(character)) {
|
|
610
|
+
state.quote = character;
|
|
611
|
+
state.previousCode = character;
|
|
612
|
+
return { text: character, skip: 0 };
|
|
613
|
+
}
|
|
614
|
+
return undefined;
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
// Code characters survive into the masked output; only non-whitespace updates `previousCode` because
|
|
618
|
+
// the regex-start heuristic must look at the last real token, not at the space before it.
|
|
619
|
+
function maskPlainCodeCharacter(character: string, state: MaskState): MaskStep {
|
|
620
|
+
if (isNonWhitespaceCharacter(character)) {
|
|
621
|
+
state.previousCode = character;
|
|
622
|
+
}
|
|
623
|
+
return { text: character, skip: 0 };
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
// One space per masked byte. Preserves overall length so byte offsets in masked text map 1:1 to source.
|
|
627
|
+
function maskSingleCharacter(): MaskStep {
|
|
628
|
+
return { text: " ", skip: 0 };
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// Cheaper, line-local mask used when rules don't need full lexer state - strips line-comments and
|
|
632
|
+
// string bodies but does not track block comments. Use `maskNonCode` instead when state must
|
|
633
|
+
// survive across lines.
|
|
634
|
+
function codeLineForMatching(line: string): string {
|
|
635
|
+
let result = "";
|
|
636
|
+
const state: CodeLineState = { quote: undefined, isEscaped: false };
|
|
637
|
+
for (let index = 0; index < line.length; index += 1) {
|
|
638
|
+
const step = codeLineCharacter(line, index, state);
|
|
639
|
+
result += step.text;
|
|
640
|
+
if (step.shouldStopLine) {
|
|
641
|
+
break;
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
return result;
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
// Smaller lexer state: just enough to detect when we're inside a quoted string and skip its body.
|
|
648
|
+
interface CodeLineState {
|
|
649
|
+
quote: string | undefined;
|
|
650
|
+
isEscaped: boolean;
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
// One step yields the kept text (empty when masked, original when code) plus a `shouldStopLine` flag set
|
|
654
|
+
// once a line comment opens.
|
|
655
|
+
interface CodeLineStep {
|
|
656
|
+
text: string;
|
|
657
|
+
shouldStopLine: boolean;
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
// Quotes are emitted (so callers can see the literal boundary) but their bodies are dropped, while
|
|
661
|
+
// any `//` outside a string truncates the rest of the line.
|
|
662
|
+
function codeLineCharacter(line: string, index: number, state: CodeLineState): CodeLineStep {
|
|
663
|
+
const character = line[index] ?? "";
|
|
664
|
+
const next = line[index + 1] ?? "";
|
|
665
|
+
if (!state.quote && character === "/" && next === "/") {
|
|
666
|
+
return { text: "", shouldStopLine: true };
|
|
667
|
+
}
|
|
668
|
+
if (state.quote) {
|
|
669
|
+
return quotedCodeLineCharacter(character, state);
|
|
670
|
+
}
|
|
671
|
+
if (isQuote(character)) {
|
|
672
|
+
state.quote = character;
|
|
673
|
+
return { text: character, shouldStopLine: false };
|
|
674
|
+
}
|
|
675
|
+
return { text: character, shouldStopLine: false };
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
// Body characters of a string are dropped so `"// not a comment"` doesn't truncate the line, but
|
|
679
|
+
// the closing quote is emitted so the caller still sees a balanced literal.
|
|
680
|
+
function quotedCodeLineCharacter(character: string, state: CodeLineState): CodeLineStep {
|
|
681
|
+
if (state.isEscaped) {
|
|
682
|
+
state.isEscaped = false;
|
|
683
|
+
return { text: "", shouldStopLine: false };
|
|
684
|
+
}
|
|
685
|
+
if (character === "\\") {
|
|
686
|
+
state.isEscaped = true;
|
|
687
|
+
return { text: "", shouldStopLine: false };
|
|
688
|
+
}
|
|
689
|
+
if (character === state.quote) {
|
|
690
|
+
state.quote = undefined;
|
|
691
|
+
return { text: character, shouldStopLine: false };
|
|
692
|
+
}
|
|
693
|
+
return { text: "", shouldStopLine: false };
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
// Default step: caller advances by one, line continues. Centralised so callers stay symmetrical.
|
|
697
|
+
function continueScan(): ScanStep {
|
|
698
|
+
return { skip: 0, shouldStopLine: false };
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
// Skip one extra character - used to swallow the second half of a two-character token like `*/`.
|
|
702
|
+
function skipNextCharacter(): ScanStep {
|
|
703
|
+
return { skip: 1, shouldStopLine: false };
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
// End-of-line short circuit returned when a `//` line comment starts.
|
|
707
|
+
function stopLineScan(): ScanStep {
|
|
708
|
+
return { skip: 0, shouldStopLine: true };
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
// String, template literal, and char-class quotes recognised by every lexer in this module.
|
|
712
|
+
function isQuote(character: string): boolean {
|
|
713
|
+
return character === "\"" || character === "'" || character === "`";
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
// Used by `maskPlainCodeCharacter` to decide whether a character should overwrite `previousCode`.
|
|
717
|
+
// Whitespace must not, otherwise the regex-vs-division heuristic would be fed the wrong token.
|
|
718
|
+
function isNonWhitespaceCharacter(character: string): boolean {
|
|
719
|
+
return character !== "" && character !== " " && character !== "\t" && character !== "\r" && character !== "\n";
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
export { codeLineForMatching, maskNonCode, maskTemplateLiteralBodies, parseDiagnostics };
|