@zzzen/pyright-internal 1.2.0-dev.20260422 → 1.2.0-dev.20260426
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analyzer/sourceFile.js +53 -23
- package/dist/analyzer/sourceFile.js.map +1 -1
- package/dist/parser/characterStream.js +26 -2
- package/dist/parser/characterStream.js.map +1 -1
- package/dist/parser/parser.d.ts +2 -0
- package/dist/parser/parser.js +11 -7
- package/dist/parser/parser.js.map +1 -1
- package/dist/parser/tokenizer.d.ts +6 -2
- package/dist/parser/tokenizer.js +602 -206
- package/dist/parser/tokenizer.js.map +1 -1
- package/dist/parser/tokenizerTypes.js +115 -39
- package/dist/parser/tokenizerTypes.js.map +1 -1
- package/dist/tests/benchmarks/parserBenchmark.test.d.ts +1 -0
- package/dist/tests/benchmarks/parserBenchmark.test.js +220 -0
- package/dist/tests/benchmarks/parserBenchmark.test.js.map +1 -0
- package/dist/tests/benchmarks/tokenizerBenchmark.test.d.ts +1 -0
- package/dist/tests/benchmarks/tokenizerBenchmark.test.js +236 -0
- package/dist/tests/benchmarks/tokenizerBenchmark.test.js.map +1 -0
- package/dist/tests/tokenizer.test.js +17 -2
- package/dist/tests/tokenizer.test.js.map +1 -1
- package/package.json +5 -4
package/dist/parser/tokenizer.js
CHANGED
|
@@ -60,6 +60,53 @@ const _keywords = new Map([
|
|
|
60
60
|
['True', 33 /* KeywordType.True */],
|
|
61
61
|
]);
|
|
62
62
|
const _softKeywords = new Set(['match', 'case', 'type']);
|
|
63
|
+
// Fast-reject table: keywords are 2–9 chars long and only start with these
|
|
64
|
+
// character codes. A 128-entry boolean table indexed by charCodeAt(0) rejects
|
|
65
|
+
// most identifiers without touching the _keywords Map.
|
|
66
|
+
const _keywordFirstCharTable = (() => {
|
|
67
|
+
const table = new Array(128).fill(false);
|
|
68
|
+
for (const kw of _keywords.keys()) {
|
|
69
|
+
const code = kw.charCodeAt(0);
|
|
70
|
+
if (code < 128) {
|
|
71
|
+
table[code] = true;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return table;
|
|
75
|
+
})();
|
|
76
|
+
const _keywordMinLen = 2;
|
|
77
|
+
const _keywordMaxLen = 9; // __debug__
|
|
78
|
+
// For keyword-like identifiers, compare directly against the source text slice
|
|
79
|
+
// to avoid creating temporary substring objects on the keyword path.
|
|
80
|
+
const _keywordEntriesByFirstChar = (() => {
|
|
81
|
+
const entriesByFirstChar = new Array(128);
|
|
82
|
+
for (const [text, type] of _keywords.entries()) {
|
|
83
|
+
const firstCharCode = text.charCodeAt(0);
|
|
84
|
+
if (firstCharCode < 128) {
|
|
85
|
+
const entries = entriesByFirstChar[firstCharCode] ?? (entriesByFirstChar[firstCharCode] = []);
|
|
86
|
+
entries.push({ text, type });
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return entriesByFirstChar;
|
|
90
|
+
})();
|
|
91
|
+
function getKeywordTypeFromTextSlice(text, start, length) {
|
|
92
|
+
if (length < _keywordMinLen || length > _keywordMaxLen) {
|
|
93
|
+
return undefined;
|
|
94
|
+
}
|
|
95
|
+
const firstCharCode = text.charCodeAt(start);
|
|
96
|
+
if (firstCharCode >= 128 || !_keywordFirstCharTable[firstCharCode]) {
|
|
97
|
+
return undefined;
|
|
98
|
+
}
|
|
99
|
+
const candidates = _keywordEntriesByFirstChar[firstCharCode];
|
|
100
|
+
if (!candidates) {
|
|
101
|
+
return undefined;
|
|
102
|
+
}
|
|
103
|
+
for (const candidate of candidates) {
|
|
104
|
+
if (candidate.text.length === length && text.startsWith(candidate.text, start)) {
|
|
105
|
+
return candidate.type;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return undefined;
|
|
109
|
+
}
|
|
63
110
|
const _operatorInfo = {
|
|
64
111
|
[0 /* OperatorType.Add */]: 1 /* OperatorFlags.Unary */ | 2 /* OperatorFlags.Binary */,
|
|
65
112
|
[1 /* OperatorType.AddEqual */]: 4 /* OperatorFlags.Assignment */,
|
|
@@ -104,16 +151,331 @@ const _operatorInfo = {
|
|
|
104
151
|
[41 /* OperatorType.In */]: 2 /* OperatorFlags.Binary */,
|
|
105
152
|
[42 /* OperatorType.NotIn */]: 2 /* OperatorFlags.Binary */,
|
|
106
153
|
};
|
|
154
|
+
const _unsetSingleCharOperatorType = -1;
|
|
155
|
+
const _singleCharOperatorTypeTable = (() => {
|
|
156
|
+
const table = new Int16Array(128);
|
|
157
|
+
table.fill(_unsetSingleCharOperatorType);
|
|
158
|
+
table[61 /* Char.Equal */] = 2 /* OperatorType.Assign */;
|
|
159
|
+
table[43 /* Char.Plus */] = 0 /* OperatorType.Add */;
|
|
160
|
+
table[45 /* Char.Hyphen */] = 33 /* OperatorType.Subtract */;
|
|
161
|
+
table[42 /* Char.Asterisk */] = 26 /* OperatorType.Multiply */;
|
|
162
|
+
table[47 /* Char.Slash */] = 10 /* OperatorType.Divide */;
|
|
163
|
+
table[38 /* Char.Ampersand */] = 3 /* OperatorType.BitwiseAnd */;
|
|
164
|
+
table[124 /* Char.Bar */] = 6 /* OperatorType.BitwiseOr */;
|
|
165
|
+
table[94 /* Char.Caret */] = 8 /* OperatorType.BitwiseXor */;
|
|
166
|
+
table[37 /* Char.Percent */] = 24 /* OperatorType.Mod */;
|
|
167
|
+
table[126 /* Char.Tilde */] = 5 /* OperatorType.BitwiseInvert */;
|
|
168
|
+
table[64 /* Char.At */] = 22 /* OperatorType.MatrixMultiply */;
|
|
169
|
+
table[60 /* Char.Less */] = 20 /* OperatorType.LessThan */;
|
|
170
|
+
table[62 /* Char.Greater */] = 15 /* OperatorType.GreaterThan */;
|
|
171
|
+
return table;
|
|
172
|
+
})();
|
|
173
|
+
const _singleCharEqualOperatorTypeTable = (() => {
|
|
174
|
+
const table = new Int16Array(128);
|
|
175
|
+
table.fill(_unsetSingleCharOperatorType);
|
|
176
|
+
table[43 /* Char.Plus */] = 1 /* OperatorType.AddEqual */;
|
|
177
|
+
table[45 /* Char.Hyphen */] = 34 /* OperatorType.SubtractEqual */;
|
|
178
|
+
table[42 /* Char.Asterisk */] = 27 /* OperatorType.MultiplyEqual */;
|
|
179
|
+
table[47 /* Char.Slash */] = 11 /* OperatorType.DivideEqual */;
|
|
180
|
+
table[38 /* Char.Ampersand */] = 4 /* OperatorType.BitwiseAndEqual */;
|
|
181
|
+
table[124 /* Char.Bar */] = 7 /* OperatorType.BitwiseOrEqual */;
|
|
182
|
+
table[94 /* Char.Caret */] = 9 /* OperatorType.BitwiseXorEqual */;
|
|
183
|
+
table[37 /* Char.Percent */] = 25 /* OperatorType.ModEqual */;
|
|
184
|
+
table[64 /* Char.At */] = 23 /* OperatorType.MatrixMultiplyEqual */;
|
|
185
|
+
return table;
|
|
186
|
+
})();
|
|
187
|
+
function getTwoCharKey(char1, char2) {
|
|
188
|
+
return (char1 << 8) | char2;
|
|
189
|
+
}
|
|
190
|
+
// Two-char operator/token tables: use Map instead of Int16Array(65536).
|
|
191
|
+
// With only 5+1 entries, a Map uses ~200 bytes vs 256KB for two Int16Arrays.
|
|
192
|
+
const _twoCharOperatorTypeMap = new Map([
|
|
193
|
+
[getTwoCharKey(61 /* Char.Equal */, 61 /* Char.Equal */), 12 /* OperatorType.Equals */],
|
|
194
|
+
[getTwoCharKey(33 /* Char.ExclamationMark */, 61 /* Char.Equal */), 28 /* OperatorType.NotEquals */],
|
|
195
|
+
[getTwoCharKey(60 /* Char.Less */, 61 /* Char.Equal */), 21 /* OperatorType.LessThanOrEqual */],
|
|
196
|
+
[getTwoCharKey(62 /* Char.Greater */, 61 /* Char.Equal */), 16 /* OperatorType.GreaterThanOrEqual */],
|
|
197
|
+
[getTwoCharKey(60 /* Char.Less */, 62 /* Char.Greater */), 19 /* OperatorType.LessOrGreaterThan */],
|
|
198
|
+
]);
|
|
199
|
+
const _twoCharSpecialTokenTypeMap = new Map([
|
|
200
|
+
[getTwoCharKey(45 /* Char.Hyphen */, 62 /* Char.Greater */), 21 /* TokenType.Arrow */],
|
|
201
|
+
]);
|
|
202
|
+
const _repeatedCharOperatorTypeTable = (() => {
|
|
203
|
+
const table = new Int16Array(128);
|
|
204
|
+
table.fill(_unsetSingleCharOperatorType);
|
|
205
|
+
table[42 /* Char.Asterisk */] = 29 /* OperatorType.Power */;
|
|
206
|
+
table[47 /* Char.Slash */] = 13 /* OperatorType.FloorDivide */;
|
|
207
|
+
table[60 /* Char.Less */] = 17 /* OperatorType.LeftShift */;
|
|
208
|
+
table[62 /* Char.Greater */] = 31 /* OperatorType.RightShift */;
|
|
209
|
+
return table;
|
|
210
|
+
})();
|
|
211
|
+
const _repeatedCharEqualOperatorTypeTable = (() => {
|
|
212
|
+
const table = new Int16Array(128);
|
|
213
|
+
table.fill(_unsetSingleCharOperatorType);
|
|
214
|
+
table[42 /* Char.Asterisk */] = 30 /* OperatorType.PowerEqual */;
|
|
215
|
+
table[47 /* Char.Slash */] = 14 /* OperatorType.FloorDivideEqual */;
|
|
216
|
+
table[60 /* Char.Less */] = 18 /* OperatorType.LeftShiftEqual */;
|
|
217
|
+
table[62 /* Char.Greater */] = 32 /* OperatorType.RightShiftEqual */;
|
|
218
|
+
return table;
|
|
219
|
+
})();
|
|
107
220
|
const _byteOrderMarker = 0xfeff;
|
|
108
221
|
const defaultTabSize = 8;
|
|
109
|
-
|
|
110
|
-
//
|
|
111
|
-
//
|
|
112
|
-
//
|
|
113
|
-
|
|
114
|
-
const
|
|
115
|
-
|
|
116
|
-
|
|
222
|
+
// Fast-reject table: only these ASCII chars can begin a string literal
|
|
223
|
+
// (quote chars or valid string prefix chars f/r/b/u/t and their uppercase).
|
|
224
|
+
// Checking this table first avoids calling _getStringPrefixLength() for the
|
|
225
|
+
// vast majority of tokens (identifiers, numbers, operators, etc.).
|
|
226
|
+
const _canStartString = (() => {
|
|
227
|
+
const table = new Array(128).fill(false);
|
|
228
|
+
table[39 /* Char.SingleQuote */] = true;
|
|
229
|
+
table[34 /* Char.DoubleQuote */] = true;
|
|
230
|
+
for (const ch of [102 /* Char.f */, 70 /* Char.F */, 114 /* Char.r */, 82 /* Char.R */, 98 /* Char.b */, 66 /* Char.B */, 117 /* Char.u */, 85 /* Char.U */, 116 /* Char.t */, 84 /* Char.T */]) {
|
|
231
|
+
table[ch] = true;
|
|
232
|
+
}
|
|
233
|
+
return table;
|
|
234
|
+
})();
|
|
235
|
+
// ASCII identifier-continue table. Indexed by char code < 128; true if the
|
|
236
|
+
// char can appear inside an identifier (letter, digit, underscore).
|
|
237
|
+
// Building this at module load by querying isIdentifierChar lets the tight
|
|
238
|
+
// identifier-swallow loop avoid function-call overhead entirely on the common
|
|
239
|
+
// ASCII path. Non-ASCII chars fall back to the generic path.
|
|
240
|
+
const _asciiIdentifierContinue = (() => {
|
|
241
|
+
const table = new Array(128).fill(false);
|
|
242
|
+
for (let i = 0; i < 128; i++) {
|
|
243
|
+
if ((0, characters_1.isIdentifierChar)(i)) {
|
|
244
|
+
table[i] = true;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
return table;
|
|
248
|
+
})();
|
|
249
|
+
const _asciiIdentifierStart = (() => {
|
|
250
|
+
const table = new Array(128).fill(false);
|
|
251
|
+
for (let i = 0; i < 128; i++) {
|
|
252
|
+
if ((0, characters_1.isIdentifierStartChar)(i)) {
|
|
253
|
+
table[i] = true;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
return table;
|
|
257
|
+
})();
|
|
258
|
+
// Create a detached copy of a source text range without going through Buffer.
|
|
259
|
+
// Each charAt() for ASCII returns a V8-cached single-char string that does not
|
|
260
|
+
// reference the parent. The concatenation chain becomes a ConsString independent
|
|
261
|
+
// of the source text, avoiding V8 SlicedString memory pinning.
|
|
262
|
+
// ~4-9x faster than Buffer.from(str,'utf8').toString('utf8') for typical
|
|
263
|
+
// Python identifier lengths (5-20 chars).
|
|
264
|
+
function detachSubstring(text, start, end) {
|
|
265
|
+
let result = '';
|
|
266
|
+
for (let i = start; i < end; i++) {
|
|
267
|
+
result += text.charAt(i);
|
|
268
|
+
}
|
|
269
|
+
return result;
|
|
270
|
+
}
|
|
271
|
+
// Strip underscore characters from a source text range without first creating
|
|
272
|
+
// an intermediate substring.
|
|
273
|
+
function removeUnderscoresFromRange(text, start, end) {
|
|
274
|
+
let firstUnderscoreIndex = -1;
|
|
275
|
+
for (let i = start; i < end; i++) {
|
|
276
|
+
if (text.charCodeAt(i) === 95 /* Char.Underscore */) {
|
|
277
|
+
firstUnderscoreIndex = i;
|
|
278
|
+
break;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
if (firstUnderscoreIndex < 0) {
|
|
282
|
+
return text.slice(start, end);
|
|
283
|
+
}
|
|
284
|
+
let result = text.slice(start, firstUnderscoreIndex);
|
|
285
|
+
for (let i = firstUnderscoreIndex + 1; i < end; i++) {
|
|
286
|
+
if (text.charCodeAt(i) !== 95 /* Char.Underscore */) {
|
|
287
|
+
result += text[i];
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return result;
|
|
291
|
+
}
|
|
292
|
+
// Manual replacement for magicsRegEx = /\\\s*$/
|
|
293
|
+
// Check if a range [start, end) within `text` ends with a backslash followed
|
|
294
|
+
// by optional whitespace.
|
|
295
|
+
function endsWithBackslashContinuation(text, start, end) {
|
|
296
|
+
let i = end - 1;
|
|
297
|
+
// Skip trailing whitespace
|
|
298
|
+
while (i >= start) {
|
|
299
|
+
const ch = text.charCodeAt(i);
|
|
300
|
+
if (ch === 32 /* Char.Space */ || ch === 9 /* Char.Tab */ || ch === 12 /* Char.FormFeed */) {
|
|
301
|
+
i--;
|
|
302
|
+
}
|
|
303
|
+
else {
|
|
304
|
+
break;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
return i >= start && text.charCodeAt(i) === 92 /* Char.Backslash */;
|
|
308
|
+
}
|
|
309
|
+
// Parses a bracketed rule list starting at `pos` (which must point at '[').
|
|
310
|
+
// Returns the bracket content (without brackets) and the position just past ']',
|
|
311
|
+
// or undefined if the bracket is malformed (e.g. unclosed, or contains invalid chars
|
|
312
|
+
// before a closing bracket is found).
|
|
313
|
+
function parseIgnoreBracketContent(text, pos, rangeEnd, allowColon) {
|
|
314
|
+
pos++; // skip '['
|
|
315
|
+
const bracketStart = pos;
|
|
316
|
+
while (pos < rangeEnd && text.charCodeAt(pos) !== 93 /* Char.CloseBracket */) {
|
|
317
|
+
// Only allow valid bracket content chars: \s, \w, -, ,
|
|
318
|
+
// (plus ':' for type: ignore to support tool-namespaced codes)
|
|
319
|
+
const bc = text.charCodeAt(pos);
|
|
320
|
+
if ((bc >= 97 /* Char.a */ && bc <= 122 /* Char.z */) ||
|
|
321
|
+
(bc >= 65 /* Char.A */ && bc <= 90 /* Char.Z */) ||
|
|
322
|
+
(bc >= 48 /* Char._0 */ && bc <= 57 /* Char._9 */) ||
|
|
323
|
+
bc === 95 /* Char.Underscore */ ||
|
|
324
|
+
bc === 45 /* Char.Hyphen */ ||
|
|
325
|
+
bc === 44 /* Char.Comma */ ||
|
|
326
|
+
bc === 32 /* Char.Space */ ||
|
|
327
|
+
bc === 9 /* Char.Tab */ ||
|
|
328
|
+
(allowColon && bc === 58 /* Char.Colon */)) {
|
|
329
|
+
pos++;
|
|
330
|
+
}
|
|
331
|
+
else {
|
|
332
|
+
break;
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
if (pos < rangeEnd && text.charCodeAt(pos) === 93 /* Char.CloseBracket */) {
|
|
336
|
+
return { content: text.slice(bracketStart, pos), newPos: pos + 1 };
|
|
337
|
+
}
|
|
338
|
+
return undefined;
|
|
339
|
+
}
|
|
340
|
+
// Manual replacement for typeIgnoreCommentRegEx / pyrightIgnoreCommentRegEx.
|
|
341
|
+
// Scans `text` within [rangeStart, rangeEnd) for `<directive>: ignore [rules]`
|
|
342
|
+
// where directive is 'type' or 'pyright'.
|
|
343
|
+
// Returns a match object or undefined. Returned `index` is absolute within `text`.
|
|
344
|
+
function matchIgnoreDirective(text, rangeStart, rangeEnd, directive) {
|
|
345
|
+
// The directive can be preceded by optional `#` and whitespace, or
|
|
346
|
+
// appear at the start of the range with optional whitespace.
|
|
347
|
+
// type: ignore allows tool-namespaced codes (e.g. "ty:rule-name") in brackets;
|
|
348
|
+
// pyright: ignore does not.
|
|
349
|
+
const allowColonInBracket = directive === 'type';
|
|
350
|
+
let searchFrom = rangeStart;
|
|
351
|
+
while (searchFrom < rangeEnd) {
|
|
352
|
+
// Find the next occurrence of the directive keyword, bounded by
|
|
353
|
+
// rangeEnd. A bounded hand-rolled scan is important here: native
|
|
354
|
+
// String.prototype.indexOf has no end bound and, when the keyword is
|
|
355
|
+
// absent from the current comment but present elsewhere in the file,
|
|
356
|
+
// can scan well past rangeEnd — producing O(n) behavior per comment
|
|
357
|
+
// and O(n^2) overall on comment-heavy files.
|
|
358
|
+
const firstCharCode = directive.charCodeAt(0);
|
|
359
|
+
let directiveIdx = -1;
|
|
360
|
+
const scanLimit = rangeEnd - directive.length;
|
|
361
|
+
for (let i = searchFrom; i <= scanLimit; i++) {
|
|
362
|
+
if (text.charCodeAt(i) === firstCharCode) {
|
|
363
|
+
let found = true;
|
|
364
|
+
for (let d = 1; d < directive.length; d++) {
|
|
365
|
+
if (text.charCodeAt(i + d) !== directive.charCodeAt(d)) {
|
|
366
|
+
found = false;
|
|
367
|
+
break;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
if (found) {
|
|
371
|
+
directiveIdx = i;
|
|
372
|
+
break;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
if (directiveIdx < 0) {
|
|
377
|
+
return undefined;
|
|
378
|
+
}
|
|
379
|
+
// Determine the prefix: scan backward from directiveIdx to find
|
|
380
|
+
// the `#` or start-of-range, collecting whitespace.
|
|
381
|
+
let prefixStart = directiveIdx;
|
|
382
|
+
let foundAnchor = false;
|
|
383
|
+
// Walk backward over spaces/tabs
|
|
384
|
+
let j = directiveIdx - 1;
|
|
385
|
+
while (j >= rangeStart && (text.charCodeAt(j) === 32 /* Char.Space */ || text.charCodeAt(j) === 9 /* Char.Tab */)) {
|
|
386
|
+
j--;
|
|
387
|
+
}
|
|
388
|
+
if (j < rangeStart) {
|
|
389
|
+
// At start of range
|
|
390
|
+
prefixStart = rangeStart;
|
|
391
|
+
foundAnchor = true;
|
|
392
|
+
}
|
|
393
|
+
else if (text.charCodeAt(j) === 35 /* Char.Hash */) {
|
|
394
|
+
prefixStart = j;
|
|
395
|
+
foundAnchor = true;
|
|
396
|
+
}
|
|
397
|
+
if (!foundAnchor) {
|
|
398
|
+
searchFrom = directiveIdx + 1;
|
|
399
|
+
continue;
|
|
400
|
+
}
|
|
401
|
+
// After directive keyword, expect ':'
|
|
402
|
+
let pos = directiveIdx + directive.length;
|
|
403
|
+
if (pos >= rangeEnd || text.charCodeAt(pos) !== 58 /* Char.Colon */) {
|
|
404
|
+
searchFrom = directiveIdx + 1;
|
|
405
|
+
continue;
|
|
406
|
+
}
|
|
407
|
+
pos++; // skip ':'
|
|
408
|
+
// Skip optional whitespace after ':'
|
|
409
|
+
while (pos < rangeEnd && (text.charCodeAt(pos) === 32 /* Char.Space */ || text.charCodeAt(pos) === 9 /* Char.Tab */)) {
|
|
410
|
+
pos++;
|
|
411
|
+
}
|
|
412
|
+
// Expect 'ignore'
|
|
413
|
+
const ignoreStr = 'ignore';
|
|
414
|
+
if (pos + ignoreStr.length > rangeEnd) {
|
|
415
|
+
searchFrom = directiveIdx + 1;
|
|
416
|
+
continue;
|
|
417
|
+
}
|
|
418
|
+
let matched = true;
|
|
419
|
+
for (let k = 0; k < ignoreStr.length; k++) {
|
|
420
|
+
if (text.charCodeAt(pos + k) !== ignoreStr.charCodeAt(k)) {
|
|
421
|
+
matched = false;
|
|
422
|
+
break;
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
if (!matched) {
|
|
426
|
+
searchFrom = directiveIdx + 1;
|
|
427
|
+
continue;
|
|
428
|
+
}
|
|
429
|
+
pos += ignoreStr.length;
|
|
430
|
+
// After 'ignore', expect whitespace, '[', or end-of-range
|
|
431
|
+
let bracketContent;
|
|
432
|
+
if (pos >= rangeEnd) {
|
|
433
|
+
// End of range — valid
|
|
434
|
+
}
|
|
435
|
+
else {
|
|
436
|
+
const ch = text.charCodeAt(pos);
|
|
437
|
+
if (ch === 32 /* Char.Space */ || ch === 9 /* Char.Tab */) {
|
|
438
|
+
// Skip whitespace to check for optional bracket
|
|
439
|
+
while (pos < rangeEnd && (text.charCodeAt(pos) === 32 /* Char.Space */ || text.charCodeAt(pos) === 9 /* Char.Tab */)) {
|
|
440
|
+
pos++;
|
|
441
|
+
}
|
|
442
|
+
if (pos < rangeEnd && text.charCodeAt(pos) === 91 /* Char.OpenBracket */) {
|
|
443
|
+
const parsed = parseIgnoreBracketContent(text, pos, rangeEnd, allowColonInBracket);
|
|
444
|
+
if (parsed === undefined) {
|
|
445
|
+
searchFrom = directiveIdx + 1;
|
|
446
|
+
continue;
|
|
447
|
+
}
|
|
448
|
+
bracketContent = parsed.content;
|
|
449
|
+
pos = parsed.newPos;
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
else if (ch === 91 /* Char.OpenBracket */) {
|
|
453
|
+
// Bracket immediately after 'ignore'
|
|
454
|
+
const parsed = parseIgnoreBracketContent(text, pos, rangeEnd, allowColonInBracket);
|
|
455
|
+
if (parsed === undefined) {
|
|
456
|
+
searchFrom = directiveIdx + 1;
|
|
457
|
+
continue;
|
|
458
|
+
}
|
|
459
|
+
bracketContent = parsed.content;
|
|
460
|
+
pos = parsed.newPos;
|
|
461
|
+
}
|
|
462
|
+
else {
|
|
463
|
+
// No space, no bracket — not a valid match
|
|
464
|
+
searchFrom = directiveIdx + 1;
|
|
465
|
+
continue;
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
const prefix = text.slice(prefixStart, directiveIdx);
|
|
469
|
+
const fullMatch = text.slice(prefixStart, pos);
|
|
470
|
+
return {
|
|
471
|
+
fullMatch,
|
|
472
|
+
prefix,
|
|
473
|
+
bracketContent,
|
|
474
|
+
index: prefixStart,
|
|
475
|
+
};
|
|
476
|
+
}
|
|
477
|
+
return undefined;
|
|
478
|
+
}
|
|
117
479
|
var MagicsKind;
|
|
118
480
|
(function (MagicsKind) {
|
|
119
481
|
MagicsKind[MagicsKind["None"] = 0] = "None";
|
|
@@ -128,6 +490,10 @@ class Tokenizer {
|
|
|
128
490
|
this._parenDepth = 0;
|
|
129
491
|
this._lineRanges = [];
|
|
130
492
|
this._indentAmounts = [];
|
|
493
|
+
// Cached answer to "are there any non-trivial tokens yet?" Once true it
|
|
494
|
+
// stays true, so the O(n) scan in _handleComment only runs while the token
|
|
495
|
+
// stream consists purely of NewLine / Indent tokens.
|
|
496
|
+
this._hasTokenBeforeIgnoreAll = false;
|
|
131
497
|
this._typeIgnoreLines = new Map();
|
|
132
498
|
this._pyrightIgnoreLines = new Map();
|
|
133
499
|
this._fStringStack = [];
|
|
@@ -150,10 +516,7 @@ class Tokenizer {
|
|
|
150
516
|
this._doubleQuoteCount = 0;
|
|
151
517
|
// Assume Jupyter notebook tokenization rules?
|
|
152
518
|
this._useNotebookMode = false;
|
|
153
|
-
|
|
154
|
-
// per-identifier allocations while still ensuring we don't retain substrings
|
|
155
|
-
// that reference the original source text.
|
|
156
|
-
this._identifierInternedStrings = new Map();
|
|
519
|
+
this._identifierCache = new Array(Tokenizer._identifierCacheSize);
|
|
157
520
|
}
|
|
158
521
|
tokenize(text, start, length, initialParenDepth = 0, useNotebookMode = false) {
|
|
159
522
|
if (start === undefined) {
|
|
@@ -179,7 +542,8 @@ class Tokenizer {
|
|
|
179
542
|
this._lineRanges = [];
|
|
180
543
|
this._indentAmounts = [];
|
|
181
544
|
this._useNotebookMode = useNotebookMode;
|
|
182
|
-
|
|
545
|
+
// Clear per-source identifier intern cache.
|
|
546
|
+
this._identifierCache.fill(undefined);
|
|
183
547
|
const end = start + length;
|
|
184
548
|
if (start === 0) {
|
|
185
549
|
this._readIndentationAfterNewLine();
|
|
@@ -315,19 +679,22 @@ class Tokenizer {
|
|
|
315
679
|
// tokens onto the token list. Returns true if the caller should advance
|
|
316
680
|
// to the next character.
|
|
317
681
|
_handleCharacter() {
|
|
318
|
-
// f-strings, b-strings, etc
|
|
319
|
-
const
|
|
320
|
-
if (
|
|
321
|
-
|
|
322
|
-
if (stringPrefixLength
|
|
323
|
-
stringPrefix =
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
this.
|
|
330
|
-
|
|
682
|
+
// f-strings, b-strings, etc — only check if current char can start a string
|
|
683
|
+
const currentChar = this._cs.currentChar;
|
|
684
|
+
if (currentChar < 128 && _canStartString[currentChar]) {
|
|
685
|
+
const stringPrefixLength = this._getStringPrefixLength();
|
|
686
|
+
if (stringPrefixLength >= 0) {
|
|
687
|
+
let stringPrefix = '';
|
|
688
|
+
if (stringPrefixLength > 0) {
|
|
689
|
+
stringPrefix = this._cs.getText().slice(this._cs.position, this._cs.position + stringPrefixLength);
|
|
690
|
+
// Indeed a string
|
|
691
|
+
this._cs.advance(stringPrefixLength);
|
|
692
|
+
}
|
|
693
|
+
const quoteTypeFlags = this._getQuoteTypeFlags(stringPrefix);
|
|
694
|
+
if (quoteTypeFlags !== 0 /* StringTokenFlags.None */) {
|
|
695
|
+
this._handleString(quoteTypeFlags, stringPrefixLength);
|
|
696
|
+
return true;
|
|
697
|
+
}
|
|
331
698
|
}
|
|
332
699
|
}
|
|
333
700
|
if (this._cs.currentChar === 35 /* Char.Hash */) {
|
|
@@ -657,48 +1024,105 @@ class Tokenizer {
|
|
|
657
1024
|
}
|
|
658
1025
|
}
|
|
659
1026
|
_tryIdentifier() {
|
|
660
|
-
const
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
1027
|
+
const cs = this._cs;
|
|
1028
|
+
const text = cs.getText();
|
|
1029
|
+
const textLen = text.length;
|
|
1030
|
+
const start = cs.position;
|
|
1031
|
+
// Fast path for ASCII identifier start. Avoids the function call and
|
|
1032
|
+
// surrogate logic for the common case (Python source is overwhelmingly
|
|
1033
|
+
// ASCII identifiers).
|
|
1034
|
+
const firstChar = cs.currentChar;
|
|
1035
|
+
let pos = start;
|
|
1036
|
+
if (firstChar < 128) {
|
|
1037
|
+
if (!_asciiIdentifierStart[firstChar]) {
|
|
1038
|
+
// Not an identifier start and not a surrogate candidate.
|
|
1039
|
+
return false;
|
|
1040
|
+
}
|
|
1041
|
+
pos++;
|
|
1042
|
+
// Tight loop: advance while we're still in ASCII identifier chars.
|
|
1043
|
+
while (pos < textLen) {
|
|
1044
|
+
const ch = text.charCodeAt(pos);
|
|
1045
|
+
if (ch < 128 && _asciiIdentifierContinue[ch]) {
|
|
1046
|
+
pos++;
|
|
668
1047
|
}
|
|
669
1048
|
else {
|
|
670
1049
|
break;
|
|
671
1050
|
}
|
|
672
1051
|
}
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
1052
|
+
// If we hit a non-ASCII char, fall back to the generic loop to
|
|
1053
|
+
// handle possible unicode identifier continue / surrogate pairs.
|
|
1054
|
+
if (pos < textLen && text.charCodeAt(pos) >= 128) {
|
|
1055
|
+
cs.advance(pos - start);
|
|
1056
|
+
this._swallowNonAsciiIdentifierChars();
|
|
1057
|
+
pos = cs.position;
|
|
1058
|
+
}
|
|
1059
|
+
else {
|
|
1060
|
+
cs.advance(pos - start);
|
|
1061
|
+
}
|
|
678
1062
|
}
|
|
679
|
-
else
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
1063
|
+
else {
|
|
1064
|
+
// Non-ASCII start: use the generic path (supports surrogates).
|
|
1065
|
+
if ((0, characters_1.isIdentifierStartChar)(firstChar)) {
|
|
1066
|
+
cs.moveNext();
|
|
1067
|
+
}
|
|
1068
|
+
else if ((0, characters_1.isIdentifierStartChar)(firstChar, cs.nextChar)) {
|
|
1069
|
+
cs.moveNext();
|
|
1070
|
+
cs.moveNext();
|
|
1071
|
+
}
|
|
1072
|
+
else {
|
|
1073
|
+
return false;
|
|
1074
|
+
}
|
|
1075
|
+
this._swallowNonAsciiIdentifierChars();
|
|
1076
|
+
pos = cs.position;
|
|
683
1077
|
}
|
|
684
|
-
if (
|
|
685
|
-
const
|
|
686
|
-
const
|
|
1078
|
+
if (pos > start) {
|
|
1079
|
+
const end = pos;
|
|
1080
|
+
const length = end - start;
|
|
1081
|
+
const keywordType = getKeywordTypeFromTextSlice(text, start, length);
|
|
687
1082
|
if (keywordType !== undefined) {
|
|
688
|
-
this._tokens.push(tokenizerTypes_1.KeywordToken.create(start,
|
|
1083
|
+
this._tokens.push(tokenizerTypes_1.KeywordToken.create(start, length, keywordType, this._getComments()));
|
|
689
1084
|
}
|
|
690
1085
|
else {
|
|
691
|
-
const
|
|
692
|
-
this._tokens.push(tokenizerTypes_1.IdentifierToken.create(start,
|
|
1086
|
+
const value = this._internIdentifier(text, start, end, length);
|
|
1087
|
+
this._tokens.push(tokenizerTypes_1.IdentifierToken.create(start, length, value, this._getComments()));
|
|
693
1088
|
}
|
|
694
1089
|
return true;
|
|
695
1090
|
}
|
|
696
1091
|
return false;
|
|
697
1092
|
}
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
1093
|
+
// Per-tokenize identifier intern cache. Direct-mapped, so collisions
|
|
1094
|
+
// simply replace the slot. Common identifiers (self, cls, True, None,
|
|
1095
|
+
// str, int, dict, etc.) get deduplicated to a single string object,
|
|
1096
|
+
// avoiding repeated detachSubstring allocations for the same name.
|
|
1097
|
+
_internIdentifier(text, start, end, length) {
|
|
1098
|
+
const firstChar = text.charCodeAt(start);
|
|
1099
|
+
const lastChar = text.charCodeAt(end - 1);
|
|
1100
|
+
// Hash mixes length, first and last char; multiplier values chosen
|
|
1101
|
+
// to spread hits for common short identifiers across the table.
|
|
1102
|
+
const hash = (firstChar * 31 + lastChar * 7 + length) & Tokenizer._identifierCacheMask;
|
|
1103
|
+
const cached = this._identifierCache[hash];
|
|
1104
|
+
if (cached !== undefined && cached.length === length && text.startsWith(cached, start)) {
|
|
1105
|
+
return cached;
|
|
1106
|
+
}
|
|
1107
|
+
const value = detachSubstring(text, start, end);
|
|
1108
|
+
this._identifierCache[hash] = value;
|
|
1109
|
+
return value;
|
|
1110
|
+
}
|
|
1111
|
+
// Generic identifier-continue loop that handles unicode + surrogate pairs.
|
|
1112
|
+
// Falls back to this when the fast ASCII loop encounters a non-ASCII char.
|
|
1113
|
+
_swallowNonAsciiIdentifierChars() {
|
|
1114
|
+
while (true) {
|
|
1115
|
+
if ((0, characters_1.isIdentifierChar)(this._cs.currentChar)) {
|
|
1116
|
+
this._cs.moveNext();
|
|
1117
|
+
}
|
|
1118
|
+
else if ((0, characters_1.isIdentifierChar)(this._cs.currentChar, this._cs.nextChar)) {
|
|
1119
|
+
this._cs.moveNext();
|
|
1120
|
+
this._cs.moveNext();
|
|
1121
|
+
}
|
|
1122
|
+
else {
|
|
1123
|
+
break;
|
|
1124
|
+
}
|
|
1125
|
+
}
|
|
702
1126
|
}
|
|
703
1127
|
_isPossibleNumber() {
|
|
704
1128
|
if ((0, characters_1.isDecimal)(this._cs.currentChar)) {
|
|
@@ -743,8 +1167,9 @@ class Tokenizer {
|
|
|
743
1167
|
radix = 8;
|
|
744
1168
|
}
|
|
745
1169
|
if (radix > 0) {
|
|
746
|
-
const
|
|
747
|
-
const
|
|
1170
|
+
const end = this._cs.position;
|
|
1171
|
+
const text = this._cs.getText();
|
|
1172
|
+
const simpleIntText = removeUnderscoresFromRange(text, start, end);
|
|
748
1173
|
let intValue = parseInt(simpleIntText.slice(leadingChars), radix);
|
|
749
1174
|
if (!isNaN(intValue)) {
|
|
750
1175
|
const bigIntValue = BigInt(simpleIntText);
|
|
@@ -753,7 +1178,7 @@ class Tokenizer {
|
|
|
753
1178
|
intValue > Number.MAX_SAFE_INTEGER) {
|
|
754
1179
|
intValue = bigIntValue;
|
|
755
1180
|
}
|
|
756
|
-
this._tokens.push(tokenizerTypes_1.NumberToken.create(start,
|
|
1181
|
+
this._tokens.push(tokenizerTypes_1.NumberToken.create(start, end - start, intValue, true, false, this._getComments()));
|
|
757
1182
|
return true;
|
|
758
1183
|
}
|
|
759
1184
|
}
|
|
@@ -787,11 +1212,13 @@ class Tokenizer {
|
|
|
787
1212
|
(this._cs.currentChar < 49 /* Char._1 */ || this._cs.currentChar > 57 /* Char._9 */);
|
|
788
1213
|
}
|
|
789
1214
|
if (isDecimalInteger) {
|
|
790
|
-
|
|
791
|
-
const
|
|
1215
|
+
const textEnd = this._cs.position;
|
|
1216
|
+
const sourceText = this._cs.getText();
|
|
1217
|
+
const simpleIntText = removeUnderscoresFromRange(sourceText, start, textEnd);
|
|
792
1218
|
let intValue = parseInt(simpleIntText, 10);
|
|
793
1219
|
if (!isNaN(intValue)) {
|
|
794
1220
|
let isImaginary = false;
|
|
1221
|
+
let tokenLength = textEnd - start;
|
|
795
1222
|
const bigIntValue = BigInt(simpleIntText);
|
|
796
1223
|
if (!isFinite(intValue) ||
|
|
797
1224
|
bigIntValue < Number.MIN_SAFE_INTEGER ||
|
|
@@ -800,10 +1227,10 @@ class Tokenizer {
|
|
|
800
1227
|
}
|
|
801
1228
|
if (this._cs.currentChar === 106 /* Char.j */ || this._cs.currentChar === 74 /* Char.J */) {
|
|
802
1229
|
isImaginary = true;
|
|
803
|
-
text += String.fromCharCode(this._cs.currentChar);
|
|
804
1230
|
this._cs.moveNext();
|
|
1231
|
+
tokenLength += 1;
|
|
805
1232
|
}
|
|
806
|
-
this._tokens.push(tokenizerTypes_1.NumberToken.create(start,
|
|
1233
|
+
this._tokens.push(tokenizerTypes_1.NumberToken.create(start, tokenLength, intValue, true, isImaginary, this._getComments()));
|
|
807
1234
|
return true;
|
|
808
1235
|
}
|
|
809
1236
|
}
|
|
@@ -812,16 +1239,18 @@ class Tokenizer {
|
|
|
812
1239
|
if (mightBeFloatingPoint ||
|
|
813
1240
|
(this._cs.currentChar === 46 /* Char.Period */ && this._cs.nextChar >= 48 /* Char._0 */ && this._cs.nextChar <= 57 /* Char._9 */)) {
|
|
814
1241
|
if (this._skipFloatingPointCandidate()) {
|
|
815
|
-
|
|
816
|
-
const
|
|
1242
|
+
const floatEnd = this._cs.position;
|
|
1243
|
+
const floatText = removeUnderscoresFromRange(this._cs.getText(), start, floatEnd);
|
|
1244
|
+
const value = parseFloat(floatText);
|
|
817
1245
|
if (!isNaN(value)) {
|
|
818
1246
|
let isImaginary = false;
|
|
1247
|
+
let tokenLength = floatEnd - start;
|
|
819
1248
|
if (this._cs.currentChar === 106 /* Char.j */ || this._cs.currentChar === 74 /* Char.J */) {
|
|
820
1249
|
isImaginary = true;
|
|
821
|
-
text += String.fromCharCode(this._cs.currentChar);
|
|
822
1250
|
this._cs.moveNext();
|
|
1251
|
+
tokenLength += 1;
|
|
823
1252
|
}
|
|
824
|
-
this._tokens.push(tokenizerTypes_1.NumberToken.create(start,
|
|
1253
|
+
this._tokens.push(tokenizerTypes_1.NumberToken.create(start, tokenLength, value, false, isImaginary, this._getComments()));
|
|
825
1254
|
return true;
|
|
826
1255
|
}
|
|
827
1256
|
}
|
|
@@ -830,122 +1259,63 @@ class Tokenizer {
|
|
|
830
1259
|
return false;
|
|
831
1260
|
}
|
|
832
1261
|
_tryOperator() {
|
|
1262
|
+
const currentChar = this._cs.currentChar;
|
|
833
1263
|
let length = 0;
|
|
834
1264
|
const nextChar = this._cs.nextChar;
|
|
835
1265
|
let operatorType;
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
break;
|
|
861
|
-
}
|
|
862
|
-
length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
|
|
863
|
-
operatorType = length === 2 ? 12 /* OperatorType.Equals */ : 2 /* OperatorType.Assign */;
|
|
864
|
-
break;
|
|
865
|
-
case 33 /* Char.ExclamationMark */:
|
|
866
|
-
if (nextChar !== 61 /* Char.Equal */) {
|
|
867
|
-
if (this._activeFString) {
|
|
868
|
-
// Handle the conversion separator (!) within an f-string.
|
|
869
|
-
this._tokens.push(tokenizerTypes_1.Token.create(23 /* TokenType.ExclamationMark */, this._cs.position, 1, this._getComments()));
|
|
870
|
-
this._cs.advance(1);
|
|
871
|
-
return true;
|
|
872
|
-
}
|
|
873
|
-
return false;
|
|
874
|
-
}
|
|
875
|
-
length = 2;
|
|
876
|
-
operatorType = 28 /* OperatorType.NotEquals */;
|
|
877
|
-
break;
|
|
878
|
-
case 37 /* Char.Percent */:
|
|
879
|
-
length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
|
|
880
|
-
operatorType = length === 2 ? 25 /* OperatorType.ModEqual */ : 24 /* OperatorType.Mod */;
|
|
881
|
-
break;
|
|
882
|
-
case 126 /* Char.Tilde */:
|
|
883
|
-
length = 1;
|
|
884
|
-
operatorType = 5 /* OperatorType.BitwiseInvert */;
|
|
885
|
-
break;
|
|
886
|
-
case 45 /* Char.Hyphen */:
|
|
887
|
-
if (nextChar === 62 /* Char.Greater */) {
|
|
888
|
-
this._tokens.push(tokenizerTypes_1.Token.create(21 /* TokenType.Arrow */, this._cs.position, 2, this._getComments()));
|
|
889
|
-
this._cs.advance(2);
|
|
1266
|
+
if (currentChar < 128 && nextChar < 128) {
|
|
1267
|
+
const twoCharKey = (currentChar << 8) | nextChar;
|
|
1268
|
+
const specialTokenType = _twoCharSpecialTokenTypeMap.get(twoCharKey);
|
|
1269
|
+
if (specialTokenType !== undefined) {
|
|
1270
|
+
this._tokens.push(tokenizerTypes_1.Token.create(specialTokenType, this._cs.position, 2, this._getComments()));
|
|
1271
|
+
this._cs.advance(2);
|
|
1272
|
+
return true;
|
|
1273
|
+
}
|
|
1274
|
+
const twoCharOperatorType = _twoCharOperatorTypeMap.get(twoCharKey);
|
|
1275
|
+
if (twoCharOperatorType !== undefined) {
|
|
1276
|
+
this._tokens.push(tokenizerTypes_1.OperatorToken.create(this._cs.position, 2, twoCharOperatorType, this._getComments()));
|
|
1277
|
+
this._cs.advance(2);
|
|
1278
|
+
return true;
|
|
1279
|
+
}
|
|
1280
|
+
if (currentChar === nextChar) {
|
|
1281
|
+
const repeatedOperatorType = _repeatedCharOperatorTypeTable[currentChar];
|
|
1282
|
+
if (repeatedOperatorType !== _unsetSingleCharOperatorType) {
|
|
1283
|
+
const hasTrailingEqual = this._cs.lookAhead(2) === 61 /* Char.Equal */;
|
|
1284
|
+
const repeatedLength = hasTrailingEqual ? 3 : 2;
|
|
1285
|
+
const operatorType = hasTrailingEqual
|
|
1286
|
+
? _repeatedCharEqualOperatorTypeTable[currentChar]
|
|
1287
|
+
: repeatedOperatorType;
|
|
1288
|
+
this._tokens.push(tokenizerTypes_1.OperatorToken.create(this._cs.position, repeatedLength, operatorType, this._getComments()));
|
|
1289
|
+
this._cs.advance(repeatedLength);
|
|
890
1290
|
return true;
|
|
891
1291
|
}
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
}
|
|
900
|
-
else {
|
|
901
|
-
length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
|
|
902
|
-
operatorType = length === 2 ? 27 /* OperatorType.MultiplyEqual */ : 26 /* OperatorType.Multiply */;
|
|
903
|
-
}
|
|
904
|
-
break;
|
|
905
|
-
case 47 /* Char.Slash */:
|
|
906
|
-
if (nextChar === 47 /* Char.Slash */) {
|
|
907
|
-
length = this._cs.lookAhead(2) === 61 /* Char.Equal */ ? 3 : 2;
|
|
908
|
-
operatorType = length === 3 ? 14 /* OperatorType.FloorDivideEqual */ : 13 /* OperatorType.FloorDivide */;
|
|
909
|
-
}
|
|
910
|
-
else {
|
|
911
|
-
length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
|
|
912
|
-
operatorType = length === 2 ? 11 /* OperatorType.DivideEqual */ : 10 /* OperatorType.Divide */;
|
|
913
|
-
}
|
|
914
|
-
break;
|
|
915
|
-
case 60 /* Char.Less */:
|
|
916
|
-
if (nextChar === 60 /* Char.Less */) {
|
|
917
|
-
length = this._cs.lookAhead(2) === 61 /* Char.Equal */ ? 3 : 2;
|
|
918
|
-
operatorType = length === 3 ? 18 /* OperatorType.LeftShiftEqual */ : 17 /* OperatorType.LeftShift */;
|
|
919
|
-
}
|
|
920
|
-
else if (nextChar === 62 /* Char.Greater */) {
|
|
1292
|
+
}
|
|
1293
|
+
}
|
|
1294
|
+
if (currentChar < 128) {
|
|
1295
|
+
const singleCharOperatorType = _singleCharOperatorTypeTable[currentChar];
|
|
1296
|
+
if (singleCharOperatorType !== _unsetSingleCharOperatorType) {
|
|
1297
|
+
const equalOperatorType = _singleCharEqualOperatorTypeTable[currentChar];
|
|
1298
|
+
if (nextChar === 61 /* Char.Equal */ && equalOperatorType !== _unsetSingleCharOperatorType) {
|
|
921
1299
|
length = 2;
|
|
922
|
-
operatorType =
|
|
1300
|
+
operatorType = equalOperatorType;
|
|
923
1301
|
}
|
|
924
1302
|
else {
|
|
925
|
-
length =
|
|
926
|
-
operatorType =
|
|
927
|
-
}
|
|
928
|
-
break;
|
|
929
|
-
case 62 /* Char.Greater */:
|
|
930
|
-
if (nextChar === 62 /* Char.Greater */) {
|
|
931
|
-
length = this._cs.lookAhead(2) === 61 /* Char.Equal */ ? 3 : 2;
|
|
932
|
-
operatorType = length === 3 ? 32 /* OperatorType.RightShiftEqual */ : 31 /* OperatorType.RightShift */;
|
|
933
|
-
}
|
|
934
|
-
else {
|
|
935
|
-
length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
|
|
936
|
-
operatorType = length === 2 ? 16 /* OperatorType.GreaterThanOrEqual */ : 15 /* OperatorType.GreaterThan */;
|
|
1303
|
+
length = 1;
|
|
1304
|
+
operatorType = singleCharOperatorType;
|
|
937
1305
|
}
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
break;
|
|
943
|
-
default:
|
|
944
|
-
return false;
|
|
1306
|
+
this._tokens.push(tokenizerTypes_1.OperatorToken.create(this._cs.position, length, operatorType, this._getComments()));
|
|
1307
|
+
this._cs.advance(length);
|
|
1308
|
+
return true;
|
|
1309
|
+
}
|
|
945
1310
|
}
|
|
946
|
-
|
|
947
|
-
this.
|
|
948
|
-
|
|
1311
|
+
// `!=` is handled by the 2-char fast path above.
|
|
1312
|
+
if (currentChar === 33 /* Char.ExclamationMark */ && this._activeFString) {
|
|
1313
|
+
// Handle the conversion separator (!) within an f-string.
|
|
1314
|
+
this._tokens.push(tokenizerTypes_1.Token.create(23 /* TokenType.ExclamationMark */, this._cs.position, 1, this._getComments()));
|
|
1315
|
+
this._cs.advance(1);
|
|
1316
|
+
return true;
|
|
1317
|
+
}
|
|
1318
|
+
return false;
|
|
949
1319
|
}
|
|
950
1320
|
_handleInvalid() {
|
|
951
1321
|
const start = this._cs.position;
|
|
@@ -994,16 +1364,15 @@ class Tokenizer {
|
|
|
994
1364
|
}
|
|
995
1365
|
_handleIPythonMagics(type) {
|
|
996
1366
|
const start = this._cs.position + 1;
|
|
1367
|
+
const sourceText = this._cs.getText();
|
|
997
1368
|
let begin = start;
|
|
998
1369
|
while (true) {
|
|
999
1370
|
this._cs.skipToEol();
|
|
1000
1371
|
if (type === 1 /* CommentType.IPythonMagic */ || type === 2 /* CommentType.IPythonShellEscape */) {
|
|
1001
|
-
const length = this._cs.position - begin;
|
|
1002
|
-
const value = this._cs.getText().slice(begin, begin + length);
|
|
1003
1372
|
// is it multiline magics?
|
|
1004
1373
|
// %magic command \
|
|
1005
1374
|
// next arguments
|
|
1006
|
-
if (!
|
|
1375
|
+
if (!endsWithBackslashContinuation(sourceText, begin, this._cs.position)) {
|
|
1007
1376
|
break;
|
|
1008
1377
|
}
|
|
1009
1378
|
}
|
|
@@ -1014,55 +1383,74 @@ class Tokenizer {
|
|
|
1014
1383
|
}
|
|
1015
1384
|
}
|
|
1016
1385
|
const length = this._cs.position - start;
|
|
1017
|
-
const comment = tokenizerTypes_1.Comment.create(start, length,
|
|
1386
|
+
const comment = tokenizerTypes_1.Comment.create(start, length, sourceText.slice(start, start + length), type);
|
|
1018
1387
|
this._addComments(comment);
|
|
1019
1388
|
}
|
|
1020
1389
|
_handleComment() {
|
|
1021
1390
|
const start = this._cs.position + 1;
|
|
1022
1391
|
this._cs.skipToEol();
|
|
1023
1392
|
const length = this._cs.position - start;
|
|
1024
|
-
const
|
|
1025
|
-
const
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1393
|
+
const sourceText = this._cs.getText();
|
|
1394
|
+
const end = start + length;
|
|
1395
|
+
// Fast pre-filter: any ignore directive must contain the substring 'ignore'.
|
|
1396
|
+
// indexOf is a highly-optimized native call and lets us skip the full
|
|
1397
|
+
// directive scan for the vast majority of comments (which are free-form text).
|
|
1398
|
+
const ignoreIdx = sourceText.indexOf('ignore', start);
|
|
1399
|
+
if (ignoreIdx >= 0 && ignoreIdx < end) {
|
|
1400
|
+
const typeIgnoreMatch = matchIgnoreDirective(sourceText, start, end, 'type');
|
|
1401
|
+
if (typeIgnoreMatch) {
|
|
1402
|
+
const commentStart = typeIgnoreMatch.index;
|
|
1403
|
+
const textRange = {
|
|
1404
|
+
start: commentStart + typeIgnoreMatch.prefix.length,
|
|
1405
|
+
length: typeIgnoreMatch.fullMatch.length - typeIgnoreMatch.prefix.length,
|
|
1406
|
+
};
|
|
1407
|
+
const ignoreComment = {
|
|
1408
|
+
range: textRange,
|
|
1409
|
+
rulesList: this._getIgnoreCommentRulesList(commentStart, typeIgnoreMatch),
|
|
1410
|
+
};
|
|
1411
|
+
let isIgnoreAll = false;
|
|
1412
|
+
if (!this._hasTokenBeforeIgnoreAll) {
|
|
1413
|
+
// Are there any tokens other than NewLine / Indent yet?
|
|
1414
|
+
const hasOther = this._tokens.some((t) => t && t.type !== 2 /* TokenType.NewLine */ && t.type !== 3 /* TokenType.Indent */);
|
|
1415
|
+
if (hasOther) {
|
|
1416
|
+
this._hasTokenBeforeIgnoreAll = true;
|
|
1417
|
+
}
|
|
1418
|
+
else {
|
|
1419
|
+
isIgnoreAll = true;
|
|
1420
|
+
}
|
|
1421
|
+
}
|
|
1422
|
+
if (isIgnoreAll) {
|
|
1423
|
+
this._typeIgnoreAll = ignoreComment;
|
|
1424
|
+
}
|
|
1425
|
+
else {
|
|
1426
|
+
this._typeIgnoreLines.set(this._lineRanges.length, ignoreComment);
|
|
1427
|
+
}
|
|
1038
1428
|
}
|
|
1039
|
-
|
|
1040
|
-
|
|
1429
|
+
const pyrightIgnoreMatch = matchIgnoreDirective(sourceText, start, end, 'pyright');
|
|
1430
|
+
if (pyrightIgnoreMatch) {
|
|
1431
|
+
const commentStart = pyrightIgnoreMatch.index;
|
|
1432
|
+
const textRange = {
|
|
1433
|
+
start: commentStart + pyrightIgnoreMatch.prefix.length,
|
|
1434
|
+
length: pyrightIgnoreMatch.fullMatch.length - pyrightIgnoreMatch.prefix.length,
|
|
1435
|
+
};
|
|
1436
|
+
const ignoreComment = {
|
|
1437
|
+
range: textRange,
|
|
1438
|
+
rulesList: this._getIgnoreCommentRulesList(commentStart, pyrightIgnoreMatch),
|
|
1439
|
+
};
|
|
1440
|
+
this._pyrightIgnoreLines.set(this._lineRanges.length, ignoreComment);
|
|
1041
1441
|
}
|
|
1042
1442
|
}
|
|
1043
|
-
const
|
|
1044
|
-
if (pyrightIgnoreRegexMatch) {
|
|
1045
|
-
const commentStart = start + (pyrightIgnoreRegexMatch.index ?? 0);
|
|
1046
|
-
const textRange = {
|
|
1047
|
-
start: commentStart + pyrightIgnoreRegexMatch[1].length,
|
|
1048
|
-
length: pyrightIgnoreRegexMatch[0].length - pyrightIgnoreRegexMatch[1].length,
|
|
1049
|
-
};
|
|
1050
|
-
const ignoreComment = {
|
|
1051
|
-
range: textRange,
|
|
1052
|
-
rulesList: this._getIgnoreCommentRulesList(commentStart, pyrightIgnoreRegexMatch),
|
|
1053
|
-
};
|
|
1054
|
-
this._pyrightIgnoreLines.set(this._lineRanges.length, ignoreComment);
|
|
1055
|
-
}
|
|
1443
|
+
const comment = tokenizerTypes_1.Comment.create(start, length, sourceText.slice(start, end));
|
|
1056
1444
|
this._addComments(comment);
|
|
1057
1445
|
}
|
|
1058
1446
|
// Extracts the individual rules within a "type: ignore [x, y, z]" comment.
|
|
1059
1447
|
_getIgnoreCommentRulesList(start, match) {
|
|
1060
|
-
if (match.
|
|
1448
|
+
if (match.bracketContent === undefined) {
|
|
1061
1449
|
return undefined;
|
|
1062
1450
|
}
|
|
1063
|
-
const splitElements = match
|
|
1451
|
+
const splitElements = match.bracketContent.split(',');
|
|
1064
1452
|
const commentRules = [];
|
|
1065
|
-
let currentOffset = start + match
|
|
1453
|
+
let currentOffset = start + match.fullMatch.indexOf('[') + 1;
|
|
1066
1454
|
for (const element of splitElements) {
|
|
1067
1455
|
const frontTrimmed = element.trimStart();
|
|
1068
1456
|
currentOffset += element.length - frontTrimmed.length;
|
|
@@ -1417,4 +1805,12 @@ class Tokenizer {
|
|
|
1417
1805
|
}
|
|
1418
1806
|
}
|
|
1419
1807
|
exports.Tokenizer = Tokenizer;
|
|
1808
|
+
// Direct-mapped identifier intern cache. Indexed by a cheap hash of
|
|
1809
|
+
// (firstChar, lastChar, length). On a hit (slot defined and string
|
|
1810
|
+
// equals the current source range), reuse the cached string instead of
|
|
1811
|
+
// re-allocating via detachSubstring. Collisions simply overwrite the
|
|
1812
|
+
// slot — no chaining, O(1) lookup, no Map overhead. Sized as a power of
|
|
1813
|
+
// two so the mask is a single AND.
|
|
1814
|
+
Tokenizer._identifierCacheSize = 2048;
|
|
1815
|
+
Tokenizer._identifierCacheMask = Tokenizer._identifierCacheSize - 1;
|
|
1420
1816
|
//# sourceMappingURL=tokenizer.js.map
|