@zzzen/pyright-internal 1.2.0-dev.20260422 → 1.2.0-dev.20260426

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -60,6 +60,53 @@ const _keywords = new Map([
60
60
  ['True', 33 /* KeywordType.True */],
61
61
  ]);
62
62
  const _softKeywords = new Set(['match', 'case', 'type']);
63
+ // Fast-reject table: keywords are 2–9 chars long and only start with these
64
+ // character codes. A 128-entry boolean table indexed by charCodeAt(0) rejects
65
+ // most identifiers without touching the _keywords Map.
66
+ const _keywordFirstCharTable = (() => {
67
+ const table = new Array(128).fill(false);
68
+ for (const kw of _keywords.keys()) {
69
+ const code = kw.charCodeAt(0);
70
+ if (code < 128) {
71
+ table[code] = true;
72
+ }
73
+ }
74
+ return table;
75
+ })();
76
+ const _keywordMinLen = 2;
77
+ const _keywordMaxLen = 9; // __debug__
78
+ // For keyword-like identifiers, compare directly against the source text slice
79
+ // to avoid creating temporary substring objects on the keyword path.
80
+ const _keywordEntriesByFirstChar = (() => {
81
+ const entriesByFirstChar = new Array(128);
82
+ for (const [text, type] of _keywords.entries()) {
83
+ const firstCharCode = text.charCodeAt(0);
84
+ if (firstCharCode < 128) {
85
+ const entries = entriesByFirstChar[firstCharCode] ?? (entriesByFirstChar[firstCharCode] = []);
86
+ entries.push({ text, type });
87
+ }
88
+ }
89
+ return entriesByFirstChar;
90
+ })();
91
+ function getKeywordTypeFromTextSlice(text, start, length) {
92
+ if (length < _keywordMinLen || length > _keywordMaxLen) {
93
+ return undefined;
94
+ }
95
+ const firstCharCode = text.charCodeAt(start);
96
+ if (firstCharCode >= 128 || !_keywordFirstCharTable[firstCharCode]) {
97
+ return undefined;
98
+ }
99
+ const candidates = _keywordEntriesByFirstChar[firstCharCode];
100
+ if (!candidates) {
101
+ return undefined;
102
+ }
103
+ for (const candidate of candidates) {
104
+ if (candidate.text.length === length && text.startsWith(candidate.text, start)) {
105
+ return candidate.type;
106
+ }
107
+ }
108
+ return undefined;
109
+ }
63
110
  const _operatorInfo = {
64
111
  [0 /* OperatorType.Add */]: 1 /* OperatorFlags.Unary */ | 2 /* OperatorFlags.Binary */,
65
112
  [1 /* OperatorType.AddEqual */]: 4 /* OperatorFlags.Assignment */,
@@ -104,16 +151,331 @@ const _operatorInfo = {
104
151
  [41 /* OperatorType.In */]: 2 /* OperatorFlags.Binary */,
105
152
  [42 /* OperatorType.NotIn */]: 2 /* OperatorFlags.Binary */,
106
153
  };
154
+ const _unsetSingleCharOperatorType = -1;
155
+ const _singleCharOperatorTypeTable = (() => {
156
+ const table = new Int16Array(128);
157
+ table.fill(_unsetSingleCharOperatorType);
158
+ table[61 /* Char.Equal */] = 2 /* OperatorType.Assign */;
159
+ table[43 /* Char.Plus */] = 0 /* OperatorType.Add */;
160
+ table[45 /* Char.Hyphen */] = 33 /* OperatorType.Subtract */;
161
+ table[42 /* Char.Asterisk */] = 26 /* OperatorType.Multiply */;
162
+ table[47 /* Char.Slash */] = 10 /* OperatorType.Divide */;
163
+ table[38 /* Char.Ampersand */] = 3 /* OperatorType.BitwiseAnd */;
164
+ table[124 /* Char.Bar */] = 6 /* OperatorType.BitwiseOr */;
165
+ table[94 /* Char.Caret */] = 8 /* OperatorType.BitwiseXor */;
166
+ table[37 /* Char.Percent */] = 24 /* OperatorType.Mod */;
167
+ table[126 /* Char.Tilde */] = 5 /* OperatorType.BitwiseInvert */;
168
+ table[64 /* Char.At */] = 22 /* OperatorType.MatrixMultiply */;
169
+ table[60 /* Char.Less */] = 20 /* OperatorType.LessThan */;
170
+ table[62 /* Char.Greater */] = 15 /* OperatorType.GreaterThan */;
171
+ return table;
172
+ })();
173
+ const _singleCharEqualOperatorTypeTable = (() => {
174
+ const table = new Int16Array(128);
175
+ table.fill(_unsetSingleCharOperatorType);
176
+ table[43 /* Char.Plus */] = 1 /* OperatorType.AddEqual */;
177
+ table[45 /* Char.Hyphen */] = 34 /* OperatorType.SubtractEqual */;
178
+ table[42 /* Char.Asterisk */] = 27 /* OperatorType.MultiplyEqual */;
179
+ table[47 /* Char.Slash */] = 11 /* OperatorType.DivideEqual */;
180
+ table[38 /* Char.Ampersand */] = 4 /* OperatorType.BitwiseAndEqual */;
181
+ table[124 /* Char.Bar */] = 7 /* OperatorType.BitwiseOrEqual */;
182
+ table[94 /* Char.Caret */] = 9 /* OperatorType.BitwiseXorEqual */;
183
+ table[37 /* Char.Percent */] = 25 /* OperatorType.ModEqual */;
184
+ table[64 /* Char.At */] = 23 /* OperatorType.MatrixMultiplyEqual */;
185
+ return table;
186
+ })();
187
+ function getTwoCharKey(char1, char2) {
188
+ return (char1 << 8) | char2;
189
+ }
190
+ // Two-char operator/token tables: use Map instead of Int16Array(65536).
191
+ // With only 5+1 entries, a Map uses ~200 bytes vs 256KB for two Int16Arrays.
192
+ const _twoCharOperatorTypeMap = new Map([
193
+ [getTwoCharKey(61 /* Char.Equal */, 61 /* Char.Equal */), 12 /* OperatorType.Equals */],
194
+ [getTwoCharKey(33 /* Char.ExclamationMark */, 61 /* Char.Equal */), 28 /* OperatorType.NotEquals */],
195
+ [getTwoCharKey(60 /* Char.Less */, 61 /* Char.Equal */), 21 /* OperatorType.LessThanOrEqual */],
196
+ [getTwoCharKey(62 /* Char.Greater */, 61 /* Char.Equal */), 16 /* OperatorType.GreaterThanOrEqual */],
197
+ [getTwoCharKey(60 /* Char.Less */, 62 /* Char.Greater */), 19 /* OperatorType.LessOrGreaterThan */],
198
+ ]);
199
+ const _twoCharSpecialTokenTypeMap = new Map([
200
+ [getTwoCharKey(45 /* Char.Hyphen */, 62 /* Char.Greater */), 21 /* TokenType.Arrow */],
201
+ ]);
202
+ const _repeatedCharOperatorTypeTable = (() => {
203
+ const table = new Int16Array(128);
204
+ table.fill(_unsetSingleCharOperatorType);
205
+ table[42 /* Char.Asterisk */] = 29 /* OperatorType.Power */;
206
+ table[47 /* Char.Slash */] = 13 /* OperatorType.FloorDivide */;
207
+ table[60 /* Char.Less */] = 17 /* OperatorType.LeftShift */;
208
+ table[62 /* Char.Greater */] = 31 /* OperatorType.RightShift */;
209
+ return table;
210
+ })();
211
+ const _repeatedCharEqualOperatorTypeTable = (() => {
212
+ const table = new Int16Array(128);
213
+ table.fill(_unsetSingleCharOperatorType);
214
+ table[42 /* Char.Asterisk */] = 30 /* OperatorType.PowerEqual */;
215
+ table[47 /* Char.Slash */] = 14 /* OperatorType.FloorDivideEqual */;
216
+ table[60 /* Char.Less */] = 18 /* OperatorType.LeftShiftEqual */;
217
+ table[62 /* Char.Greater */] = 32 /* OperatorType.RightShiftEqual */;
218
+ return table;
219
+ })();
107
220
  const _byteOrderMarker = 0xfeff;
108
221
  const defaultTabSize = 8;
109
- const magicsRegEx = /\\\s*$/;
110
- // The character class for type: ignore rule codes includes ':' so that
111
- // tool-namespaced codes such as "ty:unresolved-reference" are accepted.
112
- // pyright: ignore uses the original class since tool-namespaced codes
113
- // are not expected there.
114
- const typeIgnoreCommentRegEx = /((^|#)\s*)type:\s*ignore(\s*\[([\s\w:,-]*)\]|\s|$)/;
115
- const pyrightIgnoreCommentRegEx = /((^|#)\s*)pyright:\s*ignore(\s*\[([\s\w-,]*)\]|\s|$)/;
116
- const underscoreRegEx = /_/g;
222
+ // Fast-reject table: only these ASCII chars can begin a string literal
223
+ // (quote chars or valid string prefix chars f/r/b/u/t and their uppercase).
224
+ // Checking this table first avoids calling _getStringPrefixLength() for the
225
+ // vast majority of tokens (identifiers, numbers, operators, etc.).
226
+ const _canStartString = (() => {
227
+ const table = new Array(128).fill(false);
228
+ table[39 /* Char.SingleQuote */] = true;
229
+ table[34 /* Char.DoubleQuote */] = true;
230
+ for (const ch of [102 /* Char.f */, 70 /* Char.F */, 114 /* Char.r */, 82 /* Char.R */, 98 /* Char.b */, 66 /* Char.B */, 117 /* Char.u */, 85 /* Char.U */, 116 /* Char.t */, 84 /* Char.T */]) {
231
+ table[ch] = true;
232
+ }
233
+ return table;
234
+ })();
235
+ // ASCII identifier-continue table. Indexed by char code < 128; true if the
236
+ // char can appear inside an identifier (letter, digit, underscore).
237
+ // Building this at module load by querying isIdentifierChar lets the tight
238
+ // identifier-swallow loop avoid function-call overhead entirely on the common
239
+ // ASCII path. Non-ASCII chars fall back to the generic path.
240
+ const _asciiIdentifierContinue = (() => {
241
+ const table = new Array(128).fill(false);
242
+ for (let i = 0; i < 128; i++) {
243
+ if ((0, characters_1.isIdentifierChar)(i)) {
244
+ table[i] = true;
245
+ }
246
+ }
247
+ return table;
248
+ })();
249
+ const _asciiIdentifierStart = (() => {
250
+ const table = new Array(128).fill(false);
251
+ for (let i = 0; i < 128; i++) {
252
+ if ((0, characters_1.isIdentifierStartChar)(i)) {
253
+ table[i] = true;
254
+ }
255
+ }
256
+ return table;
257
+ })();
258
+ // Create a detached copy of a source text range without going through Buffer.
259
+ // Each charAt() for ASCII returns a V8-cached single-char string that does not
260
+ // reference the parent. The concatenation chain becomes a ConsString independent
261
+ // of the source text, avoiding V8 SlicedString memory pinning.
262
+ // ~4-9x faster than Buffer.from(str,'utf8').toString('utf8') for typical
263
+ // Python identifier lengths (5-20 chars).
264
+ function detachSubstring(text, start, end) {
265
+ let result = '';
266
+ for (let i = start; i < end; i++) {
267
+ result += text.charAt(i);
268
+ }
269
+ return result;
270
+ }
271
+ // Strip underscore characters from a source text range without first creating
272
+ // an intermediate substring.
273
+ function removeUnderscoresFromRange(text, start, end) {
274
+ let firstUnderscoreIndex = -1;
275
+ for (let i = start; i < end; i++) {
276
+ if (text.charCodeAt(i) === 95 /* Char.Underscore */) {
277
+ firstUnderscoreIndex = i;
278
+ break;
279
+ }
280
+ }
281
+ if (firstUnderscoreIndex < 0) {
282
+ return text.slice(start, end);
283
+ }
284
+ let result = text.slice(start, firstUnderscoreIndex);
285
+ for (let i = firstUnderscoreIndex + 1; i < end; i++) {
286
+ if (text.charCodeAt(i) !== 95 /* Char.Underscore */) {
287
+ result += text[i];
288
+ }
289
+ }
290
+ return result;
291
+ }
292
+ // Manual replacement for magicsRegEx = /\\\s*$/
293
+ // Check if a range [start, end) within `text` ends with a backslash followed
294
+ // by optional whitespace.
295
+ function endsWithBackslashContinuation(text, start, end) {
296
+ let i = end - 1;
297
+ // Skip trailing whitespace
298
+ while (i >= start) {
299
+ const ch = text.charCodeAt(i);
300
+ if (ch === 32 /* Char.Space */ || ch === 9 /* Char.Tab */ || ch === 12 /* Char.FormFeed */) {
301
+ i--;
302
+ }
303
+ else {
304
+ break;
305
+ }
306
+ }
307
+ return i >= start && text.charCodeAt(i) === 92 /* Char.Backslash */;
308
+ }
309
+ // Parses a bracketed rule list starting at `pos` (which must point at '[').
310
+ // Returns the bracket content (without brackets) and the position just past ']',
311
+ // or undefined if the bracket is malformed (e.g. unclosed, or contains invalid chars
312
+ // before a closing bracket is found).
313
+ function parseIgnoreBracketContent(text, pos, rangeEnd, allowColon) {
314
+ pos++; // skip '['
315
+ const bracketStart = pos;
316
+ while (pos < rangeEnd && text.charCodeAt(pos) !== 93 /* Char.CloseBracket */) {
317
+ // Only allow valid bracket content chars: \s, \w, -, ,
318
+ // (plus ':' for type: ignore to support tool-namespaced codes)
319
+ const bc = text.charCodeAt(pos);
320
+ if ((bc >= 97 /* Char.a */ && bc <= 122 /* Char.z */) ||
321
+ (bc >= 65 /* Char.A */ && bc <= 90 /* Char.Z */) ||
322
+ (bc >= 48 /* Char._0 */ && bc <= 57 /* Char._9 */) ||
323
+ bc === 95 /* Char.Underscore */ ||
324
+ bc === 45 /* Char.Hyphen */ ||
325
+ bc === 44 /* Char.Comma */ ||
326
+ bc === 32 /* Char.Space */ ||
327
+ bc === 9 /* Char.Tab */ ||
328
+ (allowColon && bc === 58 /* Char.Colon */)) {
329
+ pos++;
330
+ }
331
+ else {
332
+ break;
333
+ }
334
+ }
335
+ if (pos < rangeEnd && text.charCodeAt(pos) === 93 /* Char.CloseBracket */) {
336
+ return { content: text.slice(bracketStart, pos), newPos: pos + 1 };
337
+ }
338
+ return undefined;
339
+ }
340
+ // Manual replacement for typeIgnoreCommentRegEx / pyrightIgnoreCommentRegEx.
341
+ // Scans `text` within [rangeStart, rangeEnd) for `<directive>: ignore [rules]`
342
+ // where directive is 'type' or 'pyright'.
343
+ // Returns a match object or undefined. Returned `index` is absolute within `text`.
344
+ function matchIgnoreDirective(text, rangeStart, rangeEnd, directive) {
345
+ // The directive can be preceded by optional `#` and whitespace, or
346
+ // appear at the start of the range with optional whitespace.
347
+ // type: ignore allows tool-namespaced codes (e.g. "ty:rule-name") in brackets;
348
+ // pyright: ignore does not.
349
+ const allowColonInBracket = directive === 'type';
350
+ let searchFrom = rangeStart;
351
+ while (searchFrom < rangeEnd) {
352
+ // Find the next occurrence of the directive keyword, bounded by
353
+ // rangeEnd. A bounded hand-rolled scan is important here: native
354
+ // String.prototype.indexOf has no end bound and, when the keyword is
355
+ // absent from the current comment but present elsewhere in the file,
356
+ // can scan well past rangeEnd — producing O(n) behavior per comment
357
+ // and O(n^2) overall on comment-heavy files.
358
+ const firstCharCode = directive.charCodeAt(0);
359
+ let directiveIdx = -1;
360
+ const scanLimit = rangeEnd - directive.length;
361
+ for (let i = searchFrom; i <= scanLimit; i++) {
362
+ if (text.charCodeAt(i) === firstCharCode) {
363
+ let found = true;
364
+ for (let d = 1; d < directive.length; d++) {
365
+ if (text.charCodeAt(i + d) !== directive.charCodeAt(d)) {
366
+ found = false;
367
+ break;
368
+ }
369
+ }
370
+ if (found) {
371
+ directiveIdx = i;
372
+ break;
373
+ }
374
+ }
375
+ }
376
+ if (directiveIdx < 0) {
377
+ return undefined;
378
+ }
379
+ // Determine the prefix: scan backward from directiveIdx to find
380
+ // the `#` or start-of-range, collecting whitespace.
381
+ let prefixStart = directiveIdx;
382
+ let foundAnchor = false;
383
+ // Walk backward over spaces/tabs
384
+ let j = directiveIdx - 1;
385
+ while (j >= rangeStart && (text.charCodeAt(j) === 32 /* Char.Space */ || text.charCodeAt(j) === 9 /* Char.Tab */)) {
386
+ j--;
387
+ }
388
+ if (j < rangeStart) {
389
+ // At start of range
390
+ prefixStart = rangeStart;
391
+ foundAnchor = true;
392
+ }
393
+ else if (text.charCodeAt(j) === 35 /* Char.Hash */) {
394
+ prefixStart = j;
395
+ foundAnchor = true;
396
+ }
397
+ if (!foundAnchor) {
398
+ searchFrom = directiveIdx + 1;
399
+ continue;
400
+ }
401
+ // After directive keyword, expect ':'
402
+ let pos = directiveIdx + directive.length;
403
+ if (pos >= rangeEnd || text.charCodeAt(pos) !== 58 /* Char.Colon */) {
404
+ searchFrom = directiveIdx + 1;
405
+ continue;
406
+ }
407
+ pos++; // skip ':'
408
+ // Skip optional whitespace after ':'
409
+ while (pos < rangeEnd && (text.charCodeAt(pos) === 32 /* Char.Space */ || text.charCodeAt(pos) === 9 /* Char.Tab */)) {
410
+ pos++;
411
+ }
412
+ // Expect 'ignore'
413
+ const ignoreStr = 'ignore';
414
+ if (pos + ignoreStr.length > rangeEnd) {
415
+ searchFrom = directiveIdx + 1;
416
+ continue;
417
+ }
418
+ let matched = true;
419
+ for (let k = 0; k < ignoreStr.length; k++) {
420
+ if (text.charCodeAt(pos + k) !== ignoreStr.charCodeAt(k)) {
421
+ matched = false;
422
+ break;
423
+ }
424
+ }
425
+ if (!matched) {
426
+ searchFrom = directiveIdx + 1;
427
+ continue;
428
+ }
429
+ pos += ignoreStr.length;
430
+ // After 'ignore', expect whitespace, '[', or end-of-range
431
+ let bracketContent;
432
+ if (pos >= rangeEnd) {
433
+ // End of range — valid
434
+ }
435
+ else {
436
+ const ch = text.charCodeAt(pos);
437
+ if (ch === 32 /* Char.Space */ || ch === 9 /* Char.Tab */) {
438
+ // Skip whitespace to check for optional bracket
439
+ while (pos < rangeEnd && (text.charCodeAt(pos) === 32 /* Char.Space */ || text.charCodeAt(pos) === 9 /* Char.Tab */)) {
440
+ pos++;
441
+ }
442
+ if (pos < rangeEnd && text.charCodeAt(pos) === 91 /* Char.OpenBracket */) {
443
+ const parsed = parseIgnoreBracketContent(text, pos, rangeEnd, allowColonInBracket);
444
+ if (parsed === undefined) {
445
+ searchFrom = directiveIdx + 1;
446
+ continue;
447
+ }
448
+ bracketContent = parsed.content;
449
+ pos = parsed.newPos;
450
+ }
451
+ }
452
+ else if (ch === 91 /* Char.OpenBracket */) {
453
+ // Bracket immediately after 'ignore'
454
+ const parsed = parseIgnoreBracketContent(text, pos, rangeEnd, allowColonInBracket);
455
+ if (parsed === undefined) {
456
+ searchFrom = directiveIdx + 1;
457
+ continue;
458
+ }
459
+ bracketContent = parsed.content;
460
+ pos = parsed.newPos;
461
+ }
462
+ else {
463
+ // No space, no bracket — not a valid match
464
+ searchFrom = directiveIdx + 1;
465
+ continue;
466
+ }
467
+ }
468
+ const prefix = text.slice(prefixStart, directiveIdx);
469
+ const fullMatch = text.slice(prefixStart, pos);
470
+ return {
471
+ fullMatch,
472
+ prefix,
473
+ bracketContent,
474
+ index: prefixStart,
475
+ };
476
+ }
477
+ return undefined;
478
+ }
117
479
  var MagicsKind;
118
480
  (function (MagicsKind) {
119
481
  MagicsKind[MagicsKind["None"] = 0] = "None";
@@ -128,6 +490,10 @@ class Tokenizer {
128
490
  this._parenDepth = 0;
129
491
  this._lineRanges = [];
130
492
  this._indentAmounts = [];
493
+ // Cached answer to "are there any non-trivial tokens yet?" Once true it
494
+ // stays true, so the O(n) scan in _handleComment only runs while the token
495
+ // stream consists purely of NewLine / Indent tokens.
496
+ this._hasTokenBeforeIgnoreAll = false;
131
497
  this._typeIgnoreLines = new Map();
132
498
  this._pyrightIgnoreLines = new Map();
133
499
  this._fStringStack = [];
@@ -150,10 +516,7 @@ class Tokenizer {
150
516
  this._doubleQuoteCount = 0;
151
517
  // Assume Jupyter notebook tokenization rules?
152
518
  this._useNotebookMode = false;
153
- // Intern identifier strings within a single tokenization pass. This reduces
154
- // per-identifier allocations while still ensuring we don't retain substrings
155
- // that reference the original source text.
156
- this._identifierInternedStrings = new Map();
519
+ this._identifierCache = new Array(Tokenizer._identifierCacheSize);
157
520
  }
158
521
  tokenize(text, start, length, initialParenDepth = 0, useNotebookMode = false) {
159
522
  if (start === undefined) {
@@ -179,7 +542,8 @@ class Tokenizer {
179
542
  this._lineRanges = [];
180
543
  this._indentAmounts = [];
181
544
  this._useNotebookMode = useNotebookMode;
182
- this._identifierInternedStrings.clear();
545
+ // Clear per-source identifier intern cache.
546
+ this._identifierCache.fill(undefined);
183
547
  const end = start + length;
184
548
  if (start === 0) {
185
549
  this._readIndentationAfterNewLine();
@@ -315,19 +679,22 @@ class Tokenizer {
315
679
  // tokens onto the token list. Returns true if the caller should advance
316
680
  // to the next character.
317
681
  _handleCharacter() {
318
- // f-strings, b-strings, etc
319
- const stringPrefixLength = this._getStringPrefixLength();
320
- if (stringPrefixLength >= 0) {
321
- let stringPrefix = '';
322
- if (stringPrefixLength > 0) {
323
- stringPrefix = this._cs.getText().slice(this._cs.position, this._cs.position + stringPrefixLength);
324
- // Indeed a string
325
- this._cs.advance(stringPrefixLength);
326
- }
327
- const quoteTypeFlags = this._getQuoteTypeFlags(stringPrefix);
328
- if (quoteTypeFlags !== 0 /* StringTokenFlags.None */) {
329
- this._handleString(quoteTypeFlags, stringPrefixLength);
330
- return true;
682
+ // f-strings, b-strings, etc — only check if current char can start a string
683
+ const currentChar = this._cs.currentChar;
684
+ if (currentChar < 128 && _canStartString[currentChar]) {
685
+ const stringPrefixLength = this._getStringPrefixLength();
686
+ if (stringPrefixLength >= 0) {
687
+ let stringPrefix = '';
688
+ if (stringPrefixLength > 0) {
689
+ stringPrefix = this._cs.getText().slice(this._cs.position, this._cs.position + stringPrefixLength);
690
+ // Indeed a string
691
+ this._cs.advance(stringPrefixLength);
692
+ }
693
+ const quoteTypeFlags = this._getQuoteTypeFlags(stringPrefix);
694
+ if (quoteTypeFlags !== 0 /* StringTokenFlags.None */) {
695
+ this._handleString(quoteTypeFlags, stringPrefixLength);
696
+ return true;
697
+ }
331
698
  }
332
699
  }
333
700
  if (this._cs.currentChar === 35 /* Char.Hash */) {
@@ -657,48 +1024,105 @@ class Tokenizer {
657
1024
  }
658
1025
  }
659
1026
  _tryIdentifier() {
660
- const swallowRemainingChars = () => {
661
- while (true) {
662
- if ((0, characters_1.isIdentifierChar)(this._cs.currentChar)) {
663
- this._cs.moveNext();
664
- }
665
- else if ((0, characters_1.isIdentifierChar)(this._cs.currentChar, this._cs.nextChar)) {
666
- this._cs.moveNext();
667
- this._cs.moveNext();
1027
+ const cs = this._cs;
1028
+ const text = cs.getText();
1029
+ const textLen = text.length;
1030
+ const start = cs.position;
1031
+ // Fast path for ASCII identifier start. Avoids the function call and
1032
+ // surrogate logic for the common case (Python source is overwhelmingly
1033
+ // ASCII identifiers).
1034
+ const firstChar = cs.currentChar;
1035
+ let pos = start;
1036
+ if (firstChar < 128) {
1037
+ if (!_asciiIdentifierStart[firstChar]) {
1038
+ // Not an identifier start and not a surrogate candidate.
1039
+ return false;
1040
+ }
1041
+ pos++;
1042
+ // Tight loop: advance while we're still in ASCII identifier chars.
1043
+ while (pos < textLen) {
1044
+ const ch = text.charCodeAt(pos);
1045
+ if (ch < 128 && _asciiIdentifierContinue[ch]) {
1046
+ pos++;
668
1047
  }
669
1048
  else {
670
1049
  break;
671
1050
  }
672
1051
  }
673
- };
674
- const start = this._cs.position;
675
- if ((0, characters_1.isIdentifierStartChar)(this._cs.currentChar)) {
676
- this._cs.moveNext();
677
- swallowRemainingChars();
1052
+ // If we hit a non-ASCII char, fall back to the generic loop to
1053
+ // handle possible unicode identifier continue / surrogate pairs.
1054
+ if (pos < textLen && text.charCodeAt(pos) >= 128) {
1055
+ cs.advance(pos - start);
1056
+ this._swallowNonAsciiIdentifierChars();
1057
+ pos = cs.position;
1058
+ }
1059
+ else {
1060
+ cs.advance(pos - start);
1061
+ }
678
1062
  }
679
- else if ((0, characters_1.isIdentifierStartChar)(this._cs.currentChar, this._cs.nextChar)) {
680
- this._cs.moveNext();
681
- this._cs.moveNext();
682
- swallowRemainingChars();
1063
+ else {
1064
+ // Non-ASCII start: use the generic path (supports surrogates).
1065
+ if ((0, characters_1.isIdentifierStartChar)(firstChar)) {
1066
+ cs.moveNext();
1067
+ }
1068
+ else if ((0, characters_1.isIdentifierStartChar)(firstChar, cs.nextChar)) {
1069
+ cs.moveNext();
1070
+ cs.moveNext();
1071
+ }
1072
+ else {
1073
+ return false;
1074
+ }
1075
+ this._swallowNonAsciiIdentifierChars();
1076
+ pos = cs.position;
683
1077
  }
684
- if (this._cs.position > start) {
685
- const value = this._cs.getText().slice(start, this._cs.position);
686
- const keywordType = _keywords.get(value);
1078
+ if (pos > start) {
1079
+ const end = pos;
1080
+ const length = end - start;
1081
+ const keywordType = getKeywordTypeFromTextSlice(text, start, length);
687
1082
  if (keywordType !== undefined) {
688
- this._tokens.push(tokenizerTypes_1.KeywordToken.create(start, this._cs.position - start, keywordType, this._getComments()));
1083
+ this._tokens.push(tokenizerTypes_1.KeywordToken.create(start, length, keywordType, this._getComments()));
689
1084
  }
690
1085
  else {
691
- const internedValue = this._identifierInternedStrings.get(value) ?? this._internIdentifierString(value);
692
- this._tokens.push(tokenizerTypes_1.IdentifierToken.create(start, this._cs.position - start, internedValue, this._getComments()));
1086
+ const value = this._internIdentifier(text, start, end, length);
1087
+ this._tokens.push(tokenizerTypes_1.IdentifierToken.create(start, length, value, this._getComments()));
693
1088
  }
694
1089
  return true;
695
1090
  }
696
1091
  return false;
697
1092
  }
698
- _internIdentifierString(value) {
699
- const clonedValue = (0, core_1.cloneStr)(value);
700
- this._identifierInternedStrings.set(clonedValue, clonedValue);
701
- return clonedValue;
1093
+ // Per-tokenize identifier intern cache. Direct-mapped, so collisions
1094
+ // simply replace the slot. Common identifiers (self, cls, True, None,
1095
+ // str, int, dict, etc.) get deduplicated to a single string object,
1096
+ // avoiding repeated detachSubstring allocations for the same name.
1097
+ _internIdentifier(text, start, end, length) {
1098
+ const firstChar = text.charCodeAt(start);
1099
+ const lastChar = text.charCodeAt(end - 1);
1100
+ // Hash mixes length, first and last char; multiplier values chosen
1101
+ // to spread hits for common short identifiers across the table.
1102
+ const hash = (firstChar * 31 + lastChar * 7 + length) & Tokenizer._identifierCacheMask;
1103
+ const cached = this._identifierCache[hash];
1104
+ if (cached !== undefined && cached.length === length && text.startsWith(cached, start)) {
1105
+ return cached;
1106
+ }
1107
+ const value = detachSubstring(text, start, end);
1108
+ this._identifierCache[hash] = value;
1109
+ return value;
1110
+ }
1111
+ // Generic identifier-continue loop that handles unicode + surrogate pairs.
1112
+ // Falls back to this when the fast ASCII loop encounters a non-ASCII char.
1113
+ _swallowNonAsciiIdentifierChars() {
1114
+ while (true) {
1115
+ if ((0, characters_1.isIdentifierChar)(this._cs.currentChar)) {
1116
+ this._cs.moveNext();
1117
+ }
1118
+ else if ((0, characters_1.isIdentifierChar)(this._cs.currentChar, this._cs.nextChar)) {
1119
+ this._cs.moveNext();
1120
+ this._cs.moveNext();
1121
+ }
1122
+ else {
1123
+ break;
1124
+ }
1125
+ }
702
1126
  }
703
1127
  _isPossibleNumber() {
704
1128
  if ((0, characters_1.isDecimal)(this._cs.currentChar)) {
@@ -743,8 +1167,9 @@ class Tokenizer {
743
1167
  radix = 8;
744
1168
  }
745
1169
  if (radix > 0) {
746
- const text = this._cs.getText().slice(start, this._cs.position);
747
- const simpleIntText = text.replace(underscoreRegEx, '');
1170
+ const end = this._cs.position;
1171
+ const text = this._cs.getText();
1172
+ const simpleIntText = removeUnderscoresFromRange(text, start, end);
748
1173
  let intValue = parseInt(simpleIntText.slice(leadingChars), radix);
749
1174
  if (!isNaN(intValue)) {
750
1175
  const bigIntValue = BigInt(simpleIntText);
@@ -753,7 +1178,7 @@ class Tokenizer {
753
1178
  intValue > Number.MAX_SAFE_INTEGER) {
754
1179
  intValue = bigIntValue;
755
1180
  }
756
- this._tokens.push(tokenizerTypes_1.NumberToken.create(start, text.length, intValue, true, false, this._getComments()));
1181
+ this._tokens.push(tokenizerTypes_1.NumberToken.create(start, end - start, intValue, true, false, this._getComments()));
757
1182
  return true;
758
1183
  }
759
1184
  }
@@ -787,11 +1212,13 @@ class Tokenizer {
787
1212
  (this._cs.currentChar < 49 /* Char._1 */ || this._cs.currentChar > 57 /* Char._9 */);
788
1213
  }
789
1214
  if (isDecimalInteger) {
790
- let text = this._cs.getText().slice(start, this._cs.position);
791
- const simpleIntText = text.replace(underscoreRegEx, '');
1215
+ const textEnd = this._cs.position;
1216
+ const sourceText = this._cs.getText();
1217
+ const simpleIntText = removeUnderscoresFromRange(sourceText, start, textEnd);
792
1218
  let intValue = parseInt(simpleIntText, 10);
793
1219
  if (!isNaN(intValue)) {
794
1220
  let isImaginary = false;
1221
+ let tokenLength = textEnd - start;
795
1222
  const bigIntValue = BigInt(simpleIntText);
796
1223
  if (!isFinite(intValue) ||
797
1224
  bigIntValue < Number.MIN_SAFE_INTEGER ||
@@ -800,10 +1227,10 @@ class Tokenizer {
800
1227
  }
801
1228
  if (this._cs.currentChar === 106 /* Char.j */ || this._cs.currentChar === 74 /* Char.J */) {
802
1229
  isImaginary = true;
803
- text += String.fromCharCode(this._cs.currentChar);
804
1230
  this._cs.moveNext();
1231
+ tokenLength += 1;
805
1232
  }
806
- this._tokens.push(tokenizerTypes_1.NumberToken.create(start, text.length, intValue, true, isImaginary, this._getComments()));
1233
+ this._tokens.push(tokenizerTypes_1.NumberToken.create(start, tokenLength, intValue, true, isImaginary, this._getComments()));
807
1234
  return true;
808
1235
  }
809
1236
  }
@@ -812,16 +1239,18 @@ class Tokenizer {
812
1239
  if (mightBeFloatingPoint ||
813
1240
  (this._cs.currentChar === 46 /* Char.Period */ && this._cs.nextChar >= 48 /* Char._0 */ && this._cs.nextChar <= 57 /* Char._9 */)) {
814
1241
  if (this._skipFloatingPointCandidate()) {
815
- let text = this._cs.getText().slice(start, this._cs.position);
816
- const value = parseFloat(text.replace(underscoreRegEx, ''));
1242
+ const floatEnd = this._cs.position;
1243
+ const floatText = removeUnderscoresFromRange(this._cs.getText(), start, floatEnd);
1244
+ const value = parseFloat(floatText);
817
1245
  if (!isNaN(value)) {
818
1246
  let isImaginary = false;
1247
+ let tokenLength = floatEnd - start;
819
1248
  if (this._cs.currentChar === 106 /* Char.j */ || this._cs.currentChar === 74 /* Char.J */) {
820
1249
  isImaginary = true;
821
- text += String.fromCharCode(this._cs.currentChar);
822
1250
  this._cs.moveNext();
1251
+ tokenLength += 1;
823
1252
  }
824
- this._tokens.push(tokenizerTypes_1.NumberToken.create(start, this._cs.position - start, value, false, isImaginary, this._getComments()));
1253
+ this._tokens.push(tokenizerTypes_1.NumberToken.create(start, tokenLength, value, false, isImaginary, this._getComments()));
825
1254
  return true;
826
1255
  }
827
1256
  }
@@ -830,122 +1259,63 @@ class Tokenizer {
830
1259
  return false;
831
1260
  }
832
1261
  _tryOperator() {
1262
+ const currentChar = this._cs.currentChar;
833
1263
  let length = 0;
834
1264
  const nextChar = this._cs.nextChar;
835
1265
  let operatorType;
836
- switch (this._cs.currentChar) {
837
- case 43 /* Char.Plus */:
838
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
839
- operatorType = length === 2 ? 1 /* OperatorType.AddEqual */ : 0 /* OperatorType.Add */;
840
- break;
841
- case 38 /* Char.Ampersand */:
842
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
843
- operatorType = length === 2 ? 4 /* OperatorType.BitwiseAndEqual */ : 3 /* OperatorType.BitwiseAnd */;
844
- break;
845
- case 124 /* Char.Bar */:
846
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
847
- operatorType = length === 2 ? 7 /* OperatorType.BitwiseOrEqual */ : 6 /* OperatorType.BitwiseOr */;
848
- break;
849
- case 94 /* Char.Caret */:
850
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
851
- operatorType = length === 2 ? 9 /* OperatorType.BitwiseXorEqual */ : 8 /* OperatorType.BitwiseXor */;
852
- break;
853
- case 61 /* Char.Equal */:
854
- if (this._activeFString?.activeReplacementField &&
855
- this._activeFString?.activeReplacementField.parenDepth === this._parenDepth &&
856
- !this._activeFString.activeReplacementField.inFormatSpecifier &&
857
- nextChar !== 61 /* Char.Equal */) {
858
- length = 1;
859
- operatorType = 2 /* OperatorType.Assign */;
860
- break;
861
- }
862
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
863
- operatorType = length === 2 ? 12 /* OperatorType.Equals */ : 2 /* OperatorType.Assign */;
864
- break;
865
- case 33 /* Char.ExclamationMark */:
866
- if (nextChar !== 61 /* Char.Equal */) {
867
- if (this._activeFString) {
868
- // Handle the conversion separator (!) within an f-string.
869
- this._tokens.push(tokenizerTypes_1.Token.create(23 /* TokenType.ExclamationMark */, this._cs.position, 1, this._getComments()));
870
- this._cs.advance(1);
871
- return true;
872
- }
873
- return false;
874
- }
875
- length = 2;
876
- operatorType = 28 /* OperatorType.NotEquals */;
877
- break;
878
- case 37 /* Char.Percent */:
879
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
880
- operatorType = length === 2 ? 25 /* OperatorType.ModEqual */ : 24 /* OperatorType.Mod */;
881
- break;
882
- case 126 /* Char.Tilde */:
883
- length = 1;
884
- operatorType = 5 /* OperatorType.BitwiseInvert */;
885
- break;
886
- case 45 /* Char.Hyphen */:
887
- if (nextChar === 62 /* Char.Greater */) {
888
- this._tokens.push(tokenizerTypes_1.Token.create(21 /* TokenType.Arrow */, this._cs.position, 2, this._getComments()));
889
- this._cs.advance(2);
1266
+ if (currentChar < 128 && nextChar < 128) {
1267
+ const twoCharKey = (currentChar << 8) | nextChar;
1268
+ const specialTokenType = _twoCharSpecialTokenTypeMap.get(twoCharKey);
1269
+ if (specialTokenType !== undefined) {
1270
+ this._tokens.push(tokenizerTypes_1.Token.create(specialTokenType, this._cs.position, 2, this._getComments()));
1271
+ this._cs.advance(2);
1272
+ return true;
1273
+ }
1274
+ const twoCharOperatorType = _twoCharOperatorTypeMap.get(twoCharKey);
1275
+ if (twoCharOperatorType !== undefined) {
1276
+ this._tokens.push(tokenizerTypes_1.OperatorToken.create(this._cs.position, 2, twoCharOperatorType, this._getComments()));
1277
+ this._cs.advance(2);
1278
+ return true;
1279
+ }
1280
+ if (currentChar === nextChar) {
1281
+ const repeatedOperatorType = _repeatedCharOperatorTypeTable[currentChar];
1282
+ if (repeatedOperatorType !== _unsetSingleCharOperatorType) {
1283
+ const hasTrailingEqual = this._cs.lookAhead(2) === 61 /* Char.Equal */;
1284
+ const repeatedLength = hasTrailingEqual ? 3 : 2;
1285
+ const operatorType = hasTrailingEqual
1286
+ ? _repeatedCharEqualOperatorTypeTable[currentChar]
1287
+ : repeatedOperatorType;
1288
+ this._tokens.push(tokenizerTypes_1.OperatorToken.create(this._cs.position, repeatedLength, operatorType, this._getComments()));
1289
+ this._cs.advance(repeatedLength);
890
1290
  return true;
891
1291
  }
892
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
893
- operatorType = length === 2 ? 34 /* OperatorType.SubtractEqual */ : 33 /* OperatorType.Subtract */;
894
- break;
895
- case 42 /* Char.Asterisk */:
896
- if (nextChar === 42 /* Char.Asterisk */) {
897
- length = this._cs.lookAhead(2) === 61 /* Char.Equal */ ? 3 : 2;
898
- operatorType = length === 3 ? 30 /* OperatorType.PowerEqual */ : 29 /* OperatorType.Power */;
899
- }
900
- else {
901
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
902
- operatorType = length === 2 ? 27 /* OperatorType.MultiplyEqual */ : 26 /* OperatorType.Multiply */;
903
- }
904
- break;
905
- case 47 /* Char.Slash */:
906
- if (nextChar === 47 /* Char.Slash */) {
907
- length = this._cs.lookAhead(2) === 61 /* Char.Equal */ ? 3 : 2;
908
- operatorType = length === 3 ? 14 /* OperatorType.FloorDivideEqual */ : 13 /* OperatorType.FloorDivide */;
909
- }
910
- else {
911
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
912
- operatorType = length === 2 ? 11 /* OperatorType.DivideEqual */ : 10 /* OperatorType.Divide */;
913
- }
914
- break;
915
- case 60 /* Char.Less */:
916
- if (nextChar === 60 /* Char.Less */) {
917
- length = this._cs.lookAhead(2) === 61 /* Char.Equal */ ? 3 : 2;
918
- operatorType = length === 3 ? 18 /* OperatorType.LeftShiftEqual */ : 17 /* OperatorType.LeftShift */;
919
- }
920
- else if (nextChar === 62 /* Char.Greater */) {
1292
+ }
1293
+ }
1294
+ if (currentChar < 128) {
1295
+ const singleCharOperatorType = _singleCharOperatorTypeTable[currentChar];
1296
+ if (singleCharOperatorType !== _unsetSingleCharOperatorType) {
1297
+ const equalOperatorType = _singleCharEqualOperatorTypeTable[currentChar];
1298
+ if (nextChar === 61 /* Char.Equal */ && equalOperatorType !== _unsetSingleCharOperatorType) {
921
1299
  length = 2;
922
- operatorType = 19 /* OperatorType.LessOrGreaterThan */;
1300
+ operatorType = equalOperatorType;
923
1301
  }
924
1302
  else {
925
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
926
- operatorType = length === 2 ? 21 /* OperatorType.LessThanOrEqual */ : 20 /* OperatorType.LessThan */;
927
- }
928
- break;
929
- case 62 /* Char.Greater */:
930
- if (nextChar === 62 /* Char.Greater */) {
931
- length = this._cs.lookAhead(2) === 61 /* Char.Equal */ ? 3 : 2;
932
- operatorType = length === 3 ? 32 /* OperatorType.RightShiftEqual */ : 31 /* OperatorType.RightShift */;
933
- }
934
- else {
935
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
936
- operatorType = length === 2 ? 16 /* OperatorType.GreaterThanOrEqual */ : 15 /* OperatorType.GreaterThan */;
1303
+ length = 1;
1304
+ operatorType = singleCharOperatorType;
937
1305
  }
938
- break;
939
- case 64 /* Char.At */:
940
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
941
- operatorType = length === 2 ? 23 /* OperatorType.MatrixMultiplyEqual */ : 22 /* OperatorType.MatrixMultiply */;
942
- break;
943
- default:
944
- return false;
1306
+ this._tokens.push(tokenizerTypes_1.OperatorToken.create(this._cs.position, length, operatorType, this._getComments()));
1307
+ this._cs.advance(length);
1308
+ return true;
1309
+ }
945
1310
  }
946
- this._tokens.push(tokenizerTypes_1.OperatorToken.create(this._cs.position, length, operatorType, this._getComments()));
947
- this._cs.advance(length);
948
- return length > 0;
1311
+ // `!=` is handled by the 2-char fast path above.
1312
+ if (currentChar === 33 /* Char.ExclamationMark */ && this._activeFString) {
1313
+ // Handle the conversion separator (!) within an f-string.
1314
+ this._tokens.push(tokenizerTypes_1.Token.create(23 /* TokenType.ExclamationMark */, this._cs.position, 1, this._getComments()));
1315
+ this._cs.advance(1);
1316
+ return true;
1317
+ }
1318
+ return false;
949
1319
  }
950
1320
  _handleInvalid() {
951
1321
  const start = this._cs.position;
@@ -994,16 +1364,15 @@ class Tokenizer {
994
1364
  }
995
1365
  _handleIPythonMagics(type) {
996
1366
  const start = this._cs.position + 1;
1367
+ const sourceText = this._cs.getText();
997
1368
  let begin = start;
998
1369
  while (true) {
999
1370
  this._cs.skipToEol();
1000
1371
  if (type === 1 /* CommentType.IPythonMagic */ || type === 2 /* CommentType.IPythonShellEscape */) {
1001
- const length = this._cs.position - begin;
1002
- const value = this._cs.getText().slice(begin, begin + length);
1003
1372
  // is it multiline magics?
1004
1373
  // %magic command \
1005
1374
  // next arguments
1006
- if (!value.match(magicsRegEx)) {
1375
+ if (!endsWithBackslashContinuation(sourceText, begin, this._cs.position)) {
1007
1376
  break;
1008
1377
  }
1009
1378
  }
@@ -1014,55 +1383,74 @@ class Tokenizer {
1014
1383
  }
1015
1384
  }
1016
1385
  const length = this._cs.position - start;
1017
- const comment = tokenizerTypes_1.Comment.create(start, length, this._cs.getText().slice(start, start + length), type);
1386
+ const comment = tokenizerTypes_1.Comment.create(start, length, sourceText.slice(start, start + length), type);
1018
1387
  this._addComments(comment);
1019
1388
  }
1020
1389
  _handleComment() {
1021
1390
  const start = this._cs.position + 1;
1022
1391
  this._cs.skipToEol();
1023
1392
  const length = this._cs.position - start;
1024
- const comment = tokenizerTypes_1.Comment.create(start, length, this._cs.getText().slice(start, start + length));
1025
- const typeIgnoreRegexMatch = comment.value.match(typeIgnoreCommentRegEx);
1026
- if (typeIgnoreRegexMatch) {
1027
- const commentStart = start + (typeIgnoreRegexMatch.index ?? 0);
1028
- const textRange = {
1029
- start: commentStart + typeIgnoreRegexMatch[1].length,
1030
- length: typeIgnoreRegexMatch[0].length - typeIgnoreRegexMatch[1].length,
1031
- };
1032
- const ignoreComment = {
1033
- range: textRange,
1034
- rulesList: this._getIgnoreCommentRulesList(commentStart, typeIgnoreRegexMatch),
1035
- };
1036
- if (this._tokens.findIndex((t) => t.type !== 2 /* TokenType.NewLine */ && t && t.type !== 3 /* TokenType.Indent */) < 0) {
1037
- this._typeIgnoreAll = ignoreComment;
1393
+ const sourceText = this._cs.getText();
1394
+ const end = start + length;
1395
+ // Fast pre-filter: any ignore directive must contain the substring 'ignore'.
1396
+ // indexOf is a highly-optimized native call and lets us skip the full
1397
+ // directive scan for the vast majority of comments (which are free-form text).
1398
+ const ignoreIdx = sourceText.indexOf('ignore', start);
1399
+ if (ignoreIdx >= 0 && ignoreIdx < end) {
1400
+ const typeIgnoreMatch = matchIgnoreDirective(sourceText, start, end, 'type');
1401
+ if (typeIgnoreMatch) {
1402
+ const commentStart = typeIgnoreMatch.index;
1403
+ const textRange = {
1404
+ start: commentStart + typeIgnoreMatch.prefix.length,
1405
+ length: typeIgnoreMatch.fullMatch.length - typeIgnoreMatch.prefix.length,
1406
+ };
1407
+ const ignoreComment = {
1408
+ range: textRange,
1409
+ rulesList: this._getIgnoreCommentRulesList(commentStart, typeIgnoreMatch),
1410
+ };
1411
+ let isIgnoreAll = false;
1412
+ if (!this._hasTokenBeforeIgnoreAll) {
1413
+ // Are there any tokens other than NewLine / Indent yet?
1414
+ const hasOther = this._tokens.some((t) => t && t.type !== 2 /* TokenType.NewLine */ && t.type !== 3 /* TokenType.Indent */);
1415
+ if (hasOther) {
1416
+ this._hasTokenBeforeIgnoreAll = true;
1417
+ }
1418
+ else {
1419
+ isIgnoreAll = true;
1420
+ }
1421
+ }
1422
+ if (isIgnoreAll) {
1423
+ this._typeIgnoreAll = ignoreComment;
1424
+ }
1425
+ else {
1426
+ this._typeIgnoreLines.set(this._lineRanges.length, ignoreComment);
1427
+ }
1038
1428
  }
1039
- else {
1040
- this._typeIgnoreLines.set(this._lineRanges.length, ignoreComment);
1429
+ const pyrightIgnoreMatch = matchIgnoreDirective(sourceText, start, end, 'pyright');
1430
+ if (pyrightIgnoreMatch) {
1431
+ const commentStart = pyrightIgnoreMatch.index;
1432
+ const textRange = {
1433
+ start: commentStart + pyrightIgnoreMatch.prefix.length,
1434
+ length: pyrightIgnoreMatch.fullMatch.length - pyrightIgnoreMatch.prefix.length,
1435
+ };
1436
+ const ignoreComment = {
1437
+ range: textRange,
1438
+ rulesList: this._getIgnoreCommentRulesList(commentStart, pyrightIgnoreMatch),
1439
+ };
1440
+ this._pyrightIgnoreLines.set(this._lineRanges.length, ignoreComment);
1041
1441
  }
1042
1442
  }
1043
- const pyrightIgnoreRegexMatch = comment.value.match(pyrightIgnoreCommentRegEx);
1044
- if (pyrightIgnoreRegexMatch) {
1045
- const commentStart = start + (pyrightIgnoreRegexMatch.index ?? 0);
1046
- const textRange = {
1047
- start: commentStart + pyrightIgnoreRegexMatch[1].length,
1048
- length: pyrightIgnoreRegexMatch[0].length - pyrightIgnoreRegexMatch[1].length,
1049
- };
1050
- const ignoreComment = {
1051
- range: textRange,
1052
- rulesList: this._getIgnoreCommentRulesList(commentStart, pyrightIgnoreRegexMatch),
1053
- };
1054
- this._pyrightIgnoreLines.set(this._lineRanges.length, ignoreComment);
1055
- }
1443
+ const comment = tokenizerTypes_1.Comment.create(start, length, sourceText.slice(start, end));
1056
1444
  this._addComments(comment);
1057
1445
  }
1058
1446
  // Extracts the individual rules within a "type: ignore [x, y, z]" comment.
1059
1447
  _getIgnoreCommentRulesList(start, match) {
1060
- if (match.length < 5 || match[4] === undefined) {
1448
+ if (match.bracketContent === undefined) {
1061
1449
  return undefined;
1062
1450
  }
1063
- const splitElements = match[4].split(',');
1451
+ const splitElements = match.bracketContent.split(',');
1064
1452
  const commentRules = [];
1065
- let currentOffset = start + match[0].indexOf('[') + 1;
1453
+ let currentOffset = start + match.fullMatch.indexOf('[') + 1;
1066
1454
  for (const element of splitElements) {
1067
1455
  const frontTrimmed = element.trimStart();
1068
1456
  currentOffset += element.length - frontTrimmed.length;
@@ -1417,4 +1805,12 @@ class Tokenizer {
1417
1805
  }
1418
1806
  }
1419
1807
  exports.Tokenizer = Tokenizer;
1808
+ // Direct-mapped identifier intern cache. Indexed by a cheap hash of
1809
+ // (firstChar, lastChar, length). On a hit (slot defined and string
1810
+ // equals the current source range), reuse the cached string instead of
1811
+ // re-allocating via detachSubstring. Collisions simply overwrite the
1812
+ // slot — no chaining, O(1) lookup, no Map overhead. Sized as a power of
1813
+ // two so the mask is a single AND.
1814
+ Tokenizer._identifierCacheSize = 2048;
1815
+ Tokenizer._identifierCacheMask = Tokenizer._identifierCacheSize - 1;
1420
1816
  //# sourceMappingURL=tokenizer.js.map