@zzzen/pyright-internal 1.2.0-dev.20260222 → 1.2.0-dev.20260426

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. package/dist/analyzer/backgroundAnalysisProgram.d.ts +1 -1
  2. package/dist/analyzer/backgroundAnalysisProgram.js +9 -6
  3. package/dist/analyzer/backgroundAnalysisProgram.js.map +1 -1
  4. package/dist/analyzer/binder.d.ts +10 -1
  5. package/dist/analyzer/binder.js +258 -41
  6. package/dist/analyzer/binder.js.map +1 -1
  7. package/dist/analyzer/cellChainIndex.d.ts +34 -0
  8. package/dist/analyzer/cellChainIndex.js +126 -0
  9. package/dist/analyzer/cellChainIndex.js.map +1 -0
  10. package/dist/analyzer/checker.js +0 -1
  11. package/dist/analyzer/checker.js.map +1 -1
  12. package/dist/analyzer/codeFlowEngine.js +16 -1
  13. package/dist/analyzer/codeFlowEngine.js.map +1 -1
  14. package/dist/analyzer/constructors.js +9 -2
  15. package/dist/analyzer/constructors.js.map +1 -1
  16. package/dist/analyzer/importResolver.d.ts +3 -21
  17. package/dist/analyzer/importResolver.js +42 -316
  18. package/dist/analyzer/importResolver.js.map +1 -1
  19. package/dist/analyzer/importResolverFileSystem.d.ts +3 -0
  20. package/dist/analyzer/importResolverFileSystem.js +160 -0
  21. package/dist/analyzer/importResolverFileSystem.js.map +1 -0
  22. package/dist/analyzer/importResolverTypes.d.ts +24 -0
  23. package/dist/analyzer/importResolverTypes.js +8 -0
  24. package/dist/analyzer/importResolverTypes.js.map +1 -0
  25. package/dist/analyzer/importStatementUtils.js +9 -0
  26. package/dist/analyzer/importStatementUtils.js.map +1 -1
  27. package/dist/analyzer/parseTreeUtils.d.ts +1 -0
  28. package/dist/analyzer/parseTreeUtils.js +24 -7
  29. package/dist/analyzer/parseTreeUtils.js.map +1 -1
  30. package/dist/analyzer/patternMatching.js +14 -2
  31. package/dist/analyzer/patternMatching.js.map +1 -1
  32. package/dist/analyzer/program.d.ts +5 -3
  33. package/dist/analyzer/program.js +40 -51
  34. package/dist/analyzer/program.js.map +1 -1
  35. package/dist/analyzer/pythonPathUtils.d.ts +1 -1
  36. package/dist/analyzer/pythonPathUtils.js +3 -1
  37. package/dist/analyzer/pythonPathUtils.js.map +1 -1
  38. package/dist/analyzer/scope.d.ts +10 -1
  39. package/dist/analyzer/scope.js +14 -1
  40. package/dist/analyzer/scope.js.map +1 -1
  41. package/dist/analyzer/service.d.ts +11 -6
  42. package/dist/analyzer/service.js +82 -83
  43. package/dist/analyzer/service.js.map +1 -1
  44. package/dist/analyzer/sourceEnumerator.d.ts +3 -0
  45. package/dist/analyzer/sourceEnumerator.js +27 -1
  46. package/dist/analyzer/sourceEnumerator.js.map +1 -1
  47. package/dist/analyzer/sourceFile.d.ts +2 -1
  48. package/dist/analyzer/sourceFile.js +55 -25
  49. package/dist/analyzer/sourceFile.js.map +1 -1
  50. package/dist/analyzer/sourceFileInfo.d.ts +4 -0
  51. package/dist/analyzer/sourceFileInfo.js +9 -0
  52. package/dist/analyzer/sourceFileInfo.js.map +1 -1
  53. package/dist/analyzer/sourceMapper.d.ts +1 -0
  54. package/dist/analyzer/sourceMapper.js +44 -0
  55. package/dist/analyzer/sourceMapper.js.map +1 -1
  56. package/dist/analyzer/tuples.js +3 -1
  57. package/dist/analyzer/tuples.js.map +1 -1
  58. package/dist/analyzer/typeEvaluator.js +102 -51
  59. package/dist/analyzer/typeEvaluator.js.map +1 -1
  60. package/dist/analyzer/typeEvaluatorTypes.d.ts +2 -0
  61. package/dist/analyzer/typeEvaluatorTypes.js +4 -0
  62. package/dist/analyzer/typeEvaluatorTypes.js.map +1 -1
  63. package/dist/analyzer/typeGuards.js +28 -6
  64. package/dist/analyzer/typeGuards.js.map +1 -1
  65. package/dist/analyzer/typeStubWriter.d.ts +15 -50
  66. package/dist/analyzer/typeStubWriter.js +91 -9
  67. package/dist/analyzer/typeStubWriter.js.map +1 -1
  68. package/dist/analyzer/typeUtils.d.ts +1 -0
  69. package/dist/analyzer/typeUtils.js +27 -3
  70. package/dist/analyzer/typeUtils.js.map +1 -1
  71. package/dist/analyzer/typeshedInfoProvider.d.ts +2 -0
  72. package/dist/analyzer/typeshedInfoProvider.js +232 -0
  73. package/dist/analyzer/typeshedInfoProvider.js.map +1 -0
  74. package/dist/backgroundAnalysisBase.d.ts +3 -3
  75. package/dist/backgroundAnalysisBase.js +12 -9
  76. package/dist/backgroundAnalysisBase.js.map +1 -1
  77. package/dist/commands/createTypeStub.d.ts +16 -8
  78. package/dist/commands/createTypeStub.js +58 -36
  79. package/dist/commands/createTypeStub.js.map +1 -1
  80. package/dist/common/cancellationUtils.d.ts +7 -0
  81. package/dist/common/cancellationUtils.js +34 -0
  82. package/dist/common/cancellationUtils.js.map +1 -1
  83. package/dist/common/collectionUtils.d.ts +3 -3
  84. package/dist/common/collectionUtils.js.map +1 -1
  85. package/dist/common/core.d.ts +1 -1
  86. package/dist/common/core.js.map +1 -1
  87. package/dist/common/crypto.js +11 -0
  88. package/dist/common/crypto.js.map +1 -1
  89. package/dist/common/extensibility.d.ts +7 -0
  90. package/dist/common/extensibility.js.map +1 -1
  91. package/dist/common/fullAccessHost.js +4 -2
  92. package/dist/common/fullAccessHost.js.map +1 -1
  93. package/dist/common/pathUtils.d.ts +4 -1
  94. package/dist/common/pathUtils.js.map +1 -1
  95. package/dist/common/realFileSystem.js +42 -3
  96. package/dist/common/realFileSystem.js.map +1 -1
  97. package/dist/common/serviceKeys.d.ts +3 -0
  98. package/dist/common/serviceKeys.js +2 -0
  99. package/dist/common/serviceKeys.js.map +1 -1
  100. package/dist/common/serviceProviderExtensions.js.map +1 -1
  101. package/dist/common/uri/uriUtils.d.ts +4 -0
  102. package/dist/common/uri/uriUtils.js +19 -3
  103. package/dist/common/uri/uriUtils.js.map +1 -1
  104. package/dist/common/workspaceEditUtils.js +0 -2
  105. package/dist/common/workspaceEditUtils.js.map +1 -1
  106. package/dist/languageServerBase.d.ts +1 -0
  107. package/dist/languageServerBase.js +6 -0
  108. package/dist/languageServerBase.js.map +1 -1
  109. package/dist/languageService/completionProvider.d.ts +6 -0
  110. package/dist/languageService/completionProvider.js +252 -75
  111. package/dist/languageService/completionProvider.js.map +1 -1
  112. package/dist/languageService/definitionProvider.js +1 -1
  113. package/dist/languageService/definitionProvider.js.map +1 -1
  114. package/dist/languageService/documentSymbolCollector.js +34 -4
  115. package/dist/languageService/documentSymbolCollector.js.map +1 -1
  116. package/dist/languageService/dynamicFeature.d.ts +3 -0
  117. package/dist/languageService/dynamicFeature.js +5 -0
  118. package/dist/languageService/dynamicFeature.js.map +1 -1
  119. package/dist/languageService/hoverProvider.d.ts +2 -1
  120. package/dist/languageService/hoverProvider.js +13 -5
  121. package/dist/languageService/hoverProvider.js.map +1 -1
  122. package/dist/languageService/pullDiagnosticsDynamicFeature.d.ts +1 -0
  123. package/dist/languageService/pullDiagnosticsDynamicFeature.js +4 -0
  124. package/dist/languageService/pullDiagnosticsDynamicFeature.js.map +1 -1
  125. package/dist/languageService/referencesProvider.js +5 -4
  126. package/dist/languageService/referencesProvider.js.map +1 -1
  127. package/dist/languageService/signatureHelpProvider.d.ts +1 -0
  128. package/dist/languageService/signatureHelpProvider.js +76 -2
  129. package/dist/languageService/signatureHelpProvider.js.map +1 -1
  130. package/dist/languageService/symbolIndexer.d.ts +1 -0
  131. package/dist/languageService/symbolIndexer.js.map +1 -1
  132. package/dist/languageService/tooltipUtils.d.ts +11 -4
  133. package/dist/languageService/tooltipUtils.js +195 -7
  134. package/dist/languageService/tooltipUtils.js.map +1 -1
  135. package/dist/parser/characterStream.js +26 -2
  136. package/dist/parser/characterStream.js.map +1 -1
  137. package/dist/parser/parser.d.ts +2 -0
  138. package/dist/parser/parser.js +11 -7
  139. package/dist/parser/parser.js.map +1 -1
  140. package/dist/parser/tokenizer.d.ts +6 -2
  141. package/dist/parser/tokenizer.js +602 -202
  142. package/dist/parser/tokenizer.js.map +1 -1
  143. package/dist/parser/tokenizerTypes.js +115 -39
  144. package/dist/parser/tokenizerTypes.js.map +1 -1
  145. package/dist/partialStubService.d.ts +11 -0
  146. package/dist/partialStubService.js +23 -1
  147. package/dist/partialStubService.js.map +1 -1
  148. package/dist/pyright.js +13 -2
  149. package/dist/pyright.js.map +1 -1
  150. package/dist/tests/benchmarks/parserBenchmark.test.d.ts +1 -0
  151. package/dist/tests/benchmarks/parserBenchmark.test.js +220 -0
  152. package/dist/tests/benchmarks/parserBenchmark.test.js.map +1 -0
  153. package/dist/tests/benchmarks/tokenizerBenchmark.test.d.ts +1 -0
  154. package/dist/tests/benchmarks/tokenizerBenchmark.test.js +236 -0
  155. package/dist/tests/benchmarks/tokenizerBenchmark.test.js.map +1 -0
  156. package/dist/tests/chainedSourceFiles.test.js +138 -0
  157. package/dist/tests/chainedSourceFiles.test.js.map +1 -1
  158. package/dist/tests/checker.test.js +12 -0
  159. package/dist/tests/checker.test.js.map +1 -1
  160. package/dist/tests/completions.test.js +328 -0
  161. package/dist/tests/completions.test.js.map +1 -1
  162. package/dist/tests/config.test.js +54 -0
  163. package/dist/tests/config.test.js.map +1 -1
  164. package/dist/tests/filesystem.test.js +44 -0
  165. package/dist/tests/filesystem.test.js.map +1 -1
  166. package/dist/tests/fourSlashRunner.test.js +1 -1
  167. package/dist/tests/fourSlashRunner.test.js.map +1 -1
  168. package/dist/tests/fourslash/findDefinitions.definitionFilter.preferSource.fourslash.js +25 -1
  169. package/dist/tests/fourslash/findDefinitions.definitionFilter.preferSource.fourslash.js.map +1 -1
  170. package/dist/tests/fourslash/import.multipart3.fourslash.d.ts +1 -0
  171. package/dist/tests/fourslash/import.multipart3.fourslash.js +46 -0
  172. package/dist/tests/fourslash/import.multipart3.fourslash.js.map +1 -0
  173. package/dist/tests/fourslash/import.pytyped.unsupportedDunderAll.fourslash.d.ts +1 -0
  174. package/dist/tests/fourslash/import.pytyped.unsupportedDunderAll.fourslash.js +33 -0
  175. package/dist/tests/fourslash/import.pytyped.unsupportedDunderAll.fourslash.js.map +1 -0
  176. package/dist/tests/harness/fourslash/runner.d.ts +4 -4
  177. package/dist/tests/harness/fourslash/runner.js +5 -5
  178. package/dist/tests/harness/fourslash/runner.js.map +1 -1
  179. package/dist/tests/harness/fourslash/testState.d.ts +20 -5
  180. package/dist/tests/harness/fourslash/testState.js +11 -26
  181. package/dist/tests/harness/fourslash/testState.js.map +1 -1
  182. package/dist/tests/harness/fourslash/testStateUtils.js +2 -0
  183. package/dist/tests/harness/fourslash/testStateUtils.js.map +1 -1
  184. package/dist/tests/harness/testAccessHost.d.ts +3 -1
  185. package/dist/tests/harness/testAccessHost.js +6 -2
  186. package/dist/tests/harness/testAccessHost.js.map +1 -1
  187. package/dist/tests/harness/testHost.js +20 -18
  188. package/dist/tests/harness/testHost.js.map +1 -1
  189. package/dist/tests/harness/vfs/factory.js +4 -1
  190. package/dist/tests/harness/vfs/factory.js.map +1 -1
  191. package/dist/tests/harness/vfs/filesystem.d.ts +8 -1
  192. package/dist/tests/harness/vfs/filesystem.js +84 -30
  193. package/dist/tests/harness/vfs/filesystem.js.map +1 -1
  194. package/dist/tests/hoverProvider.test.js +290 -0
  195. package/dist/tests/hoverProvider.test.js.map +1 -1
  196. package/dist/tests/importResolverSupport.test.d.ts +1 -0
  197. package/dist/tests/importResolverSupport.test.js +319 -0
  198. package/dist/tests/importResolverSupport.test.js.map +1 -0
  199. package/dist/tests/importStatementUtils.test.js +66 -0
  200. package/dist/tests/importStatementUtils.test.js.map +1 -1
  201. package/dist/tests/lsp/{webpack.testserver.config.d.ts → rspack.testserver.config.d.ts} +1 -1
  202. package/dist/tests/lsp/{webpack.testserver.config.js → rspack.testserver.config.js} +4 -11
  203. package/dist/tests/lsp/rspack.testserver.config.js.map +1 -0
  204. package/dist/tests/realTempFile.test.d.ts +1 -0
  205. package/dist/tests/realTempFile.test.js +144 -0
  206. package/dist/tests/realTempFile.test.js.map +1 -0
  207. package/dist/tests/service.test.js +182 -3
  208. package/dist/tests/service.test.js.map +1 -1
  209. package/dist/tests/signatureHelp.test.js +391 -6
  210. package/dist/tests/signatureHelp.test.js.map +1 -1
  211. package/dist/tests/testState.test.js +19 -0
  212. package/dist/tests/testState.test.js.map +1 -1
  213. package/dist/tests/tokenizer.test.js +59 -2
  214. package/dist/tests/tokenizer.test.js.map +1 -1
  215. package/dist/tests/typeEvaluator1.test.js +4 -0
  216. package/dist/tests/typeEvaluator1.test.js.map +1 -1
  217. package/dist/tests/typeEvaluator2.test.js +12 -0
  218. package/dist/tests/typeEvaluator2.test.js.map +1 -1
  219. package/dist/tests/typeEvaluator3.test.js +8 -1
  220. package/dist/tests/typeEvaluator3.test.js.map +1 -1
  221. package/dist/tests/typeEvaluator6.test.js +7 -0
  222. package/dist/tests/typeEvaluator6.test.js.map +1 -1
  223. package/dist/tests/typeEvaluator8.test.js +13 -1
  224. package/dist/tests/typeEvaluator8.test.js.map +1 -1
  225. package/dist/tests/uri.test.js +29 -0
  226. package/dist/tests/uri.test.js.map +1 -1
  227. package/dist/tests/wildcardImportPackageMerge.test.d.ts +1 -0
  228. package/dist/tests/wildcardImportPackageMerge.test.js +97 -0
  229. package/dist/tests/wildcardImportPackageMerge.test.js.map +1 -0
  230. package/dist/tests/workspaceEditUtils.test.js +0 -1
  231. package/dist/tests/workspaceEditUtils.test.js.map +1 -1
  232. package/package.json +16 -14
  233. package/dist/tests/lsp/webpack.testserver.config.js.map +0 -1
@@ -60,6 +60,53 @@ const _keywords = new Map([
60
60
  ['True', 33 /* KeywordType.True */],
61
61
  ]);
62
62
  const _softKeywords = new Set(['match', 'case', 'type']);
63
+ // Fast-reject table: keywords are 2–9 chars long and only start with these
64
+ // character codes. A 128-entry boolean table indexed by charCodeAt(0) rejects
65
+ // most identifiers without touching the _keywords Map.
66
+ const _keywordFirstCharTable = (() => {
67
+ const table = new Array(128).fill(false);
68
+ for (const kw of _keywords.keys()) {
69
+ const code = kw.charCodeAt(0);
70
+ if (code < 128) {
71
+ table[code] = true;
72
+ }
73
+ }
74
+ return table;
75
+ })();
76
+ const _keywordMinLen = 2;
77
+ const _keywordMaxLen = 9; // __debug__
78
+ // For keyword-like identifiers, compare directly against the source text slice
79
+ // to avoid creating temporary substring objects on the keyword path.
80
+ const _keywordEntriesByFirstChar = (() => {
81
+ const entriesByFirstChar = new Array(128);
82
+ for (const [text, type] of _keywords.entries()) {
83
+ const firstCharCode = text.charCodeAt(0);
84
+ if (firstCharCode < 128) {
85
+ const entries = entriesByFirstChar[firstCharCode] ?? (entriesByFirstChar[firstCharCode] = []);
86
+ entries.push({ text, type });
87
+ }
88
+ }
89
+ return entriesByFirstChar;
90
+ })();
91
+ function getKeywordTypeFromTextSlice(text, start, length) {
92
+ if (length < _keywordMinLen || length > _keywordMaxLen) {
93
+ return undefined;
94
+ }
95
+ const firstCharCode = text.charCodeAt(start);
96
+ if (firstCharCode >= 128 || !_keywordFirstCharTable[firstCharCode]) {
97
+ return undefined;
98
+ }
99
+ const candidates = _keywordEntriesByFirstChar[firstCharCode];
100
+ if (!candidates) {
101
+ return undefined;
102
+ }
103
+ for (const candidate of candidates) {
104
+ if (candidate.text.length === length && text.startsWith(candidate.text, start)) {
105
+ return candidate.type;
106
+ }
107
+ }
108
+ return undefined;
109
+ }
63
110
  const _operatorInfo = {
64
111
  [0 /* OperatorType.Add */]: 1 /* OperatorFlags.Unary */ | 2 /* OperatorFlags.Binary */,
65
112
  [1 /* OperatorType.AddEqual */]: 4 /* OperatorFlags.Assignment */,
@@ -104,12 +151,331 @@ const _operatorInfo = {
104
151
  [41 /* OperatorType.In */]: 2 /* OperatorFlags.Binary */,
105
152
  [42 /* OperatorType.NotIn */]: 2 /* OperatorFlags.Binary */,
106
153
  };
154
+ const _unsetSingleCharOperatorType = -1;
155
+ const _singleCharOperatorTypeTable = (() => {
156
+ const table = new Int16Array(128);
157
+ table.fill(_unsetSingleCharOperatorType);
158
+ table[61 /* Char.Equal */] = 2 /* OperatorType.Assign */;
159
+ table[43 /* Char.Plus */] = 0 /* OperatorType.Add */;
160
+ table[45 /* Char.Hyphen */] = 33 /* OperatorType.Subtract */;
161
+ table[42 /* Char.Asterisk */] = 26 /* OperatorType.Multiply */;
162
+ table[47 /* Char.Slash */] = 10 /* OperatorType.Divide */;
163
+ table[38 /* Char.Ampersand */] = 3 /* OperatorType.BitwiseAnd */;
164
+ table[124 /* Char.Bar */] = 6 /* OperatorType.BitwiseOr */;
165
+ table[94 /* Char.Caret */] = 8 /* OperatorType.BitwiseXor */;
166
+ table[37 /* Char.Percent */] = 24 /* OperatorType.Mod */;
167
+ table[126 /* Char.Tilde */] = 5 /* OperatorType.BitwiseInvert */;
168
+ table[64 /* Char.At */] = 22 /* OperatorType.MatrixMultiply */;
169
+ table[60 /* Char.Less */] = 20 /* OperatorType.LessThan */;
170
+ table[62 /* Char.Greater */] = 15 /* OperatorType.GreaterThan */;
171
+ return table;
172
+ })();
173
+ const _singleCharEqualOperatorTypeTable = (() => {
174
+ const table = new Int16Array(128);
175
+ table.fill(_unsetSingleCharOperatorType);
176
+ table[43 /* Char.Plus */] = 1 /* OperatorType.AddEqual */;
177
+ table[45 /* Char.Hyphen */] = 34 /* OperatorType.SubtractEqual */;
178
+ table[42 /* Char.Asterisk */] = 27 /* OperatorType.MultiplyEqual */;
179
+ table[47 /* Char.Slash */] = 11 /* OperatorType.DivideEqual */;
180
+ table[38 /* Char.Ampersand */] = 4 /* OperatorType.BitwiseAndEqual */;
181
+ table[124 /* Char.Bar */] = 7 /* OperatorType.BitwiseOrEqual */;
182
+ table[94 /* Char.Caret */] = 9 /* OperatorType.BitwiseXorEqual */;
183
+ table[37 /* Char.Percent */] = 25 /* OperatorType.ModEqual */;
184
+ table[64 /* Char.At */] = 23 /* OperatorType.MatrixMultiplyEqual */;
185
+ return table;
186
+ })();
187
+ function getTwoCharKey(char1, char2) {
188
+ return (char1 << 8) | char2;
189
+ }
190
+ // Two-char operator/token tables: use Map instead of Int16Array(65536).
191
+ // With only 5+1 entries, a Map uses ~200 bytes vs 256KB for two Int16Arrays.
192
+ const _twoCharOperatorTypeMap = new Map([
193
+ [getTwoCharKey(61 /* Char.Equal */, 61 /* Char.Equal */), 12 /* OperatorType.Equals */],
194
+ [getTwoCharKey(33 /* Char.ExclamationMark */, 61 /* Char.Equal */), 28 /* OperatorType.NotEquals */],
195
+ [getTwoCharKey(60 /* Char.Less */, 61 /* Char.Equal */), 21 /* OperatorType.LessThanOrEqual */],
196
+ [getTwoCharKey(62 /* Char.Greater */, 61 /* Char.Equal */), 16 /* OperatorType.GreaterThanOrEqual */],
197
+ [getTwoCharKey(60 /* Char.Less */, 62 /* Char.Greater */), 19 /* OperatorType.LessOrGreaterThan */],
198
+ ]);
199
+ const _twoCharSpecialTokenTypeMap = new Map([
200
+ [getTwoCharKey(45 /* Char.Hyphen */, 62 /* Char.Greater */), 21 /* TokenType.Arrow */],
201
+ ]);
202
+ const _repeatedCharOperatorTypeTable = (() => {
203
+ const table = new Int16Array(128);
204
+ table.fill(_unsetSingleCharOperatorType);
205
+ table[42 /* Char.Asterisk */] = 29 /* OperatorType.Power */;
206
+ table[47 /* Char.Slash */] = 13 /* OperatorType.FloorDivide */;
207
+ table[60 /* Char.Less */] = 17 /* OperatorType.LeftShift */;
208
+ table[62 /* Char.Greater */] = 31 /* OperatorType.RightShift */;
209
+ return table;
210
+ })();
211
+ const _repeatedCharEqualOperatorTypeTable = (() => {
212
+ const table = new Int16Array(128);
213
+ table.fill(_unsetSingleCharOperatorType);
214
+ table[42 /* Char.Asterisk */] = 30 /* OperatorType.PowerEqual */;
215
+ table[47 /* Char.Slash */] = 14 /* OperatorType.FloorDivideEqual */;
216
+ table[60 /* Char.Less */] = 18 /* OperatorType.LeftShiftEqual */;
217
+ table[62 /* Char.Greater */] = 32 /* OperatorType.RightShiftEqual */;
218
+ return table;
219
+ })();
107
220
  const _byteOrderMarker = 0xfeff;
108
221
  const defaultTabSize = 8;
109
- const magicsRegEx = /\\\s*$/;
110
- const typeIgnoreCommentRegEx = /((^|#)\s*)type:\s*ignore(\s*\[([\s\w-,]*)\]|\s|$)/;
111
- const pyrightIgnoreCommentRegEx = /((^|#)\s*)pyright:\s*ignore(\s*\[([\s\w-,]*)\]|\s|$)/;
112
- const underscoreRegEx = /_/g;
222
+ // Fast-reject table: only these ASCII chars can begin a string literal
223
+ // (quote chars or valid string prefix chars f/r/b/u/t and their uppercase).
224
+ // Checking this table first avoids calling _getStringPrefixLength() for the
225
+ // vast majority of tokens (identifiers, numbers, operators, etc.).
226
+ const _canStartString = (() => {
227
+ const table = new Array(128).fill(false);
228
+ table[39 /* Char.SingleQuote */] = true;
229
+ table[34 /* Char.DoubleQuote */] = true;
230
+ for (const ch of [102 /* Char.f */, 70 /* Char.F */, 114 /* Char.r */, 82 /* Char.R */, 98 /* Char.b */, 66 /* Char.B */, 117 /* Char.u */, 85 /* Char.U */, 116 /* Char.t */, 84 /* Char.T */]) {
231
+ table[ch] = true;
232
+ }
233
+ return table;
234
+ })();
235
+ // ASCII identifier-continue table. Indexed by char code < 128; true if the
236
+ // char can appear inside an identifier (letter, digit, underscore).
237
+ // Building this at module load by querying isIdentifierChar lets the tight
238
+ // identifier-swallow loop avoid function-call overhead entirely on the common
239
+ // ASCII path. Non-ASCII chars fall back to the generic path.
240
+ const _asciiIdentifierContinue = (() => {
241
+ const table = new Array(128).fill(false);
242
+ for (let i = 0; i < 128; i++) {
243
+ if ((0, characters_1.isIdentifierChar)(i)) {
244
+ table[i] = true;
245
+ }
246
+ }
247
+ return table;
248
+ })();
249
+ const _asciiIdentifierStart = (() => {
250
+ const table = new Array(128).fill(false);
251
+ for (let i = 0; i < 128; i++) {
252
+ if ((0, characters_1.isIdentifierStartChar)(i)) {
253
+ table[i] = true;
254
+ }
255
+ }
256
+ return table;
257
+ })();
258
+ // Create a detached copy of a source text range without going through Buffer.
259
+ // Each charAt() for ASCII returns a V8-cached single-char string that does not
260
+ // reference the parent. The concatenation chain becomes a ConsString independent
261
+ // of the source text, avoiding V8 SlicedString memory pinning.
262
+ // ~4-9x faster than Buffer.from(str,'utf8').toString('utf8') for typical
263
+ // Python identifier lengths (5-20 chars).
264
+ function detachSubstring(text, start, end) {
265
+ let result = '';
266
+ for (let i = start; i < end; i++) {
267
+ result += text.charAt(i);
268
+ }
269
+ return result;
270
+ }
271
+ // Strip underscore characters from a source text range without first creating
272
+ // an intermediate substring.
273
+ function removeUnderscoresFromRange(text, start, end) {
274
+ let firstUnderscoreIndex = -1;
275
+ for (let i = start; i < end; i++) {
276
+ if (text.charCodeAt(i) === 95 /* Char.Underscore */) {
277
+ firstUnderscoreIndex = i;
278
+ break;
279
+ }
280
+ }
281
+ if (firstUnderscoreIndex < 0) {
282
+ return text.slice(start, end);
283
+ }
284
+ let result = text.slice(start, firstUnderscoreIndex);
285
+ for (let i = firstUnderscoreIndex + 1; i < end; i++) {
286
+ if (text.charCodeAt(i) !== 95 /* Char.Underscore */) {
287
+ result += text[i];
288
+ }
289
+ }
290
+ return result;
291
+ }
292
+ // Manual replacement for magicsRegEx = /\\\s*$/
293
+ // Check if a range [start, end) within `text` ends with a backslash followed
294
+ // by optional whitespace.
295
+ function endsWithBackslashContinuation(text, start, end) {
296
+ let i = end - 1;
297
+ // Skip trailing whitespace
298
+ while (i >= start) {
299
+ const ch = text.charCodeAt(i);
300
+ if (ch === 32 /* Char.Space */ || ch === 9 /* Char.Tab */ || ch === 12 /* Char.FormFeed */) {
301
+ i--;
302
+ }
303
+ else {
304
+ break;
305
+ }
306
+ }
307
+ return i >= start && text.charCodeAt(i) === 92 /* Char.Backslash */;
308
+ }
309
+ // Parses a bracketed rule list starting at `pos` (which must point at '[').
310
+ // Returns the bracket content (without brackets) and the position just past ']',
311
+ // or undefined if the bracket is malformed (e.g. unclosed, or contains invalid chars
312
+ // before a closing bracket is found).
313
+ function parseIgnoreBracketContent(text, pos, rangeEnd, allowColon) {
314
+ pos++; // skip '['
315
+ const bracketStart = pos;
316
+ while (pos < rangeEnd && text.charCodeAt(pos) !== 93 /* Char.CloseBracket */) {
317
+ // Only allow valid bracket content chars: \s, \w, -, ,
318
+ // (plus ':' for type: ignore to support tool-namespaced codes)
319
+ const bc = text.charCodeAt(pos);
320
+ if ((bc >= 97 /* Char.a */ && bc <= 122 /* Char.z */) ||
321
+ (bc >= 65 /* Char.A */ && bc <= 90 /* Char.Z */) ||
322
+ (bc >= 48 /* Char._0 */ && bc <= 57 /* Char._9 */) ||
323
+ bc === 95 /* Char.Underscore */ ||
324
+ bc === 45 /* Char.Hyphen */ ||
325
+ bc === 44 /* Char.Comma */ ||
326
+ bc === 32 /* Char.Space */ ||
327
+ bc === 9 /* Char.Tab */ ||
328
+ (allowColon && bc === 58 /* Char.Colon */)) {
329
+ pos++;
330
+ }
331
+ else {
332
+ break;
333
+ }
334
+ }
335
+ if (pos < rangeEnd && text.charCodeAt(pos) === 93 /* Char.CloseBracket */) {
336
+ return { content: text.slice(bracketStart, pos), newPos: pos + 1 };
337
+ }
338
+ return undefined;
339
+ }
340
+ // Manual replacement for typeIgnoreCommentRegEx / pyrightIgnoreCommentRegEx.
341
+ // Scans `text` within [rangeStart, rangeEnd) for `<directive>: ignore [rules]`
342
+ // where directive is 'type' or 'pyright'.
343
+ // Returns a match object or undefined. Returned `index` is absolute within `text`.
344
+ function matchIgnoreDirective(text, rangeStart, rangeEnd, directive) {
345
+ // The directive can be preceded by optional `#` and whitespace, or
346
+ // appear at the start of the range with optional whitespace.
347
+ // type: ignore allows tool-namespaced codes (e.g. "ty:rule-name") in brackets;
348
+ // pyright: ignore does not.
349
+ const allowColonInBracket = directive === 'type';
350
+ let searchFrom = rangeStart;
351
+ while (searchFrom < rangeEnd) {
352
+ // Find the next occurrence of the directive keyword, bounded by
353
+ // rangeEnd. A bounded hand-rolled scan is important here: native
354
+ // String.prototype.indexOf has no end bound and, when the keyword is
355
+ // absent from the current comment but present elsewhere in the file,
356
+ // can scan well past rangeEnd — producing O(n) behavior per comment
357
+ // and O(n^2) overall on comment-heavy files.
358
+ const firstCharCode = directive.charCodeAt(0);
359
+ let directiveIdx = -1;
360
+ const scanLimit = rangeEnd - directive.length;
361
+ for (let i = searchFrom; i <= scanLimit; i++) {
362
+ if (text.charCodeAt(i) === firstCharCode) {
363
+ let found = true;
364
+ for (let d = 1; d < directive.length; d++) {
365
+ if (text.charCodeAt(i + d) !== directive.charCodeAt(d)) {
366
+ found = false;
367
+ break;
368
+ }
369
+ }
370
+ if (found) {
371
+ directiveIdx = i;
372
+ break;
373
+ }
374
+ }
375
+ }
376
+ if (directiveIdx < 0) {
377
+ return undefined;
378
+ }
379
+ // Determine the prefix: scan backward from directiveIdx to find
380
+ // the `#` or start-of-range, collecting whitespace.
381
+ let prefixStart = directiveIdx;
382
+ let foundAnchor = false;
383
+ // Walk backward over spaces/tabs
384
+ let j = directiveIdx - 1;
385
+ while (j >= rangeStart && (text.charCodeAt(j) === 32 /* Char.Space */ || text.charCodeAt(j) === 9 /* Char.Tab */)) {
386
+ j--;
387
+ }
388
+ if (j < rangeStart) {
389
+ // At start of range
390
+ prefixStart = rangeStart;
391
+ foundAnchor = true;
392
+ }
393
+ else if (text.charCodeAt(j) === 35 /* Char.Hash */) {
394
+ prefixStart = j;
395
+ foundAnchor = true;
396
+ }
397
+ if (!foundAnchor) {
398
+ searchFrom = directiveIdx + 1;
399
+ continue;
400
+ }
401
+ // After directive keyword, expect ':'
402
+ let pos = directiveIdx + directive.length;
403
+ if (pos >= rangeEnd || text.charCodeAt(pos) !== 58 /* Char.Colon */) {
404
+ searchFrom = directiveIdx + 1;
405
+ continue;
406
+ }
407
+ pos++; // skip ':'
408
+ // Skip optional whitespace after ':'
409
+ while (pos < rangeEnd && (text.charCodeAt(pos) === 32 /* Char.Space */ || text.charCodeAt(pos) === 9 /* Char.Tab */)) {
410
+ pos++;
411
+ }
412
+ // Expect 'ignore'
413
+ const ignoreStr = 'ignore';
414
+ if (pos + ignoreStr.length > rangeEnd) {
415
+ searchFrom = directiveIdx + 1;
416
+ continue;
417
+ }
418
+ let matched = true;
419
+ for (let k = 0; k < ignoreStr.length; k++) {
420
+ if (text.charCodeAt(pos + k) !== ignoreStr.charCodeAt(k)) {
421
+ matched = false;
422
+ break;
423
+ }
424
+ }
425
+ if (!matched) {
426
+ searchFrom = directiveIdx + 1;
427
+ continue;
428
+ }
429
+ pos += ignoreStr.length;
430
+ // After 'ignore', expect whitespace, '[', or end-of-range
431
+ let bracketContent;
432
+ if (pos >= rangeEnd) {
433
+ // End of range — valid
434
+ }
435
+ else {
436
+ const ch = text.charCodeAt(pos);
437
+ if (ch === 32 /* Char.Space */ || ch === 9 /* Char.Tab */) {
438
+ // Skip whitespace to check for optional bracket
439
+ while (pos < rangeEnd && (text.charCodeAt(pos) === 32 /* Char.Space */ || text.charCodeAt(pos) === 9 /* Char.Tab */)) {
440
+ pos++;
441
+ }
442
+ if (pos < rangeEnd && text.charCodeAt(pos) === 91 /* Char.OpenBracket */) {
443
+ const parsed = parseIgnoreBracketContent(text, pos, rangeEnd, allowColonInBracket);
444
+ if (parsed === undefined) {
445
+ searchFrom = directiveIdx + 1;
446
+ continue;
447
+ }
448
+ bracketContent = parsed.content;
449
+ pos = parsed.newPos;
450
+ }
451
+ }
452
+ else if (ch === 91 /* Char.OpenBracket */) {
453
+ // Bracket immediately after 'ignore'
454
+ const parsed = parseIgnoreBracketContent(text, pos, rangeEnd, allowColonInBracket);
455
+ if (parsed === undefined) {
456
+ searchFrom = directiveIdx + 1;
457
+ continue;
458
+ }
459
+ bracketContent = parsed.content;
460
+ pos = parsed.newPos;
461
+ }
462
+ else {
463
+ // No space, no bracket — not a valid match
464
+ searchFrom = directiveIdx + 1;
465
+ continue;
466
+ }
467
+ }
468
+ const prefix = text.slice(prefixStart, directiveIdx);
469
+ const fullMatch = text.slice(prefixStart, pos);
470
+ return {
471
+ fullMatch,
472
+ prefix,
473
+ bracketContent,
474
+ index: prefixStart,
475
+ };
476
+ }
477
+ return undefined;
478
+ }
113
479
  var MagicsKind;
114
480
  (function (MagicsKind) {
115
481
  MagicsKind[MagicsKind["None"] = 0] = "None";
@@ -124,6 +490,10 @@ class Tokenizer {
124
490
  this._parenDepth = 0;
125
491
  this._lineRanges = [];
126
492
  this._indentAmounts = [];
493
+ // Cached answer to "are there any non-trivial tokens yet?" Once true it
494
+ // stays true, so the O(n) scan in _handleComment only runs while the token
495
+ // stream consists purely of NewLine / Indent tokens.
496
+ this._hasTokenBeforeIgnoreAll = false;
127
497
  this._typeIgnoreLines = new Map();
128
498
  this._pyrightIgnoreLines = new Map();
129
499
  this._fStringStack = [];
@@ -146,10 +516,7 @@ class Tokenizer {
146
516
  this._doubleQuoteCount = 0;
147
517
  // Assume Jupyter notebook tokenization rules?
148
518
  this._useNotebookMode = false;
149
- // Intern identifier strings within a single tokenization pass. This reduces
150
- // per-identifier allocations while still ensuring we don't retain substrings
151
- // that reference the original source text.
152
- this._identifierInternedStrings = new Map();
519
+ this._identifierCache = new Array(Tokenizer._identifierCacheSize);
153
520
  }
154
521
  tokenize(text, start, length, initialParenDepth = 0, useNotebookMode = false) {
155
522
  if (start === undefined) {
@@ -175,7 +542,8 @@ class Tokenizer {
175
542
  this._lineRanges = [];
176
543
  this._indentAmounts = [];
177
544
  this._useNotebookMode = useNotebookMode;
178
- this._identifierInternedStrings.clear();
545
+ // Clear per-source identifier intern cache.
546
+ this._identifierCache.fill(undefined);
179
547
  const end = start + length;
180
548
  if (start === 0) {
181
549
  this._readIndentationAfterNewLine();
@@ -311,19 +679,22 @@ class Tokenizer {
311
679
  // tokens onto the token list. Returns true if the caller should advance
312
680
  // to the next character.
313
681
  _handleCharacter() {
314
- // f-strings, b-strings, etc
315
- const stringPrefixLength = this._getStringPrefixLength();
316
- if (stringPrefixLength >= 0) {
317
- let stringPrefix = '';
318
- if (stringPrefixLength > 0) {
319
- stringPrefix = this._cs.getText().slice(this._cs.position, this._cs.position + stringPrefixLength);
320
- // Indeed a string
321
- this._cs.advance(stringPrefixLength);
322
- }
323
- const quoteTypeFlags = this._getQuoteTypeFlags(stringPrefix);
324
- if (quoteTypeFlags !== 0 /* StringTokenFlags.None */) {
325
- this._handleString(quoteTypeFlags, stringPrefixLength);
326
- return true;
682
+ // f-strings, b-strings, etc — only check if current char can start a string
683
+ const currentChar = this._cs.currentChar;
684
+ if (currentChar < 128 && _canStartString[currentChar]) {
685
+ const stringPrefixLength = this._getStringPrefixLength();
686
+ if (stringPrefixLength >= 0) {
687
+ let stringPrefix = '';
688
+ if (stringPrefixLength > 0) {
689
+ stringPrefix = this._cs.getText().slice(this._cs.position, this._cs.position + stringPrefixLength);
690
+ // Indeed a string
691
+ this._cs.advance(stringPrefixLength);
692
+ }
693
+ const quoteTypeFlags = this._getQuoteTypeFlags(stringPrefix);
694
+ if (quoteTypeFlags !== 0 /* StringTokenFlags.None */) {
695
+ this._handleString(quoteTypeFlags, stringPrefixLength);
696
+ return true;
697
+ }
327
698
  }
328
699
  }
329
700
  if (this._cs.currentChar === 35 /* Char.Hash */) {
@@ -653,48 +1024,105 @@ class Tokenizer {
653
1024
  }
654
1025
  }
655
1026
  _tryIdentifier() {
656
- const swallowRemainingChars = () => {
657
- while (true) {
658
- if ((0, characters_1.isIdentifierChar)(this._cs.currentChar)) {
659
- this._cs.moveNext();
660
- }
661
- else if ((0, characters_1.isIdentifierChar)(this._cs.currentChar, this._cs.nextChar)) {
662
- this._cs.moveNext();
663
- this._cs.moveNext();
1027
+ const cs = this._cs;
1028
+ const text = cs.getText();
1029
+ const textLen = text.length;
1030
+ const start = cs.position;
1031
+ // Fast path for ASCII identifier start. Avoids the function call and
1032
+ // surrogate logic for the common case (Python source is overwhelmingly
1033
+ // ASCII identifiers).
1034
+ const firstChar = cs.currentChar;
1035
+ let pos = start;
1036
+ if (firstChar < 128) {
1037
+ if (!_asciiIdentifierStart[firstChar]) {
1038
+ // Not an identifier start and not a surrogate candidate.
1039
+ return false;
1040
+ }
1041
+ pos++;
1042
+ // Tight loop: advance while we're still in ASCII identifier chars.
1043
+ while (pos < textLen) {
1044
+ const ch = text.charCodeAt(pos);
1045
+ if (ch < 128 && _asciiIdentifierContinue[ch]) {
1046
+ pos++;
664
1047
  }
665
1048
  else {
666
1049
  break;
667
1050
  }
668
1051
  }
669
- };
670
- const start = this._cs.position;
671
- if ((0, characters_1.isIdentifierStartChar)(this._cs.currentChar)) {
672
- this._cs.moveNext();
673
- swallowRemainingChars();
1052
+ // If we hit a non-ASCII char, fall back to the generic loop to
1053
+ // handle possible unicode identifier continue / surrogate pairs.
1054
+ if (pos < textLen && text.charCodeAt(pos) >= 128) {
1055
+ cs.advance(pos - start);
1056
+ this._swallowNonAsciiIdentifierChars();
1057
+ pos = cs.position;
1058
+ }
1059
+ else {
1060
+ cs.advance(pos - start);
1061
+ }
674
1062
  }
675
- else if ((0, characters_1.isIdentifierStartChar)(this._cs.currentChar, this._cs.nextChar)) {
676
- this._cs.moveNext();
677
- this._cs.moveNext();
678
- swallowRemainingChars();
1063
+ else {
1064
+ // Non-ASCII start: use the generic path (supports surrogates).
1065
+ if ((0, characters_1.isIdentifierStartChar)(firstChar)) {
1066
+ cs.moveNext();
1067
+ }
1068
+ else if ((0, characters_1.isIdentifierStartChar)(firstChar, cs.nextChar)) {
1069
+ cs.moveNext();
1070
+ cs.moveNext();
1071
+ }
1072
+ else {
1073
+ return false;
1074
+ }
1075
+ this._swallowNonAsciiIdentifierChars();
1076
+ pos = cs.position;
679
1077
  }
680
- if (this._cs.position > start) {
681
- const value = this._cs.getText().slice(start, this._cs.position);
682
- const keywordType = _keywords.get(value);
1078
+ if (pos > start) {
1079
+ const end = pos;
1080
+ const length = end - start;
1081
+ const keywordType = getKeywordTypeFromTextSlice(text, start, length);
683
1082
  if (keywordType !== undefined) {
684
- this._tokens.push(tokenizerTypes_1.KeywordToken.create(start, this._cs.position - start, keywordType, this._getComments()));
1083
+ this._tokens.push(tokenizerTypes_1.KeywordToken.create(start, length, keywordType, this._getComments()));
685
1084
  }
686
1085
  else {
687
- const internedValue = this._identifierInternedStrings.get(value) ?? this._internIdentifierString(value);
688
- this._tokens.push(tokenizerTypes_1.IdentifierToken.create(start, this._cs.position - start, internedValue, this._getComments()));
1086
+ const value = this._internIdentifier(text, start, end, length);
1087
+ this._tokens.push(tokenizerTypes_1.IdentifierToken.create(start, length, value, this._getComments()));
689
1088
  }
690
1089
  return true;
691
1090
  }
692
1091
  return false;
693
1092
  }
694
- _internIdentifierString(value) {
695
- const clonedValue = (0, core_1.cloneStr)(value);
696
- this._identifierInternedStrings.set(clonedValue, clonedValue);
697
- return clonedValue;
1093
+ // Per-tokenize identifier intern cache. Direct-mapped, so collisions
1094
+ // simply replace the slot. Common identifiers (self, cls, True, None,
1095
+ // str, int, dict, etc.) get deduplicated to a single string object,
1096
+ // avoiding repeated detachSubstring allocations for the same name.
1097
+ _internIdentifier(text, start, end, length) {
1098
+ const firstChar = text.charCodeAt(start);
1099
+ const lastChar = text.charCodeAt(end - 1);
1100
+ // Hash mixes length, first and last char; multiplier values chosen
1101
+ // to spread hits for common short identifiers across the table.
1102
+ const hash = (firstChar * 31 + lastChar * 7 + length) & Tokenizer._identifierCacheMask;
1103
+ const cached = this._identifierCache[hash];
1104
+ if (cached !== undefined && cached.length === length && text.startsWith(cached, start)) {
1105
+ return cached;
1106
+ }
1107
+ const value = detachSubstring(text, start, end);
1108
+ this._identifierCache[hash] = value;
1109
+ return value;
1110
+ }
1111
+ // Generic identifier-continue loop that handles unicode + surrogate pairs.
1112
+ // Falls back to this when the fast ASCII loop encounters a non-ASCII char.
1113
+ _swallowNonAsciiIdentifierChars() {
1114
+ while (true) {
1115
+ if ((0, characters_1.isIdentifierChar)(this._cs.currentChar)) {
1116
+ this._cs.moveNext();
1117
+ }
1118
+ else if ((0, characters_1.isIdentifierChar)(this._cs.currentChar, this._cs.nextChar)) {
1119
+ this._cs.moveNext();
1120
+ this._cs.moveNext();
1121
+ }
1122
+ else {
1123
+ break;
1124
+ }
1125
+ }
698
1126
  }
699
1127
  _isPossibleNumber() {
700
1128
  if ((0, characters_1.isDecimal)(this._cs.currentChar)) {
@@ -739,8 +1167,9 @@ class Tokenizer {
739
1167
  radix = 8;
740
1168
  }
741
1169
  if (radix > 0) {
742
- const text = this._cs.getText().slice(start, this._cs.position);
743
- const simpleIntText = text.replace(underscoreRegEx, '');
1170
+ const end = this._cs.position;
1171
+ const text = this._cs.getText();
1172
+ const simpleIntText = removeUnderscoresFromRange(text, start, end);
744
1173
  let intValue = parseInt(simpleIntText.slice(leadingChars), radix);
745
1174
  if (!isNaN(intValue)) {
746
1175
  const bigIntValue = BigInt(simpleIntText);
@@ -749,7 +1178,7 @@ class Tokenizer {
749
1178
  intValue > Number.MAX_SAFE_INTEGER) {
750
1179
  intValue = bigIntValue;
751
1180
  }
752
- this._tokens.push(tokenizerTypes_1.NumberToken.create(start, text.length, intValue, true, false, this._getComments()));
1181
+ this._tokens.push(tokenizerTypes_1.NumberToken.create(start, end - start, intValue, true, false, this._getComments()));
753
1182
  return true;
754
1183
  }
755
1184
  }
@@ -783,11 +1212,13 @@ class Tokenizer {
783
1212
  (this._cs.currentChar < 49 /* Char._1 */ || this._cs.currentChar > 57 /* Char._9 */);
784
1213
  }
785
1214
  if (isDecimalInteger) {
786
- let text = this._cs.getText().slice(start, this._cs.position);
787
- const simpleIntText = text.replace(underscoreRegEx, '');
1215
+ const textEnd = this._cs.position;
1216
+ const sourceText = this._cs.getText();
1217
+ const simpleIntText = removeUnderscoresFromRange(sourceText, start, textEnd);
788
1218
  let intValue = parseInt(simpleIntText, 10);
789
1219
  if (!isNaN(intValue)) {
790
1220
  let isImaginary = false;
1221
+ let tokenLength = textEnd - start;
791
1222
  const bigIntValue = BigInt(simpleIntText);
792
1223
  if (!isFinite(intValue) ||
793
1224
  bigIntValue < Number.MIN_SAFE_INTEGER ||
@@ -796,10 +1227,10 @@ class Tokenizer {
796
1227
  }
797
1228
  if (this._cs.currentChar === 106 /* Char.j */ || this._cs.currentChar === 74 /* Char.J */) {
798
1229
  isImaginary = true;
799
- text += String.fromCharCode(this._cs.currentChar);
800
1230
  this._cs.moveNext();
1231
+ tokenLength += 1;
801
1232
  }
802
- this._tokens.push(tokenizerTypes_1.NumberToken.create(start, text.length, intValue, true, isImaginary, this._getComments()));
1233
+ this._tokens.push(tokenizerTypes_1.NumberToken.create(start, tokenLength, intValue, true, isImaginary, this._getComments()));
803
1234
  return true;
804
1235
  }
805
1236
  }
@@ -808,16 +1239,18 @@ class Tokenizer {
808
1239
  if (mightBeFloatingPoint ||
809
1240
  (this._cs.currentChar === 46 /* Char.Period */ && this._cs.nextChar >= 48 /* Char._0 */ && this._cs.nextChar <= 57 /* Char._9 */)) {
810
1241
  if (this._skipFloatingPointCandidate()) {
811
- let text = this._cs.getText().slice(start, this._cs.position);
812
- const value = parseFloat(text);
1242
+ const floatEnd = this._cs.position;
1243
+ const floatText = removeUnderscoresFromRange(this._cs.getText(), start, floatEnd);
1244
+ const value = parseFloat(floatText);
813
1245
  if (!isNaN(value)) {
814
1246
  let isImaginary = false;
1247
+ let tokenLength = floatEnd - start;
815
1248
  if (this._cs.currentChar === 106 /* Char.j */ || this._cs.currentChar === 74 /* Char.J */) {
816
1249
  isImaginary = true;
817
- text += String.fromCharCode(this._cs.currentChar);
818
1250
  this._cs.moveNext();
1251
+ tokenLength += 1;
819
1252
  }
820
- this._tokens.push(tokenizerTypes_1.NumberToken.create(start, this._cs.position - start, value, false, isImaginary, this._getComments()));
1253
+ this._tokens.push(tokenizerTypes_1.NumberToken.create(start, tokenLength, value, false, isImaginary, this._getComments()));
821
1254
  return true;
822
1255
  }
823
1256
  }
@@ -826,122 +1259,63 @@ class Tokenizer {
826
1259
  return false;
827
1260
  }
828
1261
  _tryOperator() {
1262
+ const currentChar = this._cs.currentChar;
829
1263
  let length = 0;
830
1264
  const nextChar = this._cs.nextChar;
831
1265
  let operatorType;
832
- switch (this._cs.currentChar) {
833
- case 43 /* Char.Plus */:
834
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
835
- operatorType = length === 2 ? 1 /* OperatorType.AddEqual */ : 0 /* OperatorType.Add */;
836
- break;
837
- case 38 /* Char.Ampersand */:
838
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
839
- operatorType = length === 2 ? 4 /* OperatorType.BitwiseAndEqual */ : 3 /* OperatorType.BitwiseAnd */;
840
- break;
841
- case 124 /* Char.Bar */:
842
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
843
- operatorType = length === 2 ? 7 /* OperatorType.BitwiseOrEqual */ : 6 /* OperatorType.BitwiseOr */;
844
- break;
845
- case 94 /* Char.Caret */:
846
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
847
- operatorType = length === 2 ? 9 /* OperatorType.BitwiseXorEqual */ : 8 /* OperatorType.BitwiseXor */;
848
- break;
849
- case 61 /* Char.Equal */:
850
- if (this._activeFString?.activeReplacementField &&
851
- this._activeFString?.activeReplacementField.parenDepth === this._parenDepth &&
852
- !this._activeFString.activeReplacementField.inFormatSpecifier &&
853
- nextChar !== 61 /* Char.Equal */) {
854
- length = 1;
855
- operatorType = 2 /* OperatorType.Assign */;
856
- break;
857
- }
858
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
859
- operatorType = length === 2 ? 12 /* OperatorType.Equals */ : 2 /* OperatorType.Assign */;
860
- break;
861
- case 33 /* Char.ExclamationMark */:
862
- if (nextChar !== 61 /* Char.Equal */) {
863
- if (this._activeFString) {
864
- // Handle the conversion separator (!) within an f-string.
865
- this._tokens.push(tokenizerTypes_1.Token.create(23 /* TokenType.ExclamationMark */, this._cs.position, 1, this._getComments()));
866
- this._cs.advance(1);
867
- return true;
868
- }
869
- return false;
870
- }
871
- length = 2;
872
- operatorType = 28 /* OperatorType.NotEquals */;
873
- break;
874
- case 37 /* Char.Percent */:
875
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
876
- operatorType = length === 2 ? 25 /* OperatorType.ModEqual */ : 24 /* OperatorType.Mod */;
877
- break;
878
- case 126 /* Char.Tilde */:
879
- length = 1;
880
- operatorType = 5 /* OperatorType.BitwiseInvert */;
881
- break;
882
- case 45 /* Char.Hyphen */:
883
- if (nextChar === 62 /* Char.Greater */) {
884
- this._tokens.push(tokenizerTypes_1.Token.create(21 /* TokenType.Arrow */, this._cs.position, 2, this._getComments()));
885
- this._cs.advance(2);
1266
+ if (currentChar < 128 && nextChar < 128) {
1267
+ const twoCharKey = (currentChar << 8) | nextChar;
1268
+ const specialTokenType = _twoCharSpecialTokenTypeMap.get(twoCharKey);
1269
+ if (specialTokenType !== undefined) {
1270
+ this._tokens.push(tokenizerTypes_1.Token.create(specialTokenType, this._cs.position, 2, this._getComments()));
1271
+ this._cs.advance(2);
1272
+ return true;
1273
+ }
1274
+ const twoCharOperatorType = _twoCharOperatorTypeMap.get(twoCharKey);
1275
+ if (twoCharOperatorType !== undefined) {
1276
+ this._tokens.push(tokenizerTypes_1.OperatorToken.create(this._cs.position, 2, twoCharOperatorType, this._getComments()));
1277
+ this._cs.advance(2);
1278
+ return true;
1279
+ }
1280
+ if (currentChar === nextChar) {
1281
+ const repeatedOperatorType = _repeatedCharOperatorTypeTable[currentChar];
1282
+ if (repeatedOperatorType !== _unsetSingleCharOperatorType) {
1283
+ const hasTrailingEqual = this._cs.lookAhead(2) === 61 /* Char.Equal */;
1284
+ const repeatedLength = hasTrailingEqual ? 3 : 2;
1285
+ const operatorType = hasTrailingEqual
1286
+ ? _repeatedCharEqualOperatorTypeTable[currentChar]
1287
+ : repeatedOperatorType;
1288
+ this._tokens.push(tokenizerTypes_1.OperatorToken.create(this._cs.position, repeatedLength, operatorType, this._getComments()));
1289
+ this._cs.advance(repeatedLength);
886
1290
  return true;
887
1291
  }
888
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
889
- operatorType = length === 2 ? 34 /* OperatorType.SubtractEqual */ : 33 /* OperatorType.Subtract */;
890
- break;
891
- case 42 /* Char.Asterisk */:
892
- if (nextChar === 42 /* Char.Asterisk */) {
893
- length = this._cs.lookAhead(2) === 61 /* Char.Equal */ ? 3 : 2;
894
- operatorType = length === 3 ? 30 /* OperatorType.PowerEqual */ : 29 /* OperatorType.Power */;
895
- }
896
- else {
897
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
898
- operatorType = length === 2 ? 27 /* OperatorType.MultiplyEqual */ : 26 /* OperatorType.Multiply */;
899
- }
900
- break;
901
- case 47 /* Char.Slash */:
902
- if (nextChar === 47 /* Char.Slash */) {
903
- length = this._cs.lookAhead(2) === 61 /* Char.Equal */ ? 3 : 2;
904
- operatorType = length === 3 ? 14 /* OperatorType.FloorDivideEqual */ : 13 /* OperatorType.FloorDivide */;
905
- }
906
- else {
907
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
908
- operatorType = length === 2 ? 11 /* OperatorType.DivideEqual */ : 10 /* OperatorType.Divide */;
909
- }
910
- break;
911
- case 60 /* Char.Less */:
912
- if (nextChar === 60 /* Char.Less */) {
913
- length = this._cs.lookAhead(2) === 61 /* Char.Equal */ ? 3 : 2;
914
- operatorType = length === 3 ? 18 /* OperatorType.LeftShiftEqual */ : 17 /* OperatorType.LeftShift */;
915
- }
916
- else if (nextChar === 62 /* Char.Greater */) {
1292
+ }
1293
+ }
1294
+ if (currentChar < 128) {
1295
+ const singleCharOperatorType = _singleCharOperatorTypeTable[currentChar];
1296
+ if (singleCharOperatorType !== _unsetSingleCharOperatorType) {
1297
+ const equalOperatorType = _singleCharEqualOperatorTypeTable[currentChar];
1298
+ if (nextChar === 61 /* Char.Equal */ && equalOperatorType !== _unsetSingleCharOperatorType) {
917
1299
  length = 2;
918
- operatorType = 19 /* OperatorType.LessOrGreaterThan */;
1300
+ operatorType = equalOperatorType;
919
1301
  }
920
1302
  else {
921
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
922
- operatorType = length === 2 ? 21 /* OperatorType.LessThanOrEqual */ : 20 /* OperatorType.LessThan */;
923
- }
924
- break;
925
- case 62 /* Char.Greater */:
926
- if (nextChar === 62 /* Char.Greater */) {
927
- length = this._cs.lookAhead(2) === 61 /* Char.Equal */ ? 3 : 2;
928
- operatorType = length === 3 ? 32 /* OperatorType.RightShiftEqual */ : 31 /* OperatorType.RightShift */;
929
- }
930
- else {
931
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
932
- operatorType = length === 2 ? 16 /* OperatorType.GreaterThanOrEqual */ : 15 /* OperatorType.GreaterThan */;
1303
+ length = 1;
1304
+ operatorType = singleCharOperatorType;
933
1305
  }
934
- break;
935
- case 64 /* Char.At */:
936
- length = nextChar === 61 /* Char.Equal */ ? 2 : 1;
937
- operatorType = length === 2 ? 23 /* OperatorType.MatrixMultiplyEqual */ : 22 /* OperatorType.MatrixMultiply */;
938
- break;
939
- default:
940
- return false;
1306
+ this._tokens.push(tokenizerTypes_1.OperatorToken.create(this._cs.position, length, operatorType, this._getComments()));
1307
+ this._cs.advance(length);
1308
+ return true;
1309
+ }
941
1310
  }
942
- this._tokens.push(tokenizerTypes_1.OperatorToken.create(this._cs.position, length, operatorType, this._getComments()));
943
- this._cs.advance(length);
944
- return length > 0;
1311
+ // `!=` is handled by the 2-char fast path above.
1312
+ if (currentChar === 33 /* Char.ExclamationMark */ && this._activeFString) {
1313
+ // Handle the conversion separator (!) within an f-string.
1314
+ this._tokens.push(tokenizerTypes_1.Token.create(23 /* TokenType.ExclamationMark */, this._cs.position, 1, this._getComments()));
1315
+ this._cs.advance(1);
1316
+ return true;
1317
+ }
1318
+ return false;
945
1319
  }
946
1320
  _handleInvalid() {
947
1321
  const start = this._cs.position;
@@ -990,16 +1364,15 @@ class Tokenizer {
990
1364
  }
991
1365
  _handleIPythonMagics(type) {
992
1366
  const start = this._cs.position + 1;
1367
+ const sourceText = this._cs.getText();
993
1368
  let begin = start;
994
1369
  while (true) {
995
1370
  this._cs.skipToEol();
996
1371
  if (type === 1 /* CommentType.IPythonMagic */ || type === 2 /* CommentType.IPythonShellEscape */) {
997
- const length = this._cs.position - begin;
998
- const value = this._cs.getText().slice(begin, begin + length);
999
1372
  // is it multiline magics?
1000
1373
  // %magic command \
1001
1374
  // next arguments
1002
- if (!value.match(magicsRegEx)) {
1375
+ if (!endsWithBackslashContinuation(sourceText, begin, this._cs.position)) {
1003
1376
  break;
1004
1377
  }
1005
1378
  }
@@ -1010,55 +1383,74 @@ class Tokenizer {
1010
1383
  }
1011
1384
  }
1012
1385
  const length = this._cs.position - start;
1013
- const comment = tokenizerTypes_1.Comment.create(start, length, this._cs.getText().slice(start, start + length), type);
1386
+ const comment = tokenizerTypes_1.Comment.create(start, length, sourceText.slice(start, start + length), type);
1014
1387
  this._addComments(comment);
1015
1388
  }
1016
1389
  _handleComment() {
1017
1390
  const start = this._cs.position + 1;
1018
1391
  this._cs.skipToEol();
1019
1392
  const length = this._cs.position - start;
1020
- const comment = tokenizerTypes_1.Comment.create(start, length, this._cs.getText().slice(start, start + length));
1021
- const typeIgnoreRegexMatch = comment.value.match(typeIgnoreCommentRegEx);
1022
- if (typeIgnoreRegexMatch) {
1023
- const commentStart = start + (typeIgnoreRegexMatch.index ?? 0);
1024
- const textRange = {
1025
- start: commentStart + typeIgnoreRegexMatch[1].length,
1026
- length: typeIgnoreRegexMatch[0].length - typeIgnoreRegexMatch[1].length,
1027
- };
1028
- const ignoreComment = {
1029
- range: textRange,
1030
- rulesList: this._getIgnoreCommentRulesList(commentStart, typeIgnoreRegexMatch),
1031
- };
1032
- if (this._tokens.findIndex((t) => t.type !== 2 /* TokenType.NewLine */ && t && t.type !== 3 /* TokenType.Indent */) < 0) {
1033
- this._typeIgnoreAll = ignoreComment;
1393
+ const sourceText = this._cs.getText();
1394
+ const end = start + length;
1395
+ // Fast pre-filter: any ignore directive must contain the substring 'ignore'.
1396
+ // indexOf is a highly-optimized native call and lets us skip the full
1397
+ // directive scan for the vast majority of comments (which are free-form text).
1398
+ const ignoreIdx = sourceText.indexOf('ignore', start);
1399
+ if (ignoreIdx >= 0 && ignoreIdx < end) {
1400
+ const typeIgnoreMatch = matchIgnoreDirective(sourceText, start, end, 'type');
1401
+ if (typeIgnoreMatch) {
1402
+ const commentStart = typeIgnoreMatch.index;
1403
+ const textRange = {
1404
+ start: commentStart + typeIgnoreMatch.prefix.length,
1405
+ length: typeIgnoreMatch.fullMatch.length - typeIgnoreMatch.prefix.length,
1406
+ };
1407
+ const ignoreComment = {
1408
+ range: textRange,
1409
+ rulesList: this._getIgnoreCommentRulesList(commentStart, typeIgnoreMatch),
1410
+ };
1411
+ let isIgnoreAll = false;
1412
+ if (!this._hasTokenBeforeIgnoreAll) {
1413
+ // Are there any tokens other than NewLine / Indent yet?
1414
+ const hasOther = this._tokens.some((t) => t && t.type !== 2 /* TokenType.NewLine */ && t.type !== 3 /* TokenType.Indent */);
1415
+ if (hasOther) {
1416
+ this._hasTokenBeforeIgnoreAll = true;
1417
+ }
1418
+ else {
1419
+ isIgnoreAll = true;
1420
+ }
1421
+ }
1422
+ if (isIgnoreAll) {
1423
+ this._typeIgnoreAll = ignoreComment;
1424
+ }
1425
+ else {
1426
+ this._typeIgnoreLines.set(this._lineRanges.length, ignoreComment);
1427
+ }
1034
1428
  }
1035
- else {
1036
- this._typeIgnoreLines.set(this._lineRanges.length, ignoreComment);
1429
+ const pyrightIgnoreMatch = matchIgnoreDirective(sourceText, start, end, 'pyright');
1430
+ if (pyrightIgnoreMatch) {
1431
+ const commentStart = pyrightIgnoreMatch.index;
1432
+ const textRange = {
1433
+ start: commentStart + pyrightIgnoreMatch.prefix.length,
1434
+ length: pyrightIgnoreMatch.fullMatch.length - pyrightIgnoreMatch.prefix.length,
1435
+ };
1436
+ const ignoreComment = {
1437
+ range: textRange,
1438
+ rulesList: this._getIgnoreCommentRulesList(commentStart, pyrightIgnoreMatch),
1439
+ };
1440
+ this._pyrightIgnoreLines.set(this._lineRanges.length, ignoreComment);
1037
1441
  }
1038
1442
  }
1039
- const pyrightIgnoreRegexMatch = comment.value.match(pyrightIgnoreCommentRegEx);
1040
- if (pyrightIgnoreRegexMatch) {
1041
- const commentStart = start + (pyrightIgnoreRegexMatch.index ?? 0);
1042
- const textRange = {
1043
- start: commentStart + pyrightIgnoreRegexMatch[1].length,
1044
- length: pyrightIgnoreRegexMatch[0].length - pyrightIgnoreRegexMatch[1].length,
1045
- };
1046
- const ignoreComment = {
1047
- range: textRange,
1048
- rulesList: this._getIgnoreCommentRulesList(commentStart, pyrightIgnoreRegexMatch),
1049
- };
1050
- this._pyrightIgnoreLines.set(this._lineRanges.length, ignoreComment);
1051
- }
1443
+ const comment = tokenizerTypes_1.Comment.create(start, length, sourceText.slice(start, end));
1052
1444
  this._addComments(comment);
1053
1445
  }
1054
1446
  // Extracts the individual rules within a "type: ignore [x, y, z]" comment.
1055
1447
  _getIgnoreCommentRulesList(start, match) {
1056
- if (match.length < 5 || match[4] === undefined) {
1448
+ if (match.bracketContent === undefined) {
1057
1449
  return undefined;
1058
1450
  }
1059
- const splitElements = match[4].split(',');
1451
+ const splitElements = match.bracketContent.split(',');
1060
1452
  const commentRules = [];
1061
- let currentOffset = start + match[0].indexOf('[') + 1;
1453
+ let currentOffset = start + match.fullMatch.indexOf('[') + 1;
1062
1454
  for (const element of splitElements) {
1063
1455
  const frontTrimmed = element.trimStart();
1064
1456
  currentOffset += element.length - frontTrimmed.length;
@@ -1413,4 +1805,12 @@ class Tokenizer {
1413
1805
  }
1414
1806
  }
1415
1807
  exports.Tokenizer = Tokenizer;
1808
+ // Direct-mapped identifier intern cache. Indexed by a cheap hash of
1809
+ // (firstChar, lastChar, length). On a hit (slot defined and string
1810
+ // equals the current source range), reuse the cached string instead of
1811
+ // re-allocating via detachSubstring. Collisions simply overwrite the
1812
+ // slot — no chaining, O(1) lookup, no Map overhead. Sized as a power of
1813
+ // two so the mask is a single AND.
1814
+ Tokenizer._identifierCacheSize = 2048;
1815
+ Tokenizer._identifierCacheMask = Tokenizer._identifierCacheSize - 1;
1416
1816
  //# sourceMappingURL=tokenizer.js.map