Typhon-Language 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Typhon/Driver/configs.py +14 -0
- Typhon/Driver/debugging.py +148 -5
- Typhon/Driver/diagnostic.py +4 -3
- Typhon/Driver/language_server.py +25 -0
- Typhon/Driver/run.py +1 -1
- Typhon/Driver/translate.py +14 -10
- Typhon/Driver/utils.py +39 -1
- Typhon/Grammar/_typhon_parser.py +2738 -2525
- Typhon/Grammar/parser.py +80 -53
- Typhon/Grammar/parser_helper.py +68 -87
- Typhon/Grammar/syntax_errors.py +31 -21
- Typhon/Grammar/token_factory_custom.py +541 -485
- Typhon/Grammar/tokenizer_custom.py +52 -0
- Typhon/Grammar/typhon_ast.py +372 -44
- Typhon/Grammar/typhon_ast_error.py +438 -0
- Typhon/LanguageServer/__init__.py +3 -0
- Typhon/LanguageServer/client/__init__.py +42 -0
- Typhon/LanguageServer/client/pyrefly.py +115 -0
- Typhon/LanguageServer/client/pyright.py +173 -0
- Typhon/LanguageServer/semantic_tokens.py +446 -0
- Typhon/LanguageServer/server.py +376 -0
- Typhon/LanguageServer/utils.py +65 -0
- Typhon/SourceMap/ast_match_based_map.py +199 -152
- Typhon/SourceMap/ast_matching.py +102 -87
- Typhon/SourceMap/datatype.py +27 -16
- Typhon/SourceMap/defined_name_retrieve.py +145 -0
- Typhon/Transform/comprehension_to_function.py +2 -5
- Typhon/Transform/const_member_to_final.py +12 -7
- Typhon/Transform/forbidden_statements.py +1 -0
- Typhon/Transform/optional_operators_to_checked.py +14 -6
- Typhon/Transform/scope_check_rename.py +44 -18
- Typhon/Transform/type_abbrev_desugar.py +11 -15
- Typhon/Transform/type_annotation_check_expand.py +2 -2
- Typhon/Transform/utils/imports.py +39 -4
- Typhon/Transform/utils/make_class.py +18 -23
- Typhon/Transform/visitor.py +25 -0
- Typhon/Typing/pyrefly.py +145 -0
- Typhon/Typing/pyright.py +2 -4
- Typhon/__main__.py +15 -1
- {typhon_language-0.1.3.dist-info → typhon_language-0.1.4.dist-info}/METADATA +7 -5
- typhon_language-0.1.4.dist-info/RECORD +65 -0
- {typhon_language-0.1.3.dist-info → typhon_language-0.1.4.dist-info}/WHEEL +1 -1
- typhon_language-0.1.4.dist-info/licenses/LICENSE +201 -0
- typhon_language-0.1.3.dist-info/RECORD +0 -53
- typhon_language-0.1.3.dist-info/licenses/LICENSE +0 -21
- {typhon_language-0.1.3.dist-info → typhon_language-0.1.4.dist-info}/entry_points.txt +0 -0
- {typhon_language-0.1.3.dist-info → typhon_language-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -1,485 +1,541 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
from
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
self.
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
self.
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
)
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
debug_verbose_print(
|
|
275
|
-
f"Block comment
|
|
276
|
-
)
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
def
|
|
300
|
-
self.
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
#
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
)
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
1
|
+
import sys
|
|
2
|
+
import token
|
|
3
|
+
from typing import Callable, Iterator, Literal
|
|
4
|
+
from tokenize import TokenInfo, generate_tokens
|
|
5
|
+
import tokenize
|
|
6
|
+
import re
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from ..Driver.debugging import debug_print, debug_verbose_print
|
|
9
|
+
from enum import Enum, auto
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def generate_tokens_ignore_error(readline: Callable[[], str]) -> Iterator[TokenInfo]:
|
|
13
|
+
# yield from _generate_tokens_parso(readline)
|
|
14
|
+
try:
|
|
15
|
+
for tok in generate_tokens(readline):
|
|
16
|
+
yield tok
|
|
17
|
+
except tokenize.TokenError as e:
|
|
18
|
+
# Ignore the error on EOF in multiline.
|
|
19
|
+
message: str
|
|
20
|
+
lineno: int
|
|
21
|
+
offset: int
|
|
22
|
+
message, (lineno, offset) = e.args
|
|
23
|
+
pos = (lineno, offset)
|
|
24
|
+
print(f"Tokenization error ignored at {pos}: {e}")
|
|
25
|
+
yield TokenInfo(token.ENDMARKER, "", pos, pos, "")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _regularize_token_type(token_type: int) -> int:
|
|
29
|
+
"""Convert token type to a regularized form for Typhon.
|
|
30
|
+
|
|
31
|
+
NL -> NEWLINE
|
|
32
|
+
"""
|
|
33
|
+
if token_type == tokenize.NL:
|
|
34
|
+
return tokenize.NEWLINE
|
|
35
|
+
return token_type
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class _BlockComment:
|
|
40
|
+
start_line: int
|
|
41
|
+
start_col: int
|
|
42
|
+
end_line: int
|
|
43
|
+
end_col: int
|
|
44
|
+
comment: str
|
|
45
|
+
lines: str
|
|
46
|
+
|
|
47
|
+
def __hash__(self) -> int:
|
|
48
|
+
return hash(
|
|
49
|
+
(
|
|
50
|
+
self.start_line,
|
|
51
|
+
self.start_col,
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class _StrKind(Enum):
|
|
57
|
+
SINGLE_QUOTE = auto()
|
|
58
|
+
DOUBLE_QUOTE = auto()
|
|
59
|
+
SINGLE_QUOTE_DOCSTRING = auto()
|
|
60
|
+
DOUBLE_QUOTE_DOCSTRING = auto()
|
|
61
|
+
FSTRING_START = auto()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class _StrPrefix:
|
|
66
|
+
is_raw: bool
|
|
67
|
+
is_fstring: bool
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class _Str:
|
|
72
|
+
prefix: _StrPrefix
|
|
73
|
+
kind: _StrKind
|
|
74
|
+
|
|
75
|
+
def is_raw(self) -> bool:
|
|
76
|
+
return self.prefix.is_raw
|
|
77
|
+
|
|
78
|
+
def is_fstring(self) -> bool:
|
|
79
|
+
return self.prefix.is_fstring
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# Line parser that handles block comments and strings.
|
|
83
|
+
# This is ONLY for implementing block comments that can span multiple lines.
|
|
84
|
+
class _LineParser:
|
|
85
|
+
def __init__(self, readline: Callable[[], str]) -> None:
|
|
86
|
+
self.readline = readline
|
|
87
|
+
self.line = ""
|
|
88
|
+
self.result_line = ""
|
|
89
|
+
self.line_num = 0
|
|
90
|
+
self._column = 0
|
|
91
|
+
# Is inside string. Note this is false in f-string expression parts unless not in the string in the expression.
|
|
92
|
+
self.in_string = False
|
|
93
|
+
self.in_comment = False
|
|
94
|
+
|
|
95
|
+
# For f-string interpolation handling.
|
|
96
|
+
self.interpolation_stack: list[Literal["{"]] = []
|
|
97
|
+
# String context stack for nested strings (only in f-string expressions).
|
|
98
|
+
self.str_context: list[_Str] = []
|
|
99
|
+
# To count the brackets in f-string interpolation.
|
|
100
|
+
self.bracket_stack_in_interpolation: list[str] = []
|
|
101
|
+
self.block_comment_begin_stack: list[_BlockComment] = []
|
|
102
|
+
self.outermost_block_comments: list[_BlockComment] = []
|
|
103
|
+
self.line_head_spaces: list[str] = []
|
|
104
|
+
|
|
105
|
+
def _next_char(self) -> str | None:
|
|
106
|
+
if self._column >= len(self.line):
|
|
107
|
+
return None
|
|
108
|
+
ch = self.line[self._column]
|
|
109
|
+
self._column += 1
|
|
110
|
+
return ch
|
|
111
|
+
|
|
112
|
+
# Current column of character taken last time.
|
|
113
|
+
def _get_char_column(self) -> int:
|
|
114
|
+
return self._column - 1
|
|
115
|
+
|
|
116
|
+
def _peek_char(self, offset: int = 0) -> str | None:
|
|
117
|
+
if self._column + offset >= len(self.line):
|
|
118
|
+
return None
|
|
119
|
+
return self.line[self._column + offset]
|
|
120
|
+
|
|
121
|
+
def _passed(self) -> str:
|
|
122
|
+
return self.line[: self._column]
|
|
123
|
+
|
|
124
|
+
def _pop_index(self, bracket: str) -> int | None:
|
|
125
|
+
for idx in range(len(self.bracket_stack_in_interpolation) - 1, -1, -1):
|
|
126
|
+
if self.bracket_stack_in_interpolation[idx] == bracket:
|
|
127
|
+
return idx
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
def _commit(self, ch: str | None) -> None:
|
|
131
|
+
if ch is not None:
|
|
132
|
+
if self.block_comment_begin_stack:
|
|
133
|
+
# Inside block comment, do not commit to result line
|
|
134
|
+
self.block_comment_begin_stack[0].comment += ch
|
|
135
|
+
else:
|
|
136
|
+
# Normal code
|
|
137
|
+
self.result_line += ch
|
|
138
|
+
|
|
139
|
+
def _handle_bracket(self, ch: str) -> None:
|
|
140
|
+
if self.interpolation_stack:
|
|
141
|
+
if ch == "{":
|
|
142
|
+
self.bracket_stack_in_interpolation.append("{")
|
|
143
|
+
elif ch == "[":
|
|
144
|
+
self.bracket_stack_in_interpolation.append("[")
|
|
145
|
+
elif ch == "(":
|
|
146
|
+
self.bracket_stack_in_interpolation.append("(")
|
|
147
|
+
# Unclosed brackets to be ignored.
|
|
148
|
+
elif ch == "}":
|
|
149
|
+
if (pop_idx := self._pop_index("{")) is not None:
|
|
150
|
+
self.bracket_stack_in_interpolation = (
|
|
151
|
+
self.bracket_stack_in_interpolation[:pop_idx]
|
|
152
|
+
)
|
|
153
|
+
if not self.bracket_stack_in_interpolation:
|
|
154
|
+
# All brackets closed, end of interpolation
|
|
155
|
+
self.interpolation_stack.pop()
|
|
156
|
+
self.in_string = True
|
|
157
|
+
elif ch == "]":
|
|
158
|
+
if (pop_idx := self._pop_index("[")) is not None:
|
|
159
|
+
self.bracket_stack_in_interpolation = (
|
|
160
|
+
self.bracket_stack_in_interpolation[:pop_idx]
|
|
161
|
+
)
|
|
162
|
+
elif ch == ")":
|
|
163
|
+
if (pop_idx := self._pop_index("(")) is not None:
|
|
164
|
+
self.bracket_stack_in_interpolation = (
|
|
165
|
+
self.bracket_stack_in_interpolation[:pop_idx]
|
|
166
|
+
)
|
|
167
|
+
elif self.str_context and self.str_context[-1].is_fstring() and ch == "{":
|
|
168
|
+
# Start of f-string interpolation
|
|
169
|
+
debug_verbose_print(
|
|
170
|
+
f"Starting f-string interpolation at column={self._get_char_column()}"
|
|
171
|
+
)
|
|
172
|
+
self.interpolation_stack.append("{")
|
|
173
|
+
self.bracket_stack_in_interpolation.append("{")
|
|
174
|
+
self.in_string = False
|
|
175
|
+
|
|
176
|
+
def _get_str_prefix(self) -> _StrPrefix:
|
|
177
|
+
is_raw = False
|
|
178
|
+
is_fstring = False
|
|
179
|
+
for back_ch in reversed(self._passed()[:-1]):
|
|
180
|
+
if back_ch in {"r", "R"}:
|
|
181
|
+
is_raw = True
|
|
182
|
+
elif back_ch in {"f", "F", "t", "T"}:
|
|
183
|
+
is_fstring = True
|
|
184
|
+
elif back_ch in {"b", "B"}:
|
|
185
|
+
continue
|
|
186
|
+
else:
|
|
187
|
+
break
|
|
188
|
+
debug_verbose_print(
|
|
189
|
+
f"Determined string prefix {list(reversed(self._passed()[:-1]))[0:2]} is_raw={is_raw} is_fstring={is_fstring} at column={self._get_char_column()}"
|
|
190
|
+
)
|
|
191
|
+
return _StrPrefix(is_raw=is_raw, is_fstring=is_fstring)
|
|
192
|
+
|
|
193
|
+
def _handle_string_delim(self, ch: str) -> None:
|
|
194
|
+
if self.in_string:
|
|
195
|
+
# Possible string end
|
|
196
|
+
assert self.str_context, "String context stack should not be empty"
|
|
197
|
+
prefix = self.str_context[-1].prefix
|
|
198
|
+
kind = self.str_context[-1].kind
|
|
199
|
+
debug_verbose_print(
|
|
200
|
+
f"Handling string may end delim: {ch!r} kind={kind} prefix={prefix} column={self._get_char_column()}"
|
|
201
|
+
)
|
|
202
|
+
if kind == _StrKind.SINGLE_QUOTE and ch == "'":
|
|
203
|
+
self.str_context.pop()
|
|
204
|
+
self.in_string = False
|
|
205
|
+
return
|
|
206
|
+
elif kind == _StrKind.DOUBLE_QUOTE and ch == '"':
|
|
207
|
+
self.str_context.pop()
|
|
208
|
+
self.in_string = False
|
|
209
|
+
return
|
|
210
|
+
elif kind == _StrKind.SINGLE_QUOTE_DOCSTRING and ch == "'":
|
|
211
|
+
next_ch = self._peek_char()
|
|
212
|
+
third_ch = self._peek_char(1)
|
|
213
|
+
if next_ch == "'" and third_ch == "'":
|
|
214
|
+
self._commit(self._next_char()) # consume
|
|
215
|
+
self._commit(self._next_char()) # consume
|
|
216
|
+
self.str_context.pop()
|
|
217
|
+
self.in_string = False
|
|
218
|
+
return
|
|
219
|
+
elif kind == _StrKind.DOUBLE_QUOTE_DOCSTRING and ch == '"':
|
|
220
|
+
next_ch = self._peek_char()
|
|
221
|
+
third_ch = self._peek_char(1)
|
|
222
|
+
if next_ch == '"' and third_ch == '"':
|
|
223
|
+
self._commit(self._next_char()) # consume
|
|
224
|
+
self._commit(self._next_char()) # consume
|
|
225
|
+
self.str_context.pop()
|
|
226
|
+
self.in_string = False
|
|
227
|
+
return
|
|
228
|
+
else:
|
|
229
|
+
# String start
|
|
230
|
+
prefix = self._get_str_prefix()
|
|
231
|
+
next_ch = self._peek_char()
|
|
232
|
+
debug_verbose_print(
|
|
233
|
+
f"Handling string start delim: {ch!r} next_ch={next_ch!r} prefix={prefix} passed={self._passed()} column={self._get_char_column()}"
|
|
234
|
+
)
|
|
235
|
+
self.in_string = True
|
|
236
|
+
if next_ch == ch:
|
|
237
|
+
# Maybe triple quote
|
|
238
|
+
third_ch = self._peek_char(1)
|
|
239
|
+
if third_ch == ch:
|
|
240
|
+
self._commit(self._next_char()) # consume
|
|
241
|
+
self._commit(self._next_char()) # consume
|
|
242
|
+
# Docstring
|
|
243
|
+
if ch == "'":
|
|
244
|
+
self.str_context.append(
|
|
245
|
+
_Str(prefix, _StrKind.SINGLE_QUOTE_DOCSTRING)
|
|
246
|
+
)
|
|
247
|
+
else:
|
|
248
|
+
self.str_context.append(
|
|
249
|
+
_Str(prefix, _StrKind.DOUBLE_QUOTE_DOCSTRING)
|
|
250
|
+
)
|
|
251
|
+
return
|
|
252
|
+
if ch == "'":
|
|
253
|
+
self.str_context.append(_Str(prefix, _StrKind.SINGLE_QUOTE))
|
|
254
|
+
else:
|
|
255
|
+
self.str_context.append(_Str(prefix, _StrKind.DOUBLE_QUOTE))
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
def _handle_comment(self) -> None:
|
|
259
|
+
first_sharp_column = self._get_char_column()
|
|
260
|
+
debug_verbose_print(
|
|
261
|
+
f"Handling comment at line {self.line_num} col {first_sharp_column} in line: {self.line!r}"
|
|
262
|
+
)
|
|
263
|
+
# Block comment begin in front
|
|
264
|
+
while self._peek_char() == "#":
|
|
265
|
+
self._next_char()
|
|
266
|
+
if self._peek_char() == "(":
|
|
267
|
+
# Block comment begin
|
|
268
|
+
# Consume the '('
|
|
269
|
+
self._next_char()
|
|
270
|
+
# All # and (
|
|
271
|
+
comment_starter = self.line[
|
|
272
|
+
first_sharp_column : self._get_char_column() + 1
|
|
273
|
+
]
|
|
274
|
+
debug_verbose_print(
|
|
275
|
+
f"Block comment begin detected at col {first_sharp_column} in line comment_starter={comment_starter}: {self.line!r}"
|
|
276
|
+
)
|
|
277
|
+
self.block_comment_begin_stack.append(
|
|
278
|
+
_BlockComment(
|
|
279
|
+
start_line=self.line_num,
|
|
280
|
+
start_col=first_sharp_column,
|
|
281
|
+
end_line=0,
|
|
282
|
+
end_col=0,
|
|
283
|
+
comment="",
|
|
284
|
+
lines=self.line,
|
|
285
|
+
)
|
|
286
|
+
)
|
|
287
|
+
# Accumulate the begin part to the outermost block comment
|
|
288
|
+
self.block_comment_begin_stack[0].comment += comment_starter
|
|
289
|
+
elif not self.block_comment_begin_stack:
|
|
290
|
+
# Normal comment line, skip to end
|
|
291
|
+
self.result_line += self.line[first_sharp_column:]
|
|
292
|
+
self._column = len(self.line)
|
|
293
|
+
else:
|
|
294
|
+
# Inside block comment, just commit the '#'
|
|
295
|
+
self.block_comment_begin_stack[0].comment += self.line[
|
|
296
|
+
first_sharp_column : self._get_char_column()
|
|
297
|
+
]
|
|
298
|
+
|
|
299
|
+
def _handle_block_comment_end(self) -> None:
|
|
300
|
+
if self.block_comment_begin_stack:
|
|
301
|
+
while self._peek_char() == "#":
|
|
302
|
+
self._commit(self._next_char())
|
|
303
|
+
debug_verbose_print(
|
|
304
|
+
f"Block comment end detected at col {self._column} in line: {self.line!r} "
|
|
305
|
+
)
|
|
306
|
+
if len(self.block_comment_begin_stack) == 1:
|
|
307
|
+
block_comment = self.block_comment_begin_stack[-1]
|
|
308
|
+
block_comment.end_line = self.line_num
|
|
309
|
+
block_comment.end_col = self._column # after the last '#'
|
|
310
|
+
self.outermost_block_comments.append(block_comment)
|
|
311
|
+
self.in_comment = False
|
|
312
|
+
debug_verbose_print(
|
|
313
|
+
f"block comment from line {block_comment.start_line} col {block_comment.start_col} "
|
|
314
|
+
f"to line {block_comment.end_line} col {block_comment.end_col}"
|
|
315
|
+
)
|
|
316
|
+
self.result_line += " " # Replace block comment with space
|
|
317
|
+
# Pop the block comment begin
|
|
318
|
+
self.block_comment_begin_stack.pop()
|
|
319
|
+
|
|
320
|
+
def _cut_line_head_spaces(
|
|
321
|
+
self, line: str, line_head_in_string_or_comment: bool
|
|
322
|
+
) -> str:
|
|
323
|
+
if not line_head_in_string_or_comment:
|
|
324
|
+
match = re.match(r"[ \t]*", line)
|
|
325
|
+
if match:
|
|
326
|
+
self.line_head_spaces.append(match.group(0))
|
|
327
|
+
return line[match.end() :]
|
|
328
|
+
self.line_head_spaces.append("")
|
|
329
|
+
return line
|
|
330
|
+
|
|
331
|
+
def _next_line(self) -> None:
|
|
332
|
+
self.line = self.readline()
|
|
333
|
+
self._column = 0
|
|
334
|
+
self.line_num += 1
|
|
335
|
+
|
|
336
|
+
# Parse the line and return true start/end of block comment.
|
|
337
|
+
# block comment begin/end is ignored in string/docstring.
|
|
338
|
+
# They are valid in f-string expressions.
|
|
339
|
+
def parse_next_line(self) -> str:
|
|
340
|
+
self._next_line()
|
|
341
|
+
ch = ""
|
|
342
|
+
line_head_in_string_or_comment = self.in_string or self.in_comment
|
|
343
|
+
while True:
|
|
344
|
+
ch = self._next_char()
|
|
345
|
+
if ch is None:
|
|
346
|
+
# End of line. Continue if block comment continues.
|
|
347
|
+
if self.block_comment_begin_stack:
|
|
348
|
+
self._next_line()
|
|
349
|
+
continue
|
|
350
|
+
# True end of line
|
|
351
|
+
break
|
|
352
|
+
if self.block_comment_begin_stack:
|
|
353
|
+
# Inside block comment
|
|
354
|
+
if ch == "#":
|
|
355
|
+
self._handle_comment()
|
|
356
|
+
if ch == ")" and self._peek_char() == "#":
|
|
357
|
+
self._commit(ch)
|
|
358
|
+
self._handle_block_comment_end()
|
|
359
|
+
else:
|
|
360
|
+
self._commit(ch)
|
|
361
|
+
elif self.in_string: # Inside string
|
|
362
|
+
self._commit(ch)
|
|
363
|
+
if ch in {"'", '"'}:
|
|
364
|
+
self._handle_string_delim(ch)
|
|
365
|
+
elif ch == "\\" and not self.str_context[-1].is_raw():
|
|
366
|
+
self._commit(self._next_char()) # consume escape character
|
|
367
|
+
elif (
|
|
368
|
+
ch == "{" and self.str_context and self.str_context[-1].is_fstring()
|
|
369
|
+
):
|
|
370
|
+
# Possible interpolation start
|
|
371
|
+
self._handle_bracket(ch)
|
|
372
|
+
else: # Normal code
|
|
373
|
+
if ch == "#":
|
|
374
|
+
self._handle_comment()
|
|
375
|
+
else:
|
|
376
|
+
self._commit(ch)
|
|
377
|
+
if ch in {"'", '"'}:
|
|
378
|
+
self._handle_string_delim(ch)
|
|
379
|
+
elif ch in {"{", "}", "(", ")", "[", "]"}:
|
|
380
|
+
self._handle_bracket(ch)
|
|
381
|
+
result = self._cut_line_head_spaces(
|
|
382
|
+
self.result_line, line_head_in_string_or_comment
|
|
383
|
+
)
|
|
384
|
+
self.result_line = ""
|
|
385
|
+
debug_verbose_print(f"Parsed line {self.line_num} result: {result!r}")
|
|
386
|
+
return result
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _generate_and_postprocess_tokens(
|
|
390
|
+
readline: Callable[[], str], # After block comment is processed.
|
|
391
|
+
unconsumed_block_comment: list[_BlockComment],
|
|
392
|
+
head_space_lines: list[str],
|
|
393
|
+
) -> Iterator[TokenInfo]:
|
|
394
|
+
"""Generate tokens from readline, handling head space and block comments."""
|
|
395
|
+
line_offset_already_consumed = 0
|
|
396
|
+
block_comment_already_output: set[_BlockComment] = set()
|
|
397
|
+
# Adjust token positions from generated tokens, and mix in block comment tokens.
|
|
398
|
+
for tok in generate_tokens_ignore_error(readline):
|
|
399
|
+
debug_verbose_print(
|
|
400
|
+
f"Generated token: {tok.string!r} type={tok.type} start={tok.start} end={tok.end}"
|
|
401
|
+
)
|
|
402
|
+
# Retrieve the line head spaces for this line.
|
|
403
|
+
start = (
|
|
404
|
+
tok.start[0],
|
|
405
|
+
tok.start[1] + len(head_space_lines[tok.start[0] - 1]),
|
|
406
|
+
)
|
|
407
|
+
end = (
|
|
408
|
+
tok.end[0],
|
|
409
|
+
tok.end[1] + len(head_space_lines[tok.end[0] - 1]),
|
|
410
|
+
)
|
|
411
|
+
# Gather unconsumed block comments before this token.
|
|
412
|
+
tok_start_line, tok_start_col = start
|
|
413
|
+
while (
|
|
414
|
+
unconsumed_block_comment
|
|
415
|
+
and (block_comment := unconsumed_block_comment[0])
|
|
416
|
+
and (
|
|
417
|
+
block_comment.end_line < tok_start_line
|
|
418
|
+
or (
|
|
419
|
+
block_comment.end_line == tok_start_line
|
|
420
|
+
and block_comment.end_col <= tok_start_col
|
|
421
|
+
)
|
|
422
|
+
)
|
|
423
|
+
):
|
|
424
|
+
debug_verbose_print(
|
|
425
|
+
f"pop block comment token: {block_comment.comment!r} start=({block_comment.start_line}, {block_comment.start_col}) end=({block_comment.end_line}, {block_comment.end_col})"
|
|
426
|
+
)
|
|
427
|
+
# Pop away comments that will never affect to remaining tokens.
|
|
428
|
+
# Remove already passed block comments.
|
|
429
|
+
line_offset_already_consumed += (
|
|
430
|
+
block_comment.end_line - block_comment.start_line
|
|
431
|
+
)
|
|
432
|
+
unconsumed_block_comment.pop(0)
|
|
433
|
+
# Adjust the token position if there are block comments before this token.
|
|
434
|
+
adjusted_start_line, adjusted_start_col = start
|
|
435
|
+
adjusted_end_line, adjusted_end_col = end
|
|
436
|
+
adjusted_start_line += line_offset_already_consumed
|
|
437
|
+
adjusted_end_line += line_offset_already_consumed
|
|
438
|
+
for block_comment in unconsumed_block_comment:
|
|
439
|
+
if (block_comment.start_line, block_comment.start_col) >= (
|
|
440
|
+
adjusted_start_line,
|
|
441
|
+
adjusted_start_col,
|
|
442
|
+
):
|
|
443
|
+
break
|
|
444
|
+
# This block comment is before the token, yield here first.
|
|
445
|
+
if block_comment not in block_comment_already_output:
|
|
446
|
+
block_comment_already_output.add(block_comment)
|
|
447
|
+
debug_verbose_print(
|
|
448
|
+
f"Yielding block comment at start=({block_comment.start_line}, {block_comment.start_col}) "
|
|
449
|
+
f"end=({block_comment.end_line}, {block_comment.end_col})"
|
|
450
|
+
)
|
|
451
|
+
yield TokenInfo(
|
|
452
|
+
type=tokenize.COMMENT,
|
|
453
|
+
string=block_comment.comment,
|
|
454
|
+
start=(block_comment.start_line, block_comment.start_col),
|
|
455
|
+
end=(block_comment.end_line, block_comment.end_col),
|
|
456
|
+
line=block_comment.lines,
|
|
457
|
+
)
|
|
458
|
+
# The length of the last line of block comment.
|
|
459
|
+
block_comment_last_line_len = (
|
|
460
|
+
block_comment.end_col - block_comment.start_col
|
|
461
|
+
if block_comment.end_line == block_comment.start_line
|
|
462
|
+
else block_comment.end_col
|
|
463
|
+
)
|
|
464
|
+
# Adjust start position
|
|
465
|
+
debug_verbose_print(
|
|
466
|
+
f"Adjusting token start {tok.string!r} adjusted_start: {(adjusted_start_line, adjusted_start_col)} adjusted_end:{(adjusted_end_col, adjusted_end_col)} block_comment.start_col: {block_comment.start_col} block_comment.end_col:{block_comment.end_col} block_comment_last_line_len: {block_comment_last_line_len} block_comment.start_line: {block_comment.start_line} block_comment.end_line: {block_comment.end_line}"
|
|
467
|
+
)
|
|
468
|
+
# Line start adjustment: shift down by number of lines in block comment.
|
|
469
|
+
adjusted_start_line = adjusted_start_line + (
|
|
470
|
+
block_comment.end_line - block_comment.start_line
|
|
471
|
+
)
|
|
472
|
+
# Column start adjustment:
|
|
473
|
+
adjusted_start_col = (
|
|
474
|
+
# If last of the comment is on the same line, add the length of block comment end part.
|
|
475
|
+
(
|
|
476
|
+
adjusted_start_col
|
|
477
|
+
- block_comment.start_col
|
|
478
|
+
+ block_comment.end_col
|
|
479
|
+
- 1 # Account for space
|
|
480
|
+
)
|
|
481
|
+
if adjusted_start_line == block_comment.end_line
|
|
482
|
+
# If on different lines, the start of the token itself is.
|
|
483
|
+
else adjusted_start_col
|
|
484
|
+
)
|
|
485
|
+
# Adjust end position
|
|
486
|
+
# Line end adjustment: same as start
|
|
487
|
+
adjusted_end_line = adjusted_end_line + (
|
|
488
|
+
block_comment.end_line - block_comment.start_line
|
|
489
|
+
)
|
|
490
|
+
# Column end adjustment:
|
|
491
|
+
adjusted_end_col = (
|
|
492
|
+
# If last of the comment is on the same line, add the length of block comment end part.
|
|
493
|
+
(
|
|
494
|
+
adjusted_end_col
|
|
495
|
+
- block_comment.start_col
|
|
496
|
+
+ block_comment.end_col
|
|
497
|
+
- 1 # Account for space
|
|
498
|
+
)
|
|
499
|
+
if adjusted_end_line == block_comment.end_line
|
|
500
|
+
# If on different lines, the start of the token itself is.
|
|
501
|
+
else adjusted_end_col
|
|
502
|
+
)
|
|
503
|
+
debug_verbose_print(
|
|
504
|
+
f"Block Comment Adjusting token {tok.string!r} to start=({adjusted_start_line}, {adjusted_start_col}) "
|
|
505
|
+
f"end=({adjusted_end_line}, {adjusted_end_col}) due to block comment"
|
|
506
|
+
)
|
|
507
|
+
debug_verbose_print(
|
|
508
|
+
f"Yielding token {tok.string!r} at adjusted start=({adjusted_start_line}, {adjusted_start_col}) "
|
|
509
|
+
f"end=({adjusted_end_line}, {adjusted_end_col})"
|
|
510
|
+
)
|
|
511
|
+
yield TokenInfo(
|
|
512
|
+
type=_regularize_token_type(tok.type),
|
|
513
|
+
string=tok.string,
|
|
514
|
+
start=(adjusted_start_line, adjusted_start_col),
|
|
515
|
+
end=(adjusted_end_line, adjusted_end_col),
|
|
516
|
+
line=tok.line,
|
|
517
|
+
)
|
|
518
|
+
for block_comment in unconsumed_block_comment:
|
|
519
|
+
# Yield remaining unconsumed block comments at the end.
|
|
520
|
+
if block_comment not in block_comment_already_output:
|
|
521
|
+
debug_verbose_print(
|
|
522
|
+
f"Yielding remaining block comment at end: start=({block_comment.start_line}, {block_comment.start_col}) "
|
|
523
|
+
f"end=({block_comment.end_line}, {block_comment.end_col})"
|
|
524
|
+
)
|
|
525
|
+
yield TokenInfo(
|
|
526
|
+
type=tokenize.COMMENT,
|
|
527
|
+
string=block_comment.comment,
|
|
528
|
+
start=(block_comment.start_line, block_comment.start_col),
|
|
529
|
+
end=(block_comment.end_line, block_comment.end_col),
|
|
530
|
+
line=block_comment.lines,
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def token_stream_factory(readline: Callable[[], str]) -> Iterator[TokenInfo]:
|
|
535
|
+
line_parser = _LineParser(readline)
|
|
536
|
+
|
|
537
|
+
yield from _generate_and_postprocess_tokens(
|
|
538
|
+
line_parser.parse_next_line,
|
|
539
|
+
line_parser.outermost_block_comments,
|
|
540
|
+
line_parser.line_head_spaces,
|
|
541
|
+
)
|