@lewin671/python-vm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/.claude/settings.local.json +3 -0
  2. package/.prettierrc +7 -0
  3. package/Agents.md +66 -0
  4. package/README.md +93 -0
  5. package/README_zh-CN.md +93 -0
  6. package/SETUP.md +171 -0
  7. package/dist/compiler.d.ts +20 -0
  8. package/dist/compiler.js +91 -0
  9. package/dist/compiler_module/compiler.d.ts +8 -0
  10. package/dist/compiler_module/compiler.js +22 -0
  11. package/dist/compiler_module/index.d.ts +2 -0
  12. package/dist/compiler_module/index.js +6 -0
  13. package/dist/index.d.ts +4 -0
  14. package/dist/index.js +67 -0
  15. package/dist/lexer/index.d.ts +2 -0
  16. package/dist/lexer/index.js +6 -0
  17. package/dist/lexer/lexer.d.ts +16 -0
  18. package/dist/lexer/lexer.js +403 -0
  19. package/dist/parser/expressions.d.ts +30 -0
  20. package/dist/parser/expressions.js +483 -0
  21. package/dist/parser/index.d.ts +2 -0
  22. package/dist/parser/index.js +6 -0
  23. package/dist/parser/parser.d.ts +63 -0
  24. package/dist/parser/parser.js +129 -0
  25. package/dist/parser/statements.d.ts +20 -0
  26. package/dist/parser/statements.js +388 -0
  27. package/dist/parser/targets.d.ts +6 -0
  28. package/dist/parser/targets.js +75 -0
  29. package/dist/types/ast.d.ts +63 -0
  30. package/dist/types/ast.js +60 -0
  31. package/dist/types/bytecode.d.ts +38 -0
  32. package/dist/types/bytecode.js +35 -0
  33. package/dist/types/index.d.ts +4 -0
  34. package/dist/types/index.js +20 -0
  35. package/dist/types/token.d.ts +34 -0
  36. package/dist/types/token.js +39 -0
  37. package/dist/vm/builtins.d.ts +4 -0
  38. package/dist/vm/builtins.js +269 -0
  39. package/dist/vm/callable.d.ts +8 -0
  40. package/dist/vm/callable.js +161 -0
  41. package/dist/vm/execution.d.ts +15 -0
  42. package/dist/vm/execution.js +283 -0
  43. package/dist/vm/expression-generator.d.ts +3 -0
  44. package/dist/vm/expression-generator.js +70 -0
  45. package/dist/vm/expressions.d.ts +13 -0
  46. package/dist/vm/expressions.js +390 -0
  47. package/dist/vm/imports.d.ts +7 -0
  48. package/dist/vm/imports.js +99 -0
  49. package/dist/vm/index.d.ts +3 -0
  50. package/dist/vm/index.js +21 -0
  51. package/dist/vm/operations.d.ts +16 -0
  52. package/dist/vm/operations.js +439 -0
  53. package/dist/vm/runtime-types.d.ts +84 -0
  54. package/dist/vm/runtime-types.js +290 -0
  55. package/dist/vm/statements.d.ts +7 -0
  56. package/dist/vm/statements.js +381 -0
  57. package/dist/vm/truthy.d.ts +4 -0
  58. package/dist/vm/truthy.js +47 -0
  59. package/dist/vm/value-utils.d.ts +28 -0
  60. package/dist/vm/value-utils.js +225 -0
  61. package/dist/vm/vm.d.ts +56 -0
  62. package/dist/vm/vm.js +75 -0
  63. package/examples/assert_testing.py +38 -0
  64. package/examples/big_int_precision.py +2 -0
  65. package/examples/boolean_logic.py +35 -0
  66. package/examples/break_continue.py +43 -0
  67. package/examples/classes_objects.py +43 -0
  68. package/examples/compiler_killer_async.py +6 -0
  69. package/examples/compiler_killer_bigint.py +3 -0
  70. package/examples/compiler_killer_bool_int_dict_key.py +5 -0
  71. package/examples/compiler_killer_bool_len.py +9 -0
  72. package/examples/compiler_killer_floor_division.py +4 -0
  73. package/examples/compiler_killer_is_identity.py +3 -0
  74. package/examples/compiler_killer_list_sort_return.py +3 -0
  75. package/examples/compiler_killer_match.py +13 -0
  76. package/examples/compiler_killer_negative_repeat.py +3 -0
  77. package/examples/compiler_killer_negative_zero_repr.py +3 -0
  78. package/examples/compiler_killer_rounding.py +4 -0
  79. package/examples/compiler_killer_slice_assign.py +3 -0
  80. package/examples/comprehensions.py +28 -0
  81. package/examples/conditions.py +13 -0
  82. package/examples/context_manager.py +35 -0
  83. package/examples/decorators.py +50 -0
  84. package/examples/exceptions.py +40 -0
  85. package/examples/fibonacci.py +10 -0
  86. package/examples/functions.py +38 -0
  87. package/examples/generator.py +51 -0
  88. package/examples/global_nonlocal.py +48 -0
  89. package/examples/hello.py +3 -0
  90. package/examples/itertools_example.py +33 -0
  91. package/examples/lists_dicts.py +29 -0
  92. package/examples/loops.py +19 -0
  93. package/examples/math_ops.py +15 -0
  94. package/examples/nan_set.py +6 -0
  95. package/examples/numbers_operators.py +51 -0
  96. package/examples/sets.py +36 -0
  97. package/examples/slicing.py +29 -0
  98. package/examples/starred_unpacking.py +3 -0
  99. package/examples/string_formatting.py +36 -0
  100. package/examples/strings.py +22 -0
  101. package/examples/tuples.py +45 -0
  102. package/examples/type_conversion.py +41 -0
  103. package/jest.config.js +15 -0
  104. package/notes/iterations/compiler-runtime/compiler-runtime_2025-09-16.md +25 -0
  105. package/notes/iterations/compiler-runtime/compiler-runtime_2026-01-16.md +24 -0
  106. package/notes/iterations/compiler-runtime/compiler-runtime_test_2026-01-16.md +21 -0
  107. package/notes/iterations/floor-division/floor-division_2026-01-16.md +29 -0
  108. package/package.json +36 -0
  109. package/prompts/commit.txt +9 -0
  110. package/prompts/task.txt +21 -0
  111. package/prompts/test.txt +23 -0
  112. package/scripts/codex-loop.js +215 -0
  113. package/scripts/verify.sh +12 -0
  114. package/src/compiler.ts +58 -0
  115. package/src/compiler_module/compiler.ts +19 -0
  116. package/src/compiler_module/index.ts +1 -0
  117. package/src/index.ts +39 -0
  118. package/src/lexer/index.ts +1 -0
  119. package/src/lexer/lexer.ts +402 -0
  120. package/src/parser/expressions.ts +462 -0
  121. package/src/parser/index.ts +1 -0
  122. package/src/parser/parser.ts +102 -0
  123. package/src/parser/statements.ts +366 -0
  124. package/src/parser/targets.ts +71 -0
  125. package/src/types/ast.ts +64 -0
  126. package/src/types/bytecode.ts +50 -0
  127. package/src/types/index.ts +3 -0
  128. package/src/types/token.ts +44 -0
  129. package/src/vm/builtins.ts +237 -0
  130. package/src/vm/callable.ts +154 -0
  131. package/src/vm/execution.ts +251 -0
  132. package/src/vm/expression-generator.ts +65 -0
  133. package/src/vm/expressions.ts +373 -0
  134. package/src/vm/imports.ts +61 -0
  135. package/src/vm/index.ts +2 -0
  136. package/src/vm/operations.ts +414 -0
  137. package/src/vm/runtime-types.ts +292 -0
  138. package/src/vm/statements.ts +358 -0
  139. package/src/vm/truthy.ts +36 -0
  140. package/src/vm/value-utils.ts +173 -0
  141. package/src/vm/vm.ts +80 -0
  142. package/tests/compiler.test.ts +111 -0
  143. package/tsconfig.json +20 -0
  144. package/vitest.config.ts +16 -0
@@ -0,0 +1,402 @@
1
+ import { Token, TokenType } from '../types';
2
+
3
+ /**
4
+ * 词法分析器 - 将源代码转换为 token 流
5
+ */
6
+ export class Lexer {
7
+ private code: string;
8
+ private pos: number = 0;
9
+ private line: number = 1;
10
+ private column: number = 1;
11
+ private tokens: Token[] = [];
12
+ private indentStack: number[] = [0];
13
+ private atLineStart: boolean = true;
14
+
15
+ constructor(code: string) {
16
+ this.code = code;
17
+ }
18
+
19
+ tokenize(): Token[] {
20
+ // Helper function to create a token
21
+ const createToken = (type: TokenType, value: string): Token => ({
22
+ type,
23
+ value,
24
+ line: this.line,
25
+ column: this.column - value.length
26
+ });
27
+
28
+ // Helper function to advance position
29
+ const advance = (n: number = 1) => {
30
+ for (let i = 0; i < n; i++) {
31
+ if (this.code[this.pos] === '\n') {
32
+ this.line++;
33
+ this.column = 1;
34
+ } else {
35
+ this.column++;
36
+ }
37
+ this.pos++;
38
+ }
39
+ };
40
+
41
+ // Helper function to peek ahead
42
+ const peek = (n: number = 0) => this.code[this.pos + n] || '';
43
+
44
+ const emitIndentTokens = (indent: number) => {
45
+ const currentIndent = this.indentStack[this.indentStack.length - 1];
46
+ if (indent > currentIndent) {
47
+ this.tokens.push(createToken(TokenType.INDENT, ''));
48
+ this.indentStack.push(indent);
49
+ } else if (indent < currentIndent) {
50
+ while (indent < this.indentStack[this.indentStack.length - 1]) {
51
+ this.tokens.push(createToken(TokenType.DEDENT, ''));
52
+ this.indentStack.pop();
53
+ }
54
+ if (indent !== this.indentStack[this.indentStack.length - 1]) {
55
+ throw new Error(`Indentation error at line ${this.line}`);
56
+ }
57
+ }
58
+ };
59
+
60
+ // Main tokenization loop
61
+ while (this.pos < this.code.length) {
62
+ if (this.atLineStart) {
63
+ let indent = 0;
64
+ while (peek() === ' ' || peek() === '\t') {
65
+ indent += peek() === '\t' ? 4 : 1;
66
+ advance();
67
+ }
68
+
69
+ if (peek() === '\n') {
70
+ this.tokens.push(createToken(TokenType.NEWLINE, '\n'));
71
+ advance();
72
+ this.atLineStart = true;
73
+ continue;
74
+ }
75
+
76
+ if (peek() === '#') {
77
+ while (this.pos < this.code.length && peek() !== '\n') {
78
+ advance();
79
+ }
80
+ continue;
81
+ }
82
+
83
+ emitIndentTokens(indent);
84
+ this.atLineStart = false;
85
+ }
86
+
87
+ if (this.pos >= this.code.length) {
88
+ break;
89
+ }
90
+
91
+ const char = peek();
92
+
93
+ if (char === ' ' || char === '\t') {
94
+ advance();
95
+ continue;
96
+ }
97
+
98
+ if (char === '#') {
99
+ while (this.pos < this.code.length && peek() !== '\n') {
100
+ advance();
101
+ }
102
+ continue;
103
+ }
104
+
105
+ if (char === '\n') {
106
+ this.tokens.push(createToken(TokenType.NEWLINE, '\n'));
107
+ advance();
108
+ this.atLineStart = true;
109
+ continue;
110
+ }
111
+
112
+ // Numbers
113
+ if (/[0-9]/.test(char) || (char === '.' && /[0-9]/.test(peek(1)))) {
114
+ let num = '';
115
+ let hasDot = false;
116
+ if (char === '.') {
117
+ hasDot = true;
118
+ num += '.';
119
+ advance();
120
+ }
121
+ while (this.pos < this.code.length && /[0-9]/.test(peek())) {
122
+ num += peek();
123
+ advance();
124
+ }
125
+ if (peek() === '.' && !hasDot) {
126
+ hasDot = true;
127
+ num += '.';
128
+ advance();
129
+ while (this.pos < this.code.length && /[0-9]/.test(peek())) {
130
+ num += peek();
131
+ advance();
132
+ }
133
+ }
134
+ if (peek() === 'j' || peek() === 'J') {
135
+ num += peek();
136
+ advance();
137
+ }
138
+ this.tokens.push(createToken(TokenType.NUMBER, num));
139
+ continue;
140
+ }
141
+
142
+ // Strings
143
+ if (char === '"' || char === "'" || ((char === 'f' || char === 'F') && (peek(1) === '"' || peek(1) === "'"))) {
144
+ let prefix = '';
145
+ let quote = char;
146
+ if (char === 'f' || char === 'F') {
147
+ prefix = char;
148
+ quote = peek(1);
149
+ advance();
150
+ }
151
+ let str = prefix + quote;
152
+ advance();
153
+ const isTriple = peek() === quote && peek(1) === quote;
154
+ if (isTriple) {
155
+ str += quote + quote;
156
+ advance(2);
157
+ }
158
+
159
+ while (this.pos < this.code.length) {
160
+ if (!isTriple && peek() === quote) {
161
+ break;
162
+ }
163
+ if (isTriple && peek() === quote && peek(1) === quote && peek(2) === quote) {
164
+ break;
165
+ }
166
+ if (peek() === '\\') {
167
+ str += peek();
168
+ advance();
169
+ if (this.pos < this.code.length) {
170
+ str += peek();
171
+ advance();
172
+ }
173
+ } else {
174
+ str += peek();
175
+ advance();
176
+ }
177
+ }
178
+
179
+ if (!isTriple && peek() === quote) {
180
+ str += quote;
181
+ advance();
182
+ } else if (isTriple && peek() === quote && peek(1) === quote && peek(2) === quote) {
183
+ str += quote + quote + quote;
184
+ advance(3);
185
+ } else {
186
+ throw new Error(`Unterminated string at line ${this.line}`);
187
+ }
188
+
189
+ this.tokens.push(createToken(TokenType.STRING, str));
190
+ continue;
191
+ }
192
+
193
+ // Identifiers and keywords
194
+ if (/[a-zA-Z_]/.test(char)) {
195
+ let ident = '';
196
+ while (this.pos < this.code.length && /[a-zA-Z0-9_]/.test(peek())) {
197
+ ident += peek();
198
+ advance();
199
+ }
200
+
201
+ // Check for keywords and boolean literals
202
+ if (ident === 'def' || ident === 'class' || ident === 'if' || ident === 'elif' || ident === 'else' ||
203
+ ident === 'for' || ident === 'while' || ident === 'return' || ident === 'break' || ident === 'continue' ||
204
+ ident === 'pass' || ident === 'in' || ident === 'is' || ident === 'and' || ident === 'or' || ident === 'not' ||
205
+ ident === 'lambda' || ident === 'yield' || ident === 'try' || ident === 'except' || ident === 'finally' ||
206
+ ident === 'with' || ident === 'as' || ident === 'global' || ident === 'nonlocal' || ident === 'assert' ||
207
+ ident === 'raise' || ident === 'del' || ident === 'match' || ident === 'case' || ident === 'import' ||
208
+ ident === 'async') {
209
+ this.tokens.push(createToken(TokenType.KEYWORD, ident));
210
+ } else if (ident === 'True' || ident === 'False') {
211
+ this.tokens.push(createToken(TokenType.BOOLEAN, ident));
212
+ } else if (ident === 'None') {
213
+ this.tokens.push(createToken(TokenType.NONE, ident));
214
+ } else {
215
+ this.tokens.push(createToken(TokenType.IDENTIFIER, ident));
216
+ }
217
+ continue;
218
+ }
219
+
220
+ // Operators and delimiters
221
+ switch (char) {
222
+ case '+':
223
+ if (peek(1) === '=') {
224
+ this.tokens.push(createToken(TokenType.OPERATOR, '+='));
225
+ advance(2);
226
+ } else {
227
+ this.tokens.push(createToken(TokenType.OPERATOR, '+'));
228
+ advance();
229
+ }
230
+ break;
231
+ case '-':
232
+ if (peek(1) === '=') {
233
+ this.tokens.push(createToken(TokenType.OPERATOR, '-='));
234
+ advance(2);
235
+ } else {
236
+ this.tokens.push(createToken(TokenType.OPERATOR, '-'));
237
+ advance();
238
+ }
239
+ break;
240
+ case '*':
241
+ if (peek(1) === '*') {
242
+ if (peek(2) === '=') {
243
+ this.tokens.push(createToken(TokenType.OPERATOR, '**='));
244
+ advance(3);
245
+ } else {
246
+ this.tokens.push(createToken(TokenType.OPERATOR, '**'));
247
+ advance(2);
248
+ }
249
+ } else {
250
+ if (peek(1) === '=') {
251
+ this.tokens.push(createToken(TokenType.OPERATOR, '*='));
252
+ advance(2);
253
+ } else {
254
+ this.tokens.push(createToken(TokenType.OPERATOR, '*'));
255
+ advance();
256
+ }
257
+ }
258
+ break;
259
+ case '/':
260
+ if (peek(1) === '/') {
261
+ if (peek(2) === '=') {
262
+ this.tokens.push(createToken(TokenType.OPERATOR, '//='));
263
+ advance(3);
264
+ } else {
265
+ this.tokens.push(createToken(TokenType.OPERATOR, '//'));
266
+ advance(2);
267
+ }
268
+ } else if (peek(1) === '=') {
269
+ this.tokens.push(createToken(TokenType.OPERATOR, '/='));
270
+ advance(2);
271
+ } else {
272
+ this.tokens.push(createToken(TokenType.OPERATOR, '/'));
273
+ advance();
274
+ }
275
+ break;
276
+ case '%':
277
+ if (peek(1) === '=') {
278
+ this.tokens.push(createToken(TokenType.OPERATOR, '%='));
279
+ advance(2);
280
+ } else {
281
+ this.tokens.push(createToken(TokenType.OPERATOR, '%'));
282
+ advance();
283
+ }
284
+ break;
285
+ case '=':
286
+ if (peek(1) === '=') {
287
+ this.tokens.push(createToken(TokenType.OPERATOR, '=='));
288
+ advance(2);
289
+ } else {
290
+ this.tokens.push(createToken(TokenType.ASSIGN, '='));
291
+ advance();
292
+ }
293
+ break;
294
+ case '!':
295
+ if (peek(1) === '=') {
296
+ this.tokens.push(createToken(TokenType.OPERATOR, '!='));
297
+ advance(2);
298
+ } else {
299
+ throw new Error(`Unexpected character '!' at line ${this.line}`);
300
+ }
301
+ break;
302
+ case '&':
303
+ this.tokens.push(createToken(TokenType.OPERATOR, '&'));
304
+ advance();
305
+ break;
306
+ case '|':
307
+ this.tokens.push(createToken(TokenType.OPERATOR, '|'));
308
+ advance();
309
+ break;
310
+ case '^':
311
+ this.tokens.push(createToken(TokenType.OPERATOR, '^'));
312
+ advance();
313
+ break;
314
+ case '~':
315
+ this.tokens.push(createToken(TokenType.OPERATOR, '~'));
316
+ advance();
317
+ break;
318
+ case '<':
319
+ if (peek(1) === '<') {
320
+ this.tokens.push(createToken(TokenType.OPERATOR, '<<'));
321
+ advance(2);
322
+ } else if (peek(1) === '=') {
323
+ this.tokens.push(createToken(TokenType.OPERATOR, '<='));
324
+ advance(2);
325
+ } else {
326
+ this.tokens.push(createToken(TokenType.OPERATOR, '<'));
327
+ advance();
328
+ }
329
+ break;
330
+ case '>':
331
+ if (peek(1) === '>') {
332
+ this.tokens.push(createToken(TokenType.OPERATOR, '>>'));
333
+ advance(2);
334
+ } else if (peek(1) === '=') {
335
+ this.tokens.push(createToken(TokenType.OPERATOR, '>='));
336
+ advance(2);
337
+ } else {
338
+ this.tokens.push(createToken(TokenType.OPERATOR, '>'));
339
+ advance();
340
+ }
341
+ break;
342
+ case '(':
343
+ this.tokens.push(createToken(TokenType.LPAREN, '('));
344
+ advance();
345
+ break;
346
+ case ')':
347
+ this.tokens.push(createToken(TokenType.RPAREN, ')'));
348
+ advance();
349
+ break;
350
+ case '[':
351
+ this.tokens.push(createToken(TokenType.LBRACKET, '['));
352
+ advance();
353
+ break;
354
+ case ']':
355
+ this.tokens.push(createToken(TokenType.RBRACKET, ']'));
356
+ advance();
357
+ break;
358
+ case '{':
359
+ this.tokens.push(createToken(TokenType.LBRACE, '{'));
360
+ advance();
361
+ break;
362
+ case '}':
363
+ this.tokens.push(createToken(TokenType.RBRACE, '}'));
364
+ advance();
365
+ break;
366
+ case ':':
367
+ this.tokens.push(createToken(TokenType.COLON, ':'));
368
+ advance();
369
+ break;
370
+ case ',':
371
+ this.tokens.push(createToken(TokenType.COMMA, ','));
372
+ advance();
373
+ break;
374
+ case '.':
375
+ this.tokens.push(createToken(TokenType.DOT, '.'));
376
+ advance();
377
+ break;
378
+ case '@':
379
+ this.tokens.push(createToken(TokenType.AT, '@'));
380
+ advance();
381
+ break;
382
+ case ' ':
383
+ case '\t':
384
+ // Should be handled by skipWhitespace
385
+ advance();
386
+ break;
387
+ default:
388
+ throw new Error(`Unexpected character '${char}' at line ${this.line}, column ${this.column}`);
389
+ }
390
+ }
391
+
392
+ while (this.indentStack.length > 1) {
393
+ this.tokens.push(createToken(TokenType.DEDENT, ''));
394
+ this.indentStack.pop();
395
+ }
396
+
397
+ // Add EOF
398
+ this.tokens.push(createToken(TokenType.EOF, ''));
399
+
400
+ return this.tokens;
401
+ }
402
+ }