machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,208 @@
1
+ """Symbol table for variable management during code generation.
2
+
3
+ This module provides scope management and variable resolution for
4
+ local and global variables.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ from enum import Enum
9
+ from typing import Optional
10
+
11
+
12
+ class SymbolType(Enum):
13
+ """Type of symbol in the symbol table."""
14
+
15
+ LOCAL = "local"
16
+ GLOBAL = "global"
17
+ PARAMETER = "parameter"
18
+
19
+
20
+ @dataclass
21
+ class Symbol:
22
+ """Represents a symbol in the symbol table.
23
+
24
+ Attributes:
25
+ name: The symbol's identifier name.
26
+ symbol_type: The type of symbol (local, global, or parameter).
27
+ slot: Slot index for locals/parameters, -1 for globals.
28
+ """
29
+
30
+ name: str
31
+ symbol_type: SymbolType
32
+ slot: int # Slot index for locals/parameters, -1 for globals
33
+
34
+
35
+ class Scope:
36
+ """Represents a lexical scope in the program."""
37
+
38
+ def __init__(self, parent: Optional["Scope"] = None, name: str = "global") -> None:
39
+ """Initialize a new scope.
40
+
41
+ Args:
42
+ parent: Parent scope, None for global scope.
43
+ name: Name of the scope for debugging.
44
+ """
45
+ self.parent = parent
46
+ self.name = name
47
+ self.symbols: dict[str, Symbol] = {}
48
+ self.next_slot = 0 # Next available local slot
49
+ self.is_global = parent is None
50
+
51
+ def define_local(self, name: str) -> Symbol:
52
+ """Define a new local variable in this scope.
53
+
54
+ Args:
55
+ name: Variable name.
56
+
57
+ Returns:
58
+ The created symbol.
59
+ """
60
+ if name in self.symbols:
61
+ # Variable already exists in this scope
62
+ return self.symbols[name]
63
+
64
+ symbol = Symbol(name, SymbolType.LOCAL, self.next_slot)
65
+ self.symbols[name] = symbol
66
+ self.next_slot += 1
67
+ return symbol
68
+
69
+ def define_parameter(self, name: str) -> Symbol:
70
+ """Define a function parameter in this scope.
71
+
72
+ Args:
73
+ name: Parameter name.
74
+
75
+ Returns:
76
+ The created symbol.
77
+ """
78
+ symbol = Symbol(name, SymbolType.PARAMETER, self.next_slot)
79
+ self.symbols[name] = symbol
80
+ self.next_slot += 1
81
+ return symbol
82
+
83
+ def define_global(self, name: str) -> Symbol:
84
+ """Define a global variable.
85
+
86
+ Args:
87
+ name: Variable name.
88
+
89
+ Returns:
90
+ The created symbol.
91
+ """
92
+ symbol = Symbol(name, SymbolType.GLOBAL, -1)
93
+ self.symbols[name] = symbol
94
+ return symbol
95
+
96
+ def resolve(self, name: str) -> Symbol | None:
97
+ """Resolve a variable name in this scope or parent scopes.
98
+
99
+ Args:
100
+ name: Variable name to resolve.
101
+
102
+ Returns:
103
+ The symbol if found, None otherwise.
104
+ """
105
+ # Check current scope
106
+ if name in self.symbols:
107
+ return self.symbols[name]
108
+
109
+ # Check parent scopes
110
+ if self.parent:
111
+ return self.parent.resolve(name)
112
+
113
+ return None
114
+
115
+ def num_locals(self) -> int:
116
+ """Get the number of local variables in this scope.
117
+
118
+ Returns:
119
+ Number of local slots used.
120
+ """
121
+ return self.next_slot
122
+
123
+
124
+ class SymbolTable:
125
+ """Manages nested scopes and symbol resolution."""
126
+
127
+ def __init__(self) -> None:
128
+ """Initialize with a global scope."""
129
+ self.global_scope = Scope(name="global")
130
+ self.current_scope = self.global_scope
131
+
132
+ def enter_scope(self, name: str = "block") -> None:
133
+ """Enter a new nested scope.
134
+
135
+ Args:
136
+ name: Name of the scope for debugging.
137
+ """
138
+ new_scope = Scope(parent=self.current_scope, name=name)
139
+ self.current_scope = new_scope
140
+
141
+ def exit_scope(self) -> None:
142
+ """Exit the current scope and return to parent scope.
143
+
144
+ Raises:
145
+ RuntimeError: If trying to exit global scope.
146
+ """
147
+ if self.current_scope.parent is None:
148
+ raise RuntimeError("Cannot exit global scope")
149
+ self.current_scope = self.current_scope.parent
150
+
151
+ def define(self, name: str, is_parameter: bool = False) -> Symbol:
152
+ """Define a new variable in the current scope.
153
+
154
+ Args:
155
+ name: Variable name.
156
+ is_parameter: Whether this is a function parameter.
157
+
158
+ Returns:
159
+ The created symbol.
160
+ """
161
+ if self.current_scope.is_global:
162
+ return self.current_scope.define_global(name)
163
+ elif is_parameter:
164
+ return self.current_scope.define_parameter(name)
165
+ else:
166
+ return self.current_scope.define_local(name)
167
+
168
+ def resolve(self, name: str) -> Symbol | None:
169
+ """Resolve a variable name starting from current scope.
170
+
171
+ Args:
172
+ name: Variable name to resolve.
173
+
174
+ Returns:
175
+ The symbol if found, None otherwise.
176
+ """
177
+ symbol = self.current_scope.resolve(name)
178
+
179
+ # If not found anywhere, treat as global
180
+ if symbol is None and self.current_scope != self.global_scope:
181
+ # Create implicit global reference
182
+ symbol = Symbol(name, SymbolType.GLOBAL, -1)
183
+
184
+ return symbol
185
+
186
+ def is_global_scope(self) -> bool:
187
+ """Check if currently in global scope.
188
+
189
+ Returns:
190
+ True if in global scope, False otherwise.
191
+ """
192
+ return self.current_scope.is_global
193
+
194
+ def num_locals(self) -> int:
195
+ """Get the number of locals in current scope.
196
+
197
+ Returns:
198
+ Number of local slots used in current scope.
199
+ """
200
+ return self.current_scope.num_locals()
201
+
202
+ def current_scope_name(self) -> str:
203
+ """Get the name of the current scope.
204
+
205
+ Returns:
206
+ Current scope name.
207
+ """
208
+ return self.current_scope.name
@@ -0,0 +1 @@
1
+ """Tests for the codegen module."""
@@ -0,0 +1,295 @@
1
+ """Unit tests for array operations bytecode generation."""
2
+
3
+ from machine_dialect.codegen.opcodes import Opcode
4
+ from machine_dialect.codegen.register_codegen import (
5
+ RegisterAllocation,
6
+ RegisterBytecodeGenerator,
7
+ )
8
+ from machine_dialect.mir.mir_instructions import (
9
+ ArrayFindIndex,
10
+ ArrayInsert,
11
+ ArrayRemove,
12
+ )
13
+ from machine_dialect.mir.mir_types import MIRType
14
+ from machine_dialect.mir.mir_values import Temp
15
+
16
+
17
+ def create_test_generator() -> RegisterBytecodeGenerator:
18
+ """Create a generator with proper allocation setup."""
19
+ generator = RegisterBytecodeGenerator(debug=False)
20
+ # Initialize allocation
21
+ generator.allocation = RegisterAllocation()
22
+ generator.allocation.max_registers = 256
23
+ generator.allocation.next_register = 0
24
+ generator.allocation.value_to_register = {}
25
+
26
+ # Map test temps to registers
27
+ for i in range(10):
28
+ temp = Temp(MIRType.INT, i)
29
+ generator.allocation.value_to_register[temp] = i
30
+
31
+ return generator
32
+
33
+
34
+ class TestArrayFindIndexCodegen:
35
+ """Test ArrayFindIndex bytecode generation."""
36
+
37
+ def test_generates_loop_structure(self) -> None:
38
+ """Test that ArrayFindIndex generates a proper loop."""
39
+ generator = create_test_generator()
40
+
41
+ # Create test instruction
42
+ dest = Temp(MIRType.INT, 0)
43
+ array = Temp(MIRType.ARRAY, 1)
44
+ value = Temp(MIRType.INT, 2)
45
+ inst = ArrayFindIndex(dest, array, value, (1, 1))
46
+
47
+ # Generate bytecode
48
+ generator.generate_array_find_index(inst)
49
+
50
+ # Check that bytecode was generated
51
+ assert len(generator.bytecode) > 0
52
+
53
+ # Verify key opcodes are present
54
+ bytecode = generator.bytecode
55
+ opcodes_used = []
56
+ i = 0
57
+ while i < len(bytecode):
58
+ if i < len(bytecode):
59
+ opcodes_used.append(bytecode[i])
60
+ i += 1
61
+ # Skip operands (simplified - real parsing would be more complex)
62
+ if bytecode[i - 1] in [Opcode.LOAD_CONST_R]:
63
+ i += 3 # dst + 16-bit const
64
+ elif bytecode[i - 1] in [Opcode.ARRAY_LEN_R, Opcode.MOVE_R]:
65
+ i += 2 # two registers
66
+ elif bytecode[i - 1] in [Opcode.LT_R, Opcode.EQ_R, Opcode.ADD_R, Opcode.ARRAY_GET_R]:
67
+ i += 3 # three registers
68
+ elif bytecode[i - 1] in [Opcode.JUMP_R]:
69
+ i += 4 # 32-bit offset
70
+ elif bytecode[i - 1] in [Opcode.JUMP_IF_R, Opcode.JUMP_IF_NOT_R]:
71
+ i += 5 # register + 32-bit offset
72
+
73
+ # Check for essential opcodes
74
+ assert Opcode.ARRAY_LEN_R in opcodes_used # Get array length
75
+ assert Opcode.LT_R in opcodes_used # Compare index < length
76
+ assert Opcode.ARRAY_GET_R in opcodes_used # Get array element
77
+ assert Opcode.EQ_R in opcodes_used # Compare element with value
78
+ assert Opcode.ADD_R in opcodes_used # Increment index
79
+
80
+ def test_labels_are_unique(self) -> None:
81
+ """Test that multiple ArrayFindIndex operations generate unique labels."""
82
+ generator = create_test_generator()
83
+
84
+ # Generate first find operation
85
+ inst1 = ArrayFindIndex(Temp(MIRType.INT, 0), Temp(MIRType.ARRAY, 1), Temp(MIRType.INT, 2), (1, 1))
86
+ generator.generate_array_find_index(inst1)
87
+ labels1 = set(generator.block_offsets.keys())
88
+
89
+ # Generate second find operation
90
+ inst2 = ArrayFindIndex(Temp(MIRType.INT, 3), Temp(MIRType.ARRAY, 4), Temp(MIRType.INT, 5), (2, 1))
91
+ generator.generate_array_find_index(inst2)
92
+ labels2 = set(generator.block_offsets.keys())
93
+
94
+ # Labels should be different
95
+ new_labels = labels2 - labels1
96
+ assert len(new_labels) > 0 # New labels were added
97
+
98
+ # Check label patterns
99
+ for label in new_labels:
100
+ assert "find_" in label # Labels follow naming convention
101
+
102
+
103
+ class TestArrayInsertCodegen:
104
+ """Test ArrayInsert bytecode generation."""
105
+
106
+ def test_generates_copy_loops(self) -> None:
107
+ """Test that ArrayInsert generates copy loops."""
108
+ generator = create_test_generator()
109
+
110
+ # Create test instruction
111
+ array = Temp(MIRType.ARRAY, 0)
112
+ index = Temp(MIRType.INT, 1)
113
+ value = Temp(MIRType.INT, 2)
114
+ inst = ArrayInsert(array, index, value, (1, 1))
115
+
116
+ # Generate bytecode
117
+ generator.generate_array_insert(inst)
118
+
119
+ # Check that bytecode was generated
120
+ assert len(generator.bytecode) > 0
121
+
122
+ # Check for essential operations
123
+ bytecode = generator.bytecode
124
+ opcodes_used = []
125
+ i = 0
126
+ while i < len(bytecode) - 1:
127
+ opcodes_used.append(bytecode[i])
128
+ # Skip to next opcode (simplified)
129
+ if bytecode[i] in [Opcode.LOAD_CONST_R]:
130
+ i += 4
131
+ elif bytecode[i] in [Opcode.ARRAY_LEN_R, Opcode.MOVE_R]:
132
+ i += 3
133
+ elif bytecode[i] in [Opcode.NEW_ARRAY_R]:
134
+ i += 3
135
+ elif bytecode[i] in [Opcode.ADD_R, Opcode.SUB_R, Opcode.LT_R, Opcode.ARRAY_GET_R, Opcode.ARRAY_SET_R]:
136
+ i += 4
137
+ elif bytecode[i] in [Opcode.JUMP_R]:
138
+ i += 5
139
+ elif bytecode[i] in [Opcode.JUMP_IF_NOT_R]:
140
+ i += 6
141
+ else:
142
+ i += 1
143
+
144
+ # Verify key operations
145
+ assert Opcode.ARRAY_LEN_R in opcodes_used # Get original length
146
+ assert Opcode.ADD_R in opcodes_used # Calculate new length
147
+ assert Opcode.NEW_ARRAY_R in opcodes_used # Create new array
148
+ assert Opcode.ARRAY_GET_R in opcodes_used # Copy elements
149
+ assert Opcode.ARRAY_SET_R in opcodes_used # Set elements in new array
150
+ assert Opcode.MOVE_R in opcodes_used # Replace original array
151
+
152
+ def test_handles_position_correctly(self) -> None:
153
+ """Test that insert position is handled correctly."""
154
+ generator = create_test_generator()
155
+
156
+ # Test with constant position
157
+ array = Temp(MIRType.ARRAY, 0)
158
+ index = Temp(MIRType.INT, 1)
159
+ value = Temp(MIRType.INT, 2)
160
+ inst = ArrayInsert(array, index, value, (1, 1))
161
+
162
+ # Should generate without errors
163
+ generator.generate_array_insert(inst)
164
+
165
+ # Check that labels were created
166
+ assert any("insert_" in label for label in generator.block_offsets.keys())
167
+
168
+
169
+ class TestArrayRemoveCodegen:
170
+ """Test ArrayRemove bytecode generation."""
171
+
172
+ def test_generates_copy_with_skip(self) -> None:
173
+ """Test that ArrayRemove generates copy loop that skips removed element."""
174
+ generator = create_test_generator()
175
+
176
+ # Create test instruction
177
+ array = Temp(MIRType.ARRAY, 0)
178
+ index = Temp(MIRType.INT, 1)
179
+ inst = ArrayRemove(array, index, (1, 1))
180
+
181
+ # Generate bytecode
182
+ generator.generate_array_remove(inst)
183
+
184
+ # Check that bytecode was generated
185
+ assert len(generator.bytecode) > 0
186
+
187
+ # Verify key operations
188
+ bytecode = generator.bytecode
189
+ opcodes_used = []
190
+ i = 0
191
+ while i < len(bytecode) - 1:
192
+ opcodes_used.append(bytecode[i])
193
+ # Skip to next opcode (simplified)
194
+ if bytecode[i] in [Opcode.LOAD_CONST_R]:
195
+ i += 4
196
+ elif bytecode[i] in [Opcode.ARRAY_LEN_R, Opcode.MOVE_R]:
197
+ i += 3
198
+ elif bytecode[i] in [Opcode.NEW_ARRAY_R]:
199
+ i += 3
200
+ elif bytecode[i] in [
201
+ Opcode.SUB_R,
202
+ Opcode.LT_R,
203
+ Opcode.EQ_R,
204
+ Opcode.ADD_R,
205
+ Opcode.ARRAY_GET_R,
206
+ Opcode.ARRAY_SET_R,
207
+ ]:
208
+ i += 4
209
+ elif bytecode[i] in [Opcode.JUMP_R]:
210
+ i += 5
211
+ elif bytecode[i] in [Opcode.JUMP_IF_R, Opcode.JUMP_IF_NOT_R]:
212
+ i += 6
213
+ else:
214
+ i += 1
215
+
216
+ # Check for essential operations
217
+ assert Opcode.ARRAY_LEN_R in opcodes_used # Get original length
218
+ assert Opcode.SUB_R in opcodes_used # Calculate new length (old - 1)
219
+ assert Opcode.NEW_ARRAY_R in opcodes_used # Create new array
220
+ assert Opcode.EQ_R in opcodes_used # Check if current index is removal point
221
+ assert Opcode.ARRAY_GET_R in opcodes_used # Copy elements
222
+ assert Opcode.ARRAY_SET_R in opcodes_used # Set elements in new array
223
+
224
+ def test_unique_labels(self) -> None:
225
+ """Test that multiple remove operations use unique labels."""
226
+ generator = create_test_generator()
227
+
228
+ # First remove
229
+ inst1 = ArrayRemove(Temp(MIRType.ARRAY, 0), Temp(MIRType.INT, 1), (1, 1))
230
+ generator.generate_array_remove(inst1)
231
+ labels1 = set(generator.block_offsets.keys())
232
+
233
+ # Second remove
234
+ inst2 = ArrayRemove(Temp(MIRType.ARRAY, 2), Temp(MIRType.INT, 3), (2, 1))
235
+ generator.generate_array_remove(inst2)
236
+ labels2 = set(generator.block_offsets.keys())
237
+
238
+ # Should have new labels
239
+ new_labels = labels2 - labels1
240
+ assert len(new_labels) > 0
241
+ assert any("remove_" in label for label in new_labels)
242
+
243
+
244
+ class TestIntegration:
245
+ """Integration tests for array operations."""
246
+
247
+ def test_combined_operations(self) -> None:
248
+ """Test that multiple array operations can be generated together."""
249
+ generator = create_test_generator()
250
+
251
+ # Generate a sequence of operations
252
+ array = Temp(MIRType.ARRAY, 0)
253
+
254
+ # Create array
255
+ # ArrayCreate would normally be called here, but we're testing insert directly
256
+ # create_inst = ArrayCreate(array, Constant(3, MIRType.INT), (1, 1))
257
+
258
+ # Find index
259
+ find_inst = ArrayFindIndex(Temp(MIRType.INT, 1), array, Temp(MIRType.INT, 2), (2, 1))
260
+ generator.generate_array_find_index(find_inst)
261
+
262
+ # Insert element
263
+ insert_inst = ArrayInsert(array, Temp(MIRType.INT, 3), Temp(MIRType.INT, 4), (3, 1))
264
+ generator.generate_array_insert(insert_inst)
265
+
266
+ # Remove element
267
+ remove_inst = ArrayRemove(array, Temp(MIRType.INT, 5), (4, 1))
268
+ generator.generate_array_remove(remove_inst)
269
+
270
+ # Should generate substantial bytecode
271
+ assert len(generator.bytecode) > 100 # These operations generate lots of code
272
+
273
+ # All operations should have their labels
274
+ labels = generator.block_offsets.keys()
275
+ assert any("find_" in label for label in labels)
276
+ assert any("insert_" in label for label in labels)
277
+ assert any("remove_" in label for label in labels)
278
+
279
+ def test_register_allocation(self) -> None:
280
+ """Test that operations use appropriate temporary registers."""
281
+ generator = create_test_generator()
282
+
283
+ # ArrayFindIndex uses high registers for temps
284
+ inst = ArrayFindIndex(Temp(MIRType.INT, 0), Temp(MIRType.ARRAY, 1), Temp(MIRType.INT, 2), (1, 1))
285
+ generator.generate_array_find_index(inst)
286
+
287
+ # Check bytecode includes high register numbers (250-254)
288
+ bytecode = generator.bytecode
289
+ high_registers_used = False
290
+ for i in range(len(bytecode)):
291
+ if bytecode[i] >= 247: # High register range
292
+ high_registers_used = True
293
+ break
294
+
295
+ assert high_registers_used, "Should use high temporary registers"
@@ -0,0 +1,185 @@
1
+ """Tests for bytecode serialization."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import struct
6
+ import tempfile
7
+ from pathlib import Path
8
+
9
+ import pytest
10
+
11
+ from machine_dialect.codegen.bytecode_serializer import BytecodeWriter, serialize_bytecode_module
12
+
13
+
14
+ class TestBytecodeWriter:
15
+ """Test bytecode writer functionality."""
16
+
17
+ def test_create_writer(self) -> None:
18
+ """Test creating a bytecode writer."""
19
+ writer = BytecodeWriter()
20
+ assert writer.module_name == "__main__"
21
+ assert len(writer.constants) == 0
22
+ assert len(writer.instructions) == 0
23
+ assert len(writer.functions) == 0
24
+ assert len(writer.global_names) == 0
25
+
26
+ def test_add_constants(self) -> None:
27
+ """Test adding constants to the pool."""
28
+ writer = BytecodeWriter()
29
+
30
+ idx1 = writer.add_int_constant(42)
31
+ idx2 = writer.add_float_constant(3.14)
32
+ idx3 = writer.add_string_constant("hello")
33
+ idx4 = writer.add_bool_constant(True)
34
+ idx5 = writer.add_empty_constant()
35
+
36
+ assert idx1 == 0
37
+ assert idx2 == 1
38
+ assert idx3 == 2
39
+ assert idx4 == 3
40
+ assert idx5 == 4
41
+ assert len(writer.constants) == 5
42
+
43
+ assert writer.constants[0] == (0x01, 42)
44
+ assert writer.constants[1] == (0x02, 3.14)
45
+ assert writer.constants[2] == (0x03, "hello")
46
+ assert writer.constants[3] == (0x04, True)
47
+ assert writer.constants[4] == (0x05, None)
48
+
49
+ def test_emit_instructions(self) -> None:
50
+ """Test emitting various instructions."""
51
+ writer = BytecodeWriter()
52
+
53
+ # Add some constants
54
+ const_idx = writer.add_int_constant(100)
55
+
56
+ # Emit instructions
57
+ writer.emit_load_const(0, const_idx)
58
+ writer.emit_move(1, 0)
59
+ writer.emit_add(2, 0, 1)
60
+ writer.emit_return(2)
61
+
62
+ assert len(writer.instructions) == 4
63
+
64
+ # Check instruction encoding
65
+ assert writer.instructions[0] == struct.pack("<BBH", 0, 0, const_idx)
66
+ assert writer.instructions[1] == struct.pack("<BBB", 1, 1, 0)
67
+ assert writer.instructions[2] == struct.pack("<BBBB", 7, 2, 0, 1)
68
+ assert writer.instructions[3] == struct.pack("<BBB", 26, 1, 2)
69
+
70
+ def test_write_bytecode_file(self) -> None:
71
+ """Test writing bytecode to a file."""
72
+ writer = BytecodeWriter()
73
+ writer.set_module_name("test_module")
74
+
75
+ # Add some constants and instructions
76
+ idx = writer.add_int_constant(42)
77
+ writer.emit_load_const(0, idx)
78
+ writer.emit_return(0)
79
+
80
+ # Write to temporary file
81
+ with tempfile.TemporaryDirectory() as tmpdir:
82
+ path = Path(tmpdir) / "test"
83
+ writer.write_to_file(path)
84
+
85
+ # Check file was created
86
+ bytecode_path = path.with_suffix(".mdbc")
87
+ assert bytecode_path.exists()
88
+
89
+ # Read and verify header
90
+ with open(bytecode_path, "rb") as f:
91
+ magic = f.read(4)
92
+ assert magic == b"MDBC"
93
+
94
+ version = struct.unpack("<I", f.read(4))[0]
95
+ assert version == 1
96
+
97
+ flags = struct.unpack("<I", f.read(4))[0]
98
+ assert flags == 1 # Little-endian flag
99
+
100
+ def test_serialize_module(self) -> None:
101
+ """Test serialize_bytecode_module function."""
102
+ constants = [
103
+ (0x01, 100),
104
+ (0x03, "test"),
105
+ (0x04, False),
106
+ ]
107
+
108
+ instructions = [
109
+ struct.pack("<BBH", 0, 0, 0), # LoadConstR r0, 0
110
+ struct.pack("<BBH", 0, 1, 1), # LoadConstR r1, 1
111
+ struct.pack("<BB", 26, 0), # ReturnR (no value)
112
+ ]
113
+
114
+ data = serialize_bytecode_module(
115
+ "my_module",
116
+ constants,
117
+ instructions,
118
+ functions={"main": 0},
119
+ global_names=["x", "y"],
120
+ )
121
+
122
+ # Verify magic number
123
+ assert data[:4] == b"MDBC"
124
+
125
+ # Verify version
126
+ version = struct.unpack("<I", data[4:8])[0]
127
+ assert version == 1
128
+
129
+ def test_bytecode_format(self) -> None:
130
+ """Test the complete bytecode format."""
131
+ writer = BytecodeWriter()
132
+ writer.set_module_name("format_test")
133
+
134
+ # Add various constants
135
+ int_idx = writer.add_int_constant(999)
136
+ float_idx = writer.add_float_constant(2.718)
137
+ writer.add_string_constant("bytecode")
138
+ writer.add_bool_constant(False)
139
+
140
+ # Add global names
141
+ x_idx = writer.add_global_name("x")
142
+ y_idx = writer.add_global_name("y")
143
+
144
+ # Add instructions
145
+ writer.emit_load_const(0, int_idx)
146
+ writer.emit_store_global(0, x_idx)
147
+ writer.emit_load_const(1, float_idx)
148
+ writer.emit_store_global(1, y_idx)
149
+ writer.emit_load_global(2, x_idx)
150
+ writer.emit_load_global(3, y_idx)
151
+ writer.emit_add(4, 2, 3)
152
+ writer.emit_return(4)
153
+
154
+ # Get serialized data
155
+ from io import BytesIO
156
+
157
+ stream = BytesIO()
158
+ writer.write_to_stream(stream)
159
+ data = stream.getvalue()
160
+
161
+ # Parse and verify structure
162
+ assert len(data) > 28 # At least header size
163
+
164
+ # Check header
165
+ assert data[0:4] == b"MDBC"
166
+ version = struct.unpack("<I", data[4:8])[0]
167
+ assert version == 1
168
+
169
+ flags = struct.unpack("<I", data[8:12])[0]
170
+ assert flags == 1
171
+
172
+ # Check offsets are present
173
+ name_offset = struct.unpack("<I", data[12:16])[0]
174
+ const_offset = struct.unpack("<I", data[16:20])[0]
175
+ func_offset = struct.unpack("<I", data[20:24])[0]
176
+ inst_offset = struct.unpack("<I", data[24:28])[0]
177
+
178
+ assert name_offset == 28 # Right after header
179
+ assert const_offset > name_offset
180
+ assert func_offset > const_offset
181
+ assert inst_offset > func_offset
182
+
183
+
184
+ if __name__ == "__main__":
185
+ pytest.main([__file__, "-v"])