machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,1433 @@
1
+ """AST nodes for statement types in Machine Dialect™.
2
+
3
+ This module defines the statement nodes used in the Abstract Syntax Tree (AST)
4
+ for Machine Dialect™. Statements are complete units of execution that perform
5
+ actions but don't produce values (unlike expressions).
6
+
7
+ Statements include:
8
+ - DefineStatement: Defines a variable with explicit type information
9
+ - ExpressionStatement: Wraps an expression as a statement
10
+ - ReturnStatement: Returns a value from a function or procedure
11
+ - SetStatement: Assigns a value to a variable
12
+ - BlockStatement: Contains a list of statements with a specific depth
13
+ - IfStatement: Conditional statement with consequence and optional alternative
14
+ - WhileStatement: Loop that executes while a condition is true
15
+ - ForEachStatement: Loop that iterates over elements in a collection
16
+ - ErrorStatement: Represents a statement that failed to parse
17
+ - Parameter: Represents a parameter with type and optional default value
18
+ """
19
+
20
+ from enum import Enum, auto
21
+
22
+ from machine_dialect.ast import ASTNode, Expression, Identifier
23
+ from machine_dialect.lexer import Token, TokenType
24
+
25
+
26
+ class FunctionVisibility(Enum):
27
+ """Visibility levels for function statements."""
28
+
29
+ PRIVATE = auto() # Action - private method
30
+ PUBLIC = auto() # Interaction - public method
31
+ FUNCTION = auto() # Utility - function with return value
32
+
33
+
34
+ class Statement(ASTNode):
35
+ """Base class for all statement nodes in the AST.
36
+
37
+ A statement represents a complete unit of execution in the program.
38
+ Unlike expressions, statements don't produce values but perform actions.
39
+ """
40
+
41
+ def __init__(self, token: Token) -> None:
42
+ """Initialize a Statement node.
43
+
44
+ Args:
45
+ token: The token that begins this statement.
46
+ """
47
+ self.token = token
48
+
49
+ def desugar(self) -> "Statement":
50
+ """Default desugar for statements returns self.
51
+
52
+ Returns:
53
+ Self unchanged.
54
+ """
55
+ return self
56
+
57
+
58
+ class DefineStatement(Statement):
59
+ """Variable definition statement.
60
+
61
+ Defines a new variable with explicit type information and optional
62
+ default value. Variables must be defined before they can be used
63
+ in Set statements.
64
+
65
+ Attributes:
66
+ name: Variable identifier to define
67
+ type_spec: List of type names (for union type support)
68
+ initial_value: Optional default value expression
69
+
70
+ Examples:
71
+ Define `count` as Whole Number.
72
+ Define `message` as Text (default: _"Hello"_).
73
+ """
74
+
75
+ def __init__(
76
+ self, token: Token, name: Identifier, type_spec: list[str], initial_value: Expression | None = None
77
+ ) -> None:
78
+ """Initialize a DefineStatement node.
79
+
80
+ Args:
81
+ token: The DEFINE keyword token
82
+ name: The variable identifier
83
+ type_spec: List of type names (e.g., ["Whole Number"], ["Text", "Whole Number"])
84
+ initial_value: Optional default value expression
85
+ """
86
+ super().__init__(token)
87
+ self.name = name
88
+ self.type_spec = type_spec
89
+ self.initial_value = initial_value
90
+
91
+ def __str__(self) -> str:
92
+ """Return string representation of the define statement.
93
+
94
+ Returns:
95
+ Human-readable string representation.
96
+ """
97
+ type_str = " or ".join(self.type_spec)
98
+ base = f"Define `{self.name.value}` as {type_str}"
99
+ if self.initial_value:
100
+ base += f" (default: {self.initial_value})"
101
+ return base + "."
102
+
103
+ def desugar(self) -> Statement:
104
+ """Desugar define statement with default value.
105
+
106
+ A Define statement with a default value desugars into:
107
+ 1. The Define statement itself (without initial_value)
108
+ 2. A Set statement for initialization (if initial_value exists)
109
+
110
+ Returns:
111
+ Self if no initial value, otherwise a BlockStatement containing
112
+ the definition and initialization.
113
+ """
114
+ if not self.initial_value:
115
+ return self
116
+
117
+ # Create a Define without initial value
118
+ define_only = DefineStatement(self.token, self.name, self.type_spec, None)
119
+
120
+ # Create a Set statement for initialization
121
+ set_stmt = SetStatement(self.token, self.name, self.initial_value)
122
+
123
+ # Return both as a block
124
+ block = BlockStatement(self.token)
125
+ block.statements = [define_only, set_stmt]
126
+ return block
127
+
128
+ def to_hir(self) -> Statement:
129
+ """Convert DefineStatement to HIR representation.
130
+
131
+ The HIR representation includes type annotations and
132
+ desugars default values into separate initialization.
133
+
134
+ Returns:
135
+ HIR representation of the define statement
136
+ """
137
+ if not self.initial_value:
138
+ # No default value - return as-is
139
+ return DefineStatement(self.token, self.name, self.type_spec, None)
140
+
141
+ # With default value - desugar to define + set
142
+ # Create annotated define without initial value
143
+ define_stmt = DefineStatement(self.token, self.name, self.type_spec, None)
144
+
145
+ # Create initialization set statement
146
+ set_stmt = SetStatement(
147
+ self.token,
148
+ self.name,
149
+ self.initial_value.to_hir() if hasattr(self.initial_value, "to_hir") else self.initial_value,
150
+ )
151
+
152
+ # Return as block
153
+ block = BlockStatement(self.token)
154
+ block.statements = [define_stmt, set_stmt]
155
+ return block
156
+
157
+
158
+ class ExpressionStatement(Statement):
159
+ """A statement that wraps an expression.
160
+
161
+ Expression statements allow expressions to be used as statements.
162
+ For example, a function call like `print("Hello")` is an expression
163
+ that becomes a statement when used on its own line.
164
+
165
+ Attributes:
166
+ expression: The expression being wrapped as a statement.
167
+ """
168
+
169
+ def __init__(self, token: Token, expression: Expression | None) -> None:
170
+ """Initialize an ExpressionStatement node.
171
+
172
+ Args:
173
+ token: The first token of the expression.
174
+ expression: The expression to wrap as a statement.
175
+ """
176
+ super().__init__(token)
177
+ self.expression = expression
178
+
179
+ def __str__(self) -> str:
180
+ """Return the string representation of the expression statement.
181
+
182
+ Returns:
183
+ The string representation of the wrapped expression.
184
+ """
185
+ return str(self.expression)
186
+
187
+ def desugar(self) -> "ExpressionStatement":
188
+ """Desugar expression statement by recursively desugaring the expression.
189
+
190
+ Returns:
191
+ A new ExpressionStatement with desugared expression.
192
+ """
193
+ desugared = ExpressionStatement(self.token, None)
194
+ if self.expression:
195
+ desugared.expression = self.expression.desugar()
196
+ return desugared
197
+
198
+
199
+ class ReturnStatement(Statement):
200
+ """A return statement that exits a function with an optional value.
201
+
202
+ Return statements are used to exit from a function or procedure,
203
+ optionally providing a value to return to the caller.
204
+
205
+ Attributes:
206
+ return_value: The expression whose value to return, or None for void return.
207
+ """
208
+
209
+ def __init__(self, token: Token, return_value: Expression | None = None) -> None:
210
+ """Initialize a ReturnStatement node.
211
+
212
+ Args:
213
+ token: The 'return' or 'Return' token.
214
+ return_value: Optional expression to evaluate and return.
215
+ """
216
+ super().__init__(token)
217
+ self.return_value = return_value
218
+
219
+ def __str__(self) -> str:
220
+ """Return the string representation of the return statement.
221
+
222
+ Returns:
223
+ A string like "\nReturn <value>" or "\nReturn" for void returns.
224
+ """
225
+ out = f"\n{self.token.literal}"
226
+ if self.return_value:
227
+ out += f" {self.return_value}"
228
+ return out
229
+
230
+ def desugar(self) -> "ReturnStatement":
231
+ """Desugar return statement by normalizing literal and desugaring return value.
232
+
233
+ Normalizes "give back" and "gives back" to canonical "return".
234
+
235
+ Returns:
236
+ A new ReturnStatement with normalized literal and desugared return value.
237
+ """
238
+ # Create new token with normalized literal
239
+ normalized_token = Token(
240
+ self.token.type,
241
+ "return", # Normalize to canonical form
242
+ self.token.line,
243
+ self.token.position,
244
+ )
245
+
246
+ desugared = ReturnStatement(normalized_token)
247
+ if self.return_value:
248
+ desugared.return_value = self.return_value.desugar()
249
+ return desugared
250
+
251
+
252
+ class SetStatement(Statement):
253
+ """A statement that assigns a value to a variable.
254
+
255
+ Set statements follow the natural language pattern: "Set <variable> to <value>".
256
+ They are the primary way to assign values to variables in Machine Dialect™.
257
+
258
+ Attributes:
259
+ name: The identifier (variable name) to assign to.
260
+ value: The expression whose value to assign.
261
+ """
262
+
263
+ def __init__(self, token: Token, name: Identifier | None = None, value: Expression | None = None) -> None:
264
+ """Initialize a SetStatement node.
265
+
266
+ Args:
267
+ token: The 'Set' token that begins the statement.
268
+ name: The identifier to assign to.
269
+ value: The expression whose value to assign.
270
+ """
271
+ super().__init__(token)
272
+ self.name = name
273
+ self.value = value
274
+
275
+ def __str__(self) -> str:
276
+ """Return the string representation of the set statement.
277
+
278
+ Returns:
279
+ A string like "Set <name> to <value>".
280
+ """
281
+ out = f"{self.token.literal} "
282
+ if self.name:
283
+ out += f"{self.name} "
284
+ out += "to "
285
+ if self.value:
286
+ out += str(self.value)
287
+ return out
288
+
289
+ def desugar(self) -> "SetStatement":
290
+ """Desugar set statement by recursively desugaring name and value.
291
+
292
+ Returns:
293
+ A new SetStatement with desugared components.
294
+ """
295
+ desugared = SetStatement(self.token)
296
+ if self.name:
297
+ desugared.name = self.name.desugar() if hasattr(self.name, "desugar") else self.name
298
+ if self.value:
299
+ desugared.value = self.value.desugar()
300
+ return desugared
301
+
302
+
303
+ class CallStatement(Statement):
304
+ """A statement that calls/invokes a function or interaction.
305
+
306
+ Call statements follow the pattern: "use <function> [with <arguments>]".
307
+ They are used to invoke utilities, actions, or interactions with optional arguments.
308
+
309
+ Attributes:
310
+ function_name: The expression that identifies the function to call (usually a StringLiteral or Identifier).
311
+ arguments: Optional Arguments node containing the function arguments.
312
+ """
313
+
314
+ def __init__(
315
+ self, token: Token, function_name: Expression | None = None, arguments: Expression | None = None
316
+ ) -> None:
317
+ """Initialize a CallStatement node.
318
+
319
+ Args:
320
+ token: The 'call' token that begins the statement.
321
+ function_name: The expression identifying the function to call.
322
+ arguments: Optional Arguments node containing the function arguments.
323
+ """
324
+ super().__init__(token)
325
+ self.function_name = function_name
326
+ self.arguments = arguments
327
+
328
+ def __str__(self) -> str:
329
+ """Return the string representation of the call statement.
330
+
331
+ Returns:
332
+ A string like "call <function> [with <arguments>]".
333
+ """
334
+ out = f"{self.token.literal} "
335
+ if self.function_name:
336
+ out += str(self.function_name)
337
+ if self.arguments:
338
+ out += f" with {self.arguments}"
339
+ return out
340
+
341
+ def desugar(self) -> "CallStatement":
342
+ """Desugar call statement by recursively desugaring function name and arguments.
343
+
344
+ Returns:
345
+ A new CallStatement with desugared components.
346
+ """
347
+ desugared = CallStatement(self.token)
348
+ if self.function_name:
349
+ desugared.function_name = self.function_name.desugar()
350
+ if self.arguments:
351
+ desugared.arguments = self.arguments.desugar()
352
+ return desugared
353
+
354
+
355
+ class BlockStatement(Statement):
356
+ """A block of statements with a specific depth.
357
+
358
+ Block statements contain a list of statements that are executed together.
359
+ The depth is indicated by the number of '>' symbols at the beginning of
360
+ each line in the block.
361
+
362
+ Attributes:
363
+ depth: The depth level of this block (number of '>' symbols).
364
+ statements: List of statements contained in this block.
365
+ """
366
+
367
+ def __init__(self, token: Token, depth: int = 1) -> None:
368
+ """Initialize a BlockStatement node.
369
+
370
+ Args:
371
+ token: The token that begins the block (usually ':' or first '>').
372
+ depth: The depth level of this block.
373
+ """
374
+ super().__init__(token)
375
+ self.depth = depth
376
+ self.statements: list[Statement] = []
377
+
378
+ def __str__(self) -> str:
379
+ """Return the string representation of the block statement.
380
+
381
+ Returns:
382
+ A string showing the block with proper indentation.
383
+ """
384
+ indent = ">" * self.depth + " "
385
+ statements_str = "\n".join(indent + str(stmt) for stmt in self.statements)
386
+ return f":\n{statements_str}"
387
+
388
+ def desugar(self) -> "Statement | BlockStatement":
389
+ """Desugar block statement.
390
+
391
+ Always returns a BlockStatement to preserve scope semantics.
392
+ This ensures proper scope instructions are generated in MIR.
393
+
394
+ Returns:
395
+ A new BlockStatement with desugared statements.
396
+ """
397
+ # Desugar all contained statements - they return Statement type
398
+ desugared_statements: list[Statement] = []
399
+ for stmt in self.statements:
400
+ result = stmt.desugar()
401
+ # The desugar might return any Statement subclass
402
+ assert isinstance(result, Statement)
403
+ desugared_statements.append(result)
404
+
405
+ # Always return a new block with desugared statements to preserve scope
406
+ desugared = BlockStatement(self.token, self.depth)
407
+ desugared.statements = desugared_statements
408
+ return desugared
409
+
410
+
411
+ class IfStatement(Statement):
412
+ """A conditional statement with if-then-else structure.
413
+
414
+ If statements evaluate a condition and execute different blocks of code
415
+ based on whether the condition is true or false. Supports various keywords:
416
+ if/when/whenever for the condition, else/otherwise for the alternative.
417
+
418
+ Attributes:
419
+ condition: The boolean expression to evaluate.
420
+ consequence: The block of statements to execute if condition is true.
421
+ alternative: Optional block of statements to execute if condition is false.
422
+ """
423
+
424
+ def __init__(self, token: Token, condition: Expression | None = None) -> None:
425
+ """Initialize an IfStatement node.
426
+
427
+ Args:
428
+ token: The 'if', 'when', or 'whenever' token.
429
+ condition: The boolean expression to evaluate.
430
+ """
431
+ super().__init__(token)
432
+ self.condition = condition
433
+ self.consequence: BlockStatement | None = None
434
+ self.alternative: BlockStatement | None = None
435
+
436
+ def __str__(self) -> str:
437
+ """Return the string representation of the if statement.
438
+
439
+ Returns:
440
+ A string like "if <condition> then: <consequence> [else: <alternative>]".
441
+ """
442
+ out = f"{self.token.literal} {self.condition}"
443
+ if self.consequence:
444
+ out += f" then{self.consequence}"
445
+ if self.alternative:
446
+ out += f"\nelse{self.alternative}"
447
+ return out
448
+
449
+ def desugar(self) -> "IfStatement":
450
+ """Desugar if statement by recursively desugaring all components.
451
+
452
+ Returns:
453
+ A new IfStatement with desugared condition, consequence, and alternative.
454
+ """
455
+ desugared = IfStatement(self.token)
456
+ if self.condition:
457
+ desugared.condition = self.condition.desugar()
458
+ if self.consequence:
459
+ # BlockStatement.desugar may return a non-block if it has single statement
460
+ consequence_desugared = self.consequence.desugar()
461
+ # Ensure consequence is always a BlockStatement for consistency
462
+ if isinstance(consequence_desugared, BlockStatement):
463
+ desugared.consequence = consequence_desugared
464
+ else:
465
+ # Wrap single statement back in a block
466
+ block = BlockStatement(self.token, self.consequence.depth)
467
+ block.statements = [consequence_desugared]
468
+ desugared.consequence = block
469
+ if self.alternative:
470
+ # Same treatment for alternative
471
+ alternative_desugared = self.alternative.desugar()
472
+ if isinstance(alternative_desugared, BlockStatement):
473
+ desugared.alternative = alternative_desugared
474
+ else:
475
+ block = BlockStatement(self.token, self.alternative.depth)
476
+ block.statements = [alternative_desugared]
477
+ desugared.alternative = block
478
+ return desugared
479
+
480
+
481
+ class ErrorStatement(Statement):
482
+ """A statement that failed to parse correctly.
483
+
484
+ ErrorStatements preserve the AST structure even when parsing fails,
485
+ allowing the parser to continue and collect multiple errors. They
486
+ contain the tokens that were skipped during panic-mode recovery.
487
+
488
+ Attributes:
489
+ skipped_tokens: List of tokens that were skipped during panic recovery.
490
+ message: Human-readable error message describing what went wrong.
491
+ """
492
+
493
+ def __init__(self, token: Token, skipped_tokens: list[Token] | None = None, message: str = "") -> None:
494
+ """Initialize an ErrorStatement node.
495
+
496
+ Args:
497
+ token: The token where the error began.
498
+ skipped_tokens: Tokens that were skipped during panic recovery.
499
+ message: Error message describing the parsing failure.
500
+ """
501
+ super().__init__(token)
502
+ self.skipped_tokens = skipped_tokens or []
503
+ self.message = message
504
+
505
+ def __str__(self) -> str:
506
+ """Return the string representation of the error statement.
507
+
508
+ Returns:
509
+ A string like "<error: message>".
510
+ """
511
+ if self.message:
512
+ return f"<error: {self.message}>"
513
+ return "<error>"
514
+
515
+ def desugar(self) -> "ErrorStatement":
516
+ """Error statements remain unchanged during desugaring.
517
+
518
+ Returns:
519
+ Self unchanged.
520
+ """
521
+ return self
522
+
523
+
524
+ class Parameter(ASTNode):
525
+ """Represents an input parameter with type and optional default value.
526
+
527
+ Parameters are used in Actions, Interactions, and Utilities to define inputs.
528
+ They follow the syntax: `name` **as** Type (required|optional, default: value)
529
+
530
+ Attributes:
531
+ name: The identifier naming the parameter.
532
+ type_name: The type of the parameter (e.g., "Text", "Whole Number", "Status").
533
+ is_required: Whether the parameter is required or optional.
534
+ default_value: The default value for optional parameters.
535
+ """
536
+
537
+ def __init__(
538
+ self,
539
+ token: Token,
540
+ name: Identifier,
541
+ type_name: str = "",
542
+ is_required: bool = True,
543
+ default_value: Expression | None = None,
544
+ ) -> None:
545
+ """Initialize a Parameter node.
546
+
547
+ Args:
548
+ token: The token that begins this parameter.
549
+ name: The identifier naming the parameter.
550
+ type_name: The type of the parameter.
551
+ is_required: Whether the parameter is required.
552
+ default_value: The default value for optional parameters.
553
+ """
554
+ self.token = token
555
+ self.name = name
556
+ self.type_name = type_name
557
+ self.is_required = is_required
558
+ self.default_value = default_value
559
+
560
+ def __str__(self) -> str:
561
+ """Return string representation of the parameter.
562
+
563
+ Returns:
564
+ A string representation of the parameter.
565
+ """
566
+ result = f"{self.name} as {self.type_name}"
567
+ if not self.is_required:
568
+ result += " (optional"
569
+ if self.default_value:
570
+ result += f", default: {self.default_value}"
571
+ result += ")"
572
+ else:
573
+ result += " (required)"
574
+ return result
575
+
576
+
577
+ class Output(ASTNode):
578
+ """Represents an output with type and optional default value.
579
+
580
+ Outputs are used in Actions, Interactions, and Utilities to define return values.
581
+ They follow the syntax: `name` **as** Type (default: value)
582
+
583
+ Attributes:
584
+ name: The identifier naming the output.
585
+ type_name: The type of the output (e.g., "Text", "Number", "Status").
586
+ default_value: The optional default value for the output.
587
+ """
588
+
589
+ def __init__(
590
+ self,
591
+ token: Token,
592
+ name: Identifier,
593
+ type_name: str = "",
594
+ default_value: Expression | None = None,
595
+ ) -> None:
596
+ """Initialize an Output node.
597
+
598
+ Args:
599
+ token: The token that begins this output.
600
+ name: The identifier naming the output.
601
+ type_name: The type of the output.
602
+ default_value: The optional default value.
603
+ """
604
+ self.token = token
605
+ self.name = name
606
+ self.type_name = type_name
607
+ self.default_value = default_value
608
+
609
+ def __str__(self) -> str:
610
+ """Return string representation of the output.
611
+
612
+ Returns:
613
+ A string like "`name` as Type" or "`name` as Type (default: value)".
614
+ """
615
+ result = f"`{self.name.value}` as {self.type_name}"
616
+ if self.default_value is not None:
617
+ result += f" (default: {self.default_value})"
618
+ return result
619
+
620
+
621
+ class ActionStatement(Statement):
622
+ """Represents an Action statement (private method) in Machine Dialect™.
623
+
624
+ Actions are private methods that can only be called within the same scope.
625
+ They are defined using the markdown-style syntax:
626
+ ### **Action**: `name`
627
+
628
+ Attributes:
629
+ name: The identifier naming the action.
630
+ inputs: List of input parameters.
631
+ outputs: List of outputs.
632
+ body: The block of statements that make up the action body.
633
+ description: Optional description from the summary tag.
634
+ """
635
+
636
+ def __init__(
637
+ self,
638
+ token: Token,
639
+ name: Identifier,
640
+ inputs: list[Parameter] | None = None,
641
+ outputs: list[Output] | None = None,
642
+ body: BlockStatement | None = None,
643
+ description: str = "",
644
+ ) -> None:
645
+ """Initialize an ActionStatement node.
646
+
647
+ Args:
648
+ token: The token that begins this statement (KW_ACTION).
649
+ name: The identifier naming the action.
650
+ inputs: List of input parameters (defaults to empty list).
651
+ outputs: List of outputs (defaults to empty list).
652
+ body: The block of statements in the action body.
653
+ description: Optional description from summary tag.
654
+ """
655
+ super().__init__(token)
656
+ self.name = name
657
+ self.inputs = inputs if inputs is not None else []
658
+ self.outputs = outputs if outputs is not None else []
659
+ self.body = body if body is not None else BlockStatement(token)
660
+ self.description = description
661
+
662
+ def token_literal(self) -> str:
663
+ """Return the literal value of the action token.
664
+
665
+ Returns:
666
+ The literal value of the action keyword token.
667
+ """
668
+ return self.token.literal
669
+
670
+ def __str__(self) -> str:
671
+ """Return string representation of the action statement.
672
+
673
+ Returns:
674
+ A string representation of the action with its name and body.
675
+ """
676
+ inputs_str = ", ".join(str(p) for p in self.inputs)
677
+ outputs_str = ", ".join(str(p) for p in self.outputs)
678
+ result = f"action {self.name}"
679
+ if inputs_str:
680
+ result += f"(inputs: {inputs_str})"
681
+ if outputs_str:
682
+ result += f" -> {outputs_str}"
683
+ result += f" {{\n{self.body}\n}}"
684
+ return result
685
+
686
+ def desugar(self) -> "FunctionStatement":
687
+ """Desugar action statement to unified FunctionStatement.
688
+
689
+ Returns:
690
+ A FunctionStatement with PRIVATE visibility.
691
+ """
692
+ desugared_body: BlockStatement | None = None
693
+ if self.body:
694
+ body_result = self.body.desugar()
695
+ # Ensure body is always a BlockStatement
696
+ if isinstance(body_result, BlockStatement):
697
+ desugared_body = body_result
698
+ else:
699
+ # Wrap single statement in a block
700
+ desugared_body = BlockStatement(self.token)
701
+ desugared_body.statements = [body_result]
702
+
703
+ return FunctionStatement(
704
+ self.token,
705
+ FunctionVisibility.PRIVATE,
706
+ self.name.desugar() if hasattr(self.name, "desugar") else self.name,
707
+ self.inputs,
708
+ self.outputs,
709
+ desugared_body,
710
+ self.description,
711
+ )
712
+
713
+
714
+ class SayStatement(Statement):
715
+ """Represents a Say statement (output/display) in Machine Dialect™.
716
+
717
+ Say statements output or display expressions to the user.
718
+ They are similar to print statements in other languages.
719
+
720
+ Attributes:
721
+ expression: The expression to output.
722
+ """
723
+
724
+ def __init__(self, token: Token, expression: Expression | None = None) -> None:
725
+ """Initialize a SayStatement node.
726
+
727
+ Args:
728
+ token: The token that begins this statement (KW_SAY).
729
+ expression: The expression to output.
730
+ """
731
+ super().__init__(token)
732
+ self.expression = expression
733
+
734
+ def token_literal(self) -> str:
735
+ """Return the literal value of the say token.
736
+
737
+ Returns:
738
+ The literal value of the say keyword token.
739
+ """
740
+ return self.token.literal
741
+
742
+ def __str__(self) -> str:
743
+ """Return string representation of the say statement.
744
+
745
+ Returns:
746
+ A string representation like "Say expression".
747
+ """
748
+ return f"Say {self.expression}" if self.expression else "Say"
749
+
750
+ def desugar(self) -> "SayStatement":
751
+ """Desugar say statement by recursively desugaring its expression.
752
+
753
+ Returns:
754
+ A new SayStatement with desugared expression.
755
+ """
756
+ desugared = SayStatement(self.token)
757
+ if self.expression:
758
+ desugared.expression = self.expression.desugar()
759
+ return desugared
760
+
761
+
762
+ class InteractionStatement(Statement):
763
+ """Represents an Interaction statement (public method) in Machine Dialect™.
764
+
765
+ Interactions are public methods that can be called from outside the scope.
766
+ They are defined using the markdown-style syntax:
767
+ ### **Interaction**: `name`
768
+
769
+ Attributes:
770
+ name: The identifier naming the interaction.
771
+ inputs: List of input parameters.
772
+ outputs: List of outputs.
773
+ body: The block of statements that make up the interaction body.
774
+ description: Optional description from the summary tag.
775
+ """
776
+
777
+ def __init__(
778
+ self,
779
+ token: Token,
780
+ name: Identifier,
781
+ inputs: list[Parameter] | None = None,
782
+ outputs: list[Output] | None = None,
783
+ body: BlockStatement | None = None,
784
+ description: str = "",
785
+ ) -> None:
786
+ """Initialize an InteractionStatement node.
787
+
788
+ Args:
789
+ token: The token that begins this statement (KW_INTERACTION).
790
+ name: The identifier naming the interaction.
791
+ inputs: List of input parameters (defaults to empty list).
792
+ outputs: List of outputs (defaults to empty list).
793
+ body: The block of statements in the interaction body.
794
+ description: Optional description from summary tag.
795
+ """
796
+ super().__init__(token)
797
+ self.name = name
798
+ self.inputs = inputs if inputs is not None else []
799
+ self.outputs = outputs if outputs is not None else []
800
+ self.body = body if body is not None else BlockStatement(token)
801
+ self.description = description
802
+
803
+ def token_literal(self) -> str:
804
+ """Return the literal value of the interaction token.
805
+
806
+ Returns:
807
+ The literal value of the interaction keyword token.
808
+ """
809
+ return self.token.literal
810
+
811
+ def __str__(self) -> str:
812
+ """Return string representation of the interaction statement.
813
+
814
+ Returns:
815
+ A string representation of the interaction with its name and body.
816
+ """
817
+ inputs_str = ", ".join(str(p) for p in self.inputs)
818
+ outputs_str = ", ".join(str(p) for p in self.outputs)
819
+ result = f"interaction {self.name}"
820
+ if inputs_str:
821
+ result += f"(inputs: {inputs_str})"
822
+ if outputs_str:
823
+ result += f" -> {outputs_str}"
824
+ result += f" {{\n{self.body}\n}}"
825
+ return result
826
+
827
+ def desugar(self) -> "FunctionStatement":
828
+ """Desugar interaction statement to unified FunctionStatement.
829
+
830
+ Returns:
831
+ A FunctionStatement with PUBLIC visibility.
832
+ """
833
+ desugared_body: BlockStatement | None = None
834
+ if self.body:
835
+ body_result = self.body.desugar()
836
+ # Ensure body is always a BlockStatement
837
+ if isinstance(body_result, BlockStatement):
838
+ desugared_body = body_result
839
+ else:
840
+ # Wrap single statement in a block
841
+ desugared_body = BlockStatement(self.token)
842
+ desugared_body.statements = [body_result]
843
+
844
+ return FunctionStatement(
845
+ self.token,
846
+ FunctionVisibility.PUBLIC,
847
+ self.name.desugar() if hasattr(self.name, "desugar") else self.name,
848
+ self.inputs,
849
+ self.outputs,
850
+ desugared_body,
851
+ self.description,
852
+ )
853
+
854
+
855
+ class UtilityStatement(Statement):
856
+ """Represents a Utility statement (function) in Machine Dialect™.
857
+
858
+ Utilities are functions that can be called and return values.
859
+ They are defined using the markdown-style syntax:
860
+ ### **Utility**: `name`
861
+
862
+ Attributes:
863
+ name: The identifier naming the utility.
864
+ inputs: List of input parameters.
865
+ outputs: List of outputs.
866
+ body: The block of statements that make up the utility body.
867
+ description: Optional description from the summary tag.
868
+ """
869
+
870
+ def __init__(
871
+ self,
872
+ token: Token,
873
+ name: Identifier,
874
+ inputs: list[Parameter] | None = None,
875
+ outputs: list[Output] | None = None,
876
+ body: BlockStatement | None = None,
877
+ description: str = "",
878
+ ) -> None:
879
+ """Initialize a UtilityStatement node.
880
+
881
+ Args:
882
+ token: The token that begins this statement (KW_UTILITY).
883
+ name: The identifier naming the utility.
884
+ inputs: List of input parameters (defaults to empty list).
885
+ outputs: List of outputs (defaults to empty list).
886
+ body: The block of statements in the utility body.
887
+ description: Optional description from summary tag.
888
+ """
889
+ super().__init__(token)
890
+ self.name = name
891
+ self.inputs = inputs if inputs is not None else []
892
+ self.outputs = outputs if outputs is not None else []
893
+ self.body = body if body is not None else BlockStatement(token)
894
+ self.description = description
895
+
896
+ def token_literal(self) -> str:
897
+ """Return the literal value of the utility token.
898
+
899
+ Returns:
900
+ The literal value of the utility keyword token.
901
+ """
902
+ return self.token.literal
903
+
904
+ def __str__(self) -> str:
905
+ """Return string representation of the utility statement.
906
+
907
+ Returns:
908
+ A string representation of the utility with its name and body.
909
+ """
910
+ inputs_str = ", ".join(str(p) for p in self.inputs)
911
+ outputs_str = ", ".join(str(p) for p in self.outputs)
912
+ result = f"utility {self.name}"
913
+ if inputs_str:
914
+ result += f"(inputs: {inputs_str})"
915
+ if outputs_str:
916
+ result += f" -> {outputs_str}"
917
+ result += f" {{\n{self.body}\n}}"
918
+ return result
919
+
920
+ def desugar(self) -> "FunctionStatement":
921
+ """Desugar utility statement to unified FunctionStatement.
922
+
923
+ Returns:
924
+ A FunctionStatement with FUNCTION visibility.
925
+ """
926
+ desugared_body: BlockStatement | None = None
927
+ if self.body:
928
+ body_result = self.body.desugar()
929
+ # Ensure body is always a BlockStatement
930
+ if isinstance(body_result, BlockStatement):
931
+ desugared_body = body_result
932
+ else:
933
+ # Wrap single statement in a block
934
+ desugared_body = BlockStatement(self.token)
935
+ desugared_body.statements = [body_result]
936
+
937
+ return FunctionStatement(
938
+ self.token,
939
+ FunctionVisibility.FUNCTION,
940
+ self.name.desugar() if hasattr(self.name, "desugar") else self.name,
941
+ self.inputs,
942
+ self.outputs,
943
+ desugared_body,
944
+ self.description,
945
+ )
946
+
947
+
948
+ class CollectionMutationStatement(Statement):
949
+ """Statement for mutating collections (lists and named lists).
950
+
951
+ Handles operations like:
952
+ Arrays (Ordered/Unordered Lists):
953
+ - Add _value_ to list
954
+ - Remove _value_ from list
955
+ - Set the second item of list to _value_
956
+ - Set item _5_ of list to _value_
957
+ - Insert _value_ at position _3_ in list
958
+ - Clear list
959
+
960
+ Named Lists (Dictionaries):
961
+ - Add "key" to dict with value _value_
962
+ - Remove "key" from dict
963
+ - Update "key" in dict to _value_
964
+ - Clear dict
965
+
966
+ Attributes:
967
+ operation: The mutation operation ('add', 'remove', 'set', 'insert', 'clear', 'update').
968
+ collection: The collection expression to mutate.
969
+ value: The value for add/remove/set/insert/update operations.
970
+ position: The position/index/key for set/insert/update operations (can be ordinal, numeric, or key).
971
+ position_type: Type of position ('ordinal', 'numeric', 'key', None).
972
+ """
973
+
974
+ def __init__(
975
+ self,
976
+ token: Token,
977
+ operation: str,
978
+ collection: Expression,
979
+ value: Expression | None = None,
980
+ position: Expression | str | int | None = None,
981
+ position_type: str | None = None,
982
+ ) -> None:
983
+ """Initialize a CollectionMutationStatement node.
984
+
985
+ Args:
986
+ token: The token that begins this statement (KW_ADD, KW_REMOVE, etc.).
987
+ operation: The mutation operation type.
988
+ collection: The collection to mutate.
989
+ value: The value for the operation (None for 'empty').
990
+ position: The position/index for set/insert (ordinal string, numeric int, or expression).
991
+ position_type: Type of position ('ordinal', 'numeric', or None).
992
+ """
993
+ super().__init__(token)
994
+ self.operation = operation
995
+ self.collection = collection
996
+ self.value = value
997
+ self.position = position
998
+ self.position_type = position_type
999
+
1000
+ def __str__(self) -> str:
1001
+ """Return string representation of the mutation statement.
1002
+
1003
+ Returns:
1004
+ A human-readable string representation.
1005
+ """
1006
+ if self.operation == "add":
1007
+ if self.position_type == "key":
1008
+ return f"Add {self.position} to {self.collection} with value {self.value}."
1009
+ else:
1010
+ return f"Add {self.value} to {self.collection}."
1011
+ elif self.operation == "remove":
1012
+ return f"Remove {self.value} from {self.collection}."
1013
+ elif self.operation == "set":
1014
+ if self.position_type == "ordinal":
1015
+ return f"Set the {self.position} item of {self.collection} to {self.value}."
1016
+ else: # numeric
1017
+ return f"Set item _{self.position}_ of {self.collection} to {self.value}."
1018
+ elif self.operation == "insert":
1019
+ return f"Insert {self.value} at position _{self.position}_ in {self.collection}."
1020
+ elif self.operation == "clear":
1021
+ return f"Clear {self.collection}."
1022
+ elif self.operation == "update":
1023
+ return f"Update {self.position} in {self.collection} to {self.value}."
1024
+ return f"<collection mutation: {self.operation}>"
1025
+
1026
+ def desugar(self) -> "CollectionMutationStatement":
1027
+ """Desugar collection mutation statement by recursively desugaring components.
1028
+
1029
+ Returns:
1030
+ A new CollectionMutationStatement with desugared components.
1031
+ """
1032
+ desugared = CollectionMutationStatement(
1033
+ self.token,
1034
+ self.operation,
1035
+ self.collection.desugar(),
1036
+ self.value.desugar() if self.value else None,
1037
+ self.position,
1038
+ self.position_type,
1039
+ )
1040
+ # If position is an expression, desugar it
1041
+ if isinstance(self.position, Expression):
1042
+ desugared.position = self.position.desugar()
1043
+ return desugared
1044
+
1045
+ def to_hir(self) -> "CollectionMutationStatement":
1046
+ """Convert collection mutation to HIR representation.
1047
+
1048
+ Converts one-based user indices to zero-based for internal use.
1049
+
1050
+ Returns:
1051
+ HIR representation with adjusted indices.
1052
+ """
1053
+ # Convert collection to HIR
1054
+ hir_collection = self.collection.to_hir() if hasattr(self.collection, "to_hir") else self.collection
1055
+
1056
+ # Convert value to HIR if present
1057
+ hir_value = None
1058
+ if self.value:
1059
+ hir_value = self.value.to_hir() if hasattr(self.value, "to_hir") else self.value
1060
+
1061
+ # Process position based on type
1062
+ hir_position = self.position
1063
+ if self.position_type == "ordinal":
1064
+ # Convert ordinals to zero-based numeric indices
1065
+ ordinal_map = {"first": 0, "second": 1, "third": 2}
1066
+ if isinstance(self.position, str) and self.position.lower() in ordinal_map:
1067
+ hir_position = ordinal_map[self.position.lower()]
1068
+ # Return with numeric type since we converted
1069
+ return CollectionMutationStatement(
1070
+ self.token,
1071
+ self.operation,
1072
+ hir_collection,
1073
+ hir_value,
1074
+ hir_position,
1075
+ "numeric",
1076
+ )
1077
+ elif self.position == "last":
1078
+ # Keep "last" as special case
1079
+ hir_position = "last"
1080
+ elif self.position_type == "numeric":
1081
+ # Convert one-based to zero-based index
1082
+ if isinstance(self.position, int):
1083
+ hir_position = self.position - 1 # Convert to 0-based
1084
+ elif isinstance(self.position, Expression):
1085
+ # For expressions, we'll handle in MIR generation
1086
+ hir_position = self.position.to_hir() if hasattr(self.position, "to_hir") else self.position
1087
+ elif isinstance(self.position, Expression):
1088
+ hir_position = self.position.to_hir() if hasattr(self.position, "to_hir") else self.position
1089
+
1090
+ return CollectionMutationStatement(
1091
+ self.token,
1092
+ self.operation,
1093
+ hir_collection,
1094
+ hir_value,
1095
+ hir_position,
1096
+ self.position_type,
1097
+ )
1098
+
1099
+
1100
+ class FunctionStatement(Statement):
1101
+ """Unified function statement for Actions, Interactions, and Utilities.
1102
+
1103
+ This is the desugared form of ActionStatement, InteractionStatement, and
1104
+ UtilityStatement. It represents all function-like constructs with a
1105
+ visibility modifier.
1106
+
1107
+ Attributes:
1108
+ visibility: The visibility level (PRIVATE, PUBLIC, or FUNCTION).
1109
+ name: The identifier naming the function.
1110
+ inputs: List of input parameters.
1111
+ outputs: List of outputs.
1112
+ body: The block of statements that make up the function body.
1113
+ description: Optional description.
1114
+ """
1115
+
1116
+ def __init__(
1117
+ self,
1118
+ token: Token,
1119
+ visibility: FunctionVisibility,
1120
+ name: Identifier,
1121
+ inputs: list[Parameter] | None = None,
1122
+ outputs: list[Output] | None = None,
1123
+ body: BlockStatement | None = None,
1124
+ description: str = "",
1125
+ ) -> None:
1126
+ """Initialize a FunctionStatement node.
1127
+
1128
+ Args:
1129
+ token: The token that begins this statement.
1130
+ visibility: The visibility level of the function.
1131
+ name: The identifier naming the function.
1132
+ inputs: List of input parameters (defaults to empty list).
1133
+ outputs: List of outputs (defaults to empty list).
1134
+ body: The block of statements in the function body.
1135
+ description: Optional description.
1136
+ """
1137
+ super().__init__(token)
1138
+ self.visibility = visibility
1139
+ self.name = name
1140
+ self.inputs = inputs if inputs is not None else []
1141
+ self.outputs = outputs if outputs is not None else []
1142
+ self.body = body if body is not None else BlockStatement(token)
1143
+ self.description = description
1144
+
1145
+ def __str__(self) -> str:
1146
+ """Return string representation of the function statement.
1147
+
1148
+ Returns:
1149
+ A string representation of the function with its visibility, name and body.
1150
+ """
1151
+ visibility_str = {
1152
+ FunctionVisibility.PRIVATE: "action",
1153
+ FunctionVisibility.PUBLIC: "interaction",
1154
+ FunctionVisibility.FUNCTION: "utility",
1155
+ }[self.visibility]
1156
+
1157
+ inputs_str = ", ".join(str(p) for p in self.inputs)
1158
+ outputs_str = ", ".join(str(p) for p in self.outputs)
1159
+ result = f"{visibility_str} {self.name}"
1160
+ if inputs_str:
1161
+ result += f"(inputs: {inputs_str})"
1162
+ if outputs_str:
1163
+ result += f" -> {outputs_str}"
1164
+ result += f" {{\n{self.body}\n}}"
1165
+ return result
1166
+
1167
+ def desugar(self) -> "FunctionStatement":
1168
+ """Desugar function statement by recursively desugaring its components.
1169
+
1170
+ Returns:
1171
+ A new FunctionStatement with desugared components.
1172
+ """
1173
+ desugared_body: BlockStatement | None = None
1174
+ if self.body:
1175
+ body_result = self.body.desugar()
1176
+ # Ensure body is always a BlockStatement
1177
+ if isinstance(body_result, BlockStatement):
1178
+ desugared_body = body_result
1179
+ else:
1180
+ # Wrap single statement in a block
1181
+ desugared_body = BlockStatement(self.token)
1182
+ desugared_body.statements = [body_result]
1183
+
1184
+ desugared = FunctionStatement(
1185
+ self.token,
1186
+ self.visibility,
1187
+ self.name.desugar() if hasattr(self.name, "desugar") else self.name,
1188
+ self.inputs, # Parameters don't have desugar yet
1189
+ self.outputs, # Outputs don't have desugar yet
1190
+ desugared_body,
1191
+ self.description,
1192
+ )
1193
+ return desugared
1194
+
1195
+
1196
+ class WhileStatement(Statement):
1197
+ """A while loop statement in Machine Dialect™.
1198
+
1199
+ While statements follow the pattern: "While <condition>: <body>"
1200
+ They repeatedly execute the body block as long as the condition evaluates to true.
1201
+
1202
+ Attributes:
1203
+ condition: The expression to evaluate for loop continuation.
1204
+ body: The block of statements to execute while condition is true.
1205
+ """
1206
+
1207
+ def __init__(self, token: Token, condition: Expression | None = None, body: BlockStatement | None = None) -> None:
1208
+ """Initialize a WhileStatement node.
1209
+
1210
+ Args:
1211
+ token: The 'while' token that begins the statement.
1212
+ condition: The loop condition expression.
1213
+ body: The block of statements to execute.
1214
+ """
1215
+ super().__init__(token)
1216
+ self.condition = condition
1217
+ self.body = body
1218
+
1219
+ def __str__(self) -> str:
1220
+ """Return the string representation of the while statement.
1221
+
1222
+ Returns:
1223
+ A string like "While <condition>: <body>".
1224
+ """
1225
+ out = f"While {self.condition}:"
1226
+ if self.body:
1227
+ out += f"\n{self.body}"
1228
+ return out
1229
+
1230
+ def desugar(self) -> "WhileStatement":
1231
+ """Desugar while statement by recursively desugaring condition and body.
1232
+
1233
+ Returns:
1234
+ A new WhileStatement with desugared components.
1235
+ """
1236
+ desugared = WhileStatement(self.token)
1237
+ if self.condition:
1238
+ desugared.condition = self.condition.desugar()
1239
+ if self.body:
1240
+ body_result = self.body.desugar()
1241
+ # Ensure body is a BlockStatement
1242
+ if isinstance(body_result, BlockStatement):
1243
+ desugared.body = body_result
1244
+ else:
1245
+ # This shouldn't happen but handle gracefully
1246
+ desugared.body = BlockStatement(self.token)
1247
+ desugared.body.statements = [body_result]
1248
+ return desugared
1249
+
1250
+ def to_hir(self) -> "WhileStatement":
1251
+ """Convert to HIR by desugaring.
1252
+
1253
+ Returns:
1254
+ HIR representation of the while statement.
1255
+ """
1256
+ return self.desugar()
1257
+
1258
+
1259
+ class ForEachStatement(Statement):
1260
+ """A for-each loop statement in Machine Dialect™.
1261
+
1262
+ For-each statements follow the pattern: "For each <item> in <collection>: <body>"
1263
+ They iterate over each element in a collection.
1264
+
1265
+ Attributes:
1266
+ item: The identifier for the loop variable.
1267
+ collection: The expression that evaluates to the collection to iterate over.
1268
+ body: The block of statements to execute for each item.
1269
+ """
1270
+
1271
+ # Class-level counter for generating unique synthetic variable names
1272
+ _gensym_counter = 0
1273
+
1274
+ def __init__(
1275
+ self,
1276
+ token: Token,
1277
+ item: Identifier | None = None,
1278
+ collection: Expression | None = None,
1279
+ body: BlockStatement | None = None,
1280
+ ) -> None:
1281
+ """Initialize a ForEachStatement node.
1282
+
1283
+ Args:
1284
+ token: The 'for' token that begins the statement.
1285
+ item: The loop variable identifier.
1286
+ collection: The collection to iterate over.
1287
+ body: The block of statements to execute.
1288
+ """
1289
+ super().__init__(token)
1290
+ self.item = item
1291
+ self.collection = collection
1292
+ self.body = body
1293
+
1294
+ def __str__(self) -> str:
1295
+ """Return the string representation of the for-each statement.
1296
+
1297
+ Returns:
1298
+ A string like "For each <item> in <collection>: <body>".
1299
+ """
1300
+ out = f"For each {self.item} in {self.collection}:"
1301
+ if self.body:
1302
+ out += f"\n{self.body}"
1303
+ return out
1304
+
1305
+ @classmethod
1306
+ def _gensym(cls, prefix: str) -> Identifier:
1307
+ """Generate a unique identifier for internal synthetic variables.
1308
+
1309
+ Uses a $ prefix which is not valid in user-defined identifiers
1310
+ to guarantee no name collisions.
1311
+
1312
+ Args:
1313
+ prefix: A descriptive prefix for the synthetic variable.
1314
+
1315
+ Returns:
1316
+ A unique Identifier that cannot collide with user variables.
1317
+ """
1318
+ cls._gensym_counter += 1
1319
+ # Use $ prefix to ensure no collision with user variables
1320
+ # $ is not a valid character in Machine Dialect identifiers
1321
+ name = f"${prefix}_{cls._gensym_counter}"
1322
+ # Create a synthetic token for the identifier
1323
+ synthetic_token = Token(TokenType.MISC_IDENT, name, 0, 0)
1324
+ return Identifier(synthetic_token, name)
1325
+
1326
+ def desugar(self) -> "Statement":
1327
+ """Desugar for-each loop into a while loop.
1328
+
1329
+ Transforms:
1330
+ For each `item` in `collection`:
1331
+ body
1332
+
1333
+ Into:
1334
+ index = 0
1335
+ length = len(collection)
1336
+ While index < length:
1337
+ item = collection[index]
1338
+ body
1339
+ index = index + 1
1340
+
1341
+ Returns:
1342
+ A WhileStatement representing the desugared for-each loop.
1343
+ """
1344
+ if not self.item or not self.collection:
1345
+ # If malformed, return an empty while statement
1346
+ return WhileStatement(self.token)
1347
+
1348
+ # Import here to avoid circular imports
1349
+ from machine_dialect.ast.call_expression import CallExpression
1350
+ from machine_dialect.ast.expressions import CollectionAccessExpression, InfixExpression
1351
+ from machine_dialect.ast.literals import WholeNumberLiteral
1352
+
1353
+ # Generate unique synthetic variables
1354
+ index_var = self._gensym("foreach_idx")
1355
+ length_var = self._gensym("foreach_len")
1356
+
1357
+ # Create synthetic tokens for literals
1358
+ zero_token = Token(TokenType.LIT_WHOLE_NUMBER, "0", 0, 0)
1359
+ one_token = Token(TokenType.LIT_WHOLE_NUMBER, "1", 0, 0)
1360
+
1361
+ # Build the initialization statements:
1362
+ # Set index to 0
1363
+ init_index = SetStatement(Token(TokenType.KW_SET, "Set", 0, 0), index_var, WholeNumberLiteral(zero_token, 0))
1364
+
1365
+ # Set length to len(collection)
1366
+ # Import Arguments for function call
1367
+ from machine_dialect.ast.expressions import Arguments
1368
+
1369
+ call_args = Arguments(Token(TokenType.MISC_IDENT, "args", 0, 0))
1370
+ call_args.positional = [self.collection.desugar() if self.collection else self.collection]
1371
+ call_args.named = []
1372
+
1373
+ len_call = CallExpression(
1374
+ Token(TokenType.MISC_IDENT, "len", 0, 0),
1375
+ Identifier(Token(TokenType.MISC_IDENT, "len", 0, 0), "len"),
1376
+ call_args,
1377
+ )
1378
+ init_length = SetStatement(Token(TokenType.KW_SET, "Set", 0, 0), length_var, len_call)
1379
+
1380
+ # Build the while condition: index < length
1381
+ condition = InfixExpression(Token(TokenType.OP_LT, "<", 0, 0), "<", index_var)
1382
+ condition.right = length_var
1383
+
1384
+ # Build the while body
1385
+ while_body = BlockStatement(self.token)
1386
+ while_body.statements = []
1387
+
1388
+ # Add: item = collection[index]
1389
+ collection_access = CollectionAccessExpression(
1390
+ Token(TokenType.MISC_IDENT, "access", 0, 0), # Token for the access operation
1391
+ self.collection.desugar() if self.collection else self.collection,
1392
+ index_var,
1393
+ "numeric", # Using numeric access type
1394
+ )
1395
+ set_item = SetStatement(
1396
+ Token(TokenType.KW_SET, "Set", 0, 0),
1397
+ self.item,
1398
+ collection_access,
1399
+ )
1400
+ while_body.statements.append(set_item)
1401
+
1402
+ # Add the original body statements
1403
+ if self.body:
1404
+ desugared_body = self.body.desugar()
1405
+ if isinstance(desugared_body, BlockStatement):
1406
+ while_body.statements.extend(desugared_body.statements)
1407
+ else:
1408
+ while_body.statements.append(desugared_body)
1409
+
1410
+ # Add: index = index + 1
1411
+ increment = InfixExpression(Token(TokenType.OP_PLUS, "+", 0, 0), "+", index_var)
1412
+ increment.right = WholeNumberLiteral(one_token, 1)
1413
+ set_increment = SetStatement(Token(TokenType.KW_SET, "Set", 0, 0), index_var, increment)
1414
+ while_body.statements.append(set_increment)
1415
+
1416
+ # Create the while statement
1417
+ while_stmt = WhileStatement(Token(TokenType.KW_WHILE, "While", 0, 0), condition, while_body)
1418
+
1419
+ # Wrap everything in a block statement
1420
+ result_block = BlockStatement(self.token)
1421
+ result_block.statements = [init_index, init_length, while_stmt]
1422
+
1423
+ # Since we need to return a Statement, we'll return the block
1424
+ # The HIR generation will handle this properly
1425
+ return result_block
1426
+
1427
+ def to_hir(self) -> "Statement":
1428
+ """Convert to HIR by desugaring to while loop.
1429
+
1430
+ Returns:
1431
+ HIR representation (desugared while loop).
1432
+ """
1433
+ return self.desugar()