machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,245 @@
1
+ """Test for-each statement desugaring to while loops."""
2
+
3
+ from machine_dialect.ast import Identifier
4
+ from machine_dialect.ast.call_expression import CallExpression
5
+ from machine_dialect.ast.literals import OrderedListLiteral, StringLiteral, WholeNumberLiteral
6
+ from machine_dialect.ast.statements import BlockStatement, ForEachStatement, SetStatement
7
+ from machine_dialect.lexer import Token, TokenType
8
+
9
+
10
+ class TestForEachDesugaring:
11
+ """Test that for-each statements correctly desugar to while loops."""
12
+
13
+ def test_basic_foreach_desugaring(self) -> None:
14
+ """Test basic for-each loop desugaring."""
15
+ # Create tokens
16
+ for_token = Token(TokenType.KW_FOR, "for", 1, 1)
17
+ item_token = Token(TokenType.MISC_IDENT, "item", 1, 10)
18
+ collection_token = Token(TokenType.MISC_IDENT, "items", 1, 20)
19
+
20
+ # Create the for-each statement:
21
+ # For each `item` in `items`:
22
+ # body
23
+ item_id = Identifier(item_token, "item")
24
+ collection_id = Identifier(collection_token, "items")
25
+
26
+ # Create a simple body
27
+ body = BlockStatement(for_token)
28
+ body.statements = [
29
+ SetStatement(
30
+ Token(TokenType.KW_SET, "Set", 2, 1),
31
+ Identifier(Token(TokenType.MISC_IDENT, "result", 2, 5), "result"),
32
+ item_id,
33
+ )
34
+ ]
35
+
36
+ foreach_stmt = ForEachStatement(for_token, item=item_id, collection=collection_id, body=body)
37
+
38
+ # Desugar the for-each statement
39
+ desugared = foreach_stmt.desugar()
40
+
41
+ # Should return a BlockStatement containing initialization and while loop
42
+ assert isinstance(desugared, BlockStatement)
43
+ assert len(desugared.statements) == 3 # init_index, init_length, while_stmt
44
+
45
+ # Check initialization statements
46
+ init_index = desugared.statements[0]
47
+ assert isinstance(init_index, SetStatement)
48
+ assert isinstance(init_index.name, Identifier)
49
+ assert init_index.name.value.startswith("$foreach_idx_") # Synthetic variable
50
+ assert isinstance(init_index.value, WholeNumberLiteral)
51
+ assert init_index.value.value == 0
52
+
53
+ init_length = desugared.statements[1]
54
+ assert isinstance(init_length, SetStatement)
55
+ assert isinstance(init_length.name, Identifier)
56
+ assert init_length.name.value.startswith("$foreach_len_") # Synthetic variable
57
+ assert init_length.value is not None
58
+ assert isinstance(init_length.value, CallExpression)
59
+ assert init_length.value.function_name is not None
60
+ assert isinstance(init_length.value.function_name, Identifier)
61
+ assert init_length.value.function_name.value == "len"
62
+
63
+ # The desugared ForEachStatement returns a BlockStatement containing:
64
+ # [0] Set $foreach_idx_N to 0
65
+ # [1] Set $foreach_len_N to len(collection)
66
+ # [2] WhileStatement with the loop logic
67
+ while_stmt = desugared.statements[2]
68
+
69
+ # Verify the while statement structure
70
+ from machine_dialect.ast.expressions import CollectionAccessExpression, InfixExpression
71
+ from machine_dialect.ast.statements import WhileStatement
72
+
73
+ assert isinstance(while_stmt, WhileStatement)
74
+
75
+ # Check condition: index < length
76
+ assert isinstance(while_stmt.condition, InfixExpression)
77
+ assert while_stmt.condition.operator == "<"
78
+ assert isinstance(while_stmt.condition.left, Identifier)
79
+ assert while_stmt.condition.left.value.startswith("$foreach_idx_")
80
+ assert isinstance(while_stmt.condition.right, Identifier)
81
+ assert while_stmt.condition.right.value.startswith("$foreach_len_")
82
+
83
+ # Check while body
84
+ assert isinstance(while_stmt.body, BlockStatement)
85
+ assert len(while_stmt.body.statements) >= 3 # set item, original body, increment
86
+
87
+ # First statement should set item = collection[index]
88
+ first_stmt = while_stmt.body.statements[0]
89
+ assert isinstance(first_stmt, SetStatement)
90
+ assert first_stmt.name is not None
91
+ assert first_stmt.name.value == "item" # The original loop variable
92
+ assert isinstance(first_stmt.value, CollectionAccessExpression)
93
+
94
+ # Last statement should increment index
95
+ last_stmt = while_stmt.body.statements[-1]
96
+ assert isinstance(last_stmt, SetStatement)
97
+ assert last_stmt.name is not None
98
+ assert last_stmt.name.value.startswith("$foreach_idx_")
99
+ assert isinstance(last_stmt.value, InfixExpression)
100
+ assert last_stmt.value.operator == "+"
101
+
102
+ def test_foreach_with_literal_collection(self) -> None:
103
+ """Test for-each with a literal list as collection."""
104
+ # Create tokens
105
+ for_token = Token(TokenType.KW_FOR, "for", 1, 1)
106
+ item_token = Token(TokenType.MISC_IDENT, "fruit", 1, 10)
107
+
108
+ # Create a literal list
109
+ list_token = Token(TokenType.MISC_IDENT, "[", 1, 20)
110
+ str1_token = Token(TokenType.LIT_TEXT, "apple", 1, 22)
111
+ str2_token = Token(TokenType.LIT_TEXT, "banana", 1, 30)
112
+
113
+ collection = OrderedListLiteral(
114
+ list_token, [StringLiteral(str1_token, "apple"), StringLiteral(str2_token, "banana")]
115
+ )
116
+
117
+ # Create for-each with literal collection
118
+ foreach_stmt = ForEachStatement(
119
+ for_token,
120
+ item=Identifier(item_token, "fruit"),
121
+ collection=collection,
122
+ body=BlockStatement(for_token), # Empty body
123
+ )
124
+
125
+ # Desugar
126
+ desugared = foreach_stmt.desugar()
127
+
128
+ # Should still produce valid desugared form
129
+ assert isinstance(desugared, BlockStatement)
130
+ assert len(desugared.statements) == 3
131
+
132
+ def test_foreach_empty_body(self) -> None:
133
+ """Test for-each with empty body."""
134
+ for_token = Token(TokenType.KW_FOR, "for", 1, 1)
135
+
136
+ foreach_stmt = ForEachStatement(
137
+ for_token,
138
+ item=Identifier(Token(TokenType.MISC_IDENT, "x", 1, 10), "x"),
139
+ collection=Identifier(Token(TokenType.MISC_IDENT, "xs", 1, 15), "xs"),
140
+ body=None,
141
+ )
142
+
143
+ desugared = foreach_stmt.desugar()
144
+
145
+ # Should still produce valid structure
146
+ assert isinstance(desugared, BlockStatement)
147
+ assert len(desugared.statements) == 3
148
+
149
+ def test_foreach_malformed_missing_parts(self) -> None:
150
+ """Test for-each with missing item or collection."""
151
+ for_token = Token(TokenType.KW_FOR, "for", 1, 1)
152
+
153
+ # Missing collection
154
+ foreach_stmt = ForEachStatement(
155
+ for_token,
156
+ item=Identifier(Token(TokenType.MISC_IDENT, "x", 1, 10), "x"),
157
+ collection=None,
158
+ body=BlockStatement(for_token),
159
+ )
160
+
161
+ desugared = foreach_stmt.desugar()
162
+
163
+ # Should return an empty while statement for malformed input
164
+ from machine_dialect.ast.statements import WhileStatement
165
+
166
+ assert isinstance(desugared, WhileStatement)
167
+ assert desugared.condition is None
168
+ assert desugared.body is None
169
+
170
+ def test_gensym_uniqueness(self) -> None:
171
+ """Test that gensym generates unique variable names."""
172
+ # Reset counter for predictable testing
173
+ original_counter = ForEachStatement._gensym_counter
174
+ ForEachStatement._gensym_counter = 0
175
+
176
+ try:
177
+ # Generate multiple synthetic variables
178
+ var1 = ForEachStatement._gensym("test")
179
+ var2 = ForEachStatement._gensym("test")
180
+ var3 = ForEachStatement._gensym("other")
181
+
182
+ # All should be unique
183
+ assert var1.value == "$test_1"
184
+ assert var2.value == "$test_2"
185
+ assert var3.value == "$other_3"
186
+
187
+ # All should have $ prefix (invalid for user variables)
188
+ assert all(v.value.startswith("$") for v in [var1, var2, var3])
189
+ finally:
190
+ # Restore original counter
191
+ ForEachStatement._gensym_counter = original_counter
192
+
193
+ def test_nested_foreach_unique_variables(self) -> None:
194
+ """Test that nested for-each loops get unique synthetic variables."""
195
+ # Create outer for-each
196
+ outer_foreach = ForEachStatement(
197
+ Token(TokenType.KW_FOR, "for", 1, 1),
198
+ item=Identifier(Token(TokenType.MISC_IDENT, "x", 1, 10), "x"),
199
+ collection=Identifier(Token(TokenType.MISC_IDENT, "xs", 1, 15), "xs"),
200
+ body=BlockStatement(Token(TokenType.KW_FOR, "for", 1, 1)),
201
+ )
202
+
203
+ # Create inner for-each
204
+ inner_foreach = ForEachStatement(
205
+ Token(TokenType.KW_FOR, "for", 2, 1),
206
+ item=Identifier(Token(TokenType.MISC_IDENT, "y", 2, 10), "y"),
207
+ collection=Identifier(Token(TokenType.MISC_IDENT, "ys", 2, 15), "ys"),
208
+ body=BlockStatement(Token(TokenType.KW_FOR, "for", 2, 1)),
209
+ )
210
+
211
+ # Desugar both
212
+ outer_desugared = outer_foreach.desugar()
213
+ inner_desugared = inner_foreach.desugar()
214
+
215
+ # Extract synthetic variable names from both
216
+ assert isinstance(outer_desugared, BlockStatement)
217
+ assert isinstance(inner_desugared, BlockStatement)
218
+
219
+ # Cast to SetStatement and check name existence
220
+ outer_set0 = outer_desugared.statements[0]
221
+ outer_set1 = outer_desugared.statements[1]
222
+ inner_set0 = inner_desugared.statements[0]
223
+ inner_set1 = inner_desugared.statements[1]
224
+
225
+ assert isinstance(outer_set0, SetStatement)
226
+ assert isinstance(outer_set1, SetStatement)
227
+ assert isinstance(inner_set0, SetStatement)
228
+ assert isinstance(inner_set1, SetStatement)
229
+
230
+ assert outer_set0.name is not None
231
+ assert outer_set1.name is not None
232
+ assert inner_set0.name is not None
233
+ assert inner_set1.name is not None
234
+
235
+ outer_index_var = outer_set0.name.value
236
+ outer_length_var = outer_set1.name.value
237
+ inner_index_var = inner_set0.name.value
238
+ inner_length_var = inner_set1.name.value
239
+
240
+ # All should be unique
241
+ all_vars = {outer_index_var, outer_length_var, inner_index_var, inner_length_var}
242
+ assert len(all_vars) == 4, "All synthetic variables should be unique"
243
+
244
+ # All should start with $
245
+ assert all(v.startswith("$") for v in all_vars)
@@ -0,0 +1,6 @@
1
+ """Context Free Grammar module for Machine Dialect™ with GPT-5 integration."""
2
+
3
+ from .openai_generation import generate_with_openai, validate_model_support
4
+ from .parser import CFGParser
5
+
6
+ __all__ = ["CFGParser", "generate_with_openai", "validate_model_support"]
@@ -0,0 +1,156 @@
1
+ """Configuration module for Machine Dialect™ AI API settings."""
2
+
3
+ import configparser
4
+ import os
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+
8
+
9
+ @dataclass
10
+ class AIAPIConfig:
11
+ """Configuration for AI API settings.
12
+
13
+ Attributes:
14
+ model: The AI model to use (e.g., 'gpt-5', 'gpt-5-mini').
15
+ key: The API key for authentication.
16
+ """
17
+
18
+ model: str | None = None
19
+ key: str | None = None
20
+
21
+ def is_valid(self) -> bool:
22
+ """Check if the configuration has all required fields.
23
+
24
+ Returns:
25
+ True if both model and key are set, False otherwise.
26
+ """
27
+ return self.model is not None and self.key is not None
28
+
29
+ def with_defaults(self) -> "AIAPIConfig":
30
+ """Return a config with default values filled in.
31
+
32
+ Returns:
33
+ Config with defaults applied where values are missing.
34
+ """
35
+ if self.model is None:
36
+ self.model = "gpt-5" # Default to GPT-5 for CFG support
37
+ return self
38
+
39
+
40
+ class ConfigLoader:
41
+ """Loader for Machine Dialect™ configuration."""
42
+
43
+ CONFIG_FILE_NAME = ".mdconfig"
44
+ ENV_MODEL_KEY = "MD_AI_API_MODEL"
45
+ ENV_API_KEY = "MD_AI_API_KEY"
46
+
47
+ def __init__(self) -> None:
48
+ """Initialize the configuration loader."""
49
+ self._config: AIAPIConfig | None = None
50
+
51
+ def load(self) -> AIAPIConfig:
52
+ """Load AI API configuration from file or environment.
53
+
54
+ Priority order:
55
+ 1. .mdconfig file in user's home directory
56
+ 2. Environment variables (MD_AI_API_MODEL and MD_AI_API_KEY)
57
+ 3. Legacy environment variable (OPENAI_API_KEY) for backward compatibility
58
+
59
+ Returns:
60
+ AIAPIConfig object with loaded settings.
61
+ """
62
+ if self._config is not None:
63
+ return self._config
64
+
65
+ config = AIAPIConfig()
66
+
67
+ # Try to load from .mdconfig file
68
+ config_file_path = Path.home() / self.CONFIG_FILE_NAME
69
+ if config_file_path.exists():
70
+ config = self._load_from_file(config_file_path)
71
+
72
+ # Override or fill in with environment variables
73
+ env_model = os.getenv(self.ENV_MODEL_KEY)
74
+ if env_model:
75
+ config.model = env_model
76
+
77
+ env_key = os.getenv(self.ENV_API_KEY)
78
+ if env_key:
79
+ config.key = env_key
80
+ elif not config.key:
81
+ # Fallback to legacy OPENAI_API_KEY for backward compatibility
82
+ config.key = os.getenv("OPENAI_API_KEY")
83
+
84
+ self._config = config
85
+ return config
86
+
87
+ def _load_from_file(self, config_file_path: Path) -> AIAPIConfig:
88
+ """Load configuration from a .mdconfig file.
89
+
90
+ Args:
91
+ config_file_path: Path to the configuration file.
92
+
93
+ Returns:
94
+ AIAPIConfig object with settings from the file.
95
+ """
96
+ config_parser = configparser.ConfigParser()
97
+ config_parser.read(config_file_path)
98
+
99
+ ai_config = AIAPIConfig()
100
+
101
+ try:
102
+ ai_section = config_parser["ai-api"]
103
+ ai_config.model = ai_section.get("model")
104
+ ai_config.key = ai_section.get("key")
105
+ except (configparser.NoSectionError, KeyError):
106
+ # Section doesn't exist, return empty config
107
+ pass
108
+
109
+ return ai_config
110
+
111
+ def get_error_message(self) -> str:
112
+ """Get a helpful error message for missing configuration.
113
+
114
+ Returns:
115
+ Error message with instructions for setting up configuration.
116
+ """
117
+ home_dir = Path.home()
118
+ config_path = home_dir / self.CONFIG_FILE_NAME
119
+
120
+ return f"""AI API configuration not found.
121
+
122
+ Please configure the AI API in one of the following ways:
123
+
124
+ 1. Create a {self.CONFIG_FILE_NAME} file in your home directory ({config_path}):
125
+
126
+ [ai-api]
127
+ model = gpt-5
128
+ key = your_api_key_here
129
+
130
+ Note: Only GPT-5 models (gpt-5, gpt-5-mini, gpt-5-nano) support
131
+ context-free grammar constraints required for Machine Dialect™ generation.
132
+
133
+ 2. Set environment variables:
134
+
135
+ export {self.ENV_MODEL_KEY}=gpt-5
136
+ export {self.ENV_API_KEY}=your_api_key_here
137
+
138
+ 3. For backward compatibility, you can also use:
139
+
140
+ export OPENAI_API_KEY=your_api_key_here
141
+ (Note: Model will default to gpt-5 if not specified)
142
+
143
+ To get an API key: https://platform.openai.com/api-keys"""
144
+
145
+
146
+ def get_ai_config() -> AIAPIConfig:
147
+ """Get the AI API configuration.
148
+
149
+ This is a convenience function that creates a ConfigLoader
150
+ and loads the configuration.
151
+
152
+ Returns:
153
+ AIAPIConfig object with current settings.
154
+ """
155
+ loader = ConfigLoader()
156
+ return loader.load()
@@ -0,0 +1,221 @@
1
+ """Examples and usage of the CFG module for Machine Dialect™.
2
+
3
+ This module provides demonstration examples of how to use the CFG (Context-Free Grammar)
4
+ parser for Machine Dialect™ code. It includes examples of parsing variable assignments,
5
+ conditional statements, logical operations, and code validation.
6
+
7
+ The examples show:
8
+ - Basic parsing of Machine Dialect™ syntax
9
+ - Handling conditional statements with if/else blocks
10
+ - Working with logical operations and boolean values
11
+ - Code validation to check syntax correctness
12
+ - Pretty-printing of Abstract Syntax Trees (AST)
13
+
14
+ Example:
15
+ Run all examples from the command line::
16
+
17
+ $ python -m machine_dialect.cfg.examples
18
+ """
19
+
20
+ from machine_dialect.cfg import CFGParser
21
+
22
+
23
+ def example_parse_code() -> None:
24
+ """Demonstrate parsing Machine Dialect™ code with the CFG parser.
25
+
26
+ This function shows three examples of parsing Machine Dialect™ code:
27
+ 1. Simple variable assignment and arithmetic operations
28
+ 2. Conditional statements with if/else blocks
29
+ 3. Logical operations with boolean values
30
+
31
+ Each example prints the original code, attempts to parse it, and displays
32
+ the resulting Abstract Syntax Tree (AST) if successful.
33
+
34
+ Raises:
35
+ ValueError: If any of the code examples fail to parse.
36
+
37
+ Example:
38
+ >>> example_parse_code()
39
+ Example 1: Simple arithmetic
40
+ Code: ...
41
+ Parse successful!
42
+ AST: ...
43
+ """
44
+ parser = CFGParser()
45
+
46
+ # Example 1: Simple variable assignment and output
47
+ code1 = """
48
+ Set `x` to _10_.
49
+ Set `y` to _20_.
50
+ Set `sum` to `x` + `y`.
51
+ Say `sum`.
52
+ """
53
+
54
+ print("Example 1: Simple arithmetic")
55
+ print("Code:", code1)
56
+ try:
57
+ tree = parser.parse(code1)
58
+ print("Parse successful!")
59
+ print("AST:")
60
+ print(parser.pretty_print(tree))
61
+ except ValueError as e:
62
+ print(f"Parse failed: {e}")
63
+
64
+ print("\n" + "=" * 50 + "\n")
65
+
66
+ # Example 2: Conditional statement
67
+ code2 = """
68
+ Set `age` to _18_.
69
+ If `age` is greater than _17_ then:
70
+ > Say _"You are an adult."_.
71
+ Else:
72
+ > Say _"You are a minor."_.
73
+ """
74
+
75
+ print("Example 2: Conditional")
76
+ print("Code:", code2)
77
+ try:
78
+ tree = parser.parse(code2)
79
+ print("Parse successful!")
80
+ print("AST:")
81
+ print(parser.pretty_print(tree))
82
+ except ValueError as e:
83
+ print(f"Parse failed: {e}")
84
+
85
+ print("\n" + "=" * 50 + "\n")
86
+
87
+ # Example 3: Logical operations
88
+ code3 = """
89
+ Set `is_raining` to _yes_.
90
+ Set `have_umbrella` to _no_.
91
+ Set `get_wet` to `is_raining` and not `have_umbrella`.
92
+ If `get_wet` then:
93
+ > Say _"You will get wet!"_.
94
+ """
95
+
96
+ print("Example 3: Logical operations")
97
+ print("Code:", code3)
98
+ try:
99
+ tree = parser.parse(code3)
100
+ print("Parse successful!")
101
+ print("AST:")
102
+ print(parser.pretty_print(tree))
103
+ except ValueError as e:
104
+ print(f"Parse failed: {e}")
105
+
106
+
107
+ def example_generate_prompt() -> None:
108
+ """Demonstrate creating prompts for GPT-5 CFG generation.
109
+
110
+ This function serves as a placeholder for future CFG generation
111
+ functionality using GPT-5 or similar language models. Once implemented,
112
+ it will show how to create prompts that guide AI models to generate
113
+ valid Machine Dialect™ code following the CFG rules.
114
+
115
+ Note:
116
+ This functionality is not yet implemented and will be added
117
+ in a future version.
118
+
119
+ Todo:
120
+ * Implement prompt generation for CFG-based code generation
121
+ * Add examples of different prompt types
122
+ * Include validation of generated code
123
+ """
124
+ # Placeholder for CFG generation examples
125
+ print("CFG generation functionality coming soon.")
126
+
127
+
128
+ def example_validate_code() -> None:
129
+ """Demonstrate validation of Machine Dialect™ code syntax.
130
+
131
+ This function shows examples of both valid and invalid Machine Dialect™
132
+ code to illustrate the validation capabilities of the CFG parser.
133
+ It demonstrates common syntax errors like missing backticks around
134
+ variables and missing periods at the end of statements.
135
+
136
+ The function validates:
137
+ - Valid code with proper syntax (backticks, periods)
138
+ - Invalid code with missing syntax elements
139
+
140
+ Example:
141
+ >>> example_validate_code()
142
+ Validating valid code:
143
+ Set `name` to "Alice".
144
+ Say name.
145
+ ✓ Code is valid!
146
+ """
147
+ parser = CFGParser()
148
+
149
+ # Valid code
150
+ valid_code = """
151
+ Set `name` to _"Alice"_.
152
+ Say `name`.
153
+ """
154
+
155
+ print("Validating valid code:")
156
+ print(valid_code)
157
+ if parser.validate(valid_code):
158
+ print("✓ Code is valid!")
159
+ else:
160
+ print("✗ Code is invalid!")
161
+
162
+ print("\n" + "=" * 50 + "\n")
163
+
164
+ # Invalid code
165
+ invalid_code = """
166
+ Set x to 10
167
+ Say x
168
+ """
169
+
170
+ print("Validating invalid code (missing backticks and periods):")
171
+ print(invalid_code)
172
+ if parser.validate(invalid_code):
173
+ print("✓ Code is valid!")
174
+ else:
175
+ print("✗ Code is invalid!")
176
+
177
+
178
+ def main() -> None:
179
+ """Run all CFG parser examples in sequence.
180
+
181
+ This function executes all the example functions to demonstrate
182
+ the full capabilities of the CFG parser for Machine Dialect™.
183
+ It runs parsing examples, generation prompt examples, and
184
+ validation examples, separating each section with visual dividers
185
+ for clarity.
186
+
187
+ The execution order is:
188
+ 1. Parsing examples - demonstrating code parsing
189
+ 2. Generation prompt examples - placeholder for future features
190
+ 3. Validation examples - showing syntax validation
191
+
192
+ Example:
193
+ >>> main()
194
+ ============================================================
195
+ CFG Parser Examples
196
+ ============================================================
197
+ ...
198
+ """
199
+ print("=" * 60)
200
+ print("CFG Parser Examples")
201
+ print("=" * 60)
202
+ print()
203
+
204
+ print("1. PARSING EXAMPLES")
205
+ print("-" * 40)
206
+ example_parse_code()
207
+
208
+ print("\n2. GENERATION PROMPT EXAMPLES")
209
+ print("-" * 40)
210
+ example_generate_prompt()
211
+
212
+ print("\n3. VALIDATION EXAMPLES")
213
+ print("-" * 40)
214
+ example_validate_code()
215
+
216
+ print("\n" + "=" * 60)
217
+ print("Examples complete!")
218
+
219
+
220
+ if __name__ == "__main__":
221
+ main()