PyPI - machine-dialect - Versions diffs - 0.1.0a1__py3-none-any.whl - Mend

machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (268) hide show

machine_dialect/__main__.py +667 -0
machine_dialect/agent/__init__.py +5 -0
machine_dialect/agent/agent.py +360 -0
machine_dialect/ast/__init__.py +95 -0
machine_dialect/ast/ast_node.py +35 -0
machine_dialect/ast/call_expression.py +82 -0
machine_dialect/ast/dict_extraction.py +60 -0
machine_dialect/ast/expressions.py +439 -0
machine_dialect/ast/literals.py +309 -0
machine_dialect/ast/program.py +35 -0
machine_dialect/ast/statements.py +1433 -0
machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
machine_dialect/ast/tests/test_boolean_literal.py +29 -0
machine_dialect/ast/tests/test_collection_hir.py +138 -0
machine_dialect/ast/tests/test_define_statement.py +142 -0
machine_dialect/ast/tests/test_desugar.py +541 -0
machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
machine_dialect/cfg/__init__.py +6 -0
machine_dialect/cfg/config.py +156 -0
machine_dialect/cfg/examples.py +221 -0
machine_dialect/cfg/generate_with_ai.py +187 -0
machine_dialect/cfg/openai_generation.py +200 -0
machine_dialect/cfg/parser.py +94 -0
machine_dialect/cfg/tests/__init__.py +1 -0
machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
machine_dialect/cfg/tests/test_config.py +188 -0
machine_dialect/cfg/tests/test_examples.py +391 -0
machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
machine_dialect/cfg/tests/test_openai_generation.py +256 -0
machine_dialect/codegen/__init__.py +5 -0
machine_dialect/codegen/bytecode_module.py +89 -0
machine_dialect/codegen/bytecode_serializer.py +300 -0
machine_dialect/codegen/opcodes.py +101 -0
machine_dialect/codegen/register_codegen.py +1996 -0
machine_dialect/codegen/symtab.py +208 -0
machine_dialect/codegen/tests/__init__.py +1 -0
machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
machine_dialect/codegen/tests/test_symtab.py +418 -0
machine_dialect/codegen/vm_serializer.py +621 -0
machine_dialect/compiler/__init__.py +18 -0
machine_dialect/compiler/compiler.py +197 -0
machine_dialect/compiler/config.py +149 -0
machine_dialect/compiler/context.py +149 -0
machine_dialect/compiler/phases/__init__.py +19 -0
machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
machine_dialect/compiler/phases/codegen.py +40 -0
machine_dialect/compiler/phases/hir_generation.py +39 -0
machine_dialect/compiler/phases/mir_generation.py +86 -0
machine_dialect/compiler/phases/optimization.py +110 -0
machine_dialect/compiler/phases/parsing.py +39 -0
machine_dialect/compiler/pipeline.py +143 -0
machine_dialect/compiler/tests/__init__.py +1 -0
machine_dialect/compiler/tests/test_compiler.py +568 -0
machine_dialect/compiler/vm_runner.py +173 -0
machine_dialect/errors/__init__.py +32 -0
machine_dialect/errors/exceptions.py +369 -0
machine_dialect/errors/messages.py +82 -0
machine_dialect/errors/tests/__init__.py +0 -0
machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
machine_dialect/errors/tests/test_name_errors.py +118 -0
machine_dialect/helpers/__init__.py +0 -0
machine_dialect/helpers/stopwords.py +225 -0
machine_dialect/helpers/validators.py +30 -0
machine_dialect/lexer/__init__.py +9 -0
machine_dialect/lexer/constants.py +23 -0
machine_dialect/lexer/lexer.py +907 -0
machine_dialect/lexer/tests/__init__.py +0 -0
machine_dialect/lexer/tests/helpers.py +86 -0
machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
machine_dialect/lexer/tests/test_comments.py +200 -0
machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
machine_dialect/lexer/tests/test_lexer_position.py +113 -0
machine_dialect/lexer/tests/test_list_tokens.py +282 -0
machine_dialect/lexer/tests/test_stopwords.py +80 -0
machine_dialect/lexer/tests/test_strict_equality.py +129 -0
machine_dialect/lexer/tests/test_token.py +41 -0
machine_dialect/lexer/tests/test_tokenization.py +294 -0
machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
machine_dialect/lexer/tests/test_url_literals.py +169 -0
machine_dialect/lexer/tokens.py +487 -0
machine_dialect/linter/__init__.py +10 -0
machine_dialect/linter/__main__.py +144 -0
machine_dialect/linter/linter.py +154 -0
machine_dialect/linter/rules/__init__.py +8 -0
machine_dialect/linter/rules/base.py +112 -0
machine_dialect/linter/rules/statement_termination.py +99 -0
machine_dialect/linter/tests/__init__.py +1 -0
machine_dialect/linter/tests/mdrules/__init__.py +0 -0
machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
machine_dialect/linter/tests/test_linter.py +81 -0
machine_dialect/linter/tests/test_rules.py +110 -0
machine_dialect/linter/tests/test_violations.py +71 -0
machine_dialect/linter/violations.py +51 -0
machine_dialect/mir/__init__.py +69 -0
machine_dialect/mir/analyses/__init__.py +20 -0
machine_dialect/mir/analyses/alias_analysis.py +315 -0
machine_dialect/mir/analyses/dominance_analysis.py +49 -0
machine_dialect/mir/analyses/escape_analysis.py +286 -0
machine_dialect/mir/analyses/loop_analysis.py +272 -0
machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
machine_dialect/mir/analyses/type_analysis.py +448 -0
machine_dialect/mir/analyses/use_def_chains.py +232 -0
machine_dialect/mir/basic_block.py +385 -0
machine_dialect/mir/dataflow.py +445 -0
machine_dialect/mir/debug_info.py +208 -0
machine_dialect/mir/hir_to_mir.py +1738 -0
machine_dialect/mir/mir_dumper.py +366 -0
machine_dialect/mir/mir_function.py +167 -0
machine_dialect/mir/mir_instructions.py +1877 -0
machine_dialect/mir/mir_interpreter.py +556 -0
machine_dialect/mir/mir_module.py +225 -0
machine_dialect/mir/mir_printer.py +480 -0
machine_dialect/mir/mir_transformer.py +410 -0
machine_dialect/mir/mir_types.py +367 -0
machine_dialect/mir/mir_validation.py +455 -0
machine_dialect/mir/mir_values.py +268 -0
machine_dialect/mir/optimization_config.py +233 -0
machine_dialect/mir/optimization_pass.py +251 -0
machine_dialect/mir/optimization_pipeline.py +355 -0
machine_dialect/mir/optimizations/__init__.py +84 -0
machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
machine_dialect/mir/optimizations/branch_prediction.py +372 -0
machine_dialect/mir/optimizations/constant_propagation.py +634 -0
machine_dialect/mir/optimizations/cse.py +398 -0
machine_dialect/mir/optimizations/dce.py +288 -0
machine_dialect/mir/optimizations/inlining.py +551 -0
machine_dialect/mir/optimizations/jump_threading.py +487 -0
machine_dialect/mir/optimizations/licm.py +405 -0
machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
machine_dialect/mir/optimizations/strength_reduction.py +422 -0
machine_dialect/mir/optimizations/tail_call.py +207 -0
machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
machine_dialect/mir/optimizations/type_narrowing.py +397 -0
machine_dialect/mir/optimizations/type_specialization.py +447 -0
machine_dialect/mir/optimizations/type_specific.py +906 -0
machine_dialect/mir/optimize_mir.py +89 -0
machine_dialect/mir/pass_manager.py +391 -0
machine_dialect/mir/profiling/__init__.py +26 -0
machine_dialect/mir/profiling/profile_collector.py +318 -0
machine_dialect/mir/profiling/profile_data.py +372 -0
machine_dialect/mir/profiling/profile_reader.py +272 -0
machine_dialect/mir/profiling/profile_writer.py +226 -0
machine_dialect/mir/register_allocation.py +302 -0
machine_dialect/mir/reporting/__init__.py +17 -0
machine_dialect/mir/reporting/optimization_reporter.py +314 -0
machine_dialect/mir/reporting/report_formatter.py +289 -0
machine_dialect/mir/ssa_construction.py +342 -0
machine_dialect/mir/tests/__init__.py +1 -0
machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
machine_dialect/mir/tests/test_algebraic_division.py +126 -0
machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
machine_dialect/mir/tests/test_basic_block.py +425 -0
machine_dialect/mir/tests/test_branch_prediction.py +459 -0
machine_dialect/mir/tests/test_call_lowering.py +168 -0
machine_dialect/mir/tests/test_collection_lowering.py +604 -0
machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
machine_dialect/mir/tests/test_custom_passes.py +166 -0
machine_dialect/mir/tests/test_debug_info.py +285 -0
machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
machine_dialect/mir/tests/test_double_negation.py +231 -0
machine_dialect/mir/tests/test_escape_analysis.py +233 -0
machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
machine_dialect/mir/tests/test_inlining.py +435 -0
machine_dialect/mir/tests/test_licm.py +472 -0
machine_dialect/mir/tests/test_mir_dumper.py +313 -0
machine_dialect/mir/tests/test_mir_instructions.py +445 -0
machine_dialect/mir/tests/test_mir_module.py +860 -0
machine_dialect/mir/tests/test_mir_printer.py +387 -0
machine_dialect/mir/tests/test_mir_types.py +123 -0
machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
machine_dialect/mir/tests/test_mir_validation.py +378 -0
machine_dialect/mir/tests/test_mir_values.py +168 -0
machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
machine_dialect/mir/tests/test_pass_manager.py +294 -0
machine_dialect/mir/tests/test_pass_registration.py +64 -0
machine_dialect/mir/tests/test_profiling.py +356 -0
machine_dialect/mir/tests/test_register_allocation.py +307 -0
machine_dialect/mir/tests/test_report_formatters.py +372 -0
machine_dialect/mir/tests/test_ssa_construction.py +433 -0
machine_dialect/mir/tests/test_tail_call.py +236 -0
machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
machine_dialect/mir/tests/test_type_narrowing.py +277 -0
machine_dialect/mir/tests/test_type_specialization.py +421 -0
machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
machine_dialect/mir/type_inference.py +368 -0
machine_dialect/parser/__init__.py +12 -0
machine_dialect/parser/enums.py +45 -0
machine_dialect/parser/parser.py +3655 -0
machine_dialect/parser/protocols.py +11 -0
machine_dialect/parser/symbol_table.py +169 -0
machine_dialect/parser/tests/__init__.py +0 -0
machine_dialect/parser/tests/helper_functions.py +193 -0
machine_dialect/parser/tests/test_action_statements.py +334 -0
machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
machine_dialect/parser/tests/test_call_statements.py +154 -0
machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
machine_dialect/parser/tests/test_collection_mutations.py +264 -0
machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
machine_dialect/parser/tests/test_define_integration.py +468 -0
machine_dialect/parser/tests/test_define_statements.py +311 -0
machine_dialect/parser/tests/test_dict_extraction.py +115 -0
machine_dialect/parser/tests/test_empty_literal.py +155 -0
machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
machine_dialect/parser/tests/test_if_empty_block.py +61 -0
machine_dialect/parser/tests/test_if_statements.py +299 -0
machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
machine_dialect/parser/tests/test_infix_expressions.py +680 -0
machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
machine_dialect/parser/tests/test_interaction_statements.py +269 -0
machine_dialect/parser/tests/test_list_literals.py +277 -0
machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
machine_dialect/parser/tests/test_parse_errors.py +114 -0
machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
machine_dialect/parser/tests/test_program.py +13 -0
machine_dialect/parser/tests/test_return_statements.py +89 -0
machine_dialect/parser/tests/test_set_statements.py +152 -0
machine_dialect/parser/tests/test_strict_equality.py +258 -0
machine_dialect/parser/tests/test_symbol_table.py +217 -0
machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
machine_dialect/parser/tests/test_utility_statements.py +423 -0
machine_dialect/parser/token_buffer.py +159 -0
machine_dialect/repl/__init__.py +3 -0
machine_dialect/repl/repl.py +426 -0
machine_dialect/repl/tests/__init__.py +0 -0
machine_dialect/repl/tests/test_repl.py +606 -0
machine_dialect/semantic/__init__.py +12 -0
machine_dialect/semantic/analyzer.py +906 -0
machine_dialect/semantic/error_messages.py +189 -0
machine_dialect/semantic/tests/__init__.py +1 -0
machine_dialect/semantic/tests/test_analyzer.py +364 -0
machine_dialect/semantic/tests/test_error_messages.py +104 -0
machine_dialect/tests/edge_cases/__init__.py +10 -0
machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
machine_dialect/tests/integration/test_list_compilation.py +395 -0
machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
machine_dialect/type_checking/__init__.py +21 -0
machine_dialect/type_checking/tests/__init__.py +1 -0
machine_dialect/type_checking/tests/test_type_system.py +230 -0
machine_dialect/type_checking/type_system.py +270 -0
machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
machine_dialect_vm/__init__.pyi +15 -0

machine_dialect/ast/tests/test_foreach_desugar.py ADDED Viewed

@@ -0,0 +1,245 @@
+"""Test for-each statement desugaring to while loops."""
+from machine_dialect.ast import Identifier
+from machine_dialect.ast.call_expression import CallExpression
+from machine_dialect.ast.literals import OrderedListLiteral, StringLiteral, WholeNumberLiteral
+from machine_dialect.ast.statements import BlockStatement, ForEachStatement, SetStatement
+from machine_dialect.lexer import Token, TokenType
+class TestForEachDesugaring:
+    """Test that for-each statements correctly desugar to while loops."""
+    def test_basic_foreach_desugaring(self) -> None:
+        """Test basic for-each loop desugaring."""
+        # Create tokens
+        for_token = Token(TokenType.KW_FOR, "for", 1, 1)
+        item_token = Token(TokenType.MISC_IDENT, "item", 1, 10)
+        collection_token = Token(TokenType.MISC_IDENT, "items", 1, 20)
+        # Create the for-each statement:
+        # For each `item` in `items`:
+        #     body
+        item_id = Identifier(item_token, "item")
+        collection_id = Identifier(collection_token, "items")
+        # Create a simple body
+        body = BlockStatement(for_token)
+        body.statements = [
+            SetStatement(
+                Token(TokenType.KW_SET, "Set", 2, 1),
+                Identifier(Token(TokenType.MISC_IDENT, "result", 2, 5), "result"),
+                item_id,
+            )
+        ]
+        foreach_stmt = ForEachStatement(for_token, item=item_id, collection=collection_id, body=body)
+        # Desugar the for-each statement
+        desugared = foreach_stmt.desugar()
+        # Should return a BlockStatement containing initialization and while loop
+        assert isinstance(desugared, BlockStatement)
+        assert len(desugared.statements) == 3  # init_index, init_length, while_stmt
+        # Check initialization statements
+        init_index = desugared.statements[0]
+        assert isinstance(init_index, SetStatement)
+        assert isinstance(init_index.name, Identifier)
+        assert init_index.name.value.startswith("$foreach_idx_")  # Synthetic variable
+        assert isinstance(init_index.value, WholeNumberLiteral)
+        assert init_index.value.value == 0
+        init_length = desugared.statements[1]
+        assert isinstance(init_length, SetStatement)
+        assert isinstance(init_length.name, Identifier)
+        assert init_length.name.value.startswith("$foreach_len_")  # Synthetic variable
+        assert init_length.value is not None
+        assert isinstance(init_length.value, CallExpression)
+        assert init_length.value.function_name is not None
+        assert isinstance(init_length.value.function_name, Identifier)
+        assert init_length.value.function_name.value == "len"
+        # The desugared ForEachStatement returns a BlockStatement containing:
+        # [0] Set $foreach_idx_N to 0
+        # [1] Set $foreach_len_N to len(collection)
+        # [2] WhileStatement with the loop logic
+        while_stmt = desugared.statements[2]
+        # Verify the while statement structure
+        from machine_dialect.ast.expressions import CollectionAccessExpression, InfixExpression
+        from machine_dialect.ast.statements import WhileStatement
+        assert isinstance(while_stmt, WhileStatement)
+        # Check condition: index < length
+        assert isinstance(while_stmt.condition, InfixExpression)
+        assert while_stmt.condition.operator == "<"
+        assert isinstance(while_stmt.condition.left, Identifier)
+        assert while_stmt.condition.left.value.startswith("$foreach_idx_")
+        assert isinstance(while_stmt.condition.right, Identifier)
+        assert while_stmt.condition.right.value.startswith("$foreach_len_")
+        # Check while body
+        assert isinstance(while_stmt.body, BlockStatement)
+        assert len(while_stmt.body.statements) >= 3  # set item, original body, increment
+        # First statement should set item = collection[index]
+        first_stmt = while_stmt.body.statements[0]
+        assert isinstance(first_stmt, SetStatement)
+        assert first_stmt.name is not None
+        assert first_stmt.name.value == "item"  # The original loop variable
+        assert isinstance(first_stmt.value, CollectionAccessExpression)
+        # Last statement should increment index
+        last_stmt = while_stmt.body.statements[-1]
+        assert isinstance(last_stmt, SetStatement)
+        assert last_stmt.name is not None
+        assert last_stmt.name.value.startswith("$foreach_idx_")
+        assert isinstance(last_stmt.value, InfixExpression)
+        assert last_stmt.value.operator == "+"
+    def test_foreach_with_literal_collection(self) -> None:
+        """Test for-each with a literal list as collection."""
+        # Create tokens
+        for_token = Token(TokenType.KW_FOR, "for", 1, 1)
+        item_token = Token(TokenType.MISC_IDENT, "fruit", 1, 10)
+        # Create a literal list
+        list_token = Token(TokenType.MISC_IDENT, "[", 1, 20)
+        str1_token = Token(TokenType.LIT_TEXT, "apple", 1, 22)
+        str2_token = Token(TokenType.LIT_TEXT, "banana", 1, 30)
+        collection = OrderedListLiteral(
+            list_token, [StringLiteral(str1_token, "apple"), StringLiteral(str2_token, "banana")]
+        )
+        # Create for-each with literal collection
+        foreach_stmt = ForEachStatement(
+            for_token,
+            item=Identifier(item_token, "fruit"),
+            collection=collection,
+            body=BlockStatement(for_token),  # Empty body
+        )
+        # Desugar
+        desugared = foreach_stmt.desugar()
+        # Should still produce valid desugared form
+        assert isinstance(desugared, BlockStatement)
+        assert len(desugared.statements) == 3
+    def test_foreach_empty_body(self) -> None:
+        """Test for-each with empty body."""
+        for_token = Token(TokenType.KW_FOR, "for", 1, 1)
+        foreach_stmt = ForEachStatement(
+            for_token,
+            item=Identifier(Token(TokenType.MISC_IDENT, "x", 1, 10), "x"),
+            collection=Identifier(Token(TokenType.MISC_IDENT, "xs", 1, 15), "xs"),
+            body=None,
+        )
+        desugared = foreach_stmt.desugar()
+        # Should still produce valid structure
+        assert isinstance(desugared, BlockStatement)
+        assert len(desugared.statements) == 3
+    def test_foreach_malformed_missing_parts(self) -> None:
+        """Test for-each with missing item or collection."""
+        for_token = Token(TokenType.KW_FOR, "for", 1, 1)
+        # Missing collection
+        foreach_stmt = ForEachStatement(
+            for_token,
+            item=Identifier(Token(TokenType.MISC_IDENT, "x", 1, 10), "x"),
+            collection=None,
+            body=BlockStatement(for_token),
+        )
+        desugared = foreach_stmt.desugar()
+        # Should return an empty while statement for malformed input
+        from machine_dialect.ast.statements import WhileStatement
+        assert isinstance(desugared, WhileStatement)
+        assert desugared.condition is None
+        assert desugared.body is None
+    def test_gensym_uniqueness(self) -> None:
+        """Test that gensym generates unique variable names."""
+        # Reset counter for predictable testing
+        original_counter = ForEachStatement._gensym_counter
+        ForEachStatement._gensym_counter = 0
+        try:
+            # Generate multiple synthetic variables
+            var1 = ForEachStatement._gensym("test")
+            var2 = ForEachStatement._gensym("test")
+            var3 = ForEachStatement._gensym("other")
+            # All should be unique
+            assert var1.value == "$test_1"
+            assert var2.value == "$test_2"
+            assert var3.value == "$other_3"
+            # All should have $ prefix (invalid for user variables)
+            assert all(v.value.startswith("$") for v in [var1, var2, var3])
+        finally:
+            # Restore original counter
+            ForEachStatement._gensym_counter = original_counter
+    def test_nested_foreach_unique_variables(self) -> None:
+        """Test that nested for-each loops get unique synthetic variables."""
+        # Create outer for-each
+        outer_foreach = ForEachStatement(
+            Token(TokenType.KW_FOR, "for", 1, 1),
+            item=Identifier(Token(TokenType.MISC_IDENT, "x", 1, 10), "x"),
+            collection=Identifier(Token(TokenType.MISC_IDENT, "xs", 1, 15), "xs"),
+            body=BlockStatement(Token(TokenType.KW_FOR, "for", 1, 1)),
+        )
+        # Create inner for-each
+        inner_foreach = ForEachStatement(
+            Token(TokenType.KW_FOR, "for", 2, 1),
+            item=Identifier(Token(TokenType.MISC_IDENT, "y", 2, 10), "y"),
+            collection=Identifier(Token(TokenType.MISC_IDENT, "ys", 2, 15), "ys"),
+            body=BlockStatement(Token(TokenType.KW_FOR, "for", 2, 1)),
+        )
+        # Desugar both
+        outer_desugared = outer_foreach.desugar()
+        inner_desugared = inner_foreach.desugar()
+        # Extract synthetic variable names from both
+        assert isinstance(outer_desugared, BlockStatement)
+        assert isinstance(inner_desugared, BlockStatement)
+        # Cast to SetStatement and check name existence
+        outer_set0 = outer_desugared.statements[0]
+        outer_set1 = outer_desugared.statements[1]
+        inner_set0 = inner_desugared.statements[0]
+        inner_set1 = inner_desugared.statements[1]
+        assert isinstance(outer_set0, SetStatement)
+        assert isinstance(outer_set1, SetStatement)
+        assert isinstance(inner_set0, SetStatement)
+        assert isinstance(inner_set1, SetStatement)
+        assert outer_set0.name is not None
+        assert outer_set1.name is not None
+        assert inner_set0.name is not None
+        assert inner_set1.name is not None
+        outer_index_var = outer_set0.name.value
+        outer_length_var = outer_set1.name.value
+        inner_index_var = inner_set0.name.value
+        inner_length_var = inner_set1.name.value
+        # All should be unique
+        all_vars = {outer_index_var, outer_length_var, inner_index_var, inner_length_var}
+        assert len(all_vars) == 4, "All synthetic variables should be unique"
+        # All should start with $
+        assert all(v.startswith("$") for v in all_vars)

machine_dialect/cfg/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Context Free Grammar module for Machine Dialect™ with GPT-5 integration."""
+from .openai_generation import generate_with_openai, validate_model_support
+from .parser import CFGParser
+__all__ = ["CFGParser", "generate_with_openai", "validate_model_support"]

machine_dialect/cfg/config.py ADDED Viewed

@@ -0,0 +1,156 @@
+"""Configuration module for Machine Dialect™ AI API settings."""
+import configparser
+import os
+from dataclasses import dataclass
+from pathlib import Path
+@dataclass
+class AIAPIConfig:
+    """Configuration for AI API settings.
+    Attributes:
+        model: The AI model to use (e.g., 'gpt-5', 'gpt-5-mini').
+        key: The API key for authentication.
+    """
+    model: str | None = None
+    key: str | None = None
+    def is_valid(self) -> bool:
+        """Check if the configuration has all required fields.
+        Returns:
+            True if both model and key are set, False otherwise.
+        """
+        return self.model is not None and self.key is not None
+    def with_defaults(self) -> "AIAPIConfig":
+        """Return a config with default values filled in.
+        Returns:
+            Config with defaults applied where values are missing.
+        """
+        if self.model is None:
+            self.model = "gpt-5"  # Default to GPT-5 for CFG support
+        return self
+class ConfigLoader:
+    """Loader for Machine Dialect™ configuration."""
+    CONFIG_FILE_NAME = ".mdconfig"
+    ENV_MODEL_KEY = "MD_AI_API_MODEL"
+    ENV_API_KEY = "MD_AI_API_KEY"
+    def __init__(self) -> None:
+        """Initialize the configuration loader."""
+        self._config: AIAPIConfig | None = None
+    def load(self) -> AIAPIConfig:
+        """Load AI API configuration from file or environment.
+        Priority order:
+        1. .mdconfig file in user's home directory
+        2. Environment variables (MD_AI_API_MODEL and MD_AI_API_KEY)
+        3. Legacy environment variable (OPENAI_API_KEY) for backward compatibility
+        Returns:
+            AIAPIConfig object with loaded settings.
+        """
+        if self._config is not None:
+            return self._config
+        config = AIAPIConfig()
+        # Try to load from .mdconfig file
+        config_file_path = Path.home() / self.CONFIG_FILE_NAME
+        if config_file_path.exists():
+            config = self._load_from_file(config_file_path)
+        # Override or fill in with environment variables
+        env_model = os.getenv(self.ENV_MODEL_KEY)
+        if env_model:
+            config.model = env_model
+        env_key = os.getenv(self.ENV_API_KEY)
+        if env_key:
+            config.key = env_key
+        elif not config.key:
+            # Fallback to legacy OPENAI_API_KEY for backward compatibility
+            config.key = os.getenv("OPENAI_API_KEY")
+        self._config = config
+        return config
+    def _load_from_file(self, config_file_path: Path) -> AIAPIConfig:
+        """Load configuration from a .mdconfig file.
+        Args:
+            config_file_path: Path to the configuration file.
+        Returns:
+            AIAPIConfig object with settings from the file.
+        """
+        config_parser = configparser.ConfigParser()
+        config_parser.read(config_file_path)
+        ai_config = AIAPIConfig()
+        try:
+            ai_section = config_parser["ai-api"]
+            ai_config.model = ai_section.get("model")
+            ai_config.key = ai_section.get("key")
+        except (configparser.NoSectionError, KeyError):
+            # Section doesn't exist, return empty config
+            pass
+        return ai_config
+    def get_error_message(self) -> str:
+        """Get a helpful error message for missing configuration.
+        Returns:
+            Error message with instructions for setting up configuration.
+        """
+        home_dir = Path.home()
+        config_path = home_dir / self.CONFIG_FILE_NAME
+        return f"""AI API configuration not found.
+Please configure the AI API in one of the following ways:
+1. Create a {self.CONFIG_FILE_NAME} file in your home directory ({config_path}):
+   [ai-api]
+   model = gpt-5
+   key = your_api_key_here
+   Note: Only GPT-5 models (gpt-5, gpt-5-mini, gpt-5-nano) support
+   context-free grammar constraints required for Machine Dialect™ generation.
+2. Set environment variables:
+   export {self.ENV_MODEL_KEY}=gpt-5
+   export {self.ENV_API_KEY}=your_api_key_here
+3. For backward compatibility, you can also use:
+   export OPENAI_API_KEY=your_api_key_here
+   (Note: Model will default to gpt-5 if not specified)
+To get an API key: https://platform.openai.com/api-keys"""
+def get_ai_config() -> AIAPIConfig:
+    """Get the AI API configuration.
+    This is a convenience function that creates a ConfigLoader
+    and loads the configuration.
+    Returns:
+        AIAPIConfig object with current settings.
+    """
+    loader = ConfigLoader()
+    return loader.load()

machine_dialect/cfg/examples.py ADDED Viewed

@@ -0,0 +1,221 @@
+"""Examples and usage of the CFG module for Machine Dialect™.
+This module provides demonstration examples of how to use the CFG (Context-Free Grammar)
+parser for Machine Dialect™ code. It includes examples of parsing variable assignments,
+conditional statements, logical operations, and code validation.
+The examples show:
+- Basic parsing of Machine Dialect™ syntax
+- Handling conditional statements with if/else blocks
+- Working with logical operations and boolean values
+- Code validation to check syntax correctness
+- Pretty-printing of Abstract Syntax Trees (AST)
+Example:
+    Run all examples from the command line::
+        $ python -m machine_dialect.cfg.examples
+"""
+from machine_dialect.cfg import CFGParser
+def example_parse_code() -> None:
+    """Demonstrate parsing Machine Dialect™ code with the CFG parser.
+    This function shows three examples of parsing Machine Dialect™ code:
+    1. Simple variable assignment and arithmetic operations
+    2. Conditional statements with if/else blocks
+    3. Logical operations with boolean values
+    Each example prints the original code, attempts to parse it, and displays
+    the resulting Abstract Syntax Tree (AST) if successful.
+    Raises:
+        ValueError: If any of the code examples fail to parse.
+    Example:
+        >>> example_parse_code()
+        Example 1: Simple arithmetic
+        Code: ...
+        Parse successful!
+        AST: ...
+    """
+    parser = CFGParser()
+    # Example 1: Simple variable assignment and output
+    code1 = """
+    Set `x` to _10_.
+    Set `y` to _20_.
+    Set `sum` to `x` + `y`.
+    Say `sum`.
+    """
+    print("Example 1: Simple arithmetic")
+    print("Code:", code1)
+    try:
+        tree = parser.parse(code1)
+        print("Parse successful!")
+        print("AST:")
+        print(parser.pretty_print(tree))
+    except ValueError as e:
+        print(f"Parse failed: {e}")
+    print("\n" + "=" * 50 + "\n")
+    # Example 2: Conditional statement
+    code2 = """
+    Set `age` to _18_.
+    If `age` is greater than _17_ then:
+    > Say _"You are an adult."_.
+    Else:
+    > Say _"You are a minor."_.
+    """
+    print("Example 2: Conditional")
+    print("Code:", code2)
+    try:
+        tree = parser.parse(code2)
+        print("Parse successful!")
+        print("AST:")
+        print(parser.pretty_print(tree))
+    except ValueError as e:
+        print(f"Parse failed: {e}")
+    print("\n" + "=" * 50 + "\n")
+    # Example 3: Logical operations
+    code3 = """
+    Set `is_raining` to _yes_.
+    Set `have_umbrella` to _no_.
+    Set `get_wet` to `is_raining` and not `have_umbrella`.
+    If `get_wet` then:
+    > Say _"You will get wet!"_.
+    """
+    print("Example 3: Logical operations")
+    print("Code:", code3)
+    try:
+        tree = parser.parse(code3)
+        print("Parse successful!")
+        print("AST:")
+        print(parser.pretty_print(tree))
+    except ValueError as e:
+        print(f"Parse failed: {e}")
+def example_generate_prompt() -> None:
+    """Demonstrate creating prompts for GPT-5 CFG generation.
+    This function serves as a placeholder for future CFG generation
+    functionality using GPT-5 or similar language models. Once implemented,
+    it will show how to create prompts that guide AI models to generate
+    valid Machine Dialect™ code following the CFG rules.
+    Note:
+        This functionality is not yet implemented and will be added
+        in a future version.
+    Todo:
+        * Implement prompt generation for CFG-based code generation
+        * Add examples of different prompt types
+        * Include validation of generated code
+    """
+    # Placeholder for CFG generation examples
+    print("CFG generation functionality coming soon.")
+def example_validate_code() -> None:
+    """Demonstrate validation of Machine Dialect™ code syntax.
+    This function shows examples of both valid and invalid Machine Dialect™
+    code to illustrate the validation capabilities of the CFG parser.
+    It demonstrates common syntax errors like missing backticks around
+    variables and missing periods at the end of statements.
+    The function validates:
+    - Valid code with proper syntax (backticks, periods)
+    - Invalid code with missing syntax elements
+    Example:
+        >>> example_validate_code()
+        Validating valid code:
+        Set `name` to "Alice".
+        Say name.
+        ✓ Code is valid!
+    """
+    parser = CFGParser()
+    # Valid code
+    valid_code = """
+    Set `name` to _"Alice"_.
+    Say `name`.
+    """
+    print("Validating valid code:")
+    print(valid_code)
+    if parser.validate(valid_code):
+        print("✓ Code is valid!")
+    else:
+        print("✗ Code is invalid!")
+    print("\n" + "=" * 50 + "\n")
+    # Invalid code
+    invalid_code = """
+    Set x to 10
+    Say x
+    """
+    print("Validating invalid code (missing backticks and periods):")
+    print(invalid_code)
+    if parser.validate(invalid_code):
+        print("✓ Code is valid!")
+    else:
+        print("✗ Code is invalid!")
+def main() -> None:
+    """Run all CFG parser examples in sequence.
+    This function executes all the example functions to demonstrate
+    the full capabilities of the CFG parser for Machine Dialect™.
+    It runs parsing examples, generation prompt examples, and
+    validation examples, separating each section with visual dividers
+    for clarity.
+    The execution order is:
+    1. Parsing examples - demonstrating code parsing
+    2. Generation prompt examples - placeholder for future features
+    3. Validation examples - showing syntax validation
+    Example:
+        >>> main()
+        ============================================================
+        CFG Parser Examples
+        ============================================================
+        ...
+    """
+    print("=" * 60)
+    print("CFG Parser Examples")
+    print("=" * 60)
+    print()
+    print("1. PARSING EXAMPLES")
+    print("-" * 40)
+    example_parse_code()
+    print("\n2. GENERATION PROMPT EXAMPLES")
+    print("-" * 40)
+    example_generate_prompt()
+    print("\n3. VALIDATION EXAMPLES")
+    print("-" * 40)
+    example_validate_code()
+    print("\n" + "=" * 60)
+    print("Examples complete!")
+if __name__ == "__main__":
+    main()