PyPI - machine-dialect - Versions diffs - 0.1.0a1__py3-none-any.whl - Mend

machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (268) hide show

machine_dialect/__main__.py +667 -0
machine_dialect/agent/__init__.py +5 -0
machine_dialect/agent/agent.py +360 -0
machine_dialect/ast/__init__.py +95 -0
machine_dialect/ast/ast_node.py +35 -0
machine_dialect/ast/call_expression.py +82 -0
machine_dialect/ast/dict_extraction.py +60 -0
machine_dialect/ast/expressions.py +439 -0
machine_dialect/ast/literals.py +309 -0
machine_dialect/ast/program.py +35 -0
machine_dialect/ast/statements.py +1433 -0
machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
machine_dialect/ast/tests/test_boolean_literal.py +29 -0
machine_dialect/ast/tests/test_collection_hir.py +138 -0
machine_dialect/ast/tests/test_define_statement.py +142 -0
machine_dialect/ast/tests/test_desugar.py +541 -0
machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
machine_dialect/cfg/__init__.py +6 -0
machine_dialect/cfg/config.py +156 -0
machine_dialect/cfg/examples.py +221 -0
machine_dialect/cfg/generate_with_ai.py +187 -0
machine_dialect/cfg/openai_generation.py +200 -0
machine_dialect/cfg/parser.py +94 -0
machine_dialect/cfg/tests/__init__.py +1 -0
machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
machine_dialect/cfg/tests/test_config.py +188 -0
machine_dialect/cfg/tests/test_examples.py +391 -0
machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
machine_dialect/cfg/tests/test_openai_generation.py +256 -0
machine_dialect/codegen/__init__.py +5 -0
machine_dialect/codegen/bytecode_module.py +89 -0
machine_dialect/codegen/bytecode_serializer.py +300 -0
machine_dialect/codegen/opcodes.py +101 -0
machine_dialect/codegen/register_codegen.py +1996 -0
machine_dialect/codegen/symtab.py +208 -0
machine_dialect/codegen/tests/__init__.py +1 -0
machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
machine_dialect/codegen/tests/test_symtab.py +418 -0
machine_dialect/codegen/vm_serializer.py +621 -0
machine_dialect/compiler/__init__.py +18 -0
machine_dialect/compiler/compiler.py +197 -0
machine_dialect/compiler/config.py +149 -0
machine_dialect/compiler/context.py +149 -0
machine_dialect/compiler/phases/__init__.py +19 -0
machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
machine_dialect/compiler/phases/codegen.py +40 -0
machine_dialect/compiler/phases/hir_generation.py +39 -0
machine_dialect/compiler/phases/mir_generation.py +86 -0
machine_dialect/compiler/phases/optimization.py +110 -0
machine_dialect/compiler/phases/parsing.py +39 -0
machine_dialect/compiler/pipeline.py +143 -0
machine_dialect/compiler/tests/__init__.py +1 -0
machine_dialect/compiler/tests/test_compiler.py +568 -0
machine_dialect/compiler/vm_runner.py +173 -0
machine_dialect/errors/__init__.py +32 -0
machine_dialect/errors/exceptions.py +369 -0
machine_dialect/errors/messages.py +82 -0
machine_dialect/errors/tests/__init__.py +0 -0
machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
machine_dialect/errors/tests/test_name_errors.py +118 -0
machine_dialect/helpers/__init__.py +0 -0
machine_dialect/helpers/stopwords.py +225 -0
machine_dialect/helpers/validators.py +30 -0
machine_dialect/lexer/__init__.py +9 -0
machine_dialect/lexer/constants.py +23 -0
machine_dialect/lexer/lexer.py +907 -0
machine_dialect/lexer/tests/__init__.py +0 -0
machine_dialect/lexer/tests/helpers.py +86 -0
machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
machine_dialect/lexer/tests/test_comments.py +200 -0
machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
machine_dialect/lexer/tests/test_lexer_position.py +113 -0
machine_dialect/lexer/tests/test_list_tokens.py +282 -0
machine_dialect/lexer/tests/test_stopwords.py +80 -0
machine_dialect/lexer/tests/test_strict_equality.py +129 -0
machine_dialect/lexer/tests/test_token.py +41 -0
machine_dialect/lexer/tests/test_tokenization.py +294 -0
machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
machine_dialect/lexer/tests/test_url_literals.py +169 -0
machine_dialect/lexer/tokens.py +487 -0
machine_dialect/linter/__init__.py +10 -0
machine_dialect/linter/__main__.py +144 -0
machine_dialect/linter/linter.py +154 -0
machine_dialect/linter/rules/__init__.py +8 -0
machine_dialect/linter/rules/base.py +112 -0
machine_dialect/linter/rules/statement_termination.py +99 -0
machine_dialect/linter/tests/__init__.py +1 -0
machine_dialect/linter/tests/mdrules/__init__.py +0 -0
machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
machine_dialect/linter/tests/test_linter.py +81 -0
machine_dialect/linter/tests/test_rules.py +110 -0
machine_dialect/linter/tests/test_violations.py +71 -0
machine_dialect/linter/violations.py +51 -0
machine_dialect/mir/__init__.py +69 -0
machine_dialect/mir/analyses/__init__.py +20 -0
machine_dialect/mir/analyses/alias_analysis.py +315 -0
machine_dialect/mir/analyses/dominance_analysis.py +49 -0
machine_dialect/mir/analyses/escape_analysis.py +286 -0
machine_dialect/mir/analyses/loop_analysis.py +272 -0
machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
machine_dialect/mir/analyses/type_analysis.py +448 -0
machine_dialect/mir/analyses/use_def_chains.py +232 -0
machine_dialect/mir/basic_block.py +385 -0
machine_dialect/mir/dataflow.py +445 -0
machine_dialect/mir/debug_info.py +208 -0
machine_dialect/mir/hir_to_mir.py +1738 -0
machine_dialect/mir/mir_dumper.py +366 -0
machine_dialect/mir/mir_function.py +167 -0
machine_dialect/mir/mir_instructions.py +1877 -0
machine_dialect/mir/mir_interpreter.py +556 -0
machine_dialect/mir/mir_module.py +225 -0
machine_dialect/mir/mir_printer.py +480 -0
machine_dialect/mir/mir_transformer.py +410 -0
machine_dialect/mir/mir_types.py +367 -0
machine_dialect/mir/mir_validation.py +455 -0
machine_dialect/mir/mir_values.py +268 -0
machine_dialect/mir/optimization_config.py +233 -0
machine_dialect/mir/optimization_pass.py +251 -0
machine_dialect/mir/optimization_pipeline.py +355 -0
machine_dialect/mir/optimizations/__init__.py +84 -0
machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
machine_dialect/mir/optimizations/branch_prediction.py +372 -0
machine_dialect/mir/optimizations/constant_propagation.py +634 -0
machine_dialect/mir/optimizations/cse.py +398 -0
machine_dialect/mir/optimizations/dce.py +288 -0
machine_dialect/mir/optimizations/inlining.py +551 -0
machine_dialect/mir/optimizations/jump_threading.py +487 -0
machine_dialect/mir/optimizations/licm.py +405 -0
machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
machine_dialect/mir/optimizations/strength_reduction.py +422 -0
machine_dialect/mir/optimizations/tail_call.py +207 -0
machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
machine_dialect/mir/optimizations/type_narrowing.py +397 -0
machine_dialect/mir/optimizations/type_specialization.py +447 -0
machine_dialect/mir/optimizations/type_specific.py +906 -0
machine_dialect/mir/optimize_mir.py +89 -0
machine_dialect/mir/pass_manager.py +391 -0
machine_dialect/mir/profiling/__init__.py +26 -0
machine_dialect/mir/profiling/profile_collector.py +318 -0
machine_dialect/mir/profiling/profile_data.py +372 -0
machine_dialect/mir/profiling/profile_reader.py +272 -0
machine_dialect/mir/profiling/profile_writer.py +226 -0
machine_dialect/mir/register_allocation.py +302 -0
machine_dialect/mir/reporting/__init__.py +17 -0
machine_dialect/mir/reporting/optimization_reporter.py +314 -0
machine_dialect/mir/reporting/report_formatter.py +289 -0
machine_dialect/mir/ssa_construction.py +342 -0
machine_dialect/mir/tests/__init__.py +1 -0
machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
machine_dialect/mir/tests/test_algebraic_division.py +126 -0
machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
machine_dialect/mir/tests/test_basic_block.py +425 -0
machine_dialect/mir/tests/test_branch_prediction.py +459 -0
machine_dialect/mir/tests/test_call_lowering.py +168 -0
machine_dialect/mir/tests/test_collection_lowering.py +604 -0
machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
machine_dialect/mir/tests/test_custom_passes.py +166 -0
machine_dialect/mir/tests/test_debug_info.py +285 -0
machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
machine_dialect/mir/tests/test_double_negation.py +231 -0
machine_dialect/mir/tests/test_escape_analysis.py +233 -0
machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
machine_dialect/mir/tests/test_inlining.py +435 -0
machine_dialect/mir/tests/test_licm.py +472 -0
machine_dialect/mir/tests/test_mir_dumper.py +313 -0
machine_dialect/mir/tests/test_mir_instructions.py +445 -0
machine_dialect/mir/tests/test_mir_module.py +860 -0
machine_dialect/mir/tests/test_mir_printer.py +387 -0
machine_dialect/mir/tests/test_mir_types.py +123 -0
machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
machine_dialect/mir/tests/test_mir_validation.py +378 -0
machine_dialect/mir/tests/test_mir_values.py +168 -0
machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
machine_dialect/mir/tests/test_pass_manager.py +294 -0
machine_dialect/mir/tests/test_pass_registration.py +64 -0
machine_dialect/mir/tests/test_profiling.py +356 -0
machine_dialect/mir/tests/test_register_allocation.py +307 -0
machine_dialect/mir/tests/test_report_formatters.py +372 -0
machine_dialect/mir/tests/test_ssa_construction.py +433 -0
machine_dialect/mir/tests/test_tail_call.py +236 -0
machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
machine_dialect/mir/tests/test_type_narrowing.py +277 -0
machine_dialect/mir/tests/test_type_specialization.py +421 -0
machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
machine_dialect/mir/type_inference.py +368 -0
machine_dialect/parser/__init__.py +12 -0
machine_dialect/parser/enums.py +45 -0
machine_dialect/parser/parser.py +3655 -0
machine_dialect/parser/protocols.py +11 -0
machine_dialect/parser/symbol_table.py +169 -0
machine_dialect/parser/tests/__init__.py +0 -0
machine_dialect/parser/tests/helper_functions.py +193 -0
machine_dialect/parser/tests/test_action_statements.py +334 -0
machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
machine_dialect/parser/tests/test_call_statements.py +154 -0
machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
machine_dialect/parser/tests/test_collection_mutations.py +264 -0
machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
machine_dialect/parser/tests/test_define_integration.py +468 -0
machine_dialect/parser/tests/test_define_statements.py +311 -0
machine_dialect/parser/tests/test_dict_extraction.py +115 -0
machine_dialect/parser/tests/test_empty_literal.py +155 -0
machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
machine_dialect/parser/tests/test_if_empty_block.py +61 -0
machine_dialect/parser/tests/test_if_statements.py +299 -0
machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
machine_dialect/parser/tests/test_infix_expressions.py +680 -0
machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
machine_dialect/parser/tests/test_interaction_statements.py +269 -0
machine_dialect/parser/tests/test_list_literals.py +277 -0
machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
machine_dialect/parser/tests/test_parse_errors.py +114 -0
machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
machine_dialect/parser/tests/test_program.py +13 -0
machine_dialect/parser/tests/test_return_statements.py +89 -0
machine_dialect/parser/tests/test_set_statements.py +152 -0
machine_dialect/parser/tests/test_strict_equality.py +258 -0
machine_dialect/parser/tests/test_symbol_table.py +217 -0
machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
machine_dialect/parser/tests/test_utility_statements.py +423 -0
machine_dialect/parser/token_buffer.py +159 -0
machine_dialect/repl/__init__.py +3 -0
machine_dialect/repl/repl.py +426 -0
machine_dialect/repl/tests/__init__.py +0 -0
machine_dialect/repl/tests/test_repl.py +606 -0
machine_dialect/semantic/__init__.py +12 -0
machine_dialect/semantic/analyzer.py +906 -0
machine_dialect/semantic/error_messages.py +189 -0
machine_dialect/semantic/tests/__init__.py +1 -0
machine_dialect/semantic/tests/test_analyzer.py +364 -0
machine_dialect/semantic/tests/test_error_messages.py +104 -0
machine_dialect/tests/edge_cases/__init__.py +10 -0
machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
machine_dialect/tests/integration/test_list_compilation.py +395 -0
machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
machine_dialect/type_checking/__init__.py +21 -0
machine_dialect/type_checking/tests/__init__.py +1 -0
machine_dialect/type_checking/tests/test_type_system.py +230 -0
machine_dialect/type_checking/type_system.py +270 -0
machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
machine_dialect_vm/__init__.pyi +15 -0

machine_dialect/lexer/tests/test_list_tokens.py ADDED Viewed

@@ -0,0 +1,282 @@
+"""Test list-related token recognition in the lexer."""
+from machine_dialect.lexer import Lexer, TokenType
+class TestListMarkers:
+    """Test recognition of list markers (dash vs minus)."""
+    def test_dash_at_line_start(self) -> None:
+        """Test that dash at line start is recognized as PUNCT_DASH in list context."""
+        lexer = Lexer('- _"apple"_')
+        # Without list context, it's OP_MINUS
+        token = lexer.next_token(in_list_context=False)
+        assert token.type == TokenType.OP_MINUS
+        assert token.literal == "-"
+        # Reset lexer
+        lexer = Lexer('- _"apple"_')
+        # With list context, it's PUNCT_DASH
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.PUNCT_DASH
+        assert token.literal == "-"
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.LIT_TEXT
+        assert token.literal == '"apple"'
+    def test_dash_after_whitespace(self) -> None:
+        """Test that dash after whitespace at line start is PUNCT_DASH in list context."""
+        lexer = Lexer('  - _"apple"_')
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.PUNCT_DASH
+        assert token.literal == "-"
+    def test_dash_after_block_marker(self) -> None:
+        """Test that dash after block marker (>) is PUNCT_DASH in list context."""
+        lexer = Lexer('> - _"apple"_')
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.OP_GT
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.PUNCT_DASH
+        assert token.literal == "-"
+    def test_dash_in_expression(self) -> None:
+        """Test that dash in expression context is OP_MINUS."""
+        lexer = Lexer("_5_ - _3_")
+        # First number
+        token = lexer.next_token()
+        assert token.type == TokenType.LIT_WHOLE_NUMBER
+        # Minus operator
+        token = lexer.next_token()
+        assert token.type == TokenType.OP_MINUS
+        assert token.literal == "-"
+        # Second number
+        token = lexer.next_token()
+        assert token.type == TokenType.LIT_WHOLE_NUMBER
+    def test_multiple_dashes_at_line_start(self) -> None:
+        """Test that --- at line start is PUNCT_FRONTMATTER."""
+        lexer = Lexer("---")
+        token = lexer.next_token()
+        assert token.type == TokenType.PUNCT_FRONTMATTER
+        assert token.literal == "---"
+    def test_dash_on_new_line(self) -> None:
+        """Test dash recognition across multiple lines in list context."""
+        source = """Set `x` to _5_.
+- _"apple"_
+- _"banana"_"""
+        lexer = Lexer(source)
+        # First line: Set `x` to _5_ (not in list context)
+        token = lexer.next_token()
+        assert token.type == TokenType.KW_SET
+        token = lexer.next_token()
+        assert token.type == TokenType.MISC_IDENT
+        token = lexer.next_token()
+        assert token.type == TokenType.KW_TO
+        token = lexer.next_token()
+        assert token.type == TokenType.LIT_WHOLE_NUMBER
+        token = lexer.next_token()
+        assert token.type == TokenType.PUNCT_PERIOD
+        # Second line: - _"apple"_ (in list context)
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.PUNCT_DASH
+        assert token.literal == "-"
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.LIT_TEXT
+        # Third line: - _"banana"_ (in list context)
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.PUNCT_DASH
+        assert token.literal == "-"
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.LIT_TEXT
+class TestListKeywords:
+    """Test new keywords for list operations."""
+    def test_list_operation_keywords(self) -> None:
+        """Test recognition of list operation keywords."""
+        keywords = [
+            ("add", TokenType.KW_ADD),
+            ("remove", TokenType.KW_REMOVE),
+            ("insert", TokenType.KW_INSERT),
+            ("has", TokenType.KW_HAS),
+        ]
+        for literal, expected_type in keywords:
+            lexer = Lexer(literal)
+            token = lexer.next_token()
+            assert token.type == expected_type
+            assert token.literal == literal
+    def test_list_type_keywords(self) -> None:
+        """Test recognition of list type keywords."""
+        keywords = [
+            ("Ordered List", TokenType.KW_ORDERED_LIST),
+            ("Unordered List", TokenType.KW_UNORDERED_LIST),
+            ("Named List", TokenType.KW_NAMED_LIST),
+        ]
+        for literal, expected_type in keywords:
+            lexer = Lexer(literal)
+            token = lexer.next_token()
+            assert token.type == expected_type
+            assert token.literal == literal
+    def test_list_access_keywords(self) -> None:
+        """Test recognition of list access keywords."""
+        keywords = [
+            ("first", TokenType.KW_FIRST),
+            ("second", TokenType.KW_SECOND),
+            ("third", TokenType.KW_THIRD),
+            ("last", TokenType.KW_LAST),
+            ("item", TokenType.KW_ITEM),
+            ("of", TokenType.KW_OF),
+        ]
+        for literal, expected_type in keywords:
+            lexer = Lexer(literal)
+            token = lexer.next_token()
+            assert token.type == expected_type
+            assert token.literal == literal
+    def test_iteration_keywords(self) -> None:
+        """Test recognition of iteration keywords."""
+        keywords = [
+            ("for", TokenType.KW_FOR),
+            ("each", TokenType.KW_EACH),
+            ("in", TokenType.KW_IN),
+        ]
+        for literal, expected_type in keywords:
+            lexer = Lexer(literal)
+            token = lexer.next_token()
+            assert token.type == expected_type
+            assert token.literal == literal
+    def test_named_list_keywords(self) -> None:
+        """Test recognition of named list keywords."""
+        keywords = [
+            ("name", TokenType.KW_NAME),
+            ("names", TokenType.KW_NAME),  # Plural maps to same token type
+            ("content", TokenType.KW_CONTENT),
+            ("contents", TokenType.KW_CONTENT),  # Plural maps to same token type
+        ]
+        for literal, expected_type in keywords:
+            lexer = Lexer(literal)
+            token = lexer.next_token()
+            assert token.type == expected_type
+            # Literals are preserved as-is in the token
+            assert token.literal == literal
+class TestComplexListScenarios:
+    """Test complex scenarios involving list tokens."""
+    def test_list_with_colon(self) -> None:
+        """Test dash followed by identifier and colon (named list syntax)."""
+        lexer = Lexer("- name: `value`")
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.PUNCT_DASH
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.KW_NAME
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.PUNCT_COLON
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.MISC_IDENT
+        assert token.literal == "value"
+    def test_numbered_list_marker(self) -> None:
+        """Test numbered list markers (1., 2., etc)."""
+        lexer = Lexer("1. first\n2. second")
+        # 1.
+        token = lexer.next_token()
+        assert token.type == TokenType.LIT_WHOLE_NUMBER
+        assert token.literal == "1"
+        token = lexer.next_token()
+        assert token.type == TokenType.PUNCT_PERIOD
+        # first
+        token = lexer.next_token()
+        assert token.type == TokenType.KW_FIRST
+        # 2.
+        token = lexer.next_token()
+        assert token.type == TokenType.LIT_WHOLE_NUMBER
+        assert token.literal == "2"
+        token = lexer.next_token()
+        assert token.type == TokenType.PUNCT_PERIOD
+        # second
+        token = lexer.next_token()
+        assert token.type == TokenType.KW_SECOND
+    def test_expression_with_negative_number(self) -> None:
+        """Test that negative numbers still work correctly."""
+        lexer = Lexer("_-42_")
+        token = lexer.next_token()
+        assert token.type == TokenType.LIT_WHOLE_NUMBER
+        assert token.literal == "-42"
+    def test_subtraction_vs_list_marker(self) -> None:
+        """Test differentiating subtraction from list markers."""
+        # Subtraction (not in list context)
+        lexer = Lexer("`x` - `y`")
+        token = lexer.next_token()
+        assert token.type == TokenType.MISC_IDENT
+        token = lexer.next_token()
+        assert token.type == TokenType.OP_MINUS
+        token = lexer.next_token()
+        assert token.type == TokenType.MISC_IDENT
+        # List marker on new line (in list context)
+        lexer = Lexer('\n- _"apple"_')
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.PUNCT_DASH
+    def test_list_with_negative_number(self) -> None:
+        """Test list items that include negative numbers."""
+        lexer = Lexer("- _-42_")
+        # First dash is list marker in list context
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.PUNCT_DASH
+        # The literal with negative number
+        token = lexer.next_token(in_list_context=True)
+        assert token.type == TokenType.LIT_WHOLE_NUMBER
+        assert token.literal == "-42"

machine_dialect/lexer/tests/test_stopwords.py ADDED Viewed

@@ -0,0 +1,80 @@
+import pytest
+from machine_dialect.lexer import Lexer
+from machine_dialect.lexer.tests.helpers import stream_and_assert_tokens
+from machine_dialect.lexer.tokens import Token, TokenType
+class TestStopwords:
+    @pytest.mark.parametrize(
+        "input_text,expected_tokens",
+        [
+            # Common stopwords
+            ("the", [Token(TokenType.MISC_STOPWORD, "the", line=1, position=1)]),
+            ("a", [Token(TokenType.MISC_STOPWORD, "a", line=1, position=1)]),
+            ("an", [Token(TokenType.MISC_STOPWORD, "an", line=1, position=1)]),
+            ("on", [Token(TokenType.MISC_STOPWORD, "on", line=1, position=1)]),
+            ("at", [Token(TokenType.MISC_STOPWORD, "at", line=1, position=1)]),
+            ("by", [Token(TokenType.MISC_STOPWORD, "by", line=1, position=1)]),
+            ("about", [Token(TokenType.MISC_STOPWORD, "about", line=1, position=1)]),
+            ("against", [Token(TokenType.MISC_STOPWORD, "against", line=1, position=1)]),
+            ("between", [Token(TokenType.MISC_STOPWORD, "between", line=1, position=1)]),
+            ("into", [Token(TokenType.MISC_STOPWORD, "into", line=1, position=1)]),
+            ("through", [Token(TokenType.MISC_STOPWORD, "through", line=1, position=1)]),
+            ("during", [Token(TokenType.MISC_STOPWORD, "during", line=1, position=1)]),
+            ("before", [Token(TokenType.MISC_STOPWORD, "before", line=1, position=1)]),
+            ("after", [Token(TokenType.MISC_STOPWORD, "after", line=1, position=1)]),
+            ("above", [Token(TokenType.MISC_STOPWORD, "above", line=1, position=1)]),
+            ("below", [Token(TokenType.MISC_STOPWORD, "below", line=1, position=1)]),
+            ("up", [Token(TokenType.MISC_STOPWORD, "up", line=1, position=1)]),
+            ("down", [Token(TokenType.MISC_STOPWORD, "down", line=1, position=1)]),
+            ("out", [Token(TokenType.MISC_STOPWORD, "out", line=1, position=1)]),
+            ("off", [Token(TokenType.MISC_STOPWORD, "off", line=1, position=1)]),
+            ("over", [Token(TokenType.MISC_STOPWORD, "over", line=1, position=1)]),
+            ("under", [Token(TokenType.MISC_STOPWORD, "under", line=1, position=1)]),
+            ("again", [Token(TokenType.MISC_STOPWORD, "again", line=1, position=1)]),
+            ("further", [Token(TokenType.MISC_STOPWORD, "further", line=1, position=1)]),
+            ("once", [Token(TokenType.MISC_STOPWORD, "once", line=1, position=1)]),
+            # Case-insensitive stopword detection
+            ("The", [Token(TokenType.MISC_STOPWORD, "The", line=1, position=1)]),
+            ("THE", [Token(TokenType.MISC_STOPWORD, "THE", line=1, position=1)]),
+            # Non-stopwords should be identifiers
+            ("variable", [Token(TokenType.MISC_IDENT, "variable", line=1, position=1)]),
+            ("myfunction", [Token(TokenType.MISC_IDENT, "myfunction", line=1, position=1)]),
+            ("data", [Token(TokenType.MISC_IDENT, "data", line=1, position=1)]),
+        ],
+    )
+    def test_stopword_detection(self, input_text: str, expected_tokens: list[Token]) -> None:
+        lexer = Lexer(input_text)
+        stream_and_assert_tokens(lexer, expected_tokens)
+    def test_stopwords_mixed_with_code(self) -> None:
+        input_text = "Set the `value` to 5"
+        lexer = Lexer(input_text)
+        # Expected tokens: "Set" (keyword), "the" (stopword), "value" (ident), "to" (keyword), "5" (int)
+        expected_tokens = [
+            Token(TokenType.KW_SET, "Set", line=1, position=1),
+            Token(TokenType.MISC_STOPWORD, "the", line=1, position=5),
+            Token(TokenType.MISC_IDENT, "value", line=1, position=10),
+            Token(TokenType.KW_TO, "to", line=1, position=17),
+            Token(TokenType.LIT_WHOLE_NUMBER, "5", line=1, position=20),
+        ]
+        stream_and_assert_tokens(lexer, expected_tokens)
+    def test_parser_ignores_stopwords(self) -> None:
+        from machine_dialect.parser import Parser
+        # Test that parser skips stopwords correctly
+        input_text = "Define `x` as Whole Number. Set the `x` to _5_"
+        parser = Parser()
+        program = parser.parse(input_text)
+        # The parser should skip "the" stopword and parse correctly
+        assert len(program.statements) == 2  # Define + Set
+        # First statement is Define, second is Set
+        assert program.statements[1].token.type == TokenType.KW_SET
+        # Check no parsing errors
+        assert len(parser.errors) == 0

machine_dialect/lexer/tests/test_strict_equality.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""Tests for strict equality operators in the lexer.
+This module tests that the lexer correctly recognizes strict equality
+and strict inequality operators in their various natural language forms.
+"""
+import pytest
+from machine_dialect.lexer import Lexer
+from machine_dialect.lexer.tokens import TokenType
+class TestStrictEqualityOperators:
+    """Test the lexer's handling of strict equality operators."""
+    @pytest.mark.parametrize(
+        "source,expected_token_type,expected_literal",
+        [
+            # Strict equality operators
+            ("is strictly equal to", TokenType.OP_STRICT_EQ, "is strictly equal to"),
+            ("is exactly equal to", TokenType.OP_STRICT_EQ, "is exactly equal to"),
+            ("is identical to", TokenType.OP_STRICT_EQ, "is identical to"),
+            # Strict inequality operators
+            ("is not strictly equal to", TokenType.OP_STRICT_NOT_EQ, "is not strictly equal to"),
+            ("is not exactly equal to", TokenType.OP_STRICT_NOT_EQ, "is not exactly equal to"),
+            ("is not identical to", TokenType.OP_STRICT_NOT_EQ, "is not identical to"),
+            # Value equality (for comparison)
+            ("is equal to", TokenType.OP_EQ, "is equal to"),
+            ("equals", TokenType.OP_EQ, "equals"),
+            ("is the same as", TokenType.OP_EQ, "is the same as"),
+            # Value inequality (for comparison)
+            ("is not equal to", TokenType.OP_NOT_EQ, "is not equal to"),
+            ("does not equal", TokenType.OP_NOT_EQ, "does not equal"),
+            ("is different from", TokenType.OP_NOT_EQ, "is different from"),
+        ],
+    )
+    def test_strict_equality_operators(
+        self, source: str, expected_token_type: TokenType, expected_literal: str
+    ) -> None:
+        """Test that strict equality operators are correctly tokenized.
+        Args:
+            source: The source string containing the operator.
+            expected_token_type: The expected token type.
+            expected_literal: The expected literal value.
+        """
+        lexer = Lexer(source)
+        token = lexer.next_token()
+        assert token.type == expected_token_type
+        assert token.literal == expected_literal
+    def test_strict_equality_in_expression(self) -> None:
+        """Test strict equality operators in complete expressions."""
+        source = "if x is strictly equal to 5 then give back Yes"
+        lexer = Lexer(source)
+        expected_tokens = [
+            (TokenType.KW_IF, "if"),
+            (TokenType.MISC_IDENT, "x"),
+            (TokenType.OP_STRICT_EQ, "is strictly equal to"),
+            (TokenType.LIT_WHOLE_NUMBER, "5"),
+            (TokenType.KW_THEN, "then"),
+            (TokenType.KW_RETURN, "give back"),
+            (TokenType.LIT_YES, "Yes"),
+            (TokenType.MISC_EOF, ""),
+        ]
+        for expected_type, expected_literal in expected_tokens:
+            token = lexer.next_token()
+            assert token.type == expected_type
+            assert token.literal == expected_literal
+    def test_strict_inequality_in_expression(self) -> None:
+        """Test strict inequality operators in complete expressions."""
+        source = "if `value` is not identical to `null` then `process`"
+        lexer = Lexer(source)
+        expected_tokens = [
+            (TokenType.KW_IF, "if"),
+            (TokenType.MISC_IDENT, "value"),
+            (TokenType.OP_STRICT_NOT_EQ, "is not identical to"),
+            (TokenType.MISC_IDENT, "null"),
+            (TokenType.KW_THEN, "then"),
+            (TokenType.MISC_IDENT, "process"),
+            (TokenType.MISC_EOF, ""),
+        ]
+        for expected_type, expected_literal in expected_tokens:
+            token = lexer.next_token()
+            assert token.type == expected_type
+            assert token.literal == expected_literal
+    def test_mixed_equality_operators(self) -> None:
+        """Test that different equality operators are distinguished correctly."""
+        source = "`a` equals `b` and `c` is strictly equal to `d`"
+        lexer = Lexer(source)
+        expected_tokens = [
+            (TokenType.MISC_IDENT, "a"),
+            (TokenType.OP_EQ, "equals"),
+            (TokenType.MISC_IDENT, "b"),
+            (TokenType.KW_AND, "and"),
+            (TokenType.MISC_IDENT, "c"),
+            (TokenType.OP_STRICT_EQ, "is strictly equal to"),
+            (TokenType.MISC_IDENT, "d"),
+            (TokenType.MISC_EOF, ""),
+        ]
+        for expected_type, expected_literal in expected_tokens:
+            token = lexer.next_token()
+            assert token.type == expected_type
+            assert token.literal == expected_literal
+    def test_case_insensitive_strict_operators(self) -> None:
+        """Test that strict operators are case-insensitive."""
+        test_cases = [
+            ("Is Strictly Equal To", TokenType.OP_STRICT_EQ),
+            ("IS EXACTLY EQUAL TO", TokenType.OP_STRICT_EQ),
+            ("Is Identical To", TokenType.OP_STRICT_EQ),
+            ("IS NOT STRICTLY EQUAL TO", TokenType.OP_STRICT_NOT_EQ),
+            ("Is Not Exactly Equal To", TokenType.OP_STRICT_NOT_EQ),
+            ("is NOT identical TO", TokenType.OP_STRICT_NOT_EQ),
+        ]
+        for source, expected_type in test_cases:
+            lexer = Lexer(source)
+            token = lexer.next_token()
+            assert token.type == expected_type

machine_dialect/lexer/tests/test_token.py ADDED Viewed

@@ -0,0 +1,41 @@
+from machine_dialect.lexer.tokens import Token, TokenType
+class TestToken:
+    def test_token_with_line_and_position(self) -> None:
+        """Test that Token includes line and position information."""
+        token = Token(type=TokenType.MISC_IDENT, literal="test", line=1, position=1)
+        assert token.type == TokenType.MISC_IDENT
+        assert token.literal == "test"
+        assert token.line == 1
+        assert token.position == 1
+    def test_token_string_representation_with_location(self) -> None:
+        """Test string representation includes line and position."""
+        token = Token(type=TokenType.KW_IF, literal="if", line=5, position=10)
+        expected = "Type: TokenType.KW_IF, Literal: if, Line: 5, Position: 10"
+        assert str(token) == expected
+    def test_token_equality_with_location(self) -> None:
+        """Test that tokens are equal if all attributes match."""
+        token1 = Token(type=TokenType.LIT_WHOLE_NUMBER, literal="42", line=1, position=1)
+        token2 = Token(type=TokenType.LIT_WHOLE_NUMBER, literal="42", line=1, position=1)
+        token3 = Token(
+            type=TokenType.LIT_WHOLE_NUMBER,
+            literal="42",
+            line=2,  # Different line
+            position=1,
+        )
+        assert token1 == token2
+        assert token1 != token3
+    def test_token_creation_with_defaults(self) -> None:
+        """Test Token creation with default line and position values."""
+        # This test assumes we might want default values for backward compatibility
+        token = Token(type=TokenType.OP_PLUS, literal="+", line=1, position=1)
+        assert token.line == 1
+        assert token.position == 1