machine-dialect 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- machine_dialect/__main__.py +667 -0
- machine_dialect/agent/__init__.py +5 -0
- machine_dialect/agent/agent.py +360 -0
- machine_dialect/ast/__init__.py +95 -0
- machine_dialect/ast/ast_node.py +35 -0
- machine_dialect/ast/call_expression.py +82 -0
- machine_dialect/ast/dict_extraction.py +60 -0
- machine_dialect/ast/expressions.py +439 -0
- machine_dialect/ast/literals.py +309 -0
- machine_dialect/ast/program.py +35 -0
- machine_dialect/ast/statements.py +1433 -0
- machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
- machine_dialect/ast/tests/test_boolean_literal.py +29 -0
- machine_dialect/ast/tests/test_collection_hir.py +138 -0
- machine_dialect/ast/tests/test_define_statement.py +142 -0
- machine_dialect/ast/tests/test_desugar.py +541 -0
- machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
- machine_dialect/cfg/__init__.py +6 -0
- machine_dialect/cfg/config.py +156 -0
- machine_dialect/cfg/examples.py +221 -0
- machine_dialect/cfg/generate_with_ai.py +187 -0
- machine_dialect/cfg/openai_generation.py +200 -0
- machine_dialect/cfg/parser.py +94 -0
- machine_dialect/cfg/tests/__init__.py +1 -0
- machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
- machine_dialect/cfg/tests/test_config.py +188 -0
- machine_dialect/cfg/tests/test_examples.py +391 -0
- machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
- machine_dialect/cfg/tests/test_openai_generation.py +256 -0
- machine_dialect/codegen/__init__.py +5 -0
- machine_dialect/codegen/bytecode_module.py +89 -0
- machine_dialect/codegen/bytecode_serializer.py +300 -0
- machine_dialect/codegen/opcodes.py +101 -0
- machine_dialect/codegen/register_codegen.py +1996 -0
- machine_dialect/codegen/symtab.py +208 -0
- machine_dialect/codegen/tests/__init__.py +1 -0
- machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
- machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
- machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
- machine_dialect/codegen/tests/test_symtab.py +418 -0
- machine_dialect/codegen/vm_serializer.py +621 -0
- machine_dialect/compiler/__init__.py +18 -0
- machine_dialect/compiler/compiler.py +197 -0
- machine_dialect/compiler/config.py +149 -0
- machine_dialect/compiler/context.py +149 -0
- machine_dialect/compiler/phases/__init__.py +19 -0
- machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
- machine_dialect/compiler/phases/codegen.py +40 -0
- machine_dialect/compiler/phases/hir_generation.py +39 -0
- machine_dialect/compiler/phases/mir_generation.py +86 -0
- machine_dialect/compiler/phases/optimization.py +110 -0
- machine_dialect/compiler/phases/parsing.py +39 -0
- machine_dialect/compiler/pipeline.py +143 -0
- machine_dialect/compiler/tests/__init__.py +1 -0
- machine_dialect/compiler/tests/test_compiler.py +568 -0
- machine_dialect/compiler/vm_runner.py +173 -0
- machine_dialect/errors/__init__.py +32 -0
- machine_dialect/errors/exceptions.py +369 -0
- machine_dialect/errors/messages.py +82 -0
- machine_dialect/errors/tests/__init__.py +0 -0
- machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
- machine_dialect/errors/tests/test_name_errors.py +118 -0
- machine_dialect/helpers/__init__.py +0 -0
- machine_dialect/helpers/stopwords.py +225 -0
- machine_dialect/helpers/validators.py +30 -0
- machine_dialect/lexer/__init__.py +9 -0
- machine_dialect/lexer/constants.py +23 -0
- machine_dialect/lexer/lexer.py +907 -0
- machine_dialect/lexer/tests/__init__.py +0 -0
- machine_dialect/lexer/tests/helpers.py +86 -0
- machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
- machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
- machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
- machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
- machine_dialect/lexer/tests/test_comments.py +200 -0
- machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
- machine_dialect/lexer/tests/test_lexer_position.py +113 -0
- machine_dialect/lexer/tests/test_list_tokens.py +282 -0
- machine_dialect/lexer/tests/test_stopwords.py +80 -0
- machine_dialect/lexer/tests/test_strict_equality.py +129 -0
- machine_dialect/lexer/tests/test_token.py +41 -0
- machine_dialect/lexer/tests/test_tokenization.py +294 -0
- machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
- machine_dialect/lexer/tests/test_url_literals.py +169 -0
- machine_dialect/lexer/tokens.py +487 -0
- machine_dialect/linter/__init__.py +10 -0
- machine_dialect/linter/__main__.py +144 -0
- machine_dialect/linter/linter.py +154 -0
- machine_dialect/linter/rules/__init__.py +8 -0
- machine_dialect/linter/rules/base.py +112 -0
- machine_dialect/linter/rules/statement_termination.py +99 -0
- machine_dialect/linter/tests/__init__.py +1 -0
- machine_dialect/linter/tests/mdrules/__init__.py +0 -0
- machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
- machine_dialect/linter/tests/test_linter.py +81 -0
- machine_dialect/linter/tests/test_rules.py +110 -0
- machine_dialect/linter/tests/test_violations.py +71 -0
- machine_dialect/linter/violations.py +51 -0
- machine_dialect/mir/__init__.py +69 -0
- machine_dialect/mir/analyses/__init__.py +20 -0
- machine_dialect/mir/analyses/alias_analysis.py +315 -0
- machine_dialect/mir/analyses/dominance_analysis.py +49 -0
- machine_dialect/mir/analyses/escape_analysis.py +286 -0
- machine_dialect/mir/analyses/loop_analysis.py +272 -0
- machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
- machine_dialect/mir/analyses/type_analysis.py +448 -0
- machine_dialect/mir/analyses/use_def_chains.py +232 -0
- machine_dialect/mir/basic_block.py +385 -0
- machine_dialect/mir/dataflow.py +445 -0
- machine_dialect/mir/debug_info.py +208 -0
- machine_dialect/mir/hir_to_mir.py +1738 -0
- machine_dialect/mir/mir_dumper.py +366 -0
- machine_dialect/mir/mir_function.py +167 -0
- machine_dialect/mir/mir_instructions.py +1877 -0
- machine_dialect/mir/mir_interpreter.py +556 -0
- machine_dialect/mir/mir_module.py +225 -0
- machine_dialect/mir/mir_printer.py +480 -0
- machine_dialect/mir/mir_transformer.py +410 -0
- machine_dialect/mir/mir_types.py +367 -0
- machine_dialect/mir/mir_validation.py +455 -0
- machine_dialect/mir/mir_values.py +268 -0
- machine_dialect/mir/optimization_config.py +233 -0
- machine_dialect/mir/optimization_pass.py +251 -0
- machine_dialect/mir/optimization_pipeline.py +355 -0
- machine_dialect/mir/optimizations/__init__.py +84 -0
- machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
- machine_dialect/mir/optimizations/branch_prediction.py +372 -0
- machine_dialect/mir/optimizations/constant_propagation.py +634 -0
- machine_dialect/mir/optimizations/cse.py +398 -0
- machine_dialect/mir/optimizations/dce.py +288 -0
- machine_dialect/mir/optimizations/inlining.py +551 -0
- machine_dialect/mir/optimizations/jump_threading.py +487 -0
- machine_dialect/mir/optimizations/licm.py +405 -0
- machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
- machine_dialect/mir/optimizations/strength_reduction.py +422 -0
- machine_dialect/mir/optimizations/tail_call.py +207 -0
- machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
- machine_dialect/mir/optimizations/type_narrowing.py +397 -0
- machine_dialect/mir/optimizations/type_specialization.py +447 -0
- machine_dialect/mir/optimizations/type_specific.py +906 -0
- machine_dialect/mir/optimize_mir.py +89 -0
- machine_dialect/mir/pass_manager.py +391 -0
- machine_dialect/mir/profiling/__init__.py +26 -0
- machine_dialect/mir/profiling/profile_collector.py +318 -0
- machine_dialect/mir/profiling/profile_data.py +372 -0
- machine_dialect/mir/profiling/profile_reader.py +272 -0
- machine_dialect/mir/profiling/profile_writer.py +226 -0
- machine_dialect/mir/register_allocation.py +302 -0
- machine_dialect/mir/reporting/__init__.py +17 -0
- machine_dialect/mir/reporting/optimization_reporter.py +314 -0
- machine_dialect/mir/reporting/report_formatter.py +289 -0
- machine_dialect/mir/ssa_construction.py +342 -0
- machine_dialect/mir/tests/__init__.py +1 -0
- machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
- machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
- machine_dialect/mir/tests/test_algebraic_division.py +126 -0
- machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
- machine_dialect/mir/tests/test_basic_block.py +425 -0
- machine_dialect/mir/tests/test_branch_prediction.py +459 -0
- machine_dialect/mir/tests/test_call_lowering.py +168 -0
- machine_dialect/mir/tests/test_collection_lowering.py +604 -0
- machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
- machine_dialect/mir/tests/test_custom_passes.py +166 -0
- machine_dialect/mir/tests/test_debug_info.py +285 -0
- machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
- machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
- machine_dialect/mir/tests/test_double_negation.py +231 -0
- machine_dialect/mir/tests/test_escape_analysis.py +233 -0
- machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
- machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
- machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
- machine_dialect/mir/tests/test_inlining.py +435 -0
- machine_dialect/mir/tests/test_licm.py +472 -0
- machine_dialect/mir/tests/test_mir_dumper.py +313 -0
- machine_dialect/mir/tests/test_mir_instructions.py +445 -0
- machine_dialect/mir/tests/test_mir_module.py +860 -0
- machine_dialect/mir/tests/test_mir_printer.py +387 -0
- machine_dialect/mir/tests/test_mir_types.py +123 -0
- machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
- machine_dialect/mir/tests/test_mir_validation.py +378 -0
- machine_dialect/mir/tests/test_mir_values.py +168 -0
- machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
- machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
- machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
- machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
- machine_dialect/mir/tests/test_pass_manager.py +294 -0
- machine_dialect/mir/tests/test_pass_registration.py +64 -0
- machine_dialect/mir/tests/test_profiling.py +356 -0
- machine_dialect/mir/tests/test_register_allocation.py +307 -0
- machine_dialect/mir/tests/test_report_formatters.py +372 -0
- machine_dialect/mir/tests/test_ssa_construction.py +433 -0
- machine_dialect/mir/tests/test_tail_call.py +236 -0
- machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
- machine_dialect/mir/tests/test_type_narrowing.py +277 -0
- machine_dialect/mir/tests/test_type_specialization.py +421 -0
- machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
- machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
- machine_dialect/mir/type_inference.py +368 -0
- machine_dialect/parser/__init__.py +12 -0
- machine_dialect/parser/enums.py +45 -0
- machine_dialect/parser/parser.py +3655 -0
- machine_dialect/parser/protocols.py +11 -0
- machine_dialect/parser/symbol_table.py +169 -0
- machine_dialect/parser/tests/__init__.py +0 -0
- machine_dialect/parser/tests/helper_functions.py +193 -0
- machine_dialect/parser/tests/test_action_statements.py +334 -0
- machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
- machine_dialect/parser/tests/test_call_statements.py +154 -0
- machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
- machine_dialect/parser/tests/test_collection_mutations.py +264 -0
- machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
- machine_dialect/parser/tests/test_define_integration.py +468 -0
- machine_dialect/parser/tests/test_define_statements.py +311 -0
- machine_dialect/parser/tests/test_dict_extraction.py +115 -0
- machine_dialect/parser/tests/test_empty_literal.py +155 -0
- machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
- machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
- machine_dialect/parser/tests/test_if_empty_block.py +61 -0
- machine_dialect/parser/tests/test_if_statements.py +299 -0
- machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
- machine_dialect/parser/tests/test_infix_expressions.py +680 -0
- machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
- machine_dialect/parser/tests/test_interaction_statements.py +269 -0
- machine_dialect/parser/tests/test_list_literals.py +277 -0
- machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
- machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
- machine_dialect/parser/tests/test_parse_errors.py +114 -0
- machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
- machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
- machine_dialect/parser/tests/test_program.py +13 -0
- machine_dialect/parser/tests/test_return_statements.py +89 -0
- machine_dialect/parser/tests/test_set_statements.py +152 -0
- machine_dialect/parser/tests/test_strict_equality.py +258 -0
- machine_dialect/parser/tests/test_symbol_table.py +217 -0
- machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
- machine_dialect/parser/tests/test_utility_statements.py +423 -0
- machine_dialect/parser/token_buffer.py +159 -0
- machine_dialect/repl/__init__.py +3 -0
- machine_dialect/repl/repl.py +426 -0
- machine_dialect/repl/tests/__init__.py +0 -0
- machine_dialect/repl/tests/test_repl.py +606 -0
- machine_dialect/semantic/__init__.py +12 -0
- machine_dialect/semantic/analyzer.py +906 -0
- machine_dialect/semantic/error_messages.py +189 -0
- machine_dialect/semantic/tests/__init__.py +1 -0
- machine_dialect/semantic/tests/test_analyzer.py +364 -0
- machine_dialect/semantic/tests/test_error_messages.py +104 -0
- machine_dialect/tests/edge_cases/__init__.py +10 -0
- machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
- machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
- machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
- machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
- machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
- machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
- machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
- machine_dialect/tests/integration/test_list_compilation.py +395 -0
- machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
- machine_dialect/type_checking/__init__.py +21 -0
- machine_dialect/type_checking/tests/__init__.py +1 -0
- machine_dialect/type_checking/tests/test_type_system.py +230 -0
- machine_dialect/type_checking/type_system.py +270 -0
- machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
- machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
- machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
- machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
- machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
- machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
- machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,188 @@
|
|
1
|
+
"""Tests for parser expected token errors.
|
2
|
+
|
3
|
+
This module tests the parser's ability to detect and report syntax errors
|
4
|
+
when expected tokens are not found during parsing.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from machine_dialect.errors.exceptions import MDNameError, MDSyntaxError
|
8
|
+
from machine_dialect.parser import Parser
|
9
|
+
|
10
|
+
|
11
|
+
class TestExpectedTokenErrors:
|
12
|
+
"""Test cases for expected token error handling."""
|
13
|
+
|
14
|
+
def test_missing_identifier_after_set(self) -> None:
|
15
|
+
"""Test error when Set statement is missing the identifier."""
|
16
|
+
source = "Set 42 to X" # 42 is not a valid identifier
|
17
|
+
parser = Parser()
|
18
|
+
|
19
|
+
parser.parse(source)
|
20
|
+
|
21
|
+
# Should have name error when non-identifier used as identifier
|
22
|
+
assert parser.has_errors() is True
|
23
|
+
# With panic recovery, we get 1 error and skip to EOF (no period)
|
24
|
+
assert len(parser.errors) == 1
|
25
|
+
assert isinstance(parser.errors[0], MDNameError)
|
26
|
+
|
27
|
+
# Error should mention expected identifier and the illegal character
|
28
|
+
error_msg = str(parser.errors[0])
|
29
|
+
assert "identifier" in error_msg.lower()
|
30
|
+
assert "42" in error_msg # The illegal character should be in the message
|
31
|
+
|
32
|
+
def test_missing_to_keyword(self) -> None:
|
33
|
+
"""Test error when Set statement is missing the 'to' keyword."""
|
34
|
+
source = "Set `X` 42" # Missing 'to' keyword
|
35
|
+
parser = Parser()
|
36
|
+
|
37
|
+
parser.parse(source)
|
38
|
+
|
39
|
+
# Should have two errors: undefined variable + syntax error
|
40
|
+
assert parser.has_errors() is True
|
41
|
+
assert len(parser.errors) == 2
|
42
|
+
# First is NameError for undefined variable
|
43
|
+
assert isinstance(parser.errors[0], MDNameError)
|
44
|
+
# Second is the syntax error for missing 'to'
|
45
|
+
assert isinstance(parser.errors[1], MDSyntaxError)
|
46
|
+
|
47
|
+
# Syntax error should mention expected 'to' keyword
|
48
|
+
error_msg = str(parser.errors[1])
|
49
|
+
assert "TokenType.KW_TO" in error_msg or "to" in error_msg.lower()
|
50
|
+
|
51
|
+
def test_multiple_expected_token_errors(self) -> None:
|
52
|
+
"""Test multiple expected token errors in one parse."""
|
53
|
+
# Add periods so panic recovery stops at statement boundaries
|
54
|
+
source = """Set 42 to X.
|
55
|
+
Set price 3.14.
|
56
|
+
Set to "hello".
|
57
|
+
"""
|
58
|
+
parser = Parser()
|
59
|
+
|
60
|
+
parser.parse(source)
|
61
|
+
|
62
|
+
# Should have multiple errors (including undefined variable errors)
|
63
|
+
assert parser.has_errors() is True
|
64
|
+
# With periods, panic recovery allows finding syntax + name errors
|
65
|
+
assert len(parser.errors) == 4
|
66
|
+
|
67
|
+
# Check for expected error types - mix of syntax and name errors
|
68
|
+
# Line 1: "Set 42" - MDNameError (42 is not valid identifier)
|
69
|
+
# Line 2: "Set price" - MDNameError (undefined variable) + MDSyntaxError (missing 'to')
|
70
|
+
# Line 3: "Set to" - MDNameError ('to' is not valid identifier)
|
71
|
+
name_errors = [e for e in parser.errors if isinstance(e, MDNameError)]
|
72
|
+
syntax_errors = [e for e in parser.errors if isinstance(e, MDSyntaxError)]
|
73
|
+
assert len(name_errors) == 3 # 3 name errors
|
74
|
+
assert len(syntax_errors) == 1 # 1 syntax error
|
75
|
+
|
76
|
+
def test_empty_identifier(self) -> None:
|
77
|
+
"""Test error with empty backtick identifier."""
|
78
|
+
source = "Set `` to 42" # Empty backticks produce illegal tokens
|
79
|
+
parser = Parser()
|
80
|
+
|
81
|
+
parser.parse(source)
|
82
|
+
|
83
|
+
# Should have errors (from lexer producing illegal tokens)
|
84
|
+
# Empty backticks produce two illegal backtick characters
|
85
|
+
assert parser.has_errors() is True
|
86
|
+
|
87
|
+
def test_unclosed_backtick(self) -> None:
|
88
|
+
"""Test error with unclosed backtick identifier."""
|
89
|
+
source = "Set `X to 42" # Missing closing backtick
|
90
|
+
parser = Parser()
|
91
|
+
|
92
|
+
parser.parse(source)
|
93
|
+
|
94
|
+
# Should have an error (either from lexer or parser)
|
95
|
+
assert parser.has_errors() is True
|
96
|
+
|
97
|
+
def test_error_location_info(self) -> None:
|
98
|
+
"""Test that expected token errors have correct location information."""
|
99
|
+
source = "Set 42 to X" # Error at position of 42
|
100
|
+
parser = Parser()
|
101
|
+
|
102
|
+
parser.parse(source)
|
103
|
+
|
104
|
+
# With panic recovery, we get 1 name error (42 is not valid identifier)
|
105
|
+
assert len(parser.errors) == 1
|
106
|
+
# The error should be MDNameError since 42 is not a valid identifier
|
107
|
+
error = parser.errors[0]
|
108
|
+
assert isinstance(error, MDNameError)
|
109
|
+
|
110
|
+
# Check that error has location information
|
111
|
+
assert hasattr(error, "_line")
|
112
|
+
assert hasattr(error, "_column")
|
113
|
+
assert error._line == 1
|
114
|
+
assert error._column == 5 # Points to '42'
|
115
|
+
|
116
|
+
def test_error_message_content(self) -> None:
|
117
|
+
"""Test that error messages contain helpful information."""
|
118
|
+
source = "Set `X` something" # 'to' keyword missing
|
119
|
+
parser = Parser()
|
120
|
+
|
121
|
+
parser.parse(source)
|
122
|
+
|
123
|
+
assert len(parser.errors) == 2 # Name error + syntax error
|
124
|
+
# Get the syntax error
|
125
|
+
error = parser.errors[1] if isinstance(parser.errors[1], MDSyntaxError) else parser.errors[0]
|
126
|
+
error_msg = str(error)
|
127
|
+
|
128
|
+
# Error message should contain what was expected and what was found
|
129
|
+
assert "expected" in error_msg.lower() or "Expected" in error_msg
|
130
|
+
# The parser now expects 'to' after the merged identifier 'X something'
|
131
|
+
# and finds EOF, so check for that
|
132
|
+
assert "TokenType.KW_TO" in error_msg or "to" in error_msg.lower()
|
133
|
+
|
134
|
+
def test_parser_continues_after_expected_token_error(self) -> None:
|
135
|
+
"""Test that parser continues parsing after encountering expected token errors."""
|
136
|
+
source = """Set 42 to X.
|
137
|
+
Set `price` to 3.14.
|
138
|
+
Set `Z` 99.
|
139
|
+
"""
|
140
|
+
parser = Parser()
|
141
|
+
|
142
|
+
program = parser.parse(source)
|
143
|
+
|
144
|
+
# Should have errors for first and third statements
|
145
|
+
assert parser.has_errors() is True
|
146
|
+
# We expect 4 errors:
|
147
|
+
# Line 1: expected identifier (42 is invalid)
|
148
|
+
# Line 2: undefined variable 'price'
|
149
|
+
# Line 3: undefined variable 'Z' + missing 'to'
|
150
|
+
assert len(parser.errors) == 4
|
151
|
+
|
152
|
+
# The parser should attempt to parse all statements, even if some fail
|
153
|
+
# Due to error recovery, we may get fewer successfully parsed statements
|
154
|
+
# But we should get at least the valid one (second statement)
|
155
|
+
assert len(program.statements) >= 1
|
156
|
+
|
157
|
+
# Check that we have the valid statement
|
158
|
+
from machine_dialect.ast import SetStatement
|
159
|
+
|
160
|
+
valid_statements = [
|
161
|
+
s for s in program.statements if isinstance(s, SetStatement) and s.name and s.name.value == "price"
|
162
|
+
]
|
163
|
+
assert len(valid_statements) == 1
|
164
|
+
|
165
|
+
def test_consecutive_errors(self) -> None:
|
166
|
+
"""Test handling of consecutive expected token errors."""
|
167
|
+
source = "Set Set Set" # Multiple Set keywords without proper syntax
|
168
|
+
parser = Parser()
|
169
|
+
|
170
|
+
parser.parse(source)
|
171
|
+
|
172
|
+
# Should have multiple errors
|
173
|
+
assert parser.has_errors() is True
|
174
|
+
assert len(parser.errors) == 1
|
175
|
+
|
176
|
+
def test_eof_during_parsing(self) -> None:
|
177
|
+
"""Test error when EOF is encountered while expecting a token."""
|
178
|
+
source = "Define `X` as Empty. Set `X`" # Missing 'to' and value
|
179
|
+
parser = Parser()
|
180
|
+
|
181
|
+
parser.parse(source)
|
182
|
+
|
183
|
+
# Should have an error for missing 'to'
|
184
|
+
assert parser.has_errors() is True
|
185
|
+
assert len(parser.errors) == 1
|
186
|
+
# Find syntax error (may not be first if there are name errors)
|
187
|
+
syntax_errors = [e for e in parser.errors if isinstance(e, MDSyntaxError)]
|
188
|
+
assert len(syntax_errors) == 1
|
@@ -0,0 +1,118 @@
|
|
1
|
+
"""Tests for parser error handling.
|
2
|
+
|
3
|
+
This module tests the parser's ability to collect and report errors
|
4
|
+
from the lexer, including lexical errors like illegal characters.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from machine_dialect.errors.exceptions import MDSyntaxError
|
8
|
+
from machine_dialect.parser import Parser
|
9
|
+
|
10
|
+
|
11
|
+
class TestParserErrors:
|
12
|
+
"""Test cases for parser error handling."""
|
13
|
+
|
14
|
+
def test_parser_collects_lexer_errors(self) -> None:
|
15
|
+
"""Test that parser reports errors for illegal tokens during parsing."""
|
16
|
+
# Source with illegal character
|
17
|
+
source = "Define `X` as Empty. Set `X` to @."
|
18
|
+
# Lexer instantiation moved to Parser.parse()
|
19
|
+
parser = Parser()
|
20
|
+
|
21
|
+
# Errors are reported during parsing, not before
|
22
|
+
parser.parse(source)
|
23
|
+
|
24
|
+
# Parser should have reported the error for @ as a syntax error (illegal token)
|
25
|
+
assert len(parser.errors) == 1
|
26
|
+
assert isinstance(parser.errors[0], MDSyntaxError)
|
27
|
+
assert "@" in str(parser.errors[0])
|
28
|
+
|
29
|
+
def test_parser_has_errors_method(self) -> None:
|
30
|
+
"""Test the has_errors() method."""
|
31
|
+
# Valid source - no errors
|
32
|
+
source = "Define `X` as Whole Number. Set `X` to 42."
|
33
|
+
parser = Parser()
|
34
|
+
parser.parse(source)
|
35
|
+
|
36
|
+
assert parser.has_errors() is False
|
37
|
+
assert len(parser.errors) == 0
|
38
|
+
|
39
|
+
# Invalid source - with illegal character
|
40
|
+
source_with_error = "Define `Y` as Empty. Set `Y` to §." # § is not a valid token
|
41
|
+
parser_with_error = Parser()
|
42
|
+
parser_with_error.parse(source_with_error)
|
43
|
+
|
44
|
+
assert parser_with_error.has_errors() is True
|
45
|
+
assert len(parser_with_error.errors) == 1
|
46
|
+
|
47
|
+
def test_parser_collects_multiple_errors(self) -> None:
|
48
|
+
"""Test that parser reports multiple errors through panic recovery."""
|
49
|
+
# Source with multiple illegal characters - periods are mandatory
|
50
|
+
source = (
|
51
|
+
"Define `A` as Empty. Define `B` as Empty. Define `C` as Empty. Set `A` to @. Set `B` to $. Set `C` to %."
|
52
|
+
)
|
53
|
+
parser = Parser()
|
54
|
+
parser.parse(source)
|
55
|
+
|
56
|
+
# Should have 3 errors for illegal characters (all syntax errors)
|
57
|
+
assert len(parser.errors) == 3
|
58
|
+
assert all(isinstance(error, MDSyntaxError) for error in parser.errors)
|
59
|
+
|
60
|
+
# Check that all illegal characters are in the errors
|
61
|
+
error_messages = [str(error) for error in parser.errors]
|
62
|
+
assert any("@" in msg for msg in error_messages)
|
63
|
+
assert any("$" in msg for msg in error_messages)
|
64
|
+
assert any("%" in msg for msg in error_messages)
|
65
|
+
|
66
|
+
def test_parser_continues_after_lexer_errors(self) -> None:
|
67
|
+
"""Test that parser continues parsing despite lexer errors."""
|
68
|
+
# Source with an error but valid structure
|
69
|
+
source = "Define `X` as Empty. Define `result` as Whole Number. Set `X` to @. Set `result` to _123_."
|
70
|
+
# Lexer instantiation moved to Parser.parse()
|
71
|
+
parser = Parser()
|
72
|
+
|
73
|
+
# Parse the program
|
74
|
+
program = parser.parse(source)
|
75
|
+
|
76
|
+
# Should have one error for illegal character
|
77
|
+
assert parser.has_errors() is True
|
78
|
+
assert len(parser.errors) == 1
|
79
|
+
|
80
|
+
# But should still parse the valid statements
|
81
|
+
assert len(program.statements) == 4 # 2 defines + 2 sets
|
82
|
+
# Type assertions to help mypy
|
83
|
+
from machine_dialect.ast import DefineStatement, SetStatement
|
84
|
+
|
85
|
+
assert isinstance(program.statements[0], DefineStatement)
|
86
|
+
assert isinstance(program.statements[1], DefineStatement)
|
87
|
+
assert isinstance(program.statements[2], SetStatement)
|
88
|
+
assert isinstance(program.statements[3], SetStatement)
|
89
|
+
assert program.statements[2].name is not None
|
90
|
+
assert program.statements[2].name.value == "X"
|
91
|
+
assert program.statements[3].name is not None
|
92
|
+
assert program.statements[3].name.value == "result"
|
93
|
+
|
94
|
+
def test_empty_source_no_errors(self) -> None:
|
95
|
+
"""Test that empty source produces no errors."""
|
96
|
+
source = ""
|
97
|
+
parser = Parser()
|
98
|
+
|
99
|
+
program = parser.parse(source)
|
100
|
+
|
101
|
+
assert parser.has_errors() is False
|
102
|
+
assert len(parser.errors) == 0
|
103
|
+
assert len(program.statements) == 0
|
104
|
+
|
105
|
+
def test_parser_error_details(self) -> None:
|
106
|
+
"""Test that parser errors contain correct location information."""
|
107
|
+
source = "Define `X` as Empty. Set `X` to &."
|
108
|
+
parser = Parser()
|
109
|
+
parser.parse(source)
|
110
|
+
|
111
|
+
assert len(parser.errors) == 1
|
112
|
+
error = parser.errors[0]
|
113
|
+
|
114
|
+
# Check error has location information
|
115
|
+
assert hasattr(error, "_line")
|
116
|
+
assert hasattr(error, "_column")
|
117
|
+
assert error._line == 1 # First line
|
118
|
+
assert error._column > 0 # Should have a column position
|
File without changes
|
@@ -0,0 +1,225 @@
|
|
1
|
+
"""English stopwords module for Machine Dialect™.
|
2
|
+
|
3
|
+
This module provides a curated set of English stopwords commonly used in
|
4
|
+
natural language processing. These words are typically filtered out during
|
5
|
+
text analysis as they carry little semantic meaning on their own.
|
6
|
+
|
7
|
+
The stopwords include common articles, prepositions, pronouns, conjunctions,
|
8
|
+
and other functional words that appear frequently in English text.
|
9
|
+
|
10
|
+
Attributes:
|
11
|
+
ENGLISH_STOPWORDS (set[str]): A set containing common English stopwords
|
12
|
+
in lowercase. Includes contractions and their expanded forms.
|
13
|
+
"""
|
14
|
+
|
15
|
+
ENGLISH_STOPWORDS = {
|
16
|
+
"a",
|
17
|
+
"about",
|
18
|
+
"above",
|
19
|
+
"after",
|
20
|
+
"again",
|
21
|
+
"against",
|
22
|
+
"ain",
|
23
|
+
"all",
|
24
|
+
"am",
|
25
|
+
"an",
|
26
|
+
"and",
|
27
|
+
"any",
|
28
|
+
"are",
|
29
|
+
"aren",
|
30
|
+
"aren't",
|
31
|
+
"as",
|
32
|
+
"at",
|
33
|
+
"be",
|
34
|
+
"because",
|
35
|
+
"been",
|
36
|
+
"before",
|
37
|
+
"being",
|
38
|
+
"below",
|
39
|
+
"between",
|
40
|
+
"both",
|
41
|
+
"but",
|
42
|
+
"by",
|
43
|
+
"can",
|
44
|
+
"could",
|
45
|
+
"couldn",
|
46
|
+
"couldn't",
|
47
|
+
"d",
|
48
|
+
"did",
|
49
|
+
"didn",
|
50
|
+
"didn't",
|
51
|
+
"do",
|
52
|
+
"does",
|
53
|
+
"doesn",
|
54
|
+
"doesn't",
|
55
|
+
"doing",
|
56
|
+
"don",
|
57
|
+
"don't",
|
58
|
+
"down",
|
59
|
+
"during",
|
60
|
+
"each",
|
61
|
+
"few",
|
62
|
+
"for",
|
63
|
+
"from",
|
64
|
+
"further",
|
65
|
+
"had",
|
66
|
+
"hadn",
|
67
|
+
"hadn't",
|
68
|
+
"has",
|
69
|
+
"hasn",
|
70
|
+
"hasn't",
|
71
|
+
"have",
|
72
|
+
"haven",
|
73
|
+
"haven't",
|
74
|
+
"having",
|
75
|
+
"he",
|
76
|
+
"he'd",
|
77
|
+
"he'll",
|
78
|
+
"he's",
|
79
|
+
"here's",
|
80
|
+
"her",
|
81
|
+
"here",
|
82
|
+
"hers",
|
83
|
+
"herself",
|
84
|
+
"him",
|
85
|
+
"himself",
|
86
|
+
"his",
|
87
|
+
"how",
|
88
|
+
"how's",
|
89
|
+
"i",
|
90
|
+
"i'd",
|
91
|
+
"i'll",
|
92
|
+
"i'm",
|
93
|
+
"i've",
|
94
|
+
"if",
|
95
|
+
"in",
|
96
|
+
"into",
|
97
|
+
"is",
|
98
|
+
"isn",
|
99
|
+
"isn't",
|
100
|
+
"it",
|
101
|
+
"it'd",
|
102
|
+
"it'll",
|
103
|
+
"it's",
|
104
|
+
"its",
|
105
|
+
"itself",
|
106
|
+
"just",
|
107
|
+
"ll",
|
108
|
+
"m",
|
109
|
+
"ma",
|
110
|
+
"me",
|
111
|
+
"mightn",
|
112
|
+
"mightn't",
|
113
|
+
"more",
|
114
|
+
"most",
|
115
|
+
"mustn",
|
116
|
+
"mustn't",
|
117
|
+
"my",
|
118
|
+
"myself",
|
119
|
+
"needn",
|
120
|
+
"needn't",
|
121
|
+
"no",
|
122
|
+
"nor",
|
123
|
+
"not",
|
124
|
+
"now",
|
125
|
+
"o",
|
126
|
+
"of",
|
127
|
+
"off",
|
128
|
+
"on",
|
129
|
+
"once",
|
130
|
+
"only",
|
131
|
+
"or",
|
132
|
+
"other",
|
133
|
+
"ought",
|
134
|
+
"our",
|
135
|
+
"ours",
|
136
|
+
"ourselves",
|
137
|
+
"out",
|
138
|
+
"over",
|
139
|
+
"own",
|
140
|
+
"re",
|
141
|
+
"s",
|
142
|
+
"same",
|
143
|
+
"shan",
|
144
|
+
"shan't",
|
145
|
+
"she",
|
146
|
+
"she'd",
|
147
|
+
"she'll",
|
148
|
+
"she's",
|
149
|
+
"should",
|
150
|
+
"should've",
|
151
|
+
"shouldn",
|
152
|
+
"shouldn't",
|
153
|
+
"so",
|
154
|
+
"some",
|
155
|
+
"such",
|
156
|
+
"t",
|
157
|
+
"than",
|
158
|
+
"that",
|
159
|
+
"that'll",
|
160
|
+
"that's",
|
161
|
+
"the",
|
162
|
+
"their",
|
163
|
+
"theirs",
|
164
|
+
"them",
|
165
|
+
"themselves",
|
166
|
+
"then",
|
167
|
+
"there",
|
168
|
+
"there's",
|
169
|
+
"they'd",
|
170
|
+
"they'll",
|
171
|
+
"they're",
|
172
|
+
"they've",
|
173
|
+
"these",
|
174
|
+
"they",
|
175
|
+
"this",
|
176
|
+
"those",
|
177
|
+
"through",
|
178
|
+
"to",
|
179
|
+
"too",
|
180
|
+
"under",
|
181
|
+
"until",
|
182
|
+
"up",
|
183
|
+
"ve",
|
184
|
+
"very",
|
185
|
+
"was",
|
186
|
+
"wasn",
|
187
|
+
"wasn't",
|
188
|
+
"we",
|
189
|
+
"we'd",
|
190
|
+
"we'll",
|
191
|
+
"we're",
|
192
|
+
"we've",
|
193
|
+
"were",
|
194
|
+
"weren",
|
195
|
+
"weren't",
|
196
|
+
"what",
|
197
|
+
"what's",
|
198
|
+
"when",
|
199
|
+
"when's",
|
200
|
+
"where's",
|
201
|
+
"where",
|
202
|
+
"which",
|
203
|
+
"while",
|
204
|
+
"who",
|
205
|
+
"who's",
|
206
|
+
"whom",
|
207
|
+
"why",
|
208
|
+
"will",
|
209
|
+
"why's",
|
210
|
+
"with",
|
211
|
+
"won",
|
212
|
+
"won't",
|
213
|
+
"would",
|
214
|
+
"wouldn",
|
215
|
+
"wouldn't",
|
216
|
+
"you",
|
217
|
+
"you'd",
|
218
|
+
"you'll",
|
219
|
+
"you're",
|
220
|
+
"you've",
|
221
|
+
"your",
|
222
|
+
"yours",
|
223
|
+
"yourself",
|
224
|
+
"yourselves",
|
225
|
+
}
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from rfc3986 import uri_reference
|
2
|
+
from rfc3986.validators import Validator
|
3
|
+
|
4
|
+
|
5
|
+
def is_valid_url(url: str, *, require_scheme: bool = True) -> bool:
|
6
|
+
try:
|
7
|
+
uri = uri_reference(url).normalize()
|
8
|
+
|
9
|
+
# Use the new Validator API
|
10
|
+
validator = Validator()
|
11
|
+
|
12
|
+
# Special handling for certain schemes that don't require host
|
13
|
+
if uri.scheme in ("mailto", "data", "file"):
|
14
|
+
# These schemes have their own validation rules
|
15
|
+
if require_scheme:
|
16
|
+
validator = validator.require_presence_of("scheme")
|
17
|
+
else:
|
18
|
+
# For typical URLs (http, https, ftp, etc.), require host
|
19
|
+
if require_scheme:
|
20
|
+
validator = validator.require_presence_of("scheme", "host")
|
21
|
+
else:
|
22
|
+
validator = validator.require_presence_of("host")
|
23
|
+
|
24
|
+
# Check all components
|
25
|
+
validator = validator.check_validity_of("scheme", "userinfo", "host", "port", "path", "query", "fragment")
|
26
|
+
|
27
|
+
validator.validate(uri)
|
28
|
+
return True
|
29
|
+
except Exception:
|
30
|
+
return False
|
@@ -0,0 +1,23 @@
|
|
1
|
+
from machine_dialect.lexer.tokens import TokenType
|
2
|
+
|
3
|
+
# Single-character tokens
|
4
|
+
CHAR_TO_TOKEN_MAP = {
|
5
|
+
"+": TokenType.OP_PLUS,
|
6
|
+
"-": TokenType.OP_MINUS,
|
7
|
+
"*": TokenType.OP_STAR,
|
8
|
+
"/": TokenType.OP_DIVISION,
|
9
|
+
"^": TokenType.OP_CARET,
|
10
|
+
"=": TokenType.OP_ASSIGN,
|
11
|
+
"<": TokenType.OP_LT,
|
12
|
+
">": TokenType.OP_GT,
|
13
|
+
"(": TokenType.DELIM_LPAREN,
|
14
|
+
")": TokenType.DELIM_RPAREN,
|
15
|
+
"{": TokenType.DELIM_LBRACE,
|
16
|
+
"}": TokenType.DELIM_RBRACE,
|
17
|
+
";": TokenType.PUNCT_SEMICOLON,
|
18
|
+
",": TokenType.PUNCT_COMMA,
|
19
|
+
".": TokenType.PUNCT_PERIOD,
|
20
|
+
":": TokenType.PUNCT_COLON,
|
21
|
+
"#": TokenType.PUNCT_HASH,
|
22
|
+
"\\": TokenType.PUNCT_BACKSLASH,
|
23
|
+
}
|