machine-dialect 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- machine_dialect/__main__.py +667 -0
- machine_dialect/agent/__init__.py +5 -0
- machine_dialect/agent/agent.py +360 -0
- machine_dialect/ast/__init__.py +95 -0
- machine_dialect/ast/ast_node.py +35 -0
- machine_dialect/ast/call_expression.py +82 -0
- machine_dialect/ast/dict_extraction.py +60 -0
- machine_dialect/ast/expressions.py +439 -0
- machine_dialect/ast/literals.py +309 -0
- machine_dialect/ast/program.py +35 -0
- machine_dialect/ast/statements.py +1433 -0
- machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
- machine_dialect/ast/tests/test_boolean_literal.py +29 -0
- machine_dialect/ast/tests/test_collection_hir.py +138 -0
- machine_dialect/ast/tests/test_define_statement.py +142 -0
- machine_dialect/ast/tests/test_desugar.py +541 -0
- machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
- machine_dialect/cfg/__init__.py +6 -0
- machine_dialect/cfg/config.py +156 -0
- machine_dialect/cfg/examples.py +221 -0
- machine_dialect/cfg/generate_with_ai.py +187 -0
- machine_dialect/cfg/openai_generation.py +200 -0
- machine_dialect/cfg/parser.py +94 -0
- machine_dialect/cfg/tests/__init__.py +1 -0
- machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
- machine_dialect/cfg/tests/test_config.py +188 -0
- machine_dialect/cfg/tests/test_examples.py +391 -0
- machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
- machine_dialect/cfg/tests/test_openai_generation.py +256 -0
- machine_dialect/codegen/__init__.py +5 -0
- machine_dialect/codegen/bytecode_module.py +89 -0
- machine_dialect/codegen/bytecode_serializer.py +300 -0
- machine_dialect/codegen/opcodes.py +101 -0
- machine_dialect/codegen/register_codegen.py +1996 -0
- machine_dialect/codegen/symtab.py +208 -0
- machine_dialect/codegen/tests/__init__.py +1 -0
- machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
- machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
- machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
- machine_dialect/codegen/tests/test_symtab.py +418 -0
- machine_dialect/codegen/vm_serializer.py +621 -0
- machine_dialect/compiler/__init__.py +18 -0
- machine_dialect/compiler/compiler.py +197 -0
- machine_dialect/compiler/config.py +149 -0
- machine_dialect/compiler/context.py +149 -0
- machine_dialect/compiler/phases/__init__.py +19 -0
- machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
- machine_dialect/compiler/phases/codegen.py +40 -0
- machine_dialect/compiler/phases/hir_generation.py +39 -0
- machine_dialect/compiler/phases/mir_generation.py +86 -0
- machine_dialect/compiler/phases/optimization.py +110 -0
- machine_dialect/compiler/phases/parsing.py +39 -0
- machine_dialect/compiler/pipeline.py +143 -0
- machine_dialect/compiler/tests/__init__.py +1 -0
- machine_dialect/compiler/tests/test_compiler.py +568 -0
- machine_dialect/compiler/vm_runner.py +173 -0
- machine_dialect/errors/__init__.py +32 -0
- machine_dialect/errors/exceptions.py +369 -0
- machine_dialect/errors/messages.py +82 -0
- machine_dialect/errors/tests/__init__.py +0 -0
- machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
- machine_dialect/errors/tests/test_name_errors.py +118 -0
- machine_dialect/helpers/__init__.py +0 -0
- machine_dialect/helpers/stopwords.py +225 -0
- machine_dialect/helpers/validators.py +30 -0
- machine_dialect/lexer/__init__.py +9 -0
- machine_dialect/lexer/constants.py +23 -0
- machine_dialect/lexer/lexer.py +907 -0
- machine_dialect/lexer/tests/__init__.py +0 -0
- machine_dialect/lexer/tests/helpers.py +86 -0
- machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
- machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
- machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
- machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
- machine_dialect/lexer/tests/test_comments.py +200 -0
- machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
- machine_dialect/lexer/tests/test_lexer_position.py +113 -0
- machine_dialect/lexer/tests/test_list_tokens.py +282 -0
- machine_dialect/lexer/tests/test_stopwords.py +80 -0
- machine_dialect/lexer/tests/test_strict_equality.py +129 -0
- machine_dialect/lexer/tests/test_token.py +41 -0
- machine_dialect/lexer/tests/test_tokenization.py +294 -0
- machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
- machine_dialect/lexer/tests/test_url_literals.py +169 -0
- machine_dialect/lexer/tokens.py +487 -0
- machine_dialect/linter/__init__.py +10 -0
- machine_dialect/linter/__main__.py +144 -0
- machine_dialect/linter/linter.py +154 -0
- machine_dialect/linter/rules/__init__.py +8 -0
- machine_dialect/linter/rules/base.py +112 -0
- machine_dialect/linter/rules/statement_termination.py +99 -0
- machine_dialect/linter/tests/__init__.py +1 -0
- machine_dialect/linter/tests/mdrules/__init__.py +0 -0
- machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
- machine_dialect/linter/tests/test_linter.py +81 -0
- machine_dialect/linter/tests/test_rules.py +110 -0
- machine_dialect/linter/tests/test_violations.py +71 -0
- machine_dialect/linter/violations.py +51 -0
- machine_dialect/mir/__init__.py +69 -0
- machine_dialect/mir/analyses/__init__.py +20 -0
- machine_dialect/mir/analyses/alias_analysis.py +315 -0
- machine_dialect/mir/analyses/dominance_analysis.py +49 -0
- machine_dialect/mir/analyses/escape_analysis.py +286 -0
- machine_dialect/mir/analyses/loop_analysis.py +272 -0
- machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
- machine_dialect/mir/analyses/type_analysis.py +448 -0
- machine_dialect/mir/analyses/use_def_chains.py +232 -0
- machine_dialect/mir/basic_block.py +385 -0
- machine_dialect/mir/dataflow.py +445 -0
- machine_dialect/mir/debug_info.py +208 -0
- machine_dialect/mir/hir_to_mir.py +1738 -0
- machine_dialect/mir/mir_dumper.py +366 -0
- machine_dialect/mir/mir_function.py +167 -0
- machine_dialect/mir/mir_instructions.py +1877 -0
- machine_dialect/mir/mir_interpreter.py +556 -0
- machine_dialect/mir/mir_module.py +225 -0
- machine_dialect/mir/mir_printer.py +480 -0
- machine_dialect/mir/mir_transformer.py +410 -0
- machine_dialect/mir/mir_types.py +367 -0
- machine_dialect/mir/mir_validation.py +455 -0
- machine_dialect/mir/mir_values.py +268 -0
- machine_dialect/mir/optimization_config.py +233 -0
- machine_dialect/mir/optimization_pass.py +251 -0
- machine_dialect/mir/optimization_pipeline.py +355 -0
- machine_dialect/mir/optimizations/__init__.py +84 -0
- machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
- machine_dialect/mir/optimizations/branch_prediction.py +372 -0
- machine_dialect/mir/optimizations/constant_propagation.py +634 -0
- machine_dialect/mir/optimizations/cse.py +398 -0
- machine_dialect/mir/optimizations/dce.py +288 -0
- machine_dialect/mir/optimizations/inlining.py +551 -0
- machine_dialect/mir/optimizations/jump_threading.py +487 -0
- machine_dialect/mir/optimizations/licm.py +405 -0
- machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
- machine_dialect/mir/optimizations/strength_reduction.py +422 -0
- machine_dialect/mir/optimizations/tail_call.py +207 -0
- machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
- machine_dialect/mir/optimizations/type_narrowing.py +397 -0
- machine_dialect/mir/optimizations/type_specialization.py +447 -0
- machine_dialect/mir/optimizations/type_specific.py +906 -0
- machine_dialect/mir/optimize_mir.py +89 -0
- machine_dialect/mir/pass_manager.py +391 -0
- machine_dialect/mir/profiling/__init__.py +26 -0
- machine_dialect/mir/profiling/profile_collector.py +318 -0
- machine_dialect/mir/profiling/profile_data.py +372 -0
- machine_dialect/mir/profiling/profile_reader.py +272 -0
- machine_dialect/mir/profiling/profile_writer.py +226 -0
- machine_dialect/mir/register_allocation.py +302 -0
- machine_dialect/mir/reporting/__init__.py +17 -0
- machine_dialect/mir/reporting/optimization_reporter.py +314 -0
- machine_dialect/mir/reporting/report_formatter.py +289 -0
- machine_dialect/mir/ssa_construction.py +342 -0
- machine_dialect/mir/tests/__init__.py +1 -0
- machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
- machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
- machine_dialect/mir/tests/test_algebraic_division.py +126 -0
- machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
- machine_dialect/mir/tests/test_basic_block.py +425 -0
- machine_dialect/mir/tests/test_branch_prediction.py +459 -0
- machine_dialect/mir/tests/test_call_lowering.py +168 -0
- machine_dialect/mir/tests/test_collection_lowering.py +604 -0
- machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
- machine_dialect/mir/tests/test_custom_passes.py +166 -0
- machine_dialect/mir/tests/test_debug_info.py +285 -0
- machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
- machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
- machine_dialect/mir/tests/test_double_negation.py +231 -0
- machine_dialect/mir/tests/test_escape_analysis.py +233 -0
- machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
- machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
- machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
- machine_dialect/mir/tests/test_inlining.py +435 -0
- machine_dialect/mir/tests/test_licm.py +472 -0
- machine_dialect/mir/tests/test_mir_dumper.py +313 -0
- machine_dialect/mir/tests/test_mir_instructions.py +445 -0
- machine_dialect/mir/tests/test_mir_module.py +860 -0
- machine_dialect/mir/tests/test_mir_printer.py +387 -0
- machine_dialect/mir/tests/test_mir_types.py +123 -0
- machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
- machine_dialect/mir/tests/test_mir_validation.py +378 -0
- machine_dialect/mir/tests/test_mir_values.py +168 -0
- machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
- machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
- machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
- machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
- machine_dialect/mir/tests/test_pass_manager.py +294 -0
- machine_dialect/mir/tests/test_pass_registration.py +64 -0
- machine_dialect/mir/tests/test_profiling.py +356 -0
- machine_dialect/mir/tests/test_register_allocation.py +307 -0
- machine_dialect/mir/tests/test_report_formatters.py +372 -0
- machine_dialect/mir/tests/test_ssa_construction.py +433 -0
- machine_dialect/mir/tests/test_tail_call.py +236 -0
- machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
- machine_dialect/mir/tests/test_type_narrowing.py +277 -0
- machine_dialect/mir/tests/test_type_specialization.py +421 -0
- machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
- machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
- machine_dialect/mir/type_inference.py +368 -0
- machine_dialect/parser/__init__.py +12 -0
- machine_dialect/parser/enums.py +45 -0
- machine_dialect/parser/parser.py +3655 -0
- machine_dialect/parser/protocols.py +11 -0
- machine_dialect/parser/symbol_table.py +169 -0
- machine_dialect/parser/tests/__init__.py +0 -0
- machine_dialect/parser/tests/helper_functions.py +193 -0
- machine_dialect/parser/tests/test_action_statements.py +334 -0
- machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
- machine_dialect/parser/tests/test_call_statements.py +154 -0
- machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
- machine_dialect/parser/tests/test_collection_mutations.py +264 -0
- machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
- machine_dialect/parser/tests/test_define_integration.py +468 -0
- machine_dialect/parser/tests/test_define_statements.py +311 -0
- machine_dialect/parser/tests/test_dict_extraction.py +115 -0
- machine_dialect/parser/tests/test_empty_literal.py +155 -0
- machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
- machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
- machine_dialect/parser/tests/test_if_empty_block.py +61 -0
- machine_dialect/parser/tests/test_if_statements.py +299 -0
- machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
- machine_dialect/parser/tests/test_infix_expressions.py +680 -0
- machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
- machine_dialect/parser/tests/test_interaction_statements.py +269 -0
- machine_dialect/parser/tests/test_list_literals.py +277 -0
- machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
- machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
- machine_dialect/parser/tests/test_parse_errors.py +114 -0
- machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
- machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
- machine_dialect/parser/tests/test_program.py +13 -0
- machine_dialect/parser/tests/test_return_statements.py +89 -0
- machine_dialect/parser/tests/test_set_statements.py +152 -0
- machine_dialect/parser/tests/test_strict_equality.py +258 -0
- machine_dialect/parser/tests/test_symbol_table.py +217 -0
- machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
- machine_dialect/parser/tests/test_utility_statements.py +423 -0
- machine_dialect/parser/token_buffer.py +159 -0
- machine_dialect/repl/__init__.py +3 -0
- machine_dialect/repl/repl.py +426 -0
- machine_dialect/repl/tests/__init__.py +0 -0
- machine_dialect/repl/tests/test_repl.py +606 -0
- machine_dialect/semantic/__init__.py +12 -0
- machine_dialect/semantic/analyzer.py +906 -0
- machine_dialect/semantic/error_messages.py +189 -0
- machine_dialect/semantic/tests/__init__.py +1 -0
- machine_dialect/semantic/tests/test_analyzer.py +364 -0
- machine_dialect/semantic/tests/test_error_messages.py +104 -0
- machine_dialect/tests/edge_cases/__init__.py +10 -0
- machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
- machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
- machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
- machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
- machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
- machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
- machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
- machine_dialect/tests/integration/test_list_compilation.py +395 -0
- machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
- machine_dialect/type_checking/__init__.py +21 -0
- machine_dialect/type_checking/tests/__init__.py +1 -0
- machine_dialect/type_checking/tests/test_type_system.py +230 -0
- machine_dialect/type_checking/type_system.py +270 -0
- machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
- machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
- machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
- machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
- machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
- machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
- machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,200 @@
|
|
1
|
+
"""Tests for comment token recognition."""
|
2
|
+
|
3
|
+
from machine_dialect.lexer.lexer import Lexer
|
4
|
+
from machine_dialect.lexer.tokens import TokenType
|
5
|
+
|
6
|
+
|
7
|
+
class TestComments:
|
8
|
+
"""Test comment token recognition."""
|
9
|
+
|
10
|
+
def test_simple_comment(self) -> None:
|
11
|
+
"""Test simple comment within summary tags."""
|
12
|
+
source = "<summary>This is a comment</summary>"
|
13
|
+
lexer = Lexer(source)
|
14
|
+
|
15
|
+
tokens = []
|
16
|
+
while True:
|
17
|
+
token = lexer.next_token()
|
18
|
+
tokens.append(token)
|
19
|
+
if token.type == TokenType.MISC_EOF:
|
20
|
+
break
|
21
|
+
|
22
|
+
assert len(tokens) == 4
|
23
|
+
assert tokens[0].type == TokenType.TAG_SUMMARY_START
|
24
|
+
assert tokens[0].literal == "<summary>"
|
25
|
+
assert tokens[1].type == TokenType.MISC_COMMENT
|
26
|
+
assert tokens[1].literal == "This is a comment"
|
27
|
+
assert tokens[2].type == TokenType.TAG_SUMMARY_END
|
28
|
+
assert tokens[2].literal == "</summary>"
|
29
|
+
assert tokens[3].type == TokenType.MISC_EOF
|
30
|
+
|
31
|
+
def test_multiline_comment(self) -> None:
|
32
|
+
"""Test multiline comment within summary tags."""
|
33
|
+
source = """<summary>
|
34
|
+
This is a comment
|
35
|
+
that spans multiple lines
|
36
|
+
</summary>"""
|
37
|
+
lexer = Lexer(source)
|
38
|
+
|
39
|
+
tokens = []
|
40
|
+
while True:
|
41
|
+
token = lexer.next_token()
|
42
|
+
tokens.append(token)
|
43
|
+
if token.type == TokenType.MISC_EOF:
|
44
|
+
break
|
45
|
+
|
46
|
+
assert len(tokens) == 4
|
47
|
+
assert tokens[0].type == TokenType.TAG_SUMMARY_START
|
48
|
+
assert tokens[1].type == TokenType.MISC_COMMENT
|
49
|
+
assert tokens[1].literal == "\nThis is a comment\nthat spans multiple lines\n"
|
50
|
+
assert tokens[2].type == TokenType.TAG_SUMMARY_END
|
51
|
+
assert tokens[3].type == TokenType.MISC_EOF
|
52
|
+
|
53
|
+
def test_empty_comment(self) -> None:
|
54
|
+
"""Test empty comment within summary tags."""
|
55
|
+
source = "<summary></summary>"
|
56
|
+
lexer = Lexer(source)
|
57
|
+
|
58
|
+
tokens = []
|
59
|
+
while True:
|
60
|
+
token = lexer.next_token()
|
61
|
+
tokens.append(token)
|
62
|
+
if token.type == TokenType.MISC_EOF:
|
63
|
+
break
|
64
|
+
|
65
|
+
assert len(tokens) == 4
|
66
|
+
assert tokens[0].type == TokenType.TAG_SUMMARY_START
|
67
|
+
assert tokens[1].type == TokenType.MISC_COMMENT
|
68
|
+
assert tokens[1].literal == ""
|
69
|
+
assert tokens[2].type == TokenType.TAG_SUMMARY_END
|
70
|
+
assert tokens[3].type == TokenType.MISC_EOF
|
71
|
+
|
72
|
+
def test_comment_with_code_before_and_after(self) -> None:
|
73
|
+
"""Test comment with code before and after."""
|
74
|
+
source = "set x to 10. <summary>This is a comment</summary> set y to 20."
|
75
|
+
lexer = Lexer(source)
|
76
|
+
|
77
|
+
tokens = []
|
78
|
+
while True:
|
79
|
+
token = lexer.next_token()
|
80
|
+
tokens.append(token)
|
81
|
+
if token.type == TokenType.MISC_EOF:
|
82
|
+
break
|
83
|
+
|
84
|
+
# Check first part: set x to 10.
|
85
|
+
assert tokens[0].type == TokenType.KW_SET
|
86
|
+
assert tokens[1].type == TokenType.MISC_IDENT
|
87
|
+
assert tokens[1].literal == "x"
|
88
|
+
assert tokens[2].type == TokenType.KW_TO
|
89
|
+
assert tokens[3].type == TokenType.LIT_WHOLE_NUMBER
|
90
|
+
assert tokens[3].literal == "10"
|
91
|
+
assert tokens[4].type == TokenType.PUNCT_PERIOD
|
92
|
+
|
93
|
+
# Check comment part
|
94
|
+
assert tokens[5].type == TokenType.TAG_SUMMARY_START
|
95
|
+
assert tokens[6].type == TokenType.MISC_COMMENT
|
96
|
+
assert tokens[6].literal == "This is a comment"
|
97
|
+
assert tokens[7].type == TokenType.TAG_SUMMARY_END
|
98
|
+
|
99
|
+
# Check second part: set y to 20.
|
100
|
+
assert tokens[8].type == TokenType.KW_SET
|
101
|
+
assert tokens[9].type == TokenType.MISC_IDENT
|
102
|
+
assert tokens[9].literal == "y"
|
103
|
+
assert tokens[10].type == TokenType.KW_TO
|
104
|
+
assert tokens[11].type == TokenType.LIT_WHOLE_NUMBER
|
105
|
+
assert tokens[11].literal == "20"
|
106
|
+
assert tokens[12].type == TokenType.PUNCT_PERIOD
|
107
|
+
assert tokens[13].type == TokenType.MISC_EOF
|
108
|
+
|
109
|
+
def test_comment_case_insensitive_tags(self) -> None:
|
110
|
+
"""Test that summary tags are case-insensitive."""
|
111
|
+
source = "<SUMMARY>This is a comment</SUMMARY>"
|
112
|
+
lexer = Lexer(source)
|
113
|
+
|
114
|
+
tokens = []
|
115
|
+
while True:
|
116
|
+
token = lexer.next_token()
|
117
|
+
tokens.append(token)
|
118
|
+
if token.type == TokenType.MISC_EOF:
|
119
|
+
break
|
120
|
+
|
121
|
+
assert len(tokens) == 4
|
122
|
+
assert tokens[0].type == TokenType.TAG_SUMMARY_START
|
123
|
+
assert tokens[0].literal == "<summary>" # Canonical form
|
124
|
+
assert tokens[1].type == TokenType.MISC_COMMENT
|
125
|
+
assert tokens[1].literal == "This is a comment"
|
126
|
+
assert tokens[2].type == TokenType.TAG_SUMMARY_END
|
127
|
+
assert tokens[2].literal == "</summary>"
|
128
|
+
|
129
|
+
def test_mixed_case_tags(self) -> None:
|
130
|
+
"""Test mixed case summary tags."""
|
131
|
+
source = "<SuMmArY>Mixed case comment</sUmMaRy>"
|
132
|
+
lexer = Lexer(source)
|
133
|
+
|
134
|
+
tokens = []
|
135
|
+
while True:
|
136
|
+
token = lexer.next_token()
|
137
|
+
tokens.append(token)
|
138
|
+
if token.type == TokenType.MISC_EOF:
|
139
|
+
break
|
140
|
+
|
141
|
+
assert len(tokens) == 4
|
142
|
+
assert tokens[0].type == TokenType.TAG_SUMMARY_START
|
143
|
+
assert tokens[1].type == TokenType.MISC_COMMENT
|
144
|
+
assert tokens[1].literal == "Mixed case comment"
|
145
|
+
assert tokens[2].type == TokenType.TAG_SUMMARY_END
|
146
|
+
|
147
|
+
def test_comment_with_special_characters(self) -> None:
|
148
|
+
"""Test comment containing special characters."""
|
149
|
+
source = "<summary>Comment with special chars: !@#$%^&*()_+-=[]{}|;:'\",.<>?/`~</summary>"
|
150
|
+
lexer = Lexer(source)
|
151
|
+
|
152
|
+
tokens = []
|
153
|
+
while True:
|
154
|
+
token = lexer.next_token()
|
155
|
+
tokens.append(token)
|
156
|
+
if token.type == TokenType.MISC_EOF:
|
157
|
+
break
|
158
|
+
|
159
|
+
assert len(tokens) == 4
|
160
|
+
assert tokens[0].type == TokenType.TAG_SUMMARY_START
|
161
|
+
assert tokens[1].type == TokenType.MISC_COMMENT
|
162
|
+
assert tokens[1].literal == "Comment with special chars: !@#$%^&*()_+-=[]{}|;:'\",.<>?/`~"
|
163
|
+
assert tokens[2].type == TokenType.TAG_SUMMARY_END
|
164
|
+
|
165
|
+
def test_unclosed_summary_tag(self) -> None:
|
166
|
+
"""Test that unclosed summary tag creates a comment up to EOF."""
|
167
|
+
source = "<summary>This is a comment without closing tag"
|
168
|
+
lexer = Lexer(source)
|
169
|
+
|
170
|
+
tokens = []
|
171
|
+
while True:
|
172
|
+
token = lexer.next_token()
|
173
|
+
tokens.append(token)
|
174
|
+
if token.type == TokenType.MISC_EOF:
|
175
|
+
break
|
176
|
+
|
177
|
+
# Should tokenize as summary tag followed by comment content up to EOF
|
178
|
+
assert len(tokens) == 3
|
179
|
+
assert tokens[0].type == TokenType.TAG_SUMMARY_START
|
180
|
+
assert tokens[1].type == TokenType.MISC_COMMENT
|
181
|
+
assert tokens[1].literal == "This is a comment without closing tag"
|
182
|
+
assert tokens[2].type == TokenType.MISC_EOF
|
183
|
+
|
184
|
+
def test_nested_tags_in_comment(self) -> None:
|
185
|
+
"""Test comment containing nested tags."""
|
186
|
+
source = "<summary>Comment with <tag> and </tag> inside</summary>"
|
187
|
+
lexer = Lexer(source)
|
188
|
+
|
189
|
+
tokens = []
|
190
|
+
while True:
|
191
|
+
token = lexer.next_token()
|
192
|
+
tokens.append(token)
|
193
|
+
if token.type == TokenType.MISC_EOF:
|
194
|
+
break
|
195
|
+
|
196
|
+
assert len(tokens) == 4
|
197
|
+
assert tokens[0].type == TokenType.TAG_SUMMARY_START
|
198
|
+
assert tokens[1].type == TokenType.MISC_COMMENT
|
199
|
+
assert tokens[1].literal == "Comment with <tag> and </tag> inside"
|
200
|
+
assert tokens[2].type == TokenType.TAG_SUMMARY_END
|
@@ -0,0 +1,127 @@
|
|
1
|
+
from machine_dialect.lexer import Lexer, TokenType
|
2
|
+
from machine_dialect.lexer.tests.helpers import collect_all_tokens
|
3
|
+
|
4
|
+
|
5
|
+
class TestDoubleAsteriskKeywords:
|
6
|
+
def test_wrapped_keyword_define(self) -> None:
|
7
|
+
"""Test double-asterisk-wrapped keyword 'define'."""
|
8
|
+
source = "**define**"
|
9
|
+
lexer = Lexer(source)
|
10
|
+
tokens = collect_all_tokens(lexer)
|
11
|
+
assert len(tokens) == 1
|
12
|
+
assert tokens[0].type == TokenType.KW_DEFINE
|
13
|
+
assert tokens[0].literal == "define"
|
14
|
+
|
15
|
+
def test_wrapped_keyword_rule(self) -> None:
|
16
|
+
"""Test double-asterisk-wrapped keyword 'rule'."""
|
17
|
+
source = "**rule**"
|
18
|
+
lexer = Lexer(source)
|
19
|
+
tokens = collect_all_tokens(lexer)
|
20
|
+
assert len(tokens) == 1
|
21
|
+
assert tokens[0].type == TokenType.KW_RULE
|
22
|
+
assert tokens[0].literal == "rule"
|
23
|
+
|
24
|
+
def test_wrapped_keyword_set(self) -> None:
|
25
|
+
"""Test double-asterisk-wrapped keyword 'Set'."""
|
26
|
+
source = "**Set**"
|
27
|
+
lexer = Lexer(source)
|
28
|
+
tokens = collect_all_tokens(lexer)
|
29
|
+
assert len(tokens) == 1
|
30
|
+
assert tokens[0].type == TokenType.KW_SET
|
31
|
+
assert tokens[0].literal == "Set"
|
32
|
+
|
33
|
+
def test_wrapped_multi_word_keyword(self) -> None:
|
34
|
+
"""Test double-asterisk-wrapped multi-word keyword."""
|
35
|
+
source = "**give back**"
|
36
|
+
lexer = Lexer(source)
|
37
|
+
tokens = collect_all_tokens(lexer)
|
38
|
+
assert len(tokens) == 1
|
39
|
+
assert tokens[0].type == TokenType.KW_RETURN
|
40
|
+
assert tokens[0].literal == "give back"
|
41
|
+
|
42
|
+
def test_unwrapped_keyword(self) -> None:
|
43
|
+
"""Test unwrapped keyword (backward compatibility)."""
|
44
|
+
source = "define"
|
45
|
+
lexer = Lexer(source)
|
46
|
+
tokens = collect_all_tokens(lexer)
|
47
|
+
assert len(tokens) == 1
|
48
|
+
assert tokens[0].type == TokenType.KW_DEFINE
|
49
|
+
assert tokens[0].literal == "define"
|
50
|
+
|
51
|
+
def test_incomplete_wrapped_keyword(self) -> None:
|
52
|
+
"""Test incomplete wrapped keyword (missing closing asterisks)."""
|
53
|
+
source = "**define"
|
54
|
+
lexer = Lexer(source)
|
55
|
+
tokens = collect_all_tokens(lexer)
|
56
|
+
assert len(tokens) == 2
|
57
|
+
assert tokens[0].type == TokenType.OP_TWO_STARS
|
58
|
+
assert tokens[0].literal == "**"
|
59
|
+
assert tokens[1].type == TokenType.KW_DEFINE
|
60
|
+
assert tokens[1].literal == "define"
|
61
|
+
|
62
|
+
def test_non_keyword_wrapped(self) -> None:
|
63
|
+
"""Test non-keyword wrapped in double asterisks."""
|
64
|
+
source = "**notakeyword**"
|
65
|
+
lexer = Lexer(source)
|
66
|
+
tokens = collect_all_tokens(lexer)
|
67
|
+
assert len(tokens) == 3
|
68
|
+
assert tokens[0].type == TokenType.OP_TWO_STARS
|
69
|
+
assert tokens[0].literal == "**"
|
70
|
+
assert tokens[1].type == TokenType.MISC_IDENT
|
71
|
+
assert tokens[1].literal == "notakeyword"
|
72
|
+
assert tokens[2].type == TokenType.OP_TWO_STARS
|
73
|
+
assert tokens[2].literal == "**"
|
74
|
+
|
75
|
+
def test_mixed_usage_in_expression(self) -> None:
|
76
|
+
"""Test both wrapped and unwrapped keywords in same expression."""
|
77
|
+
source = "**define** a rule that takes"
|
78
|
+
lexer = Lexer(source)
|
79
|
+
tokens = collect_all_tokens(lexer)
|
80
|
+
assert len(tokens) == 5
|
81
|
+
assert tokens[0].type == TokenType.KW_DEFINE
|
82
|
+
assert tokens[0].literal == "define"
|
83
|
+
assert tokens[1].type == TokenType.MISC_STOPWORD # "a"
|
84
|
+
assert tokens[2].type == TokenType.KW_RULE
|
85
|
+
assert tokens[2].literal == "rule"
|
86
|
+
assert tokens[3].type == TokenType.MISC_STOPWORD # "that"
|
87
|
+
assert tokens[4].type == TokenType.KW_TAKE
|
88
|
+
assert tokens[4].literal == "takes"
|
89
|
+
|
90
|
+
def test_operator_usage(self) -> None:
|
91
|
+
"""Test that ** operator still works correctly."""
|
92
|
+
source = "2 ** 3"
|
93
|
+
lexer = Lexer(source)
|
94
|
+
tokens = collect_all_tokens(lexer)
|
95
|
+
assert len(tokens) == 3
|
96
|
+
assert tokens[0].type == TokenType.LIT_WHOLE_NUMBER
|
97
|
+
assert tokens[0].literal == "2"
|
98
|
+
assert tokens[1].type == TokenType.OP_TWO_STARS
|
99
|
+
assert tokens[1].literal == "**"
|
100
|
+
assert tokens[2].type == TokenType.LIT_WHOLE_NUMBER
|
101
|
+
assert tokens[2].literal == "3"
|
102
|
+
|
103
|
+
def test_stopword_wrapped(self) -> None:
|
104
|
+
"""Test stopword wrapped in double asterisks (should not be recognized as keyword)."""
|
105
|
+
source = "**the**"
|
106
|
+
lexer = Lexer(source)
|
107
|
+
tokens = collect_all_tokens(lexer)
|
108
|
+
assert len(tokens) == 3
|
109
|
+
assert tokens[0].type == TokenType.OP_TWO_STARS
|
110
|
+
assert tokens[0].literal == "**"
|
111
|
+
assert tokens[1].type == TokenType.MISC_STOPWORD
|
112
|
+
assert tokens[1].literal == "the"
|
113
|
+
assert tokens[2].type == TokenType.OP_TWO_STARS
|
114
|
+
assert tokens[2].literal == "**"
|
115
|
+
|
116
|
+
def test_boolean_literal_wrapped(self) -> None:
|
117
|
+
"""Test boolean literals wrapped in double asterisks (should not be recognized as keyword)."""
|
118
|
+
source = "**Yes**"
|
119
|
+
lexer = Lexer(source)
|
120
|
+
tokens = collect_all_tokens(lexer)
|
121
|
+
assert len(tokens) == 3
|
122
|
+
assert tokens[0].type == TokenType.OP_TWO_STARS
|
123
|
+
assert tokens[0].literal == "**"
|
124
|
+
assert tokens[1].type == TokenType.LIT_YES
|
125
|
+
assert tokens[1].literal == "Yes"
|
126
|
+
assert tokens[2].type == TokenType.OP_TWO_STARS
|
127
|
+
assert tokens[2].literal == "**"
|
@@ -0,0 +1,113 @@
|
|
1
|
+
from machine_dialect.lexer.lexer import Lexer
|
2
|
+
from machine_dialect.lexer.tests.helpers import collect_all_tokens
|
3
|
+
from machine_dialect.lexer.tokens import Token, TokenType
|
4
|
+
|
5
|
+
|
6
|
+
class TestLexerPosition:
|
7
|
+
def test_single_line_positions(self) -> None:
|
8
|
+
"""Test that tokens on a single line have correct positions."""
|
9
|
+
source = "Set x = 42"
|
10
|
+
lexer = Lexer(source)
|
11
|
+
tokens = collect_all_tokens(lexer)
|
12
|
+
|
13
|
+
expected = [
|
14
|
+
Token(TokenType.KW_SET, "Set", line=1, position=1),
|
15
|
+
Token(TokenType.MISC_IDENT, "x", line=1, position=5),
|
16
|
+
Token(TokenType.OP_ASSIGN, "=", line=1, position=7),
|
17
|
+
Token(TokenType.LIT_WHOLE_NUMBER, "42", line=1, position=9),
|
18
|
+
]
|
19
|
+
|
20
|
+
assert tokens == expected
|
21
|
+
|
22
|
+
def test_multiline_positions(self) -> None:
|
23
|
+
"""Test that tokens across multiple lines have correct line numbers."""
|
24
|
+
source = """if Yes then
|
25
|
+
give back 42
|
26
|
+
else
|
27
|
+
gives back 0"""
|
28
|
+
|
29
|
+
lexer = Lexer(source)
|
30
|
+
tokens = collect_all_tokens(lexer)
|
31
|
+
|
32
|
+
expected = [
|
33
|
+
Token(TokenType.KW_IF, "if", line=1, position=1),
|
34
|
+
Token(TokenType.LIT_YES, "Yes", line=1, position=4),
|
35
|
+
Token(TokenType.KW_THEN, "then", line=1, position=8),
|
36
|
+
Token(TokenType.KW_RETURN, "give back", line=2, position=5),
|
37
|
+
Token(TokenType.LIT_WHOLE_NUMBER, "42", line=2, position=15),
|
38
|
+
Token(TokenType.KW_ELSE, "else", line=3, position=1),
|
39
|
+
Token(TokenType.KW_RETURN, "gives back", line=4, position=5),
|
40
|
+
Token(TokenType.LIT_WHOLE_NUMBER, "0", line=4, position=16),
|
41
|
+
]
|
42
|
+
|
43
|
+
assert tokens == expected
|
44
|
+
|
45
|
+
def test_string_literal_position(self) -> None:
|
46
|
+
"""Test that string literals maintain correct position."""
|
47
|
+
source = 'Set msg = "hello world"'
|
48
|
+
lexer = Lexer(source)
|
49
|
+
tokens = collect_all_tokens(lexer)
|
50
|
+
|
51
|
+
expected = [
|
52
|
+
Token(TokenType.KW_SET, "Set", line=1, position=1),
|
53
|
+
Token(TokenType.MISC_IDENT, "msg", line=1, position=5),
|
54
|
+
Token(TokenType.OP_ASSIGN, "=", line=1, position=9),
|
55
|
+
Token(TokenType.LIT_TEXT, '"hello world"', line=1, position=11),
|
56
|
+
]
|
57
|
+
|
58
|
+
assert tokens == expected
|
59
|
+
|
60
|
+
def test_empty_lines_position(self) -> None:
|
61
|
+
"""Test position tracking with empty lines."""
|
62
|
+
source = """Set x = 1
|
63
|
+
|
64
|
+
Set y = 2"""
|
65
|
+
|
66
|
+
lexer = Lexer(source)
|
67
|
+
tokens = collect_all_tokens(lexer)
|
68
|
+
|
69
|
+
expected = [
|
70
|
+
Token(TokenType.KW_SET, "Set", line=1, position=1),
|
71
|
+
Token(TokenType.MISC_IDENT, "x", line=1, position=5),
|
72
|
+
Token(TokenType.OP_ASSIGN, "=", line=1, position=7),
|
73
|
+
Token(TokenType.LIT_WHOLE_NUMBER, "1", line=1, position=9),
|
74
|
+
Token(TokenType.KW_SET, "Set", line=3, position=1),
|
75
|
+
Token(TokenType.MISC_IDENT, "y", line=3, position=5),
|
76
|
+
Token(TokenType.OP_ASSIGN, "=", line=3, position=7),
|
77
|
+
Token(TokenType.LIT_WHOLE_NUMBER, "2", line=3, position=9),
|
78
|
+
]
|
79
|
+
|
80
|
+
assert tokens == expected
|
81
|
+
|
82
|
+
def test_tab_position(self) -> None:
|
83
|
+
"""Test position tracking with tabs."""
|
84
|
+
source = "Set\tx\t=\t42"
|
85
|
+
lexer = Lexer(source)
|
86
|
+
tokens = collect_all_tokens(lexer)
|
87
|
+
|
88
|
+
# Tabs count as single characters for position
|
89
|
+
expected = [
|
90
|
+
Token(TokenType.KW_SET, "Set", line=1, position=1),
|
91
|
+
Token(TokenType.MISC_IDENT, "x", line=1, position=5),
|
92
|
+
Token(TokenType.OP_ASSIGN, "=", line=1, position=7),
|
93
|
+
Token(TokenType.LIT_WHOLE_NUMBER, "42", line=1, position=9),
|
94
|
+
]
|
95
|
+
|
96
|
+
assert tokens == expected
|
97
|
+
|
98
|
+
def test_illegal_character_position(self) -> None:
|
99
|
+
"""Test that illegal characters have correct position."""
|
100
|
+
source = "Set x = @"
|
101
|
+
lexer = Lexer(source)
|
102
|
+
tokens = collect_all_tokens(lexer)
|
103
|
+
|
104
|
+
# Lexer no longer reports errors (parser will handle them)
|
105
|
+
|
106
|
+
expected = [
|
107
|
+
Token(TokenType.KW_SET, "Set", line=1, position=1),
|
108
|
+
Token(TokenType.MISC_IDENT, "x", line=1, position=5),
|
109
|
+
Token(TokenType.OP_ASSIGN, "=", line=1, position=7),
|
110
|
+
Token(TokenType.MISC_ILLEGAL, "@", line=1, position=9),
|
111
|
+
]
|
112
|
+
|
113
|
+
assert tokens == expected
|