machine-dialect 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- machine_dialect/__main__.py +667 -0
- machine_dialect/agent/__init__.py +5 -0
- machine_dialect/agent/agent.py +360 -0
- machine_dialect/ast/__init__.py +95 -0
- machine_dialect/ast/ast_node.py +35 -0
- machine_dialect/ast/call_expression.py +82 -0
- machine_dialect/ast/dict_extraction.py +60 -0
- machine_dialect/ast/expressions.py +439 -0
- machine_dialect/ast/literals.py +309 -0
- machine_dialect/ast/program.py +35 -0
- machine_dialect/ast/statements.py +1433 -0
- machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
- machine_dialect/ast/tests/test_boolean_literal.py +29 -0
- machine_dialect/ast/tests/test_collection_hir.py +138 -0
- machine_dialect/ast/tests/test_define_statement.py +142 -0
- machine_dialect/ast/tests/test_desugar.py +541 -0
- machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
- machine_dialect/cfg/__init__.py +6 -0
- machine_dialect/cfg/config.py +156 -0
- machine_dialect/cfg/examples.py +221 -0
- machine_dialect/cfg/generate_with_ai.py +187 -0
- machine_dialect/cfg/openai_generation.py +200 -0
- machine_dialect/cfg/parser.py +94 -0
- machine_dialect/cfg/tests/__init__.py +1 -0
- machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
- machine_dialect/cfg/tests/test_config.py +188 -0
- machine_dialect/cfg/tests/test_examples.py +391 -0
- machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
- machine_dialect/cfg/tests/test_openai_generation.py +256 -0
- machine_dialect/codegen/__init__.py +5 -0
- machine_dialect/codegen/bytecode_module.py +89 -0
- machine_dialect/codegen/bytecode_serializer.py +300 -0
- machine_dialect/codegen/opcodes.py +101 -0
- machine_dialect/codegen/register_codegen.py +1996 -0
- machine_dialect/codegen/symtab.py +208 -0
- machine_dialect/codegen/tests/__init__.py +1 -0
- machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
- machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
- machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
- machine_dialect/codegen/tests/test_symtab.py +418 -0
- machine_dialect/codegen/vm_serializer.py +621 -0
- machine_dialect/compiler/__init__.py +18 -0
- machine_dialect/compiler/compiler.py +197 -0
- machine_dialect/compiler/config.py +149 -0
- machine_dialect/compiler/context.py +149 -0
- machine_dialect/compiler/phases/__init__.py +19 -0
- machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
- machine_dialect/compiler/phases/codegen.py +40 -0
- machine_dialect/compiler/phases/hir_generation.py +39 -0
- machine_dialect/compiler/phases/mir_generation.py +86 -0
- machine_dialect/compiler/phases/optimization.py +110 -0
- machine_dialect/compiler/phases/parsing.py +39 -0
- machine_dialect/compiler/pipeline.py +143 -0
- machine_dialect/compiler/tests/__init__.py +1 -0
- machine_dialect/compiler/tests/test_compiler.py +568 -0
- machine_dialect/compiler/vm_runner.py +173 -0
- machine_dialect/errors/__init__.py +32 -0
- machine_dialect/errors/exceptions.py +369 -0
- machine_dialect/errors/messages.py +82 -0
- machine_dialect/errors/tests/__init__.py +0 -0
- machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
- machine_dialect/errors/tests/test_name_errors.py +118 -0
- machine_dialect/helpers/__init__.py +0 -0
- machine_dialect/helpers/stopwords.py +225 -0
- machine_dialect/helpers/validators.py +30 -0
- machine_dialect/lexer/__init__.py +9 -0
- machine_dialect/lexer/constants.py +23 -0
- machine_dialect/lexer/lexer.py +907 -0
- machine_dialect/lexer/tests/__init__.py +0 -0
- machine_dialect/lexer/tests/helpers.py +86 -0
- machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
- machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
- machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
- machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
- machine_dialect/lexer/tests/test_comments.py +200 -0
- machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
- machine_dialect/lexer/tests/test_lexer_position.py +113 -0
- machine_dialect/lexer/tests/test_list_tokens.py +282 -0
- machine_dialect/lexer/tests/test_stopwords.py +80 -0
- machine_dialect/lexer/tests/test_strict_equality.py +129 -0
- machine_dialect/lexer/tests/test_token.py +41 -0
- machine_dialect/lexer/tests/test_tokenization.py +294 -0
- machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
- machine_dialect/lexer/tests/test_url_literals.py +169 -0
- machine_dialect/lexer/tokens.py +487 -0
- machine_dialect/linter/__init__.py +10 -0
- machine_dialect/linter/__main__.py +144 -0
- machine_dialect/linter/linter.py +154 -0
- machine_dialect/linter/rules/__init__.py +8 -0
- machine_dialect/linter/rules/base.py +112 -0
- machine_dialect/linter/rules/statement_termination.py +99 -0
- machine_dialect/linter/tests/__init__.py +1 -0
- machine_dialect/linter/tests/mdrules/__init__.py +0 -0
- machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
- machine_dialect/linter/tests/test_linter.py +81 -0
- machine_dialect/linter/tests/test_rules.py +110 -0
- machine_dialect/linter/tests/test_violations.py +71 -0
- machine_dialect/linter/violations.py +51 -0
- machine_dialect/mir/__init__.py +69 -0
- machine_dialect/mir/analyses/__init__.py +20 -0
- machine_dialect/mir/analyses/alias_analysis.py +315 -0
- machine_dialect/mir/analyses/dominance_analysis.py +49 -0
- machine_dialect/mir/analyses/escape_analysis.py +286 -0
- machine_dialect/mir/analyses/loop_analysis.py +272 -0
- machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
- machine_dialect/mir/analyses/type_analysis.py +448 -0
- machine_dialect/mir/analyses/use_def_chains.py +232 -0
- machine_dialect/mir/basic_block.py +385 -0
- machine_dialect/mir/dataflow.py +445 -0
- machine_dialect/mir/debug_info.py +208 -0
- machine_dialect/mir/hir_to_mir.py +1738 -0
- machine_dialect/mir/mir_dumper.py +366 -0
- machine_dialect/mir/mir_function.py +167 -0
- machine_dialect/mir/mir_instructions.py +1877 -0
- machine_dialect/mir/mir_interpreter.py +556 -0
- machine_dialect/mir/mir_module.py +225 -0
- machine_dialect/mir/mir_printer.py +480 -0
- machine_dialect/mir/mir_transformer.py +410 -0
- machine_dialect/mir/mir_types.py +367 -0
- machine_dialect/mir/mir_validation.py +455 -0
- machine_dialect/mir/mir_values.py +268 -0
- machine_dialect/mir/optimization_config.py +233 -0
- machine_dialect/mir/optimization_pass.py +251 -0
- machine_dialect/mir/optimization_pipeline.py +355 -0
- machine_dialect/mir/optimizations/__init__.py +84 -0
- machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
- machine_dialect/mir/optimizations/branch_prediction.py +372 -0
- machine_dialect/mir/optimizations/constant_propagation.py +634 -0
- machine_dialect/mir/optimizations/cse.py +398 -0
- machine_dialect/mir/optimizations/dce.py +288 -0
- machine_dialect/mir/optimizations/inlining.py +551 -0
- machine_dialect/mir/optimizations/jump_threading.py +487 -0
- machine_dialect/mir/optimizations/licm.py +405 -0
- machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
- machine_dialect/mir/optimizations/strength_reduction.py +422 -0
- machine_dialect/mir/optimizations/tail_call.py +207 -0
- machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
- machine_dialect/mir/optimizations/type_narrowing.py +397 -0
- machine_dialect/mir/optimizations/type_specialization.py +447 -0
- machine_dialect/mir/optimizations/type_specific.py +906 -0
- machine_dialect/mir/optimize_mir.py +89 -0
- machine_dialect/mir/pass_manager.py +391 -0
- machine_dialect/mir/profiling/__init__.py +26 -0
- machine_dialect/mir/profiling/profile_collector.py +318 -0
- machine_dialect/mir/profiling/profile_data.py +372 -0
- machine_dialect/mir/profiling/profile_reader.py +272 -0
- machine_dialect/mir/profiling/profile_writer.py +226 -0
- machine_dialect/mir/register_allocation.py +302 -0
- machine_dialect/mir/reporting/__init__.py +17 -0
- machine_dialect/mir/reporting/optimization_reporter.py +314 -0
- machine_dialect/mir/reporting/report_formatter.py +289 -0
- machine_dialect/mir/ssa_construction.py +342 -0
- machine_dialect/mir/tests/__init__.py +1 -0
- machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
- machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
- machine_dialect/mir/tests/test_algebraic_division.py +126 -0
- machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
- machine_dialect/mir/tests/test_basic_block.py +425 -0
- machine_dialect/mir/tests/test_branch_prediction.py +459 -0
- machine_dialect/mir/tests/test_call_lowering.py +168 -0
- machine_dialect/mir/tests/test_collection_lowering.py +604 -0
- machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
- machine_dialect/mir/tests/test_custom_passes.py +166 -0
- machine_dialect/mir/tests/test_debug_info.py +285 -0
- machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
- machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
- machine_dialect/mir/tests/test_double_negation.py +231 -0
- machine_dialect/mir/tests/test_escape_analysis.py +233 -0
- machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
- machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
- machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
- machine_dialect/mir/tests/test_inlining.py +435 -0
- machine_dialect/mir/tests/test_licm.py +472 -0
- machine_dialect/mir/tests/test_mir_dumper.py +313 -0
- machine_dialect/mir/tests/test_mir_instructions.py +445 -0
- machine_dialect/mir/tests/test_mir_module.py +860 -0
- machine_dialect/mir/tests/test_mir_printer.py +387 -0
- machine_dialect/mir/tests/test_mir_types.py +123 -0
- machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
- machine_dialect/mir/tests/test_mir_validation.py +378 -0
- machine_dialect/mir/tests/test_mir_values.py +168 -0
- machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
- machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
- machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
- machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
- machine_dialect/mir/tests/test_pass_manager.py +294 -0
- machine_dialect/mir/tests/test_pass_registration.py +64 -0
- machine_dialect/mir/tests/test_profiling.py +356 -0
- machine_dialect/mir/tests/test_register_allocation.py +307 -0
- machine_dialect/mir/tests/test_report_formatters.py +372 -0
- machine_dialect/mir/tests/test_ssa_construction.py +433 -0
- machine_dialect/mir/tests/test_tail_call.py +236 -0
- machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
- machine_dialect/mir/tests/test_type_narrowing.py +277 -0
- machine_dialect/mir/tests/test_type_specialization.py +421 -0
- machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
- machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
- machine_dialect/mir/type_inference.py +368 -0
- machine_dialect/parser/__init__.py +12 -0
- machine_dialect/parser/enums.py +45 -0
- machine_dialect/parser/parser.py +3655 -0
- machine_dialect/parser/protocols.py +11 -0
- machine_dialect/parser/symbol_table.py +169 -0
- machine_dialect/parser/tests/__init__.py +0 -0
- machine_dialect/parser/tests/helper_functions.py +193 -0
- machine_dialect/parser/tests/test_action_statements.py +334 -0
- machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
- machine_dialect/parser/tests/test_call_statements.py +154 -0
- machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
- machine_dialect/parser/tests/test_collection_mutations.py +264 -0
- machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
- machine_dialect/parser/tests/test_define_integration.py +468 -0
- machine_dialect/parser/tests/test_define_statements.py +311 -0
- machine_dialect/parser/tests/test_dict_extraction.py +115 -0
- machine_dialect/parser/tests/test_empty_literal.py +155 -0
- machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
- machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
- machine_dialect/parser/tests/test_if_empty_block.py +61 -0
- machine_dialect/parser/tests/test_if_statements.py +299 -0
- machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
- machine_dialect/parser/tests/test_infix_expressions.py +680 -0
- machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
- machine_dialect/parser/tests/test_interaction_statements.py +269 -0
- machine_dialect/parser/tests/test_list_literals.py +277 -0
- machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
- machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
- machine_dialect/parser/tests/test_parse_errors.py +114 -0
- machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
- machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
- machine_dialect/parser/tests/test_program.py +13 -0
- machine_dialect/parser/tests/test_return_statements.py +89 -0
- machine_dialect/parser/tests/test_set_statements.py +152 -0
- machine_dialect/parser/tests/test_strict_equality.py +258 -0
- machine_dialect/parser/tests/test_symbol_table.py +217 -0
- machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
- machine_dialect/parser/tests/test_utility_statements.py +423 -0
- machine_dialect/parser/token_buffer.py +159 -0
- machine_dialect/repl/__init__.py +3 -0
- machine_dialect/repl/repl.py +426 -0
- machine_dialect/repl/tests/__init__.py +0 -0
- machine_dialect/repl/tests/test_repl.py +606 -0
- machine_dialect/semantic/__init__.py +12 -0
- machine_dialect/semantic/analyzer.py +906 -0
- machine_dialect/semantic/error_messages.py +189 -0
- machine_dialect/semantic/tests/__init__.py +1 -0
- machine_dialect/semantic/tests/test_analyzer.py +364 -0
- machine_dialect/semantic/tests/test_error_messages.py +104 -0
- machine_dialect/tests/edge_cases/__init__.py +10 -0
- machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
- machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
- machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
- machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
- machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
- machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
- machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
- machine_dialect/tests/integration/test_list_compilation.py +395 -0
- machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
- machine_dialect/type_checking/__init__.py +21 -0
- machine_dialect/type_checking/tests/__init__.py +1 -0
- machine_dialect/type_checking/tests/test_type_system.py +230 -0
- machine_dialect/type_checking/type_system.py +270 -0
- machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
- machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
- machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
- machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
- machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
- machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
- machine_dialect_vm/__init__.pyi +15 -0
File without changes
|
@@ -0,0 +1,86 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from machine_dialect.lexer.tokens import Token, TokenType
|
4
|
+
|
5
|
+
if TYPE_CHECKING:
|
6
|
+
from machine_dialect.lexer import Lexer
|
7
|
+
|
8
|
+
|
9
|
+
def assert_expected_token(actual: Token, expected: Token) -> None:
|
10
|
+
"""Assert that an actual token matches the expected token.
|
11
|
+
|
12
|
+
Args:
|
13
|
+
actual: The token received from the lexer.
|
14
|
+
expected: The expected token.
|
15
|
+
"""
|
16
|
+
assert actual.type == expected.type, f"Token type mismatch: got {actual.type}, expected {expected.type}"
|
17
|
+
assert actual.literal == expected.literal, (
|
18
|
+
f"Token literal mismatch: got '{actual.literal}', expected '{expected.literal}'"
|
19
|
+
)
|
20
|
+
assert actual.line == expected.line, f"Token line mismatch: got {actual.line}, expected {expected.line}"
|
21
|
+
assert actual.position == expected.position, (
|
22
|
+
f"Token position mismatch: got {actual.position}, expected {expected.position}"
|
23
|
+
)
|
24
|
+
|
25
|
+
|
26
|
+
def assert_eof(token: Token) -> None:
|
27
|
+
"""Assert that a token is an EOF token.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
token: The token to check.
|
31
|
+
"""
|
32
|
+
assert token.type == TokenType.MISC_EOF, f"Expected EOF token, got {token.type}"
|
33
|
+
|
34
|
+
|
35
|
+
def stream_and_assert_tokens(lexer: "Lexer", expected_tokens: list[Token]) -> None:
|
36
|
+
"""Stream tokens from lexer and assert they match expected tokens.
|
37
|
+
|
38
|
+
This helper function:
|
39
|
+
1. Streams tokens one by one from the lexer
|
40
|
+
2. Asserts each token matches the expected token
|
41
|
+
3. Verifies the count matches
|
42
|
+
4. Asserts EOF is reached after all expected tokens
|
43
|
+
|
44
|
+
Args:
|
45
|
+
lexer: The lexer instance to stream tokens from.
|
46
|
+
expected_tokens: List of expected tokens (not including EOF).
|
47
|
+
"""
|
48
|
+
actual_count = 0
|
49
|
+
|
50
|
+
for i, expected in enumerate(expected_tokens):
|
51
|
+
actual = lexer.next_token()
|
52
|
+
assert actual.type != TokenType.MISC_EOF, f"Got EOF at token {i}, expected {len(expected_tokens)} tokens"
|
53
|
+
assert_expected_token(actual, expected)
|
54
|
+
actual_count += 1
|
55
|
+
|
56
|
+
# Verify we get EOF next
|
57
|
+
eof_token = lexer.next_token()
|
58
|
+
assert_eof(eof_token)
|
59
|
+
|
60
|
+
# Verify count
|
61
|
+
assert actual_count == len(expected_tokens), f"Expected {len(expected_tokens)} tokens, got {actual_count}"
|
62
|
+
|
63
|
+
|
64
|
+
def token(token_type: TokenType, literal: str, line: int = 1, position: int = 0) -> Token:
|
65
|
+
"""Helper function to create tokens with default line and position values for tests."""
|
66
|
+
return Token(token_type, literal, line, position)
|
67
|
+
|
68
|
+
|
69
|
+
def collect_all_tokens(lexer: "Lexer") -> list[Token]:
|
70
|
+
"""Collect all tokens from lexer until EOF (excluding EOF token).
|
71
|
+
|
72
|
+
This is useful for tests that need to examine all tokens but don't
|
73
|
+
want to repeatedly write the streaming loop.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
lexer: The lexer instance to stream tokens from.
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
List of all tokens excluding the EOF token.
|
80
|
+
"""
|
81
|
+
tokens = []
|
82
|
+
token = lexer.next_token()
|
83
|
+
while token.type != TokenType.MISC_EOF:
|
84
|
+
tokens.append(token)
|
85
|
+
token = lexer.next_token()
|
86
|
+
return tokens
|
@@ -0,0 +1,122 @@
|
|
1
|
+
"""Tests for apostrophe support in identifiers."""
|
2
|
+
|
3
|
+
from machine_dialect.lexer.lexer import Lexer
|
4
|
+
from machine_dialect.lexer.tokens import TokenType, is_valid_identifier
|
5
|
+
|
6
|
+
|
7
|
+
class TestApostropheIdentifiers:
|
8
|
+
"""Test apostrophe support in identifiers."""
|
9
|
+
|
10
|
+
def test_valid_apostrophe_identifiers(self) -> None:
|
11
|
+
"""Test that valid apostrophe patterns are accepted."""
|
12
|
+
valid_identifiers = [
|
13
|
+
"don't",
|
14
|
+
"can't",
|
15
|
+
"won't",
|
16
|
+
"I'm",
|
17
|
+
"it's",
|
18
|
+
"person's",
|
19
|
+
"person's name",
|
20
|
+
"I don't like",
|
21
|
+
"can't wait",
|
22
|
+
"won't stop",
|
23
|
+
"it's working",
|
24
|
+
"user's data",
|
25
|
+
"server's response",
|
26
|
+
]
|
27
|
+
|
28
|
+
for identifier in valid_identifiers:
|
29
|
+
assert is_valid_identifier(identifier), f"'{identifier}' should be valid"
|
30
|
+
|
31
|
+
def test_invalid_apostrophe_identifiers(self) -> None:
|
32
|
+
"""Test that invalid apostrophe patterns are rejected."""
|
33
|
+
invalid_identifiers = [
|
34
|
+
"'hello", # Starts with apostrophe
|
35
|
+
"hello'", # Ends with apostrophe
|
36
|
+
"'", # Just apostrophe
|
37
|
+
"hello 'world", # Word starts with apostrophe
|
38
|
+
"hello world'", # Word ends with apostrophe
|
39
|
+
"'hello world", # Starts with apostrophe
|
40
|
+
"hello world'", # Ends with apostrophe
|
41
|
+
]
|
42
|
+
|
43
|
+
for identifier in invalid_identifiers:
|
44
|
+
assert not is_valid_identifier(identifier), f"'{identifier}' should be invalid"
|
45
|
+
|
46
|
+
def test_backtick_identifiers_with_apostrophes(self) -> None:
|
47
|
+
"""Test that backtick-wrapped identifiers with apostrophes work correctly."""
|
48
|
+
test_cases = [
|
49
|
+
("Set `don't` to _true_.", "don't", TokenType.MISC_IDENT),
|
50
|
+
("Set `I'm happy` to _yes_.", "I'm happy", TokenType.MISC_IDENT),
|
51
|
+
('Set `person\'s name` to _"John"_.', "person's name", TokenType.MISC_IDENT),
|
52
|
+
("Set `I don't like` to _no_.", "I don't like", TokenType.MISC_IDENT),
|
53
|
+
("Set `can't wait` to _true_.", "can't wait", TokenType.MISC_IDENT),
|
54
|
+
]
|
55
|
+
|
56
|
+
for source, expected_ident, expected_type in test_cases:
|
57
|
+
lexer = Lexer(source)
|
58
|
+
|
59
|
+
# Skip "Set"
|
60
|
+
token = lexer.next_token()
|
61
|
+
assert token.type == TokenType.KW_SET
|
62
|
+
|
63
|
+
# Get the identifier
|
64
|
+
token = lexer.next_token()
|
65
|
+
assert token.type == expected_type, f"Expected {expected_type}, got {token.type}"
|
66
|
+
assert token.literal == expected_ident, f"Expected '{expected_ident}', got '{token.literal}'"
|
67
|
+
|
68
|
+
def test_invalid_apostrophe_patterns_in_backticks(self) -> None:
|
69
|
+
"""Test that invalid apostrophe patterns still fail in backticks."""
|
70
|
+
invalid_sources = [
|
71
|
+
"`'hello`", # Starts with apostrophe
|
72
|
+
"`hello'`", # Ends with apostrophe
|
73
|
+
"`hello 'world`", # Word starts with apostrophe
|
74
|
+
"`hello world'`", # Word ends with apostrophe
|
75
|
+
]
|
76
|
+
|
77
|
+
for source in invalid_sources:
|
78
|
+
lexer = Lexer(source)
|
79
|
+
token = lexer.next_token()
|
80
|
+
# Should return MISC_ILLEGAL since the identifier is invalid
|
81
|
+
assert token.type == TokenType.MISC_ILLEGAL, f"Source '{source}' should produce MISC_ILLEGAL"
|
82
|
+
|
83
|
+
def test_apostrophe_s_possessive(self) -> None:
|
84
|
+
"""Test that possessive 's pattern works correctly."""
|
85
|
+
# Test with our new syntax: possessive followed by string literal
|
86
|
+
source = '`person`\'s "name"'
|
87
|
+
lexer = Lexer(source)
|
88
|
+
|
89
|
+
# Should get the possessive token
|
90
|
+
token = lexer.next_token()
|
91
|
+
assert token.type == TokenType.PUNCT_APOSTROPHE_S
|
92
|
+
assert token.literal == "person"
|
93
|
+
|
94
|
+
# Next should be a string literal
|
95
|
+
token = lexer.next_token()
|
96
|
+
assert token.type == TokenType.LIT_TEXT
|
97
|
+
assert token.literal == '"name"'
|
98
|
+
|
99
|
+
# Also test with a non-keyword identifier in backticks
|
100
|
+
source2 = "`person`'s `property`"
|
101
|
+
lexer2 = Lexer(source2)
|
102
|
+
|
103
|
+
token = lexer2.next_token()
|
104
|
+
assert token.type == TokenType.PUNCT_APOSTROPHE_S
|
105
|
+
assert token.literal == "person"
|
106
|
+
|
107
|
+
token = lexer2.next_token()
|
108
|
+
assert token.type == TokenType.MISC_IDENT
|
109
|
+
assert token.literal == "property"
|
110
|
+
|
111
|
+
def test_mixed_valid_characters(self) -> None:
|
112
|
+
"""Test identifiers with mixed valid characters including apostrophes."""
|
113
|
+
valid_identifiers = [
|
114
|
+
"user_1's_data",
|
115
|
+
"don't-stop",
|
116
|
+
"can't_wait_2",
|
117
|
+
"person's-item",
|
118
|
+
"it's_working-now",
|
119
|
+
]
|
120
|
+
|
121
|
+
for identifier in valid_identifiers:
|
122
|
+
assert is_valid_identifier(identifier), f"'{identifier}' should be valid"
|
@@ -0,0 +1,140 @@
|
|
1
|
+
from machine_dialect.lexer import Lexer, TokenType
|
2
|
+
from machine_dialect.lexer.tests.helpers import collect_all_tokens
|
3
|
+
|
4
|
+
|
5
|
+
class TestBacktickIdentifiers:
|
6
|
+
def test_backtick_wrapped_identifier(self) -> None:
|
7
|
+
"""Test backtick-wrapped identifier."""
|
8
|
+
source = "`identifier`"
|
9
|
+
lexer = Lexer(source)
|
10
|
+
tokens = collect_all_tokens(lexer)
|
11
|
+
|
12
|
+
assert len(tokens) == 1
|
13
|
+
assert tokens[0].type == TokenType.MISC_IDENT
|
14
|
+
assert tokens[0].literal == "identifier"
|
15
|
+
|
16
|
+
def test_backtick_wrapped_keyword(self) -> None:
|
17
|
+
"""Test that backtick-wrapped keywords become identifiers."""
|
18
|
+
source = "`define`"
|
19
|
+
lexer = Lexer(source)
|
20
|
+
tokens = collect_all_tokens(lexer)
|
21
|
+
|
22
|
+
assert len(tokens) == 1
|
23
|
+
assert tokens[0].type == TokenType.MISC_IDENT
|
24
|
+
assert tokens[0].literal == "define"
|
25
|
+
|
26
|
+
def test_backtick_wrapped_number(self) -> None:
|
27
|
+
"""Test that backtick-wrapped numbers are not valid identifiers."""
|
28
|
+
source = "`42`"
|
29
|
+
lexer = Lexer(source)
|
30
|
+
tokens = collect_all_tokens(lexer)
|
31
|
+
|
32
|
+
# `42` is not a valid identifier, so backtick is illegal, then 42, then backtick
|
33
|
+
assert len(tokens) == 3
|
34
|
+
assert tokens[0].type == TokenType.MISC_ILLEGAL
|
35
|
+
assert tokens[0].literal == "`"
|
36
|
+
assert tokens[1].type == TokenType.LIT_WHOLE_NUMBER
|
37
|
+
assert tokens[1].literal == "42"
|
38
|
+
assert tokens[2].type == TokenType.MISC_ILLEGAL
|
39
|
+
assert tokens[2].literal == "`"
|
40
|
+
|
41
|
+
def test_empty_backticks(self) -> None:
|
42
|
+
"""Test that empty backticks are treated as illegal."""
|
43
|
+
source = "``"
|
44
|
+
lexer = Lexer(source)
|
45
|
+
tokens = collect_all_tokens(lexer)
|
46
|
+
|
47
|
+
# Empty content is not a valid identifier, so both backticks are illegal
|
48
|
+
assert len(tokens) == 2
|
49
|
+
assert tokens[0].type == TokenType.MISC_ILLEGAL
|
50
|
+
assert tokens[0].literal == "`"
|
51
|
+
assert tokens[1].type == TokenType.MISC_ILLEGAL
|
52
|
+
assert tokens[1].literal == "`"
|
53
|
+
|
54
|
+
def test_unwrapped_identifier(self) -> None:
|
55
|
+
"""Test unwrapped identifier (backward compatibility)."""
|
56
|
+
source = "identifier"
|
57
|
+
lexer = Lexer(source)
|
58
|
+
tokens = collect_all_tokens(lexer)
|
59
|
+
|
60
|
+
assert len(tokens) == 1
|
61
|
+
assert tokens[0].type == TokenType.MISC_IDENT
|
62
|
+
assert tokens[0].literal == "identifier"
|
63
|
+
|
64
|
+
def test_mixed_usage_in_expression(self) -> None:
|
65
|
+
"""Test both wrapped and unwrapped identifiers in same expression."""
|
66
|
+
source = "Set `x` to y"
|
67
|
+
lexer = Lexer(source)
|
68
|
+
tokens = collect_all_tokens(lexer)
|
69
|
+
assert len(tokens) == 4
|
70
|
+
assert tokens[0].type == TokenType.KW_SET
|
71
|
+
assert tokens[0].literal == "Set"
|
72
|
+
assert tokens[1].type == TokenType.MISC_IDENT
|
73
|
+
assert tokens[1].literal == "x"
|
74
|
+
assert tokens[2].type == TokenType.KW_TO
|
75
|
+
assert tokens[2].literal == "to"
|
76
|
+
assert tokens[3].type == TokenType.MISC_IDENT
|
77
|
+
assert tokens[3].literal == "y"
|
78
|
+
|
79
|
+
def test_unclosed_backtick(self) -> None:
|
80
|
+
"""Test unclosed backtick without closing backtick is treated as illegal."""
|
81
|
+
source = "`unclosed"
|
82
|
+
lexer = Lexer(source)
|
83
|
+
tokens = collect_all_tokens(lexer)
|
84
|
+
|
85
|
+
# Without closing backtick, the opening backtick is illegal
|
86
|
+
assert len(tokens) == 2
|
87
|
+
assert tokens[0].type == TokenType.MISC_ILLEGAL
|
88
|
+
assert tokens[0].literal == "`"
|
89
|
+
assert tokens[1].type == TokenType.MISC_IDENT
|
90
|
+
assert tokens[1].literal == "unclosed"
|
91
|
+
|
92
|
+
def test_backtick_with_spaces(self) -> None:
|
93
|
+
"""Test backtick with spaces inside."""
|
94
|
+
source = "`with spaces`"
|
95
|
+
lexer = Lexer(source)
|
96
|
+
tokens = collect_all_tokens(lexer)
|
97
|
+
|
98
|
+
assert len(tokens) == 1
|
99
|
+
assert tokens[0].type == TokenType.MISC_IDENT
|
100
|
+
assert tokens[0].literal == "with spaces"
|
101
|
+
|
102
|
+
def test_triple_backticks_still_work(self) -> None:
|
103
|
+
"""Test that triple backticks still work as string literals."""
|
104
|
+
source = "```code block```"
|
105
|
+
lexer = Lexer(source)
|
106
|
+
tokens = collect_all_tokens(lexer)
|
107
|
+
|
108
|
+
assert len(tokens) == 1
|
109
|
+
assert tokens[0].type == TokenType.LIT_TRIPLE_BACKTICK
|
110
|
+
assert tokens[0].literal == "code block"
|
111
|
+
|
112
|
+
def test_backtick_with_hyphens(self) -> None:
|
113
|
+
"""Test backtick with hyphens inside."""
|
114
|
+
source = "`my-identifier`"
|
115
|
+
lexer = Lexer(source)
|
116
|
+
tokens = collect_all_tokens(lexer)
|
117
|
+
|
118
|
+
assert len(tokens) == 1
|
119
|
+
assert tokens[0].type == TokenType.MISC_IDENT
|
120
|
+
assert tokens[0].literal == "my-identifier"
|
121
|
+
|
122
|
+
def test_backtick_with_spaces_and_hyphens(self) -> None:
|
123
|
+
"""Test backtick with both spaces and hyphens."""
|
124
|
+
source = "`my-complex identifier`"
|
125
|
+
lexer = Lexer(source)
|
126
|
+
tokens = collect_all_tokens(lexer)
|
127
|
+
|
128
|
+
assert len(tokens) == 1
|
129
|
+
assert tokens[0].type == TokenType.MISC_IDENT
|
130
|
+
assert tokens[0].literal == "my-complex identifier"
|
131
|
+
|
132
|
+
def test_backtick_wrapped_type_keyword(self) -> None:
|
133
|
+
"""Test that type keywords like 'number' become identifiers in backticks."""
|
134
|
+
source = "`number`"
|
135
|
+
lexer = Lexer(source)
|
136
|
+
tokens = collect_all_tokens(lexer)
|
137
|
+
|
138
|
+
assert len(tokens) == 1
|
139
|
+
assert tokens[0].type == TokenType.MISC_IDENT
|
140
|
+
assert tokens[0].literal == "number"
|
@@ -0,0 +1,108 @@
|
|
1
|
+
from machine_dialect.lexer import Lexer, Token, TokenMetaType, TokenType
|
2
|
+
from machine_dialect.lexer.tests.helpers import assert_eof
|
3
|
+
|
4
|
+
|
5
|
+
def is_literal_token(token: Token) -> bool:
|
6
|
+
return token.type.meta_type == TokenMetaType.LIT
|
7
|
+
|
8
|
+
|
9
|
+
class TestBooleanLiterals:
|
10
|
+
def test_wrapped_yes(self) -> None:
|
11
|
+
"""Test underscore-wrapped Yes literal."""
|
12
|
+
source = "_Yes_"
|
13
|
+
lexer = Lexer(source)
|
14
|
+
|
15
|
+
token = lexer.next_token()
|
16
|
+
assert token.type == TokenType.LIT_YES
|
17
|
+
assert token.literal == "Yes" # Canonical form without underscores
|
18
|
+
assert is_literal_token(token)
|
19
|
+
|
20
|
+
assert_eof(lexer.next_token())
|
21
|
+
|
22
|
+
def test_wrapped_no(self) -> None:
|
23
|
+
"""Test underscore-wrapped No literal."""
|
24
|
+
source = "_No_"
|
25
|
+
lexer = Lexer(source)
|
26
|
+
|
27
|
+
token = lexer.next_token()
|
28
|
+
assert token.type == TokenType.LIT_NO
|
29
|
+
assert token.literal == "No" # Canonical form without underscores
|
30
|
+
assert is_literal_token(token)
|
31
|
+
|
32
|
+
assert_eof(lexer.next_token())
|
33
|
+
|
34
|
+
def test_unwrapped_yes(self) -> None:
|
35
|
+
"""Test unwrapped Yes literal."""
|
36
|
+
source = "Yes"
|
37
|
+
lexer = Lexer(source)
|
38
|
+
|
39
|
+
token = lexer.next_token()
|
40
|
+
assert token.type == TokenType.LIT_YES
|
41
|
+
assert token.literal == "Yes"
|
42
|
+
assert is_literal_token(token)
|
43
|
+
|
44
|
+
assert_eof(lexer.next_token())
|
45
|
+
|
46
|
+
def test_unwrapped_no(self) -> None:
|
47
|
+
"""Test unwrapped No literal."""
|
48
|
+
source = "No"
|
49
|
+
lexer = Lexer(source)
|
50
|
+
|
51
|
+
token = lexer.next_token()
|
52
|
+
assert token.type == TokenType.LIT_NO
|
53
|
+
assert token.literal == "No"
|
54
|
+
assert is_literal_token(token)
|
55
|
+
|
56
|
+
assert_eof(lexer.next_token())
|
57
|
+
|
58
|
+
def test_boolean_in_expression(self) -> None:
|
59
|
+
"""Test boolean literals in expressions."""
|
60
|
+
source = "if x > 0 then give back _Yes_ else give back No"
|
61
|
+
lexer = Lexer(source)
|
62
|
+
|
63
|
+
# Collect all tokens
|
64
|
+
tokens = []
|
65
|
+
while True:
|
66
|
+
token = lexer.next_token()
|
67
|
+
if token.type == TokenType.MISC_EOF:
|
68
|
+
break
|
69
|
+
tokens.append(token)
|
70
|
+
|
71
|
+
# Find the boolean tokens
|
72
|
+
boolean_tokens = [t for t in tokens if t.type in (TokenType.LIT_YES, TokenType.LIT_NO)]
|
73
|
+
assert len(boolean_tokens) == 2
|
74
|
+
|
75
|
+
# Both booleans are stored in canonical form
|
76
|
+
assert boolean_tokens[0].type == TokenType.LIT_YES
|
77
|
+
assert boolean_tokens[0].literal == "Yes"
|
78
|
+
|
79
|
+
assert boolean_tokens[1].type == TokenType.LIT_NO
|
80
|
+
assert boolean_tokens[1].literal == "No"
|
81
|
+
|
82
|
+
def test_lowercase_yes_no(self) -> None:
|
83
|
+
"""Test that lowercase yes/no are recognized as boolean literals."""
|
84
|
+
source = "yes no"
|
85
|
+
lexer = Lexer(source)
|
86
|
+
|
87
|
+
# Lowercase yes/no are recognized as boolean literals
|
88
|
+
token1 = lexer.next_token()
|
89
|
+
assert token1.type == TokenType.LIT_YES
|
90
|
+
assert token1.literal == "Yes" # Canonical form
|
91
|
+
|
92
|
+
token2 = lexer.next_token()
|
93
|
+
assert token2.type == TokenType.LIT_NO
|
94
|
+
assert token2.literal == "No" # Canonical form
|
95
|
+
|
96
|
+
assert_eof(lexer.next_token())
|
97
|
+
|
98
|
+
def test_incomplete_wrapped_boolean(self) -> None:
|
99
|
+
"""Test incomplete wrapped boolean falls back to identifier."""
|
100
|
+
source = "_Yes" # Missing closing underscore
|
101
|
+
lexer = Lexer(source)
|
102
|
+
|
103
|
+
token = lexer.next_token()
|
104
|
+
assert token.type == TokenType.MISC_IDENT
|
105
|
+
assert token.literal == "_Yes"
|
106
|
+
assert not is_literal_token(token)
|
107
|
+
|
108
|
+
assert_eof(lexer.next_token())
|
@@ -0,0 +1,188 @@
|
|
1
|
+
from machine_dialect.lexer.lexer import Lexer
|
2
|
+
from machine_dialect.lexer.tests.helpers import collect_all_tokens
|
3
|
+
from machine_dialect.lexer.tokens import TokenType, keywords_mapping
|
4
|
+
|
5
|
+
|
6
|
+
class TestCaseInsensitiveKeywords:
|
7
|
+
"""Test case-insensitive keyword matching while preserving canonical form."""
|
8
|
+
|
9
|
+
def test_all_keywords_case_variations(self) -> None:
|
10
|
+
"""Test that all keywords in keywords_mapping are case-insensitive."""
|
11
|
+
for canonical_form, token_type in keywords_mapping.items():
|
12
|
+
# Test different case variations
|
13
|
+
test_cases = [
|
14
|
+
canonical_form, # Original form
|
15
|
+
canonical_form.lower(), # Lowercase
|
16
|
+
canonical_form.upper(), # Uppercase
|
17
|
+
canonical_form.title(), # Title case
|
18
|
+
]
|
19
|
+
|
20
|
+
# Add a mixed case variant
|
21
|
+
if len(canonical_form) > 2:
|
22
|
+
# Create a mixed case like "fLoAt" or "tRuE"
|
23
|
+
mixed = ""
|
24
|
+
for i, char in enumerate(canonical_form):
|
25
|
+
if char == " ":
|
26
|
+
mixed += " "
|
27
|
+
elif i % 2 == 0:
|
28
|
+
mixed += char.lower()
|
29
|
+
else:
|
30
|
+
mixed += char.upper()
|
31
|
+
test_cases.append(mixed)
|
32
|
+
|
33
|
+
for variant in test_cases:
|
34
|
+
# Skip if variant is the same as one we already tested
|
35
|
+
if test_cases.count(variant) > 1:
|
36
|
+
continue
|
37
|
+
|
38
|
+
lexer = Lexer(variant)
|
39
|
+
tokens = collect_all_tokens(lexer)
|
40
|
+
assert len(tokens) == 1, f"Expected 1 token for '{variant}', got {len(tokens)}"
|
41
|
+
assert tokens[0].type == token_type, f"Expected {token_type} for '{variant}', got {tokens[0].type}"
|
42
|
+
# Special case for boolean literals which canonicalize to Yes/No
|
43
|
+
if token_type in (TokenType.LIT_YES, TokenType.LIT_NO):
|
44
|
+
expected = "Yes" if token_type == TokenType.LIT_YES else "No"
|
45
|
+
assert tokens[0].literal == expected, (
|
46
|
+
f"Expected literal '{expected}' for '{variant}', got '{tokens[0].literal}'"
|
47
|
+
)
|
48
|
+
else:
|
49
|
+
assert tokens[0].literal == canonical_form, (
|
50
|
+
f"Expected literal '{canonical_form}' for '{variant}', got '{tokens[0].literal}'"
|
51
|
+
)
|
52
|
+
|
53
|
+
def test_double_asterisk_keywords_all_cases(self) -> None:
|
54
|
+
"""Test that all keywords work with double-asterisk wrapping in different cases."""
|
55
|
+
# Test a subset of keywords with double asterisks
|
56
|
+
test_keywords = ["define", "Float", "Integer", "Boolean", "rule", "Set", "Tell"]
|
57
|
+
|
58
|
+
for keyword in test_keywords:
|
59
|
+
if keyword not in keywords_mapping:
|
60
|
+
continue
|
61
|
+
|
62
|
+
token_type = keywords_mapping[keyword]
|
63
|
+
test_cases = [
|
64
|
+
f"**{keyword}**",
|
65
|
+
f"**{keyword.lower()}**",
|
66
|
+
f"**{keyword.upper()}**",
|
67
|
+
]
|
68
|
+
|
69
|
+
for source in test_cases:
|
70
|
+
lexer = Lexer(source)
|
71
|
+
tokens = collect_all_tokens(lexer)
|
72
|
+
assert len(tokens) == 1
|
73
|
+
assert tokens[0].type == token_type
|
74
|
+
# Special handling for boolean literals
|
75
|
+
if token_type in (TokenType.LIT_YES, TokenType.LIT_NO):
|
76
|
+
expected = "Yes" if token_type == TokenType.LIT_YES else "No"
|
77
|
+
assert tokens[0].literal == expected
|
78
|
+
else:
|
79
|
+
assert tokens[0].literal == keyword
|
80
|
+
|
81
|
+
def test_backtick_keywords_all_cases(self) -> None:
|
82
|
+
"""Test that keywords in backticks become identifiers (case-insensitive)."""
|
83
|
+
# Test a subset of keywords with backticks
|
84
|
+
test_keywords = ["Float", "Integer", "True", "False", "define", "rule"]
|
85
|
+
|
86
|
+
for keyword in test_keywords:
|
87
|
+
if keyword not in keywords_mapping:
|
88
|
+
continue
|
89
|
+
|
90
|
+
test_cases = [
|
91
|
+
f"`{keyword}`",
|
92
|
+
f"`{keyword.lower()}`",
|
93
|
+
f"`{keyword.upper()}`",
|
94
|
+
]
|
95
|
+
|
96
|
+
for source in test_cases:
|
97
|
+
lexer = Lexer(source)
|
98
|
+
tokens = collect_all_tokens(lexer)
|
99
|
+
assert len(tokens) == 1
|
100
|
+
# Backticks force content to be identifiers
|
101
|
+
assert tokens[0].type == TokenType.MISC_IDENT
|
102
|
+
# The literal should be the actual text within backticks
|
103
|
+
assert tokens[0].literal.lower() == keyword.lower()
|
104
|
+
|
105
|
+
def test_underscore_wrapped_booleans_all_cases(self) -> None:
|
106
|
+
"""Test underscore-wrapped boolean literals in different cases."""
|
107
|
+
# Test both True/False and Yes/No inputs
|
108
|
+
test_inputs = [
|
109
|
+
("True", TokenType.LIT_YES, "Yes"),
|
110
|
+
("False", TokenType.LIT_NO, "No"),
|
111
|
+
("Yes", TokenType.LIT_YES, "Yes"),
|
112
|
+
("No", TokenType.LIT_NO, "No"),
|
113
|
+
]
|
114
|
+
|
115
|
+
for input_form, token_type, expected_literal in test_inputs:
|
116
|
+
test_cases = [
|
117
|
+
f"_{input_form}_",
|
118
|
+
f"_{input_form.lower()}_",
|
119
|
+
f"_{input_form.upper()}_",
|
120
|
+
]
|
121
|
+
|
122
|
+
for source in test_cases:
|
123
|
+
lexer = Lexer(source)
|
124
|
+
tokens = collect_all_tokens(lexer)
|
125
|
+
assert len(tokens) == 1
|
126
|
+
assert tokens[0].type == token_type
|
127
|
+
assert tokens[0].literal == expected_literal
|
128
|
+
|
129
|
+
def test_identifiers_preserve_case(self) -> None:
|
130
|
+
"""Test that non-keyword identifiers preserve their case."""
|
131
|
+
# These should NOT match any keywords
|
132
|
+
test_cases = [
|
133
|
+
("myVariable", "myVariable"),
|
134
|
+
("MyVariable", "MyVariable"),
|
135
|
+
("MYVARIABLE", "MYVARIABLE"),
|
136
|
+
("userName", "userName"),
|
137
|
+
("floatValue", "floatValue"), # Contains "float" but not a keyword
|
138
|
+
("integerCount", "integerCount"), # Contains "integer" but not a keyword
|
139
|
+
]
|
140
|
+
|
141
|
+
for source, expected_literal in test_cases:
|
142
|
+
lexer = Lexer(source)
|
143
|
+
tokens = collect_all_tokens(lexer)
|
144
|
+
assert len(tokens) == 1
|
145
|
+
assert tokens[0].type == TokenType.MISC_IDENT
|
146
|
+
assert tokens[0].literal == expected_literal
|
147
|
+
|
148
|
+
def test_complex_expression_mixed_case(self) -> None:
|
149
|
+
"""Test complex expressions with mixed case keywords."""
|
150
|
+
test_cases = [
|
151
|
+
("SET x AS integer", ["Set", "x", "as", "integer"]),
|
152
|
+
("set X as INTEGER", ["Set", "X", "as", "INTEGER"]),
|
153
|
+
("define RULE myFunc", ["define", "rule", "myFunc"]),
|
154
|
+
("DEFINE rule MyFunc", ["define", "rule", "MyFunc"]),
|
155
|
+
("if YES then GIVE BACK no", ["if", "Yes", "then", "give back", "No"]),
|
156
|
+
]
|
157
|
+
|
158
|
+
for source, expected_literals in test_cases:
|
159
|
+
lexer = Lexer(source)
|
160
|
+
tokens = collect_all_tokens(lexer)
|
161
|
+
assert len(tokens) == len(expected_literals)
|
162
|
+
|
163
|
+
for token, expected_literal in zip(tokens, expected_literals, strict=False):
|
164
|
+
assert token.literal == expected_literal
|
165
|
+
|
166
|
+
def test_multi_word_keywords_preserve_spacing(self) -> None:
|
167
|
+
"""Test that multi-word keywords preserve internal spacing but are case-insensitive."""
|
168
|
+
multi_word_keywords = [
|
169
|
+
("give back", TokenType.KW_RETURN),
|
170
|
+
("gives back", TokenType.KW_RETURN),
|
171
|
+
]
|
172
|
+
|
173
|
+
for canonical_form, token_type in multi_word_keywords:
|
174
|
+
# Test with different cases but same spacing
|
175
|
+
test_cases = [
|
176
|
+
canonical_form,
|
177
|
+
canonical_form.upper(),
|
178
|
+
canonical_form.title(),
|
179
|
+
# Mixed case for each word
|
180
|
+
" ".join(word.upper() if i % 2 == 0 else word.lower() for i, word in enumerate(canonical_form.split())),
|
181
|
+
]
|
182
|
+
|
183
|
+
for variant in test_cases:
|
184
|
+
lexer = Lexer(variant)
|
185
|
+
tokens = collect_all_tokens(lexer)
|
186
|
+
assert len(tokens) == 1
|
187
|
+
assert tokens[0].type == token_type
|
188
|
+
assert tokens[0].literal == canonical_form
|