machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
File without changes
@@ -0,0 +1,86 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from machine_dialect.lexer.tokens import Token, TokenType
4
+
5
+ if TYPE_CHECKING:
6
+ from machine_dialect.lexer import Lexer
7
+
8
+
9
+ def assert_expected_token(actual: Token, expected: Token) -> None:
10
+ """Assert that an actual token matches the expected token.
11
+
12
+ Args:
13
+ actual: The token received from the lexer.
14
+ expected: The expected token.
15
+ """
16
+ assert actual.type == expected.type, f"Token type mismatch: got {actual.type}, expected {expected.type}"
17
+ assert actual.literal == expected.literal, (
18
+ f"Token literal mismatch: got '{actual.literal}', expected '{expected.literal}'"
19
+ )
20
+ assert actual.line == expected.line, f"Token line mismatch: got {actual.line}, expected {expected.line}"
21
+ assert actual.position == expected.position, (
22
+ f"Token position mismatch: got {actual.position}, expected {expected.position}"
23
+ )
24
+
25
+
26
+ def assert_eof(token: Token) -> None:
27
+ """Assert that a token is an EOF token.
28
+
29
+ Args:
30
+ token: The token to check.
31
+ """
32
+ assert token.type == TokenType.MISC_EOF, f"Expected EOF token, got {token.type}"
33
+
34
+
35
+ def stream_and_assert_tokens(lexer: "Lexer", expected_tokens: list[Token]) -> None:
36
+ """Stream tokens from lexer and assert they match expected tokens.
37
+
38
+ This helper function:
39
+ 1. Streams tokens one by one from the lexer
40
+ 2. Asserts each token matches the expected token
41
+ 3. Verifies the count matches
42
+ 4. Asserts EOF is reached after all expected tokens
43
+
44
+ Args:
45
+ lexer: The lexer instance to stream tokens from.
46
+ expected_tokens: List of expected tokens (not including EOF).
47
+ """
48
+ actual_count = 0
49
+
50
+ for i, expected in enumerate(expected_tokens):
51
+ actual = lexer.next_token()
52
+ assert actual.type != TokenType.MISC_EOF, f"Got EOF at token {i}, expected {len(expected_tokens)} tokens"
53
+ assert_expected_token(actual, expected)
54
+ actual_count += 1
55
+
56
+ # Verify we get EOF next
57
+ eof_token = lexer.next_token()
58
+ assert_eof(eof_token)
59
+
60
+ # Verify count
61
+ assert actual_count == len(expected_tokens), f"Expected {len(expected_tokens)} tokens, got {actual_count}"
62
+
63
+
64
+ def token(token_type: TokenType, literal: str, line: int = 1, position: int = 0) -> Token:
65
+ """Helper function to create tokens with default line and position values for tests."""
66
+ return Token(token_type, literal, line, position)
67
+
68
+
69
+ def collect_all_tokens(lexer: "Lexer") -> list[Token]:
70
+ """Collect all tokens from lexer until EOF (excluding EOF token).
71
+
72
+ This is useful for tests that need to examine all tokens but don't
73
+ want to repeatedly write the streaming loop.
74
+
75
+ Args:
76
+ lexer: The lexer instance to stream tokens from.
77
+
78
+ Returns:
79
+ List of all tokens excluding the EOF token.
80
+ """
81
+ tokens = []
82
+ token = lexer.next_token()
83
+ while token.type != TokenType.MISC_EOF:
84
+ tokens.append(token)
85
+ token = lexer.next_token()
86
+ return tokens
@@ -0,0 +1,122 @@
1
+ """Tests for apostrophe support in identifiers."""
2
+
3
+ from machine_dialect.lexer.lexer import Lexer
4
+ from machine_dialect.lexer.tokens import TokenType, is_valid_identifier
5
+
6
+
7
+ class TestApostropheIdentifiers:
8
+ """Test apostrophe support in identifiers."""
9
+
10
+ def test_valid_apostrophe_identifiers(self) -> None:
11
+ """Test that valid apostrophe patterns are accepted."""
12
+ valid_identifiers = [
13
+ "don't",
14
+ "can't",
15
+ "won't",
16
+ "I'm",
17
+ "it's",
18
+ "person's",
19
+ "person's name",
20
+ "I don't like",
21
+ "can't wait",
22
+ "won't stop",
23
+ "it's working",
24
+ "user's data",
25
+ "server's response",
26
+ ]
27
+
28
+ for identifier in valid_identifiers:
29
+ assert is_valid_identifier(identifier), f"'{identifier}' should be valid"
30
+
31
+ def test_invalid_apostrophe_identifiers(self) -> None:
32
+ """Test that invalid apostrophe patterns are rejected."""
33
+ invalid_identifiers = [
34
+ "'hello", # Starts with apostrophe
35
+ "hello'", # Ends with apostrophe
36
+ "'", # Just apostrophe
37
+ "hello 'world", # Word starts with apostrophe
38
+ "hello world'", # Word ends with apostrophe
39
+ "'hello world", # Starts with apostrophe
40
+ "hello world'", # Ends with apostrophe
41
+ ]
42
+
43
+ for identifier in invalid_identifiers:
44
+ assert not is_valid_identifier(identifier), f"'{identifier}' should be invalid"
45
+
46
+ def test_backtick_identifiers_with_apostrophes(self) -> None:
47
+ """Test that backtick-wrapped identifiers with apostrophes work correctly."""
48
+ test_cases = [
49
+ ("Set `don't` to _true_.", "don't", TokenType.MISC_IDENT),
50
+ ("Set `I'm happy` to _yes_.", "I'm happy", TokenType.MISC_IDENT),
51
+ ('Set `person\'s name` to _"John"_.', "person's name", TokenType.MISC_IDENT),
52
+ ("Set `I don't like` to _no_.", "I don't like", TokenType.MISC_IDENT),
53
+ ("Set `can't wait` to _true_.", "can't wait", TokenType.MISC_IDENT),
54
+ ]
55
+
56
+ for source, expected_ident, expected_type in test_cases:
57
+ lexer = Lexer(source)
58
+
59
+ # Skip "Set"
60
+ token = lexer.next_token()
61
+ assert token.type == TokenType.KW_SET
62
+
63
+ # Get the identifier
64
+ token = lexer.next_token()
65
+ assert token.type == expected_type, f"Expected {expected_type}, got {token.type}"
66
+ assert token.literal == expected_ident, f"Expected '{expected_ident}', got '{token.literal}'"
67
+
68
+ def test_invalid_apostrophe_patterns_in_backticks(self) -> None:
69
+ """Test that invalid apostrophe patterns still fail in backticks."""
70
+ invalid_sources = [
71
+ "`'hello`", # Starts with apostrophe
72
+ "`hello'`", # Ends with apostrophe
73
+ "`hello 'world`", # Word starts with apostrophe
74
+ "`hello world'`", # Word ends with apostrophe
75
+ ]
76
+
77
+ for source in invalid_sources:
78
+ lexer = Lexer(source)
79
+ token = lexer.next_token()
80
+ # Should return MISC_ILLEGAL since the identifier is invalid
81
+ assert token.type == TokenType.MISC_ILLEGAL, f"Source '{source}' should produce MISC_ILLEGAL"
82
+
83
+ def test_apostrophe_s_possessive(self) -> None:
84
+ """Test that possessive 's pattern works correctly."""
85
+ # Test with our new syntax: possessive followed by string literal
86
+ source = '`person`\'s "name"'
87
+ lexer = Lexer(source)
88
+
89
+ # Should get the possessive token
90
+ token = lexer.next_token()
91
+ assert token.type == TokenType.PUNCT_APOSTROPHE_S
92
+ assert token.literal == "person"
93
+
94
+ # Next should be a string literal
95
+ token = lexer.next_token()
96
+ assert token.type == TokenType.LIT_TEXT
97
+ assert token.literal == '"name"'
98
+
99
+ # Also test with a non-keyword identifier in backticks
100
+ source2 = "`person`'s `property`"
101
+ lexer2 = Lexer(source2)
102
+
103
+ token = lexer2.next_token()
104
+ assert token.type == TokenType.PUNCT_APOSTROPHE_S
105
+ assert token.literal == "person"
106
+
107
+ token = lexer2.next_token()
108
+ assert token.type == TokenType.MISC_IDENT
109
+ assert token.literal == "property"
110
+
111
+ def test_mixed_valid_characters(self) -> None:
112
+ """Test identifiers with mixed valid characters including apostrophes."""
113
+ valid_identifiers = [
114
+ "user_1's_data",
115
+ "don't-stop",
116
+ "can't_wait_2",
117
+ "person's-item",
118
+ "it's_working-now",
119
+ ]
120
+
121
+ for identifier in valid_identifiers:
122
+ assert is_valid_identifier(identifier), f"'{identifier}' should be valid"
@@ -0,0 +1,140 @@
1
+ from machine_dialect.lexer import Lexer, TokenType
2
+ from machine_dialect.lexer.tests.helpers import collect_all_tokens
3
+
4
+
5
+ class TestBacktickIdentifiers:
6
+ def test_backtick_wrapped_identifier(self) -> None:
7
+ """Test backtick-wrapped identifier."""
8
+ source = "`identifier`"
9
+ lexer = Lexer(source)
10
+ tokens = collect_all_tokens(lexer)
11
+
12
+ assert len(tokens) == 1
13
+ assert tokens[0].type == TokenType.MISC_IDENT
14
+ assert tokens[0].literal == "identifier"
15
+
16
+ def test_backtick_wrapped_keyword(self) -> None:
17
+ """Test that backtick-wrapped keywords become identifiers."""
18
+ source = "`define`"
19
+ lexer = Lexer(source)
20
+ tokens = collect_all_tokens(lexer)
21
+
22
+ assert len(tokens) == 1
23
+ assert tokens[0].type == TokenType.MISC_IDENT
24
+ assert tokens[0].literal == "define"
25
+
26
+ def test_backtick_wrapped_number(self) -> None:
27
+ """Test that backtick-wrapped numbers are not valid identifiers."""
28
+ source = "`42`"
29
+ lexer = Lexer(source)
30
+ tokens = collect_all_tokens(lexer)
31
+
32
+ # `42` is not a valid identifier, so backtick is illegal, then 42, then backtick
33
+ assert len(tokens) == 3
34
+ assert tokens[0].type == TokenType.MISC_ILLEGAL
35
+ assert tokens[0].literal == "`"
36
+ assert tokens[1].type == TokenType.LIT_WHOLE_NUMBER
37
+ assert tokens[1].literal == "42"
38
+ assert tokens[2].type == TokenType.MISC_ILLEGAL
39
+ assert tokens[2].literal == "`"
40
+
41
+ def test_empty_backticks(self) -> None:
42
+ """Test that empty backticks are treated as illegal."""
43
+ source = "``"
44
+ lexer = Lexer(source)
45
+ tokens = collect_all_tokens(lexer)
46
+
47
+ # Empty content is not a valid identifier, so both backticks are illegal
48
+ assert len(tokens) == 2
49
+ assert tokens[0].type == TokenType.MISC_ILLEGAL
50
+ assert tokens[0].literal == "`"
51
+ assert tokens[1].type == TokenType.MISC_ILLEGAL
52
+ assert tokens[1].literal == "`"
53
+
54
+ def test_unwrapped_identifier(self) -> None:
55
+ """Test unwrapped identifier (backward compatibility)."""
56
+ source = "identifier"
57
+ lexer = Lexer(source)
58
+ tokens = collect_all_tokens(lexer)
59
+
60
+ assert len(tokens) == 1
61
+ assert tokens[0].type == TokenType.MISC_IDENT
62
+ assert tokens[0].literal == "identifier"
63
+
64
+ def test_mixed_usage_in_expression(self) -> None:
65
+ """Test both wrapped and unwrapped identifiers in same expression."""
66
+ source = "Set `x` to y"
67
+ lexer = Lexer(source)
68
+ tokens = collect_all_tokens(lexer)
69
+ assert len(tokens) == 4
70
+ assert tokens[0].type == TokenType.KW_SET
71
+ assert tokens[0].literal == "Set"
72
+ assert tokens[1].type == TokenType.MISC_IDENT
73
+ assert tokens[1].literal == "x"
74
+ assert tokens[2].type == TokenType.KW_TO
75
+ assert tokens[2].literal == "to"
76
+ assert tokens[3].type == TokenType.MISC_IDENT
77
+ assert tokens[3].literal == "y"
78
+
79
+ def test_unclosed_backtick(self) -> None:
80
+ """Test unclosed backtick without closing backtick is treated as illegal."""
81
+ source = "`unclosed"
82
+ lexer = Lexer(source)
83
+ tokens = collect_all_tokens(lexer)
84
+
85
+ # Without closing backtick, the opening backtick is illegal
86
+ assert len(tokens) == 2
87
+ assert tokens[0].type == TokenType.MISC_ILLEGAL
88
+ assert tokens[0].literal == "`"
89
+ assert tokens[1].type == TokenType.MISC_IDENT
90
+ assert tokens[1].literal == "unclosed"
91
+
92
+ def test_backtick_with_spaces(self) -> None:
93
+ """Test backtick with spaces inside."""
94
+ source = "`with spaces`"
95
+ lexer = Lexer(source)
96
+ tokens = collect_all_tokens(lexer)
97
+
98
+ assert len(tokens) == 1
99
+ assert tokens[0].type == TokenType.MISC_IDENT
100
+ assert tokens[0].literal == "with spaces"
101
+
102
+ def test_triple_backticks_still_work(self) -> None:
103
+ """Test that triple backticks still work as string literals."""
104
+ source = "```code block```"
105
+ lexer = Lexer(source)
106
+ tokens = collect_all_tokens(lexer)
107
+
108
+ assert len(tokens) == 1
109
+ assert tokens[0].type == TokenType.LIT_TRIPLE_BACKTICK
110
+ assert tokens[0].literal == "code block"
111
+
112
+ def test_backtick_with_hyphens(self) -> None:
113
+ """Test backtick with hyphens inside."""
114
+ source = "`my-identifier`"
115
+ lexer = Lexer(source)
116
+ tokens = collect_all_tokens(lexer)
117
+
118
+ assert len(tokens) == 1
119
+ assert tokens[0].type == TokenType.MISC_IDENT
120
+ assert tokens[0].literal == "my-identifier"
121
+
122
+ def test_backtick_with_spaces_and_hyphens(self) -> None:
123
+ """Test backtick with both spaces and hyphens."""
124
+ source = "`my-complex identifier`"
125
+ lexer = Lexer(source)
126
+ tokens = collect_all_tokens(lexer)
127
+
128
+ assert len(tokens) == 1
129
+ assert tokens[0].type == TokenType.MISC_IDENT
130
+ assert tokens[0].literal == "my-complex identifier"
131
+
132
+ def test_backtick_wrapped_type_keyword(self) -> None:
133
+ """Test that type keywords like 'number' become identifiers in backticks."""
134
+ source = "`number`"
135
+ lexer = Lexer(source)
136
+ tokens = collect_all_tokens(lexer)
137
+
138
+ assert len(tokens) == 1
139
+ assert tokens[0].type == TokenType.MISC_IDENT
140
+ assert tokens[0].literal == "number"
@@ -0,0 +1,108 @@
1
+ from machine_dialect.lexer import Lexer, Token, TokenMetaType, TokenType
2
+ from machine_dialect.lexer.tests.helpers import assert_eof
3
+
4
+
5
+ def is_literal_token(token: Token) -> bool:
6
+ return token.type.meta_type == TokenMetaType.LIT
7
+
8
+
9
+ class TestBooleanLiterals:
10
+ def test_wrapped_yes(self) -> None:
11
+ """Test underscore-wrapped Yes literal."""
12
+ source = "_Yes_"
13
+ lexer = Lexer(source)
14
+
15
+ token = lexer.next_token()
16
+ assert token.type == TokenType.LIT_YES
17
+ assert token.literal == "Yes" # Canonical form without underscores
18
+ assert is_literal_token(token)
19
+
20
+ assert_eof(lexer.next_token())
21
+
22
+ def test_wrapped_no(self) -> None:
23
+ """Test underscore-wrapped No literal."""
24
+ source = "_No_"
25
+ lexer = Lexer(source)
26
+
27
+ token = lexer.next_token()
28
+ assert token.type == TokenType.LIT_NO
29
+ assert token.literal == "No" # Canonical form without underscores
30
+ assert is_literal_token(token)
31
+
32
+ assert_eof(lexer.next_token())
33
+
34
+ def test_unwrapped_yes(self) -> None:
35
+ """Test unwrapped Yes literal."""
36
+ source = "Yes"
37
+ lexer = Lexer(source)
38
+
39
+ token = lexer.next_token()
40
+ assert token.type == TokenType.LIT_YES
41
+ assert token.literal == "Yes"
42
+ assert is_literal_token(token)
43
+
44
+ assert_eof(lexer.next_token())
45
+
46
+ def test_unwrapped_no(self) -> None:
47
+ """Test unwrapped No literal."""
48
+ source = "No"
49
+ lexer = Lexer(source)
50
+
51
+ token = lexer.next_token()
52
+ assert token.type == TokenType.LIT_NO
53
+ assert token.literal == "No"
54
+ assert is_literal_token(token)
55
+
56
+ assert_eof(lexer.next_token())
57
+
58
+ def test_boolean_in_expression(self) -> None:
59
+ """Test boolean literals in expressions."""
60
+ source = "if x > 0 then give back _Yes_ else give back No"
61
+ lexer = Lexer(source)
62
+
63
+ # Collect all tokens
64
+ tokens = []
65
+ while True:
66
+ token = lexer.next_token()
67
+ if token.type == TokenType.MISC_EOF:
68
+ break
69
+ tokens.append(token)
70
+
71
+ # Find the boolean tokens
72
+ boolean_tokens = [t for t in tokens if t.type in (TokenType.LIT_YES, TokenType.LIT_NO)]
73
+ assert len(boolean_tokens) == 2
74
+
75
+ # Both booleans are stored in canonical form
76
+ assert boolean_tokens[0].type == TokenType.LIT_YES
77
+ assert boolean_tokens[0].literal == "Yes"
78
+
79
+ assert boolean_tokens[1].type == TokenType.LIT_NO
80
+ assert boolean_tokens[1].literal == "No"
81
+
82
+ def test_lowercase_yes_no(self) -> None:
83
+ """Test that lowercase yes/no are recognized as boolean literals."""
84
+ source = "yes no"
85
+ lexer = Lexer(source)
86
+
87
+ # Lowercase yes/no are recognized as boolean literals
88
+ token1 = lexer.next_token()
89
+ assert token1.type == TokenType.LIT_YES
90
+ assert token1.literal == "Yes" # Canonical form
91
+
92
+ token2 = lexer.next_token()
93
+ assert token2.type == TokenType.LIT_NO
94
+ assert token2.literal == "No" # Canonical form
95
+
96
+ assert_eof(lexer.next_token())
97
+
98
+ def test_incomplete_wrapped_boolean(self) -> None:
99
+ """Test incomplete wrapped boolean falls back to identifier."""
100
+ source = "_Yes" # Missing closing underscore
101
+ lexer = Lexer(source)
102
+
103
+ token = lexer.next_token()
104
+ assert token.type == TokenType.MISC_IDENT
105
+ assert token.literal == "_Yes"
106
+ assert not is_literal_token(token)
107
+
108
+ assert_eof(lexer.next_token())
@@ -0,0 +1,188 @@
1
+ from machine_dialect.lexer.lexer import Lexer
2
+ from machine_dialect.lexer.tests.helpers import collect_all_tokens
3
+ from machine_dialect.lexer.tokens import TokenType, keywords_mapping
4
+
5
+
6
+ class TestCaseInsensitiveKeywords:
7
+ """Test case-insensitive keyword matching while preserving canonical form."""
8
+
9
+ def test_all_keywords_case_variations(self) -> None:
10
+ """Test that all keywords in keywords_mapping are case-insensitive."""
11
+ for canonical_form, token_type in keywords_mapping.items():
12
+ # Test different case variations
13
+ test_cases = [
14
+ canonical_form, # Original form
15
+ canonical_form.lower(), # Lowercase
16
+ canonical_form.upper(), # Uppercase
17
+ canonical_form.title(), # Title case
18
+ ]
19
+
20
+ # Add a mixed case variant
21
+ if len(canonical_form) > 2:
22
+ # Create a mixed case like "fLoAt" or "tRuE"
23
+ mixed = ""
24
+ for i, char in enumerate(canonical_form):
25
+ if char == " ":
26
+ mixed += " "
27
+ elif i % 2 == 0:
28
+ mixed += char.lower()
29
+ else:
30
+ mixed += char.upper()
31
+ test_cases.append(mixed)
32
+
33
+ for variant in test_cases:
34
+ # Skip if variant is the same as one we already tested
35
+ if test_cases.count(variant) > 1:
36
+ continue
37
+
38
+ lexer = Lexer(variant)
39
+ tokens = collect_all_tokens(lexer)
40
+ assert len(tokens) == 1, f"Expected 1 token for '{variant}', got {len(tokens)}"
41
+ assert tokens[0].type == token_type, f"Expected {token_type} for '{variant}', got {tokens[0].type}"
42
+ # Special case for boolean literals which canonicalize to Yes/No
43
+ if token_type in (TokenType.LIT_YES, TokenType.LIT_NO):
44
+ expected = "Yes" if token_type == TokenType.LIT_YES else "No"
45
+ assert tokens[0].literal == expected, (
46
+ f"Expected literal '{expected}' for '{variant}', got '{tokens[0].literal}'"
47
+ )
48
+ else:
49
+ assert tokens[0].literal == canonical_form, (
50
+ f"Expected literal '{canonical_form}' for '{variant}', got '{tokens[0].literal}'"
51
+ )
52
+
53
+ def test_double_asterisk_keywords_all_cases(self) -> None:
54
+ """Test that all keywords work with double-asterisk wrapping in different cases."""
55
+ # Test a subset of keywords with double asterisks
56
+ test_keywords = ["define", "Float", "Integer", "Boolean", "rule", "Set", "Tell"]
57
+
58
+ for keyword in test_keywords:
59
+ if keyword not in keywords_mapping:
60
+ continue
61
+
62
+ token_type = keywords_mapping[keyword]
63
+ test_cases = [
64
+ f"**{keyword}**",
65
+ f"**{keyword.lower()}**",
66
+ f"**{keyword.upper()}**",
67
+ ]
68
+
69
+ for source in test_cases:
70
+ lexer = Lexer(source)
71
+ tokens = collect_all_tokens(lexer)
72
+ assert len(tokens) == 1
73
+ assert tokens[0].type == token_type
74
+ # Special handling for boolean literals
75
+ if token_type in (TokenType.LIT_YES, TokenType.LIT_NO):
76
+ expected = "Yes" if token_type == TokenType.LIT_YES else "No"
77
+ assert tokens[0].literal == expected
78
+ else:
79
+ assert tokens[0].literal == keyword
80
+
81
+ def test_backtick_keywords_all_cases(self) -> None:
82
+ """Test that keywords in backticks become identifiers (case-insensitive)."""
83
+ # Test a subset of keywords with backticks
84
+ test_keywords = ["Float", "Integer", "True", "False", "define", "rule"]
85
+
86
+ for keyword in test_keywords:
87
+ if keyword not in keywords_mapping:
88
+ continue
89
+
90
+ test_cases = [
91
+ f"`{keyword}`",
92
+ f"`{keyword.lower()}`",
93
+ f"`{keyword.upper()}`",
94
+ ]
95
+
96
+ for source in test_cases:
97
+ lexer = Lexer(source)
98
+ tokens = collect_all_tokens(lexer)
99
+ assert len(tokens) == 1
100
+ # Backticks force content to be identifiers
101
+ assert tokens[0].type == TokenType.MISC_IDENT
102
+ # The literal should be the actual text within backticks
103
+ assert tokens[0].literal.lower() == keyword.lower()
104
+
105
+ def test_underscore_wrapped_booleans_all_cases(self) -> None:
106
+ """Test underscore-wrapped boolean literals in different cases."""
107
+ # Test both True/False and Yes/No inputs
108
+ test_inputs = [
109
+ ("True", TokenType.LIT_YES, "Yes"),
110
+ ("False", TokenType.LIT_NO, "No"),
111
+ ("Yes", TokenType.LIT_YES, "Yes"),
112
+ ("No", TokenType.LIT_NO, "No"),
113
+ ]
114
+
115
+ for input_form, token_type, expected_literal in test_inputs:
116
+ test_cases = [
117
+ f"_{input_form}_",
118
+ f"_{input_form.lower()}_",
119
+ f"_{input_form.upper()}_",
120
+ ]
121
+
122
+ for source in test_cases:
123
+ lexer = Lexer(source)
124
+ tokens = collect_all_tokens(lexer)
125
+ assert len(tokens) == 1
126
+ assert tokens[0].type == token_type
127
+ assert tokens[0].literal == expected_literal
128
+
129
+ def test_identifiers_preserve_case(self) -> None:
130
+ """Test that non-keyword identifiers preserve their case."""
131
+ # These should NOT match any keywords
132
+ test_cases = [
133
+ ("myVariable", "myVariable"),
134
+ ("MyVariable", "MyVariable"),
135
+ ("MYVARIABLE", "MYVARIABLE"),
136
+ ("userName", "userName"),
137
+ ("floatValue", "floatValue"), # Contains "float" but not a keyword
138
+ ("integerCount", "integerCount"), # Contains "integer" but not a keyword
139
+ ]
140
+
141
+ for source, expected_literal in test_cases:
142
+ lexer = Lexer(source)
143
+ tokens = collect_all_tokens(lexer)
144
+ assert len(tokens) == 1
145
+ assert tokens[0].type == TokenType.MISC_IDENT
146
+ assert tokens[0].literal == expected_literal
147
+
148
+ def test_complex_expression_mixed_case(self) -> None:
149
+ """Test complex expressions with mixed case keywords."""
150
+ test_cases = [
151
+ ("SET x AS integer", ["Set", "x", "as", "integer"]),
152
+ ("set X as INTEGER", ["Set", "X", "as", "INTEGER"]),
153
+ ("define RULE myFunc", ["define", "rule", "myFunc"]),
154
+ ("DEFINE rule MyFunc", ["define", "rule", "MyFunc"]),
155
+ ("if YES then GIVE BACK no", ["if", "Yes", "then", "give back", "No"]),
156
+ ]
157
+
158
+ for source, expected_literals in test_cases:
159
+ lexer = Lexer(source)
160
+ tokens = collect_all_tokens(lexer)
161
+ assert len(tokens) == len(expected_literals)
162
+
163
+ for token, expected_literal in zip(tokens, expected_literals, strict=False):
164
+ assert token.literal == expected_literal
165
+
166
+ def test_multi_word_keywords_preserve_spacing(self) -> None:
167
+ """Test that multi-word keywords preserve internal spacing but are case-insensitive."""
168
+ multi_word_keywords = [
169
+ ("give back", TokenType.KW_RETURN),
170
+ ("gives back", TokenType.KW_RETURN),
171
+ ]
172
+
173
+ for canonical_form, token_type in multi_word_keywords:
174
+ # Test with different cases but same spacing
175
+ test_cases = [
176
+ canonical_form,
177
+ canonical_form.upper(),
178
+ canonical_form.title(),
179
+ # Mixed case for each word
180
+ " ".join(word.upper() if i % 2 == 0 else word.lower() for i, word in enumerate(canonical_form.split())),
181
+ ]
182
+
183
+ for variant in test_cases:
184
+ lexer = Lexer(variant)
185
+ tokens = collect_all_tokens(lexer)
186
+ assert len(tokens) == 1
187
+ assert tokens[0].type == token_type
188
+ assert tokens[0].literal == canonical_form