machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,200 @@
1
+ """Tests for comment token recognition."""
2
+
3
+ from machine_dialect.lexer.lexer import Lexer
4
+ from machine_dialect.lexer.tokens import TokenType
5
+
6
+
7
+ class TestComments:
8
+ """Test comment token recognition."""
9
+
10
+ def test_simple_comment(self) -> None:
11
+ """Test simple comment within summary tags."""
12
+ source = "<summary>This is a comment</summary>"
13
+ lexer = Lexer(source)
14
+
15
+ tokens = []
16
+ while True:
17
+ token = lexer.next_token()
18
+ tokens.append(token)
19
+ if token.type == TokenType.MISC_EOF:
20
+ break
21
+
22
+ assert len(tokens) == 4
23
+ assert tokens[0].type == TokenType.TAG_SUMMARY_START
24
+ assert tokens[0].literal == "<summary>"
25
+ assert tokens[1].type == TokenType.MISC_COMMENT
26
+ assert tokens[1].literal == "This is a comment"
27
+ assert tokens[2].type == TokenType.TAG_SUMMARY_END
28
+ assert tokens[2].literal == "</summary>"
29
+ assert tokens[3].type == TokenType.MISC_EOF
30
+
31
+ def test_multiline_comment(self) -> None:
32
+ """Test multiline comment within summary tags."""
33
+ source = """<summary>
34
+ This is a comment
35
+ that spans multiple lines
36
+ </summary>"""
37
+ lexer = Lexer(source)
38
+
39
+ tokens = []
40
+ while True:
41
+ token = lexer.next_token()
42
+ tokens.append(token)
43
+ if token.type == TokenType.MISC_EOF:
44
+ break
45
+
46
+ assert len(tokens) == 4
47
+ assert tokens[0].type == TokenType.TAG_SUMMARY_START
48
+ assert tokens[1].type == TokenType.MISC_COMMENT
49
+ assert tokens[1].literal == "\nThis is a comment\nthat spans multiple lines\n"
50
+ assert tokens[2].type == TokenType.TAG_SUMMARY_END
51
+ assert tokens[3].type == TokenType.MISC_EOF
52
+
53
+ def test_empty_comment(self) -> None:
54
+ """Test empty comment within summary tags."""
55
+ source = "<summary></summary>"
56
+ lexer = Lexer(source)
57
+
58
+ tokens = []
59
+ while True:
60
+ token = lexer.next_token()
61
+ tokens.append(token)
62
+ if token.type == TokenType.MISC_EOF:
63
+ break
64
+
65
+ assert len(tokens) == 4
66
+ assert tokens[0].type == TokenType.TAG_SUMMARY_START
67
+ assert tokens[1].type == TokenType.MISC_COMMENT
68
+ assert tokens[1].literal == ""
69
+ assert tokens[2].type == TokenType.TAG_SUMMARY_END
70
+ assert tokens[3].type == TokenType.MISC_EOF
71
+
72
+ def test_comment_with_code_before_and_after(self) -> None:
73
+ """Test comment with code before and after."""
74
+ source = "set x to 10. <summary>This is a comment</summary> set y to 20."
75
+ lexer = Lexer(source)
76
+
77
+ tokens = []
78
+ while True:
79
+ token = lexer.next_token()
80
+ tokens.append(token)
81
+ if token.type == TokenType.MISC_EOF:
82
+ break
83
+
84
+ # Check first part: set x to 10.
85
+ assert tokens[0].type == TokenType.KW_SET
86
+ assert tokens[1].type == TokenType.MISC_IDENT
87
+ assert tokens[1].literal == "x"
88
+ assert tokens[2].type == TokenType.KW_TO
89
+ assert tokens[3].type == TokenType.LIT_WHOLE_NUMBER
90
+ assert tokens[3].literal == "10"
91
+ assert tokens[4].type == TokenType.PUNCT_PERIOD
92
+
93
+ # Check comment part
94
+ assert tokens[5].type == TokenType.TAG_SUMMARY_START
95
+ assert tokens[6].type == TokenType.MISC_COMMENT
96
+ assert tokens[6].literal == "This is a comment"
97
+ assert tokens[7].type == TokenType.TAG_SUMMARY_END
98
+
99
+ # Check second part: set y to 20.
100
+ assert tokens[8].type == TokenType.KW_SET
101
+ assert tokens[9].type == TokenType.MISC_IDENT
102
+ assert tokens[9].literal == "y"
103
+ assert tokens[10].type == TokenType.KW_TO
104
+ assert tokens[11].type == TokenType.LIT_WHOLE_NUMBER
105
+ assert tokens[11].literal == "20"
106
+ assert tokens[12].type == TokenType.PUNCT_PERIOD
107
+ assert tokens[13].type == TokenType.MISC_EOF
108
+
109
+ def test_comment_case_insensitive_tags(self) -> None:
110
+ """Test that summary tags are case-insensitive."""
111
+ source = "<SUMMARY>This is a comment</SUMMARY>"
112
+ lexer = Lexer(source)
113
+
114
+ tokens = []
115
+ while True:
116
+ token = lexer.next_token()
117
+ tokens.append(token)
118
+ if token.type == TokenType.MISC_EOF:
119
+ break
120
+
121
+ assert len(tokens) == 4
122
+ assert tokens[0].type == TokenType.TAG_SUMMARY_START
123
+ assert tokens[0].literal == "<summary>" # Canonical form
124
+ assert tokens[1].type == TokenType.MISC_COMMENT
125
+ assert tokens[1].literal == "This is a comment"
126
+ assert tokens[2].type == TokenType.TAG_SUMMARY_END
127
+ assert tokens[2].literal == "</summary>"
128
+
129
+ def test_mixed_case_tags(self) -> None:
130
+ """Test mixed case summary tags."""
131
+ source = "<SuMmArY>Mixed case comment</sUmMaRy>"
132
+ lexer = Lexer(source)
133
+
134
+ tokens = []
135
+ while True:
136
+ token = lexer.next_token()
137
+ tokens.append(token)
138
+ if token.type == TokenType.MISC_EOF:
139
+ break
140
+
141
+ assert len(tokens) == 4
142
+ assert tokens[0].type == TokenType.TAG_SUMMARY_START
143
+ assert tokens[1].type == TokenType.MISC_COMMENT
144
+ assert tokens[1].literal == "Mixed case comment"
145
+ assert tokens[2].type == TokenType.TAG_SUMMARY_END
146
+
147
+ def test_comment_with_special_characters(self) -> None:
148
+ """Test comment containing special characters."""
149
+ source = "<summary>Comment with special chars: !@#$%^&*()_+-=[]{}|;:'\",.<>?/`~</summary>"
150
+ lexer = Lexer(source)
151
+
152
+ tokens = []
153
+ while True:
154
+ token = lexer.next_token()
155
+ tokens.append(token)
156
+ if token.type == TokenType.MISC_EOF:
157
+ break
158
+
159
+ assert len(tokens) == 4
160
+ assert tokens[0].type == TokenType.TAG_SUMMARY_START
161
+ assert tokens[1].type == TokenType.MISC_COMMENT
162
+ assert tokens[1].literal == "Comment with special chars: !@#$%^&*()_+-=[]{}|;:'\",.<>?/`~"
163
+ assert tokens[2].type == TokenType.TAG_SUMMARY_END
164
+
165
+ def test_unclosed_summary_tag(self) -> None:
166
+ """Test that unclosed summary tag creates a comment up to EOF."""
167
+ source = "<summary>This is a comment without closing tag"
168
+ lexer = Lexer(source)
169
+
170
+ tokens = []
171
+ while True:
172
+ token = lexer.next_token()
173
+ tokens.append(token)
174
+ if token.type == TokenType.MISC_EOF:
175
+ break
176
+
177
+ # Should tokenize as summary tag followed by comment content up to EOF
178
+ assert len(tokens) == 3
179
+ assert tokens[0].type == TokenType.TAG_SUMMARY_START
180
+ assert tokens[1].type == TokenType.MISC_COMMENT
181
+ assert tokens[1].literal == "This is a comment without closing tag"
182
+ assert tokens[2].type == TokenType.MISC_EOF
183
+
184
+ def test_nested_tags_in_comment(self) -> None:
185
+ """Test comment containing nested tags."""
186
+ source = "<summary>Comment with <tag> and </tag> inside</summary>"
187
+ lexer = Lexer(source)
188
+
189
+ tokens = []
190
+ while True:
191
+ token = lexer.next_token()
192
+ tokens.append(token)
193
+ if token.type == TokenType.MISC_EOF:
194
+ break
195
+
196
+ assert len(tokens) == 4
197
+ assert tokens[0].type == TokenType.TAG_SUMMARY_START
198
+ assert tokens[1].type == TokenType.MISC_COMMENT
199
+ assert tokens[1].literal == "Comment with <tag> and </tag> inside"
200
+ assert tokens[2].type == TokenType.TAG_SUMMARY_END
@@ -0,0 +1,127 @@
1
+ from machine_dialect.lexer import Lexer, TokenType
2
+ from machine_dialect.lexer.tests.helpers import collect_all_tokens
3
+
4
+
5
+ class TestDoubleAsteriskKeywords:
6
+ def test_wrapped_keyword_define(self) -> None:
7
+ """Test double-asterisk-wrapped keyword 'define'."""
8
+ source = "**define**"
9
+ lexer = Lexer(source)
10
+ tokens = collect_all_tokens(lexer)
11
+ assert len(tokens) == 1
12
+ assert tokens[0].type == TokenType.KW_DEFINE
13
+ assert tokens[0].literal == "define"
14
+
15
+ def test_wrapped_keyword_rule(self) -> None:
16
+ """Test double-asterisk-wrapped keyword 'rule'."""
17
+ source = "**rule**"
18
+ lexer = Lexer(source)
19
+ tokens = collect_all_tokens(lexer)
20
+ assert len(tokens) == 1
21
+ assert tokens[0].type == TokenType.KW_RULE
22
+ assert tokens[0].literal == "rule"
23
+
24
+ def test_wrapped_keyword_set(self) -> None:
25
+ """Test double-asterisk-wrapped keyword 'Set'."""
26
+ source = "**Set**"
27
+ lexer = Lexer(source)
28
+ tokens = collect_all_tokens(lexer)
29
+ assert len(tokens) == 1
30
+ assert tokens[0].type == TokenType.KW_SET
31
+ assert tokens[0].literal == "Set"
32
+
33
+ def test_wrapped_multi_word_keyword(self) -> None:
34
+ """Test double-asterisk-wrapped multi-word keyword."""
35
+ source = "**give back**"
36
+ lexer = Lexer(source)
37
+ tokens = collect_all_tokens(lexer)
38
+ assert len(tokens) == 1
39
+ assert tokens[0].type == TokenType.KW_RETURN
40
+ assert tokens[0].literal == "give back"
41
+
42
+ def test_unwrapped_keyword(self) -> None:
43
+ """Test unwrapped keyword (backward compatibility)."""
44
+ source = "define"
45
+ lexer = Lexer(source)
46
+ tokens = collect_all_tokens(lexer)
47
+ assert len(tokens) == 1
48
+ assert tokens[0].type == TokenType.KW_DEFINE
49
+ assert tokens[0].literal == "define"
50
+
51
+ def test_incomplete_wrapped_keyword(self) -> None:
52
+ """Test incomplete wrapped keyword (missing closing asterisks)."""
53
+ source = "**define"
54
+ lexer = Lexer(source)
55
+ tokens = collect_all_tokens(lexer)
56
+ assert len(tokens) == 2
57
+ assert tokens[0].type == TokenType.OP_TWO_STARS
58
+ assert tokens[0].literal == "**"
59
+ assert tokens[1].type == TokenType.KW_DEFINE
60
+ assert tokens[1].literal == "define"
61
+
62
+ def test_non_keyword_wrapped(self) -> None:
63
+ """Test non-keyword wrapped in double asterisks."""
64
+ source = "**notakeyword**"
65
+ lexer = Lexer(source)
66
+ tokens = collect_all_tokens(lexer)
67
+ assert len(tokens) == 3
68
+ assert tokens[0].type == TokenType.OP_TWO_STARS
69
+ assert tokens[0].literal == "**"
70
+ assert tokens[1].type == TokenType.MISC_IDENT
71
+ assert tokens[1].literal == "notakeyword"
72
+ assert tokens[2].type == TokenType.OP_TWO_STARS
73
+ assert tokens[2].literal == "**"
74
+
75
+ def test_mixed_usage_in_expression(self) -> None:
76
+ """Test both wrapped and unwrapped keywords in same expression."""
77
+ source = "**define** a rule that takes"
78
+ lexer = Lexer(source)
79
+ tokens = collect_all_tokens(lexer)
80
+ assert len(tokens) == 5
81
+ assert tokens[0].type == TokenType.KW_DEFINE
82
+ assert tokens[0].literal == "define"
83
+ assert tokens[1].type == TokenType.MISC_STOPWORD # "a"
84
+ assert tokens[2].type == TokenType.KW_RULE
85
+ assert tokens[2].literal == "rule"
86
+ assert tokens[3].type == TokenType.MISC_STOPWORD # "that"
87
+ assert tokens[4].type == TokenType.KW_TAKE
88
+ assert tokens[4].literal == "takes"
89
+
90
+ def test_operator_usage(self) -> None:
91
+ """Test that ** operator still works correctly."""
92
+ source = "2 ** 3"
93
+ lexer = Lexer(source)
94
+ tokens = collect_all_tokens(lexer)
95
+ assert len(tokens) == 3
96
+ assert tokens[0].type == TokenType.LIT_WHOLE_NUMBER
97
+ assert tokens[0].literal == "2"
98
+ assert tokens[1].type == TokenType.OP_TWO_STARS
99
+ assert tokens[1].literal == "**"
100
+ assert tokens[2].type == TokenType.LIT_WHOLE_NUMBER
101
+ assert tokens[2].literal == "3"
102
+
103
+ def test_stopword_wrapped(self) -> None:
104
+ """Test stopword wrapped in double asterisks (should not be recognized as keyword)."""
105
+ source = "**the**"
106
+ lexer = Lexer(source)
107
+ tokens = collect_all_tokens(lexer)
108
+ assert len(tokens) == 3
109
+ assert tokens[0].type == TokenType.OP_TWO_STARS
110
+ assert tokens[0].literal == "**"
111
+ assert tokens[1].type == TokenType.MISC_STOPWORD
112
+ assert tokens[1].literal == "the"
113
+ assert tokens[2].type == TokenType.OP_TWO_STARS
114
+ assert tokens[2].literal == "**"
115
+
116
+ def test_boolean_literal_wrapped(self) -> None:
117
+ """Test boolean literals wrapped in double asterisks (should not be recognized as keyword)."""
118
+ source = "**Yes**"
119
+ lexer = Lexer(source)
120
+ tokens = collect_all_tokens(lexer)
121
+ assert len(tokens) == 3
122
+ assert tokens[0].type == TokenType.OP_TWO_STARS
123
+ assert tokens[0].literal == "**"
124
+ assert tokens[1].type == TokenType.LIT_YES
125
+ assert tokens[1].literal == "Yes"
126
+ assert tokens[2].type == TokenType.OP_TWO_STARS
127
+ assert tokens[2].literal == "**"
@@ -0,0 +1,113 @@
1
+ from machine_dialect.lexer.lexer import Lexer
2
+ from machine_dialect.lexer.tests.helpers import collect_all_tokens
3
+ from machine_dialect.lexer.tokens import Token, TokenType
4
+
5
+
6
+ class TestLexerPosition:
7
+ def test_single_line_positions(self) -> None:
8
+ """Test that tokens on a single line have correct positions."""
9
+ source = "Set x = 42"
10
+ lexer = Lexer(source)
11
+ tokens = collect_all_tokens(lexer)
12
+
13
+ expected = [
14
+ Token(TokenType.KW_SET, "Set", line=1, position=1),
15
+ Token(TokenType.MISC_IDENT, "x", line=1, position=5),
16
+ Token(TokenType.OP_ASSIGN, "=", line=1, position=7),
17
+ Token(TokenType.LIT_WHOLE_NUMBER, "42", line=1, position=9),
18
+ ]
19
+
20
+ assert tokens == expected
21
+
22
+ def test_multiline_positions(self) -> None:
23
+ """Test that tokens across multiple lines have correct line numbers."""
24
+ source = """if Yes then
25
+ give back 42
26
+ else
27
+ gives back 0"""
28
+
29
+ lexer = Lexer(source)
30
+ tokens = collect_all_tokens(lexer)
31
+
32
+ expected = [
33
+ Token(TokenType.KW_IF, "if", line=1, position=1),
34
+ Token(TokenType.LIT_YES, "Yes", line=1, position=4),
35
+ Token(TokenType.KW_THEN, "then", line=1, position=8),
36
+ Token(TokenType.KW_RETURN, "give back", line=2, position=5),
37
+ Token(TokenType.LIT_WHOLE_NUMBER, "42", line=2, position=15),
38
+ Token(TokenType.KW_ELSE, "else", line=3, position=1),
39
+ Token(TokenType.KW_RETURN, "gives back", line=4, position=5),
40
+ Token(TokenType.LIT_WHOLE_NUMBER, "0", line=4, position=16),
41
+ ]
42
+
43
+ assert tokens == expected
44
+
45
+ def test_string_literal_position(self) -> None:
46
+ """Test that string literals maintain correct position."""
47
+ source = 'Set msg = "hello world"'
48
+ lexer = Lexer(source)
49
+ tokens = collect_all_tokens(lexer)
50
+
51
+ expected = [
52
+ Token(TokenType.KW_SET, "Set", line=1, position=1),
53
+ Token(TokenType.MISC_IDENT, "msg", line=1, position=5),
54
+ Token(TokenType.OP_ASSIGN, "=", line=1, position=9),
55
+ Token(TokenType.LIT_TEXT, '"hello world"', line=1, position=11),
56
+ ]
57
+
58
+ assert tokens == expected
59
+
60
+ def test_empty_lines_position(self) -> None:
61
+ """Test position tracking with empty lines."""
62
+ source = """Set x = 1
63
+
64
+ Set y = 2"""
65
+
66
+ lexer = Lexer(source)
67
+ tokens = collect_all_tokens(lexer)
68
+
69
+ expected = [
70
+ Token(TokenType.KW_SET, "Set", line=1, position=1),
71
+ Token(TokenType.MISC_IDENT, "x", line=1, position=5),
72
+ Token(TokenType.OP_ASSIGN, "=", line=1, position=7),
73
+ Token(TokenType.LIT_WHOLE_NUMBER, "1", line=1, position=9),
74
+ Token(TokenType.KW_SET, "Set", line=3, position=1),
75
+ Token(TokenType.MISC_IDENT, "y", line=3, position=5),
76
+ Token(TokenType.OP_ASSIGN, "=", line=3, position=7),
77
+ Token(TokenType.LIT_WHOLE_NUMBER, "2", line=3, position=9),
78
+ ]
79
+
80
+ assert tokens == expected
81
+
82
+ def test_tab_position(self) -> None:
83
+ """Test position tracking with tabs."""
84
+ source = "Set\tx\t=\t42"
85
+ lexer = Lexer(source)
86
+ tokens = collect_all_tokens(lexer)
87
+
88
+ # Tabs count as single characters for position
89
+ expected = [
90
+ Token(TokenType.KW_SET, "Set", line=1, position=1),
91
+ Token(TokenType.MISC_IDENT, "x", line=1, position=5),
92
+ Token(TokenType.OP_ASSIGN, "=", line=1, position=7),
93
+ Token(TokenType.LIT_WHOLE_NUMBER, "42", line=1, position=9),
94
+ ]
95
+
96
+ assert tokens == expected
97
+
98
+ def test_illegal_character_position(self) -> None:
99
+ """Test that illegal characters have correct position."""
100
+ source = "Set x = @"
101
+ lexer = Lexer(source)
102
+ tokens = collect_all_tokens(lexer)
103
+
104
+ # Lexer no longer reports errors (parser will handle them)
105
+
106
+ expected = [
107
+ Token(TokenType.KW_SET, "Set", line=1, position=1),
108
+ Token(TokenType.MISC_IDENT, "x", line=1, position=5),
109
+ Token(TokenType.OP_ASSIGN, "=", line=1, position=7),
110
+ Token(TokenType.MISC_ILLEGAL, "@", line=1, position=9),
111
+ ]
112
+
113
+ assert tokens == expected