machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,294 @@
1
+ import pytest
2
+
3
+ from machine_dialect.lexer import Lexer
4
+ from machine_dialect.lexer.tests.helpers import stream_and_assert_tokens
5
+ from machine_dialect.lexer.tokens import Token, TokenType
6
+
7
+
8
+ class TestLexer:
9
+ @pytest.mark.parametrize(
10
+ "input_text,expected_tokens",
11
+ [
12
+ # Boolean
13
+ ("Yes", [Token(TokenType.LIT_YES, "Yes", line=1, position=1)]),
14
+ ("No", [Token(TokenType.LIT_NO, "No", line=1, position=1)]),
15
+ # Numbers
16
+ ("123", [Token(TokenType.LIT_WHOLE_NUMBER, "123", line=1, position=1)]),
17
+ ("3.14", [Token(TokenType.LIT_FLOAT, "3.14", line=1, position=1)]),
18
+ ("0", [Token(TokenType.LIT_WHOLE_NUMBER, "0", line=1, position=1)]),
19
+ # Strings
20
+ ('"hello"', [Token(TokenType.LIT_TEXT, '"hello"', line=1, position=1)]),
21
+ ("'world'", [Token(TokenType.LIT_TEXT, "'world'", line=1, position=1)]),
22
+ ('""', [Token(TokenType.LIT_TEXT, '""', line=1, position=1)]),
23
+ # Backtick identifiers (backticks consumed by lexer)
24
+ ("`code`", [Token(TokenType.MISC_IDENT, "code", line=1, position=1)]),
25
+ ("`variable_name`", [Token(TokenType.MISC_IDENT, "variable_name", line=1, position=1)]),
26
+ # Numbers in backticks are not valid identifiers, so we get illegal tokens
27
+ (
28
+ "`42`",
29
+ [
30
+ Token(TokenType.MISC_ILLEGAL, "`", line=1, position=1),
31
+ Token(TokenType.LIT_WHOLE_NUMBER, "42", line=1, position=2),
32
+ Token(TokenType.MISC_ILLEGAL, "`", line=1, position=4),
33
+ ],
34
+ ),
35
+ # Empty backticks produce two illegal backtick tokens
36
+ (
37
+ "``",
38
+ [
39
+ Token(TokenType.MISC_ILLEGAL, "`", line=1, position=1),
40
+ Token(TokenType.MISC_ILLEGAL, "`", line=1, position=2),
41
+ ],
42
+ ),
43
+ # Triple backtick strings
44
+ ("```python```", [Token(TokenType.LIT_TRIPLE_BACKTICK, "python", line=1, position=1)]),
45
+ (
46
+ "```\ncode block\n```",
47
+ [Token(TokenType.LIT_TRIPLE_BACKTICK, "\ncode block\n", line=1, position=1)],
48
+ ),
49
+ (
50
+ "```js\nconst x = 42;\n```",
51
+ [Token(TokenType.LIT_TRIPLE_BACKTICK, "js\nconst x = 42;\n", line=1, position=1)],
52
+ ),
53
+ ("``````", [Token(TokenType.LIT_TRIPLE_BACKTICK, "", line=1, position=1)]),
54
+ # Identifiers
55
+ ("variable", [Token(TokenType.MISC_IDENT, "variable", line=1, position=1)]),
56
+ ("_underscore", [Token(TokenType.MISC_IDENT, "_underscore", line=1, position=1)]),
57
+ ("camelCase", [Token(TokenType.MISC_IDENT, "camelCase", line=1, position=1)]),
58
+ ("var123", [Token(TokenType.MISC_IDENT, "var123", line=1, position=1)]),
59
+ # Keywords
60
+ ("if", [Token(TokenType.KW_IF, "if", line=1, position=1)]),
61
+ ("else", [Token(TokenType.KW_ELSE, "else", line=1, position=1)]),
62
+ ("define", [Token(TokenType.KW_DEFINE, "define", line=1, position=1)]),
63
+ ("empty", [Token(TokenType.KW_EMPTY, "empty", line=1, position=1)]),
64
+ ("entrypoint", [Token(TokenType.KW_ENTRYPOINT, "entrypoint", line=1, position=1)]),
65
+ ("filter", [Token(TokenType.KW_FILTER, "filter", line=1, position=1)]),
66
+ ("prompt", [Token(TokenType.KW_PROMPT, "prompt", line=1, position=1)]),
67
+ ("template", [Token(TokenType.KW_TEMPLATE, "template", line=1, position=1)]),
68
+ ("give back", [Token(TokenType.KW_RETURN, "give back", line=1, position=1)]),
69
+ ("gives back", [Token(TokenType.KW_RETURN, "gives back", line=1, position=1)]),
70
+ ("and", [Token(TokenType.KW_AND, "and", line=1, position=1)]),
71
+ ("or", [Token(TokenType.KW_OR, "or", line=1, position=1)]),
72
+ ("is", [Token(TokenType.KW_IS, "is", line=1, position=1)]),
73
+ ("as", [Token(TokenType.KW_AS, "as", line=1, position=1)]),
74
+ ("with", [Token(TokenType.KW_WITH, "with", line=1, position=1)]),
75
+ ("then", [Token(TokenType.KW_THEN, "then", line=1, position=1)]),
76
+ # More keywords
77
+ ("action", [Token(TokenType.KW_ACTION, "action", line=1, position=1)]),
78
+ ("actions", [Token(TokenType.KW_ACTION, "actions", line=1, position=1)]),
79
+ # "apply" is reserved for future use, currently not mapped
80
+ ("behavior", [Token(TokenType.KW_BEHAVIOR, "behavior", line=1, position=1)]),
81
+ # Backslash
82
+ ("\\", [Token(TokenType.PUNCT_BACKSLASH, "\\", line=1, position=1)]),
83
+ (
84
+ "x\\y",
85
+ [
86
+ Token(TokenType.MISC_IDENT, "x", line=1, position=1),
87
+ Token(TokenType.PUNCT_BACKSLASH, "\\", line=1, position=2),
88
+ Token(TokenType.MISC_IDENT, "y", line=1, position=3),
89
+ ],
90
+ ),
91
+ # Frontmatter delimiter
92
+ ("---", [Token(TokenType.PUNCT_FRONTMATTER, "---", line=1, position=1)]),
93
+ (
94
+ "-- -",
95
+ [
96
+ Token(TokenType.OP_MINUS, "-", line=1, position=1),
97
+ Token(TokenType.OP_MINUS, "-", line=1, position=2),
98
+ Token(TokenType.OP_MINUS, "-", line=1, position=4),
99
+ ],
100
+ ),
101
+ (
102
+ "--",
103
+ [
104
+ Token(TokenType.OP_MINUS, "-", line=1, position=1),
105
+ Token(TokenType.OP_MINUS, "-", line=1, position=2),
106
+ ],
107
+ ),
108
+ ("behaviors", [Token(TokenType.KW_BEHAVIOR, "behaviors", line=1, position=1)]),
109
+ ("behaviour", [Token(TokenType.KW_BEHAVIOR, "behaviour", line=1, position=1)]),
110
+ ("behaviours", [Token(TokenType.KW_BEHAVIOR, "behaviours", line=1, position=1)]),
111
+ ("Yes/No", [Token(TokenType.KW_YES_NO, "Yes/No", line=1, position=1)]),
112
+ ("Float", [Token(TokenType.KW_FLOAT, "Float", line=1, position=1)]),
113
+ ("Floats", [Token(TokenType.KW_FLOAT, "Floats", line=1, position=1)]),
114
+ ("from", [Token(TokenType.KW_FROM, "from", line=1, position=1)]),
115
+ ("interaction", [Token(TokenType.KW_INTERACTION, "interaction", line=1, position=1)]),
116
+ ("interactions", [Token(TokenType.KW_INTERACTION, "interactions", line=1, position=1)]),
117
+ ("List", [Token(TokenType.KW_LIST, "List", line=1, position=1)]),
118
+ ("not", [Token(TokenType.KW_NEGATION, "not", line=1, position=1)]),
119
+ ("Number", [Token(TokenType.KW_NUMBER, "Number", line=1, position=1)]),
120
+ ("Numbers", [Token(TokenType.KW_NUMBER, "Numbers", line=1, position=1)]),
121
+ ("otherwise", [Token(TokenType.KW_ELSE, "otherwise", line=1, position=1)]),
122
+ ("rule", [Token(TokenType.KW_RULE, "rule", line=1, position=1)]),
123
+ ("Set", [Token(TokenType.KW_SET, "Set", line=1, position=1)]),
124
+ ("Utility", [Token(TokenType.KW_UTILITY, "Utility", line=1, position=1)]),
125
+ ("take", [Token(TokenType.KW_TAKE, "take", line=1, position=1)]),
126
+ ("takes", [Token(TokenType.KW_TAKE, "takes", line=1, position=1)]),
127
+ ("Tell", [Token(TokenType.KW_TELL, "Tell", line=1, position=1)]),
128
+ ("text", [Token(TokenType.KW_TEXT, "text", line=1, position=1)]),
129
+ ("texts", [Token(TokenType.KW_TEXT, "texts", line=1, position=1)]),
130
+ ("to", [Token(TokenType.KW_TO, "to", line=1, position=1)]),
131
+ ("trait", [Token(TokenType.KW_TRAIT, "trait", line=1, position=1)]),
132
+ ("traits", [Token(TokenType.KW_TRAIT, "traits", line=1, position=1)]),
133
+ ("Use", [Token(TokenType.KW_USE, "Use", line=1, position=1)]),
134
+ ("URL", [Token(TokenType.KW_URL, "URL", line=1, position=1)]),
135
+ ("URLs", [Token(TokenType.KW_URL, "URLs", line=1, position=1)]),
136
+ ("Date", [Token(TokenType.KW_DATE, "Date", line=1, position=1)]),
137
+ ("Dates", [Token(TokenType.KW_DATE, "Dates", line=1, position=1)]),
138
+ ("DateTime", [Token(TokenType.KW_DATETIME, "DateTime", line=1, position=1)]),
139
+ ("DateTimes", [Token(TokenType.KW_DATETIME, "DateTimes", line=1, position=1)]),
140
+ ("Time", [Token(TokenType.KW_TIME, "Time", line=1, position=1)]),
141
+ ("Times", [Token(TokenType.KW_TIME, "Times", line=1, position=1)]),
142
+ ("DataType", [Token(TokenType.KW_DATATYPE, "DataType", line=1, position=1)]),
143
+ # Single character operators
144
+ ("+", [Token(TokenType.OP_PLUS, "+", line=1, position=1)]),
145
+ ("-", [Token(TokenType.OP_MINUS, "-", line=1, position=1)]),
146
+ ("/", [Token(TokenType.OP_DIVISION, "/", line=1, position=1)]),
147
+ ("=", [Token(TokenType.OP_ASSIGN, "=", line=1, position=1)]),
148
+ ("<", [Token(TokenType.OP_LT, "<", line=1, position=1)]),
149
+ (">", [Token(TokenType.OP_GT, ">", line=1, position=1)]),
150
+ ("*", [Token(TokenType.OP_STAR, "*", line=1, position=1)]),
151
+ # Multi-character operators
152
+ ("**", [Token(TokenType.OP_TWO_STARS, "**", line=1, position=1)]),
153
+ # Delimiters
154
+ ("(", [Token(TokenType.DELIM_LPAREN, "(", line=1, position=1)]),
155
+ (")", [Token(TokenType.DELIM_RPAREN, ")", line=1, position=1)]),
156
+ ("{", [Token(TokenType.DELIM_LBRACE, "{", line=1, position=1)]),
157
+ ("}", [Token(TokenType.DELIM_RBRACE, "}", line=1, position=1)]),
158
+ # Punctuation
159
+ (";", [Token(TokenType.PUNCT_SEMICOLON, ";", line=1, position=1)]),
160
+ (",", [Token(TokenType.PUNCT_COMMA, ",", line=1, position=1)]),
161
+ (".", [Token(TokenType.PUNCT_PERIOD, ".", line=1, position=1)]),
162
+ (":", [Token(TokenType.PUNCT_COLON, ":", line=1, position=1)]),
163
+ ("#", [Token(TokenType.PUNCT_HASH, "#", line=1, position=1)]),
164
+ # Complex expressions
165
+ (
166
+ "x = 42",
167
+ [
168
+ Token(TokenType.MISC_IDENT, "x", line=1, position=1),
169
+ Token(TokenType.OP_ASSIGN, "=", line=1, position=3),
170
+ Token(TokenType.LIT_WHOLE_NUMBER, "42", line=1, position=5),
171
+ ],
172
+ ),
173
+ (
174
+ "if (x > 0)",
175
+ [
176
+ Token(TokenType.KW_IF, "if", line=1, position=1),
177
+ Token(TokenType.DELIM_LPAREN, "(", line=1, position=4),
178
+ Token(TokenType.MISC_IDENT, "x", line=1, position=5),
179
+ Token(TokenType.OP_GT, ">", line=1, position=7),
180
+ Token(TokenType.LIT_WHOLE_NUMBER, "0", line=1, position=9),
181
+ Token(TokenType.DELIM_RPAREN, ")", line=1, position=10),
182
+ ],
183
+ ),
184
+ (
185
+ "x # comment",
186
+ [
187
+ Token(TokenType.MISC_IDENT, "x", line=1, position=1),
188
+ Token(TokenType.PUNCT_HASH, "#", line=1, position=3),
189
+ Token(TokenType.MISC_IDENT, "comment", line=1, position=5),
190
+ ],
191
+ ),
192
+ (
193
+ 'Set `name` to _"John"_',
194
+ [
195
+ Token(TokenType.KW_SET, "Set", line=1, position=1),
196
+ Token(TokenType.MISC_IDENT, "name", line=1, position=6),
197
+ Token(TokenType.KW_TO, "to", line=1, position=12),
198
+ Token(TokenType.LIT_TEXT, '"John"', line=1, position=15),
199
+ ],
200
+ ),
201
+ (
202
+ "if **x** is greater than 0, then give back _Yes_",
203
+ [
204
+ Token(TokenType.KW_IF, "if", line=1, position=1),
205
+ Token(TokenType.OP_TWO_STARS, "**", line=1, position=4),
206
+ Token(TokenType.MISC_IDENT, "x", line=1, position=6),
207
+ Token(TokenType.OP_TWO_STARS, "**", line=1, position=7),
208
+ Token(TokenType.OP_GT, "is greater than", line=1, position=10),
209
+ Token(TokenType.LIT_WHOLE_NUMBER, "0", line=1, position=26),
210
+ Token(TokenType.PUNCT_COMMA, ",", line=1, position=27),
211
+ Token(TokenType.KW_THEN, "then", line=1, position=29),
212
+ Token(TokenType.KW_RETURN, "give back", line=1, position=34),
213
+ Token(TokenType.LIT_YES, "Yes", line=1, position=44),
214
+ ],
215
+ ),
216
+ (
217
+ "if x > 0 then gives back Yes",
218
+ [
219
+ Token(TokenType.KW_IF, "if", line=1, position=1),
220
+ Token(TokenType.MISC_IDENT, "x", line=1, position=4),
221
+ Token(TokenType.OP_GT, ">", line=1, position=6),
222
+ Token(TokenType.LIT_WHOLE_NUMBER, "0", line=1, position=8),
223
+ Token(TokenType.KW_THEN, "then", line=1, position=10),
224
+ Token(TokenType.KW_RETURN, "gives back", line=1, position=15),
225
+ Token(TokenType.LIT_YES, "Yes", line=1, position=26),
226
+ ],
227
+ ),
228
+ (
229
+ "define rule that give back 42",
230
+ [
231
+ Token(TokenType.KW_DEFINE, "define", line=1, position=1),
232
+ Token(TokenType.KW_RULE, "rule", line=1, position=8),
233
+ Token(TokenType.MISC_STOPWORD, "that", line=1, position=13),
234
+ Token(TokenType.KW_RETURN, "give back", line=1, position=18),
235
+ Token(TokenType.LIT_WHOLE_NUMBER, "42", line=1, position=28),
236
+ ],
237
+ ),
238
+ ],
239
+ )
240
+ def test_lexer_tokenization(self, input_text: str, expected_tokens: list[Token]) -> None:
241
+ lexer = Lexer(input_text)
242
+ stream_and_assert_tokens(lexer, expected_tokens)
243
+
244
+ @pytest.mark.parametrize(
245
+ "input_text, expected_tokens",
246
+ [
247
+ # Basic tag tokens
248
+ ("<summary>", [Token(TokenType.TAG_SUMMARY_START, "<summary>", line=1, position=1)]),
249
+ ("</summary>", [Token(TokenType.TAG_SUMMARY_END, "</summary>", line=1, position=1)]),
250
+ ("<details>", [Token(TokenType.TAG_DETAILS_START, "<details>", line=1, position=1)]),
251
+ ("</details>", [Token(TokenType.TAG_DETAILS_END, "</details>", line=1, position=1)]),
252
+ # Case insensitive
253
+ ("<SUMMARY>", [Token(TokenType.TAG_SUMMARY_START, "<summary>", line=1, position=1)]),
254
+ ("</Summary>", [Token(TokenType.TAG_SUMMARY_END, "</summary>", line=1, position=1)]),
255
+ ("<DETAILS>", [Token(TokenType.TAG_DETAILS_START, "<details>", line=1, position=1)]),
256
+ ("</Details>", [Token(TokenType.TAG_DETAILS_END, "</details>", line=1, position=1)]),
257
+ # Mixed case
258
+ ("<SuMmArY>", [Token(TokenType.TAG_SUMMARY_START, "<summary>", line=1, position=1)]),
259
+ ("</DeTaIlS>", [Token(TokenType.TAG_DETAILS_END, "</details>", line=1, position=1)]),
260
+ # Tags with content
261
+ (
262
+ "<summary>This is a summary</summary>",
263
+ [
264
+ Token(TokenType.TAG_SUMMARY_START, "<summary>", line=1, position=1),
265
+ Token(TokenType.MISC_COMMENT, "This is a summary", line=1, position=10),
266
+ Token(TokenType.TAG_SUMMARY_END, "</summary>", line=1, position=27),
267
+ ],
268
+ ),
269
+ # Now "summary" and "details" as words should be identifiers
270
+ ("summary", [Token(TokenType.MISC_IDENT, "summary", line=1, position=1)]),
271
+ ("details", [Token(TokenType.MISC_IDENT, "details", line=1, position=1)]),
272
+ # Invalid tags should not be recognized as tags
273
+ (
274
+ "<invalid>",
275
+ [
276
+ Token(TokenType.OP_LT, "<", line=1, position=1),
277
+ Token(TokenType.MISC_IDENT, "invalid", line=1, position=2),
278
+ Token(TokenType.OP_GT, ">", line=1, position=9),
279
+ ],
280
+ ),
281
+ # Less than operator should still work
282
+ (
283
+ "x < 5",
284
+ [
285
+ Token(TokenType.MISC_IDENT, "x", line=1, position=1),
286
+ Token(TokenType.OP_LT, "<", line=1, position=3),
287
+ Token(TokenType.LIT_WHOLE_NUMBER, "5", line=1, position=5),
288
+ ],
289
+ ),
290
+ ],
291
+ )
292
+ def test_tag_tokens(self, input_text: str, expected_tokens: list[Token]) -> None:
293
+ lexer = Lexer(input_text)
294
+ stream_and_assert_tokens(lexer, expected_tokens)
@@ -0,0 +1,343 @@
1
+ from machine_dialect.lexer import Lexer
2
+ from machine_dialect.lexer.tests.helpers import assert_eof, assert_expected_token
3
+ from machine_dialect.lexer.tokens import Token, TokenMetaType, TokenType
4
+
5
+
6
+ def is_literal_token(token: Token) -> bool:
7
+ return token.type.meta_type == TokenMetaType.LIT
8
+
9
+
10
+ class TestUnderscoreLiterals:
11
+ def test_wrapped_integer(self) -> None:
12
+ """Test underscore-wrapped integer literals."""
13
+ source = "_42_"
14
+ lexer = Lexer(source)
15
+
16
+ # Expected token
17
+ expected = Token(TokenType.LIT_WHOLE_NUMBER, "42", line=1, position=1)
18
+
19
+ # Get and verify token
20
+ actual = lexer.next_token()
21
+ assert_expected_token(actual, expected)
22
+ assert is_literal_token(actual)
23
+
24
+ # Verify EOF
25
+ assert_eof(lexer.next_token())
26
+
27
+ def test_wrapped_float(self) -> None:
28
+ """Test underscore-wrapped float literals."""
29
+ source = "_3.14_"
30
+ lexer = Lexer(source)
31
+
32
+ # Expected token
33
+ expected = Token(TokenType.LIT_FLOAT, "3.14", line=1, position=1)
34
+
35
+ # Get and verify token
36
+ actual = lexer.next_token()
37
+ assert_expected_token(actual, expected)
38
+ assert is_literal_token(actual)
39
+
40
+ # Verify EOF
41
+ assert_eof(lexer.next_token())
42
+
43
+ def test_wrapped_string(self) -> None:
44
+ """Test underscore-wrapped string literals."""
45
+ source = '_"Hello, World!"_'
46
+ lexer = Lexer(source)
47
+
48
+ # Expected token
49
+ expected = Token(TokenType.LIT_TEXT, '"Hello, World!"', line=1, position=1)
50
+
51
+ # Get and verify token
52
+ actual = lexer.next_token()
53
+ assert_expected_token(actual, expected)
54
+ assert is_literal_token(actual)
55
+
56
+ # Verify EOF
57
+ assert_eof(lexer.next_token())
58
+
59
+ def test_unwrapped_integer(self) -> None:
60
+ """Test unwrapped integer literals (backward compatibility)."""
61
+ source = "42"
62
+ lexer = Lexer(source)
63
+
64
+ # Expected token
65
+ expected = Token(TokenType.LIT_WHOLE_NUMBER, "42", line=1, position=1)
66
+
67
+ # Get and verify token
68
+ actual = lexer.next_token()
69
+ assert_expected_token(actual, expected)
70
+ assert is_literal_token(actual)
71
+
72
+ # Verify EOF
73
+ assert_eof(lexer.next_token())
74
+
75
+ def test_unwrapped_float(self) -> None:
76
+ """Test unwrapped float literals (backward compatibility)."""
77
+ source = "3.14"
78
+ lexer = Lexer(source)
79
+
80
+ # Expected token
81
+ expected = Token(TokenType.LIT_FLOAT, "3.14", line=1, position=1)
82
+
83
+ # Get and verify token
84
+ actual = lexer.next_token()
85
+ assert_expected_token(actual, expected)
86
+ assert is_literal_token(actual)
87
+
88
+ # Verify EOF
89
+ assert_eof(lexer.next_token())
90
+
91
+ def test_unwrapped_string(self) -> None:
92
+ """Test unwrapped string literals (backward compatibility)."""
93
+ source = '"Hello, World!"'
94
+ lexer = Lexer(source)
95
+
96
+ # Expected token
97
+ expected = Token(TokenType.LIT_TEXT, '"Hello, World!"', line=1, position=1)
98
+
99
+ # Get and verify token
100
+ actual = lexer.next_token()
101
+ assert_expected_token(actual, expected)
102
+ assert is_literal_token(actual)
103
+
104
+ # Verify EOF
105
+ assert_eof(lexer.next_token())
106
+
107
+ def test_mixed_literals_in_expression(self) -> None:
108
+ """Test both wrapped and unwrapped literals in same expression."""
109
+ source = "Set `x` to _42_ and `y` to 3.14"
110
+ lexer = Lexer(source)
111
+
112
+ # Stream tokens and collect numeric literals
113
+ numeric_literals = []
114
+ while True:
115
+ token = lexer.next_token()
116
+ if token.type == TokenType.MISC_EOF:
117
+ break
118
+ if token.type in (TokenType.LIT_WHOLE_NUMBER, TokenType.LIT_FLOAT):
119
+ numeric_literals.append(token)
120
+
121
+ assert len(numeric_literals) == 2
122
+
123
+ # First literal is wrapped (underscore wrapping handled by lexer)
124
+ expected_int = Token(TokenType.LIT_WHOLE_NUMBER, "42", line=1, position=12)
125
+ assert_expected_token(numeric_literals[0], expected_int)
126
+
127
+ # Second literal is unwrapped
128
+ expected_float = Token(TokenType.LIT_FLOAT, "3.14", line=1, position=28)
129
+ assert_expected_token(numeric_literals[1], expected_float)
130
+
131
+ def test_underscore_in_identifier(self) -> None:
132
+ """Test that underscores in identifiers don't interfere with literal syntax."""
133
+ source = "_var_name_"
134
+ lexer = Lexer(source)
135
+
136
+ # Expected token
137
+ expected = Token(TokenType.MISC_IDENT, "_var_name_", line=1, position=1)
138
+
139
+ # Get and verify token
140
+ actual = lexer.next_token()
141
+ assert_expected_token(actual, expected)
142
+
143
+ # Verify EOF
144
+ assert_eof(lexer.next_token())
145
+
146
+ def test_incomplete_wrapped_literal(self) -> None:
147
+ """Test incomplete wrapped literal with invalid pattern is marked as illegal."""
148
+ source = "_42" # Missing closing underscore and starts with _ followed by digits
149
+ lexer = Lexer(source)
150
+
151
+ # Get the token
152
+ token = lexer.next_token()
153
+
154
+ # Lexer no longer reports errors (parser will handle them)
155
+ assert token.type == TokenType.MISC_ILLEGAL
156
+ assert token.literal == "_42"
157
+
158
+ # Verify EOF
159
+ assert_eof(lexer.next_token())
160
+
161
+ def test_wrapped_negative_integer(self) -> None:
162
+ """Test underscore-wrapped negative integer literals."""
163
+ source = "_-42_"
164
+ lexer = Lexer(source)
165
+
166
+ # Expected token
167
+ expected = Token(TokenType.LIT_WHOLE_NUMBER, "-42", line=1, position=1)
168
+
169
+ # Get and verify token
170
+ actual = lexer.next_token()
171
+ assert_expected_token(actual, expected)
172
+ assert is_literal_token(actual)
173
+
174
+ # Verify EOF
175
+ assert_eof(lexer.next_token())
176
+
177
+ def test_wrapped_negative_float(self) -> None:
178
+ """Test underscore-wrapped negative float literals."""
179
+ source = "_-3.14_"
180
+ lexer = Lexer(source)
181
+
182
+ # Expected token
183
+ expected = Token(TokenType.LIT_FLOAT, "-3.14", line=1, position=1)
184
+
185
+ # Get and verify token
186
+ actual = lexer.next_token()
187
+ assert_expected_token(actual, expected)
188
+ assert is_literal_token(actual)
189
+
190
+ # Verify EOF
191
+ assert_eof(lexer.next_token())
192
+
193
+ def test_wrapped_negative_decimal_only(self) -> None:
194
+ """Test underscore-wrapped negative float starting with decimal point."""
195
+ source = "_-.5_"
196
+ lexer = Lexer(source)
197
+
198
+ # Expected token
199
+ expected = Token(TokenType.LIT_FLOAT, "-0.5", line=1, position=1)
200
+
201
+ # Get and verify token
202
+ actual = lexer.next_token()
203
+ assert_expected_token(actual, expected)
204
+ assert is_literal_token(actual)
205
+
206
+ # Verify EOF
207
+ assert_eof(lexer.next_token())
208
+
209
+ def test_wrapped_positive_decimal_only(self) -> None:
210
+ """Test underscore-wrapped positive float starting with decimal point."""
211
+ source = "_.5_"
212
+ lexer = Lexer(source)
213
+
214
+ # Expected token (should normalize .5 to 0.5)
215
+ expected = Token(TokenType.LIT_FLOAT, "0.5", line=1, position=1)
216
+
217
+ # Get and verify token
218
+ actual = lexer.next_token()
219
+ assert_expected_token(actual, expected)
220
+ assert is_literal_token(actual)
221
+
222
+ # Verify EOF
223
+ assert_eof(lexer.next_token())
224
+
225
+ def test_invalid_negative_patterns(self) -> None:
226
+ """Test various invalid negative patterns in underscore literals."""
227
+ # Test _-_ (minus with no number)
228
+ source = "_-_"
229
+ lexer = Lexer(source)
230
+
231
+ # Should produce identifier "_" followed by minus and another identifier
232
+ token1 = lexer.next_token()
233
+ assert token1.type == TokenType.MISC_IDENT
234
+ assert token1.literal == "_"
235
+
236
+ token2 = lexer.next_token()
237
+ assert token2.type == TokenType.OP_MINUS
238
+ assert token2.literal == "-"
239
+
240
+ token3 = lexer.next_token()
241
+ assert token3.type == TokenType.MISC_IDENT
242
+ assert token3.literal == "_"
243
+
244
+ assert_eof(lexer.next_token())
245
+
246
+ def test_double_negative_invalid(self) -> None:
247
+ """Test that double negative is not valid in underscore literals."""
248
+ source = "_--5_"
249
+ lexer = Lexer(source)
250
+
251
+ # Should not parse as a literal
252
+ token1 = lexer.next_token()
253
+ assert token1.type == TokenType.MISC_IDENT
254
+ assert token1.literal == "_"
255
+
256
+ # Followed by two minus operators
257
+ token2 = lexer.next_token()
258
+ assert token2.type == TokenType.OP_MINUS
259
+
260
+ token3 = lexer.next_token()
261
+ assert token3.type == TokenType.OP_MINUS
262
+
263
+ # Then illegal pattern 5_
264
+ token4 = lexer.next_token()
265
+ assert token4.type == TokenType.MISC_ILLEGAL
266
+ assert token4.literal == "5_"
267
+
268
+ assert_eof(lexer.next_token())
269
+
270
+ def test_negative_in_expression(self) -> None:
271
+ """Test negative literal in an expression context."""
272
+ source = "Set **x** to _-5_."
273
+ lexer = Lexer(source)
274
+
275
+ # Collect all tokens
276
+ tokens = []
277
+ while True:
278
+ token = lexer.next_token()
279
+ if token.type == TokenType.MISC_EOF:
280
+ break
281
+ tokens.append(token)
282
+
283
+ # Find the negative integer literal
284
+ int_literals = [t for t in tokens if t.type == TokenType.LIT_WHOLE_NUMBER]
285
+ assert len(int_literals) == 1
286
+ assert int_literals[0].literal == "-5"
287
+
288
+ def test_malformed_underscore_string_literal(self) -> None:
289
+ """Test malformed underscore string literal like _\"unclosed."""
290
+ source = '_"unclosed.'
291
+ lexer = Lexer(source)
292
+
293
+ # This should be treated as a single ILLEGAL token
294
+ token = lexer.next_token()
295
+ assert token.type == TokenType.MISC_ILLEGAL
296
+ assert token.literal == '_"unclosed.'
297
+
298
+ # Verify EOF
299
+ assert_eof(lexer.next_token())
300
+
301
+ def test_malformed_underscore_single_quote_literal(self) -> None:
302
+ """Test malformed underscore string literal with single quotes."""
303
+ source = "_'unclosed string"
304
+ lexer = Lexer(source)
305
+
306
+ # This should be treated as a single ILLEGAL token
307
+ token = lexer.next_token()
308
+ assert token.type == TokenType.MISC_ILLEGAL
309
+ assert token.literal == "_'unclosed string"
310
+
311
+ # Verify EOF
312
+ assert_eof(lexer.next_token())
313
+
314
+ def test_underscore_string_missing_closing_underscore(self) -> None:
315
+ """Test underscore string literal missing closing underscore."""
316
+ source = '_"complete string"'
317
+ lexer = Lexer(source)
318
+
319
+ # Without closing underscore, the opening _ is an identifier
320
+ # and the string is a separate token
321
+ token1 = lexer.next_token()
322
+ assert token1.type == TokenType.MISC_IDENT
323
+ assert token1.literal == "_"
324
+
325
+ token2 = lexer.next_token()
326
+ assert token2.type == TokenType.LIT_TEXT
327
+ assert token2.literal == '"complete string"'
328
+
329
+ # Verify EOF
330
+ assert_eof(lexer.next_token())
331
+
332
+ def test_underscore_with_escaped_quote(self) -> None:
333
+ """Test underscore literal with escaped quote inside."""
334
+ source = '_"text with \\" escaped quote"_'
335
+ lexer = Lexer(source)
336
+
337
+ # Should parse correctly as a string literal
338
+ token = lexer.next_token()
339
+ assert token.type == TokenType.LIT_TEXT
340
+ assert token.literal == '"text with \\" escaped quote"'
341
+
342
+ # Verify EOF
343
+ assert_eof(lexer.next_token())