machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,907 @@
1
+ """Streaming lexer implementation for Machine Dialect™.
2
+
3
+ This module provides a Lexer class that generates tokens one at a time
4
+ instead of all at once, enabling memory-efficient parsing of large files.
5
+ """
6
+
7
+ from machine_dialect.helpers.validators import is_valid_url
8
+ from machine_dialect.lexer.constants import CHAR_TO_TOKEN_MAP
9
+ from machine_dialect.lexer.tokens import Token, TokenType, lookup_tag_token, lookup_token_type
10
+
11
+
12
+ class Lexer:
13
+ """Streaming lexer for Machine Dialect™ language.
14
+
15
+ Generates tokens one at a time from the source code.
16
+ """
17
+
18
+ def __init__(self, source: str) -> None:
19
+ """Initialize the lexer with source code.
20
+
21
+ Args:
22
+ source: The source code to tokenize.
23
+ """
24
+ self.source = source
25
+ self.position = 0
26
+ self.line = 1
27
+ self.column = 1
28
+ self.current_char: str | None = self.source[0] if source else None
29
+ self.in_summary_comment = False
30
+
31
+ @property
32
+ def at_line_start(self) -> bool:
33
+ """Check if we're at the start of a logical line.
34
+
35
+ A logical line start means we're at column 1 or only have whitespace
36
+ and block markers (>) before current position on this line.
37
+
38
+ Returns:
39
+ True if we're at the start of a logical line.
40
+ """
41
+ if self.column == 1:
42
+ return True
43
+
44
+ # Check if we only have whitespace or block markers before current position on this line
45
+ # Find the start of the current line
46
+ line_start = self.position - (self.column - 1)
47
+ for i in range(line_start, self.position):
48
+ if i < len(self.source):
49
+ char = self.source[i]
50
+ if not char.isspace() and char != ">":
51
+ return False
52
+ return True
53
+
54
+ def advance(self) -> None:
55
+ """Move to the next character in the source."""
56
+ if self.current_char == "\n":
57
+ self.line += 1
58
+ self.column = 1
59
+ else:
60
+ self.column += 1
61
+
62
+ self.position += 1
63
+ if self.position >= len(self.source):
64
+ self.current_char = None
65
+ else:
66
+ self.current_char = self.source[self.position]
67
+
68
+ def _restore_position(self, pos: int) -> None:
69
+ """Restore position and recalculate column.
70
+
71
+ Args:
72
+ pos: The position to restore to.
73
+ """
74
+ self.position = pos
75
+ self.current_char = self.source[pos] if pos < len(self.source) else None
76
+
77
+ # Recalculate column by counting from start of current line
78
+ line_start = pos
79
+ while line_start > 0 and self.source[line_start - 1] != "\n":
80
+ line_start -= 1
81
+ self.column = pos - line_start + 1
82
+
83
+ def peek(self, offset: int = 1) -> str | None:
84
+ """Look ahead at a character without consuming it.
85
+
86
+ Args:
87
+ offset: How many characters ahead to look.
88
+
89
+ Returns:
90
+ The character at the offset, or None if out of bounds.
91
+ """
92
+ peek_pos = self.position + offset
93
+ if peek_pos >= len(self.source):
94
+ return None
95
+ return self.source[peek_pos]
96
+
97
+ def skip_whitespace(self) -> None:
98
+ """Skip whitespace characters."""
99
+ while self.current_char and self.current_char.isspace():
100
+ self.advance()
101
+
102
+ def read_number(self) -> tuple[str, bool, int, int]:
103
+ """Read a number literal.
104
+
105
+ Returns:
106
+ Tuple of (literal, is_float, line, column).
107
+ """
108
+ start_pos = self.position
109
+ start_line = self.line
110
+ start_column = self.column
111
+ has_dot = False
112
+
113
+ while self.current_char and (self.current_char.isdigit() or self.current_char == "."):
114
+ if self.current_char == ".":
115
+ # Only allow one decimal point
116
+ if has_dot:
117
+ break
118
+ # Check if next character is a digit
119
+ next_char = self.peek()
120
+ if not next_char or not next_char.isdigit():
121
+ break
122
+ has_dot = True
123
+ self.advance()
124
+
125
+ return self.source[start_pos : self.position], has_dot, start_line, start_column
126
+
127
+ def read_identifier(self) -> tuple[str, int, int]:
128
+ """Read an identifier.
129
+
130
+ Returns:
131
+ Tuple of (identifier, line, column).
132
+ """
133
+ start_pos = self.position
134
+ start_line = self.line
135
+ start_column = self.column
136
+ while self.current_char and (self.current_char.isalnum() or self.current_char == "_"):
137
+ self.advance()
138
+
139
+ # Check for contractions like 't or 's
140
+ peek_char = self.peek()
141
+ if self.current_char == "'" and peek_char and peek_char.isalpha():
142
+ self.advance() # Skip apostrophe
143
+ while self.current_char and self.current_char.isalpha():
144
+ self.advance()
145
+
146
+ return self.source[start_pos : self.position], start_line, start_column
147
+
148
+ def read_string(self) -> tuple[str, int, int]:
149
+ """Read a string literal.
150
+
151
+ Returns:
152
+ Tuple of (string_literal, line, column).
153
+ """
154
+ start_pos = self.position
155
+ start_line = self.line
156
+ start_column = self.column
157
+ quote_char = self.current_char
158
+ self.advance() # Skip opening quote
159
+
160
+ while self.current_char and self.current_char != quote_char:
161
+ if self.current_char == "\\":
162
+ self.advance() # Skip escape character
163
+ if self.current_char:
164
+ self.advance() # Skip escaped character
165
+ else:
166
+ self.advance()
167
+
168
+ if self.current_char == quote_char:
169
+ self.advance() # Skip closing quote
170
+
171
+ return self.source[start_pos : self.position], start_line, start_column
172
+
173
+ def read_triple_backtick_string(self) -> tuple[str, int, int]:
174
+ """Read a triple backtick string.
175
+
176
+ Returns:
177
+ Tuple of (string_content, line, column).
178
+ """
179
+ start_line = self.line
180
+ start_column = self.column
181
+
182
+ # Skip the three backticks
183
+ self.advance() # First backtick
184
+ self.advance() # Second backtick
185
+ self.advance() # Third backtick
186
+
187
+ # Read until we find three closing backticks
188
+ content_start = self.position
189
+ while self.current_char:
190
+ if self.current_char == "`" and self.peek() == "`" and self.peek(2) == "`":
191
+ content = self.source[content_start : self.position]
192
+ # Skip the closing backticks
193
+ self.advance()
194
+ self.advance()
195
+ self.advance()
196
+ return content, start_line, start_column
197
+ self.advance()
198
+
199
+ # Unclosed triple backtick string
200
+ content = self.source[content_start : self.position]
201
+ return content, start_line, start_column
202
+
203
+ def check_multi_word_keyword(self, first_word: str, line: int, pos: int) -> tuple[str | None, int]:
204
+ """Check if the identifier starts a multi-word keyword.
205
+
206
+ Args:
207
+ first_word: The first word that was read.
208
+ line: Line number of the first word.
209
+ pos: Column position of the first word.
210
+
211
+ Returns:
212
+ Tuple of (multi_word_keyword, end_position) if found, otherwise (None, current_position).
213
+ """
214
+ # Save current state
215
+ saved_position = self.position
216
+ saved_line = self.line
217
+ saved_column = self.column
218
+ saved_char = self.current_char
219
+
220
+ words = [first_word]
221
+ longest_match = None
222
+ longest_match_position = self.position
223
+ longest_match_line = self.line
224
+ longest_match_column = self.column
225
+ longest_match_char = self.current_char
226
+
227
+ # Try to build progressively longer multi-word sequences
228
+ while True:
229
+ # Skip whitespace
230
+ start_whitespace = self.position
231
+ self.skip_whitespace()
232
+
233
+ # If no whitespace was skipped, we can't have a multi-word keyword
234
+ if self.position == start_whitespace:
235
+ break
236
+
237
+ # Try to read the next word
238
+ if not self.current_char or not self.current_char.isalpha():
239
+ break
240
+
241
+ next_word, _, _ = self.read_identifier()
242
+ if not next_word:
243
+ break
244
+
245
+ words.append(next_word)
246
+ potential_keyword = " ".join(words)
247
+
248
+ # Check if this forms a valid multi-word keyword
249
+ token_type, _ = lookup_token_type(potential_keyword)
250
+ # Only accept actual keywords, not just any valid identifier
251
+ if token_type not in (TokenType.MISC_ILLEGAL, TokenType.MISC_IDENT, TokenType.MISC_STOPWORD):
252
+ # Found a valid multi-word keyword
253
+ longest_match = potential_keyword
254
+ longest_match_position = self.position
255
+ longest_match_line = self.line
256
+ longest_match_column = self.column
257
+ longest_match_char = self.current_char
258
+
259
+ if longest_match:
260
+ # Use the longest matching multi-word keyword
261
+ self.position = longest_match_position
262
+ self.line = longest_match_line
263
+ self.column = longest_match_column
264
+ self.current_char = longest_match_char
265
+ return longest_match, self.position
266
+ else:
267
+ # No multi-word keyword found, restore original position
268
+ self.position = saved_position
269
+ self.line = saved_line
270
+ self.column = saved_column
271
+ self.current_char = saved_char
272
+ return None, self.position
273
+
274
+ def read_double_asterisk_keyword(self) -> tuple[str, TokenType, int, int] | None:
275
+ """Read a double-asterisk wrapped keyword.
276
+
277
+ Returns:
278
+ Tuple of (literal, token_type, line, column) or None if not a valid keyword.
279
+ """
280
+ start_pos = self.position
281
+ start_line = self.line
282
+ start_column = self.column
283
+
284
+ # Skip first two asterisks
285
+ self.advance() # First *
286
+ self.advance() # Second *
287
+
288
+ # Check what comes after the asterisks
289
+ if not self.current_char or not self.current_char.isalpha():
290
+ # Restore position
291
+ self.position = start_pos
292
+ self.line = start_line
293
+ self.column = start_column
294
+ self.current_char = self.source[self.position] if self.position < len(self.source) else None
295
+ return None
296
+
297
+ # Read the keyword (can be multi-word)
298
+ words = []
299
+
300
+ while True:
301
+ # Read a word
302
+ if not self.current_char or not self.current_char.isalpha():
303
+ break
304
+
305
+ word_start = self.position
306
+ while self.current_char and self.current_char.isalpha():
307
+ self.advance()
308
+ words.append(self.source[word_start : self.position])
309
+
310
+ # Check if there's a space and another word
311
+ if self.current_char == " ":
312
+ # Peek ahead to see if there's another word or closing **
313
+ saved_pos = self.position
314
+ saved_line = self.line
315
+ saved_column = self.column
316
+ saved_char = self.current_char
317
+
318
+ self.advance() # Skip space
319
+
320
+ if self.current_char == "*" and self.peek() == "*":
321
+ # It's the closing **, restore and break
322
+ self.position = saved_pos
323
+ self.line = saved_line
324
+ self.column = saved_column
325
+ self.current_char = saved_char
326
+ break
327
+ elif self.current_char and self.current_char.isalpha():
328
+ # Another word follows, continue
329
+ continue
330
+ else:
331
+ # Not a word, restore and break
332
+ self.position = saved_pos
333
+ self.line = saved_line
334
+ self.column = saved_column
335
+ self.current_char = saved_char
336
+ break
337
+ else:
338
+ break
339
+
340
+ keyword = " ".join(words) if words else ""
341
+
342
+ # Check for closing double asterisk
343
+ if self.current_char == "*" and self.peek() == "*":
344
+ self.advance() # First closing *
345
+ self.advance() # Second closing *
346
+
347
+ # Check if it's a valid keyword
348
+ from machine_dialect.lexer.tokens import lookup_token_type
349
+
350
+ token_type, canonical = lookup_token_type(keyword)
351
+
352
+ # Only accept actual keywords, not identifiers, stopwords, or boolean literals
353
+ if token_type not in (
354
+ TokenType.MISC_ILLEGAL,
355
+ TokenType.MISC_IDENT,
356
+ TokenType.MISC_STOPWORD,
357
+ TokenType.LIT_YES,
358
+ TokenType.LIT_NO,
359
+ ):
360
+ return canonical, token_type, start_line, start_column
361
+
362
+ # Not a valid double-asterisk keyword, restore position
363
+ self.position = start_pos
364
+ self.line = start_line
365
+ self.column = start_column
366
+ self.current_char = self.source[self.position] if self.position < len(self.source) else None
367
+ return None
368
+
369
+ def read_tag_token(self) -> tuple[str, TokenType, int, int] | None:
370
+ """Read a tag token like <summary>, </summary>, <details>, </details>.
371
+
372
+ Returns:
373
+ Tuple of (literal, token_type, line, column) or None if not a valid tag.
374
+ """
375
+ start_pos = self.position
376
+ start_line = self.line
377
+ start_column = self.column
378
+
379
+ # Must start with '<'
380
+ if self.current_char != "<":
381
+ return None
382
+
383
+ self.advance() # Skip '<'
384
+
385
+ # Check for closing tag
386
+ is_closing = False
387
+ if self.current_char == "/":
388
+ is_closing = True
389
+ self.advance() # Skip '/'
390
+
391
+ # Read the tag name
392
+ tag_name_start = self.position
393
+ while self.current_char and self.current_char.isalpha():
394
+ self.advance()
395
+
396
+ tag_name = self.source[tag_name_start : self.position]
397
+
398
+ # Must end with '>'
399
+ if self.current_char != ">":
400
+ # Not a valid tag, restore position
401
+ self.position = start_pos
402
+ self.line = start_line
403
+ self.column = start_column
404
+ self.current_char = self.source[self.position] if self.position < len(self.source) else None
405
+ return None
406
+
407
+ self.advance() # Skip '>'
408
+
409
+ # Construct the full tag literal
410
+ if is_closing:
411
+ tag_literal = f"</{tag_name}>"
412
+ else:
413
+ tag_literal = f"<{tag_name}>"
414
+
415
+ # Check if it's a valid tag token
416
+ token_type, canonical_literal = lookup_tag_token(tag_literal)
417
+ if token_type:
418
+ return canonical_literal, token_type, start_line, start_column
419
+
420
+ # Not a recognized tag, restore position
421
+ self.position = start_pos
422
+ self.line = start_line
423
+ self.column = start_column
424
+ self.current_char = self.source[self.position] if self.position < len(self.source) else None
425
+ return None
426
+
427
+ def read_comment_content(self) -> tuple[str, int, int]:
428
+ """Read comment content until </summary> tag is found.
429
+
430
+ Returns:
431
+ Tuple of (comment_content, line, column).
432
+ """
433
+ start_line = self.line
434
+ start_column = self.column
435
+ content_start = self.position
436
+
437
+ while self.current_char:
438
+ # Look for potential closing tag
439
+ if self.current_char == "<":
440
+ # Save position before checking
441
+ saved_pos = self.position
442
+ saved_line = self.line
443
+ saved_column = self.column
444
+ saved_char = self.current_char
445
+
446
+ # Check if it's </summary>
447
+ self.advance() # Skip '<'
448
+ if self.current_char == "/":
449
+ self.advance() # Skip '/'
450
+ # Check for "summary"
451
+ tag_start = self.position
452
+ while self.current_char and self.current_char.isalpha():
453
+ self.advance()
454
+ tag_name = self.source[tag_start : self.position]
455
+
456
+ if tag_name.lower() == "summary" and self.current_char == ">":
457
+ # Found closing tag, restore to before the tag
458
+ self.position = saved_pos
459
+ self.line = saved_line
460
+ self.column = saved_column
461
+ self.current_char = saved_char
462
+ # Return the content before the closing tag
463
+ content = self.source[content_start:saved_pos]
464
+ return content, start_line, start_column
465
+
466
+ # Not a closing summary tag, restore and continue
467
+ self.position = saved_pos
468
+ self.line = saved_line
469
+ self.column = saved_column
470
+ self.current_char = saved_char
471
+
472
+ self.advance()
473
+
474
+ # No closing tag found, return content up to EOF
475
+ content = self.source[content_start : self.position]
476
+ return content, start_line, start_column
477
+
478
+ def read_underscore_literal(self) -> tuple[str, TokenType, int, int] | None:
479
+ """Read an underscore-wrapped literal.
480
+
481
+ Returns:
482
+ Tuple of (literal, token_type, line, column) or None if not a valid literal.
483
+ """
484
+ start_pos = self.position
485
+ start_line = self.line
486
+ start_column = self.column
487
+
488
+ self.advance() # Skip first underscore
489
+
490
+ # For underscore literals, report the position after the underscore
491
+ literal_column = start_column
492
+
493
+ # Check what comes after the underscore
494
+ if not self.current_char:
495
+ # Restore position
496
+ self.position = start_pos
497
+ self.line = start_line
498
+ self.column = start_column
499
+ self.current_char = self.source[self.position] if self.position < len(self.source) else None
500
+ return None
501
+
502
+ # Check for negative sign before number
503
+ has_minus = False
504
+ if self.current_char == "-":
505
+ has_minus = True
506
+ self.advance() # Skip minus sign
507
+
508
+ # Check what comes after the minus
509
+ if not self.current_char:
510
+ # Restore position
511
+ self.position = start_pos
512
+ self.line = start_line
513
+ self.column = start_column
514
+ self.current_char = self.source[self.position] if self.position < len(self.source) else None
515
+ return None
516
+
517
+ # Try different literal types
518
+ next_char = self.peek()
519
+ if self.current_char.isdigit() or (self.current_char == "." and next_char and next_char.isdigit()):
520
+ # Number literal
521
+ literal, is_float, _, _ = self.read_number()
522
+
523
+ # Normalize decimal-only floats (e.g., ".5" -> "0.5")
524
+ if is_float and literal.startswith("."):
525
+ literal = "0" + literal
526
+
527
+ # Add minus sign if present
528
+ if has_minus:
529
+ literal = "-" + literal
530
+
531
+ # Check for closing underscore
532
+ if self.current_char == "_":
533
+ self.advance()
534
+
535
+ # Check for extra trailing underscores (invalid pattern)
536
+ if self.current_char == "_":
537
+ # Multiple trailing underscores - this is invalid
538
+ # Don't restore position, let the caller handle the illegal token
539
+ return None
540
+
541
+ # Return canonical form without underscores
542
+ token_type = TokenType.LIT_FLOAT if is_float else TokenType.LIT_WHOLE_NUMBER
543
+ return literal, token_type, start_line, literal_column
544
+ elif self.current_char in ('"', "'"):
545
+ # String literal - but minus sign is not valid before strings
546
+ if has_minus:
547
+ # Restore position
548
+ self.position = start_pos
549
+ self.line = start_line
550
+ self.column = start_column
551
+ self.current_char = self.source[self.position] if self.position < len(self.source) else None
552
+ return None
553
+ quote_char = self.current_char
554
+ self.advance() # Skip opening quote
555
+
556
+ string_content_start = self.position
557
+ while self.current_char and self.current_char != quote_char:
558
+ if self.current_char == "\\":
559
+ self.advance()
560
+ if self.current_char:
561
+ self.advance()
562
+ else:
563
+ self.advance()
564
+
565
+ if self.current_char == quote_char:
566
+ self.advance() # Skip closing quote
567
+
568
+ # Check for closing underscore
569
+ if self.current_char == "_":
570
+ self.advance()
571
+ # Get string content without quotes
572
+ string_content = self.source[string_content_start : self.position - 2]
573
+ # Return canonical form with quotes but without underscores
574
+ full_literal = f"{quote_char}{string_content}{quote_char}"
575
+
576
+ # Check if it's a URL
577
+ url_to_check = string_content
578
+ token_type = TokenType.LIT_URL if is_valid_url(url_to_check) else TokenType.LIT_TEXT
579
+ return full_literal, token_type, start_line, literal_column
580
+ else:
581
+ # String is unclosed - this is a malformed underscore literal
582
+ # Don't restore position, we've already consumed the content
583
+ # Return None to indicate it's invalid, but keep the consumed position
584
+ return None
585
+ elif self.current_char.isalpha():
586
+ # Read alphabetic characters only (no underscores) for potential boolean literal
587
+ # Minus sign is not valid before boolean literals
588
+ if has_minus:
589
+ # Restore position
590
+ self.position = start_pos
591
+ self.line = start_line
592
+ self.column = start_column
593
+ self.current_char = self.source[self.position] if self.position < len(self.source) else None
594
+ return None
595
+ ident_start = self.position
596
+ while self.current_char and self.current_char.isalpha():
597
+ self.advance()
598
+
599
+ literal = self.source[ident_start : self.position]
600
+
601
+ # Check for closing underscore
602
+ if self.current_char == "_":
603
+ # Check if it's a boolean or empty literal
604
+ if literal.lower() in ("true", "false", "yes", "no", "empty"):
605
+ self.advance() # Consume the closing underscore
606
+ # Use canonical form for the literal (without underscores)
607
+ if literal.lower() == "empty":
608
+ return "empty", TokenType.KW_EMPTY, start_line, literal_column
609
+ else:
610
+ # Map Yes/No to True/False
611
+ is_true = literal.lower() in ("true", "yes")
612
+ canonical_literal = "Yes" if is_true else "No"
613
+ token_type = TokenType.LIT_YES if is_true else TokenType.LIT_NO
614
+ return canonical_literal, token_type, start_line, literal_column
615
+
616
+ # Not a valid underscore-wrapped literal, restore position
617
+ # (This also handles the case where we have a minus sign but no valid literal follows)
618
+ self.position = start_pos
619
+ self.line = start_line
620
+ self.column = start_column
621
+ self.current_char = self.source[self.position] if self.position < len(self.source) else None
622
+ return None
623
+
624
+ def next_token(self, in_block: bool = False, in_list_context: bool = False) -> Token:
625
+ """Get the next token from the source.
626
+
627
+ Args:
628
+ in_block: Whether we're currently parsing inside a block (currently unused).
629
+ in_list_context: Whether we're in a list definition context (after Set ... to:).
630
+
631
+ Returns:
632
+ The next token, or an EOF token if no more tokens are available.
633
+ """
634
+ # If we're in a summary comment, read the comment content
635
+ if self.in_summary_comment:
636
+ self.in_summary_comment = False
637
+ # Don't skip whitespace - it's part of the comment
638
+ # If we're at EOF, don't create a comment
639
+ if self.current_char is None:
640
+ return Token(TokenType.MISC_EOF, "", self.line, self.column)
641
+ content, line, pos = self.read_comment_content()
642
+ return Token(TokenType.MISC_COMMENT, content, line, pos)
643
+
644
+ # Skip whitespace
645
+ self.skip_whitespace()
646
+
647
+ # Check if we've reached the end
648
+ if self.current_char is None:
649
+ return Token(TokenType.MISC_EOF, "", self.line, self.column)
650
+
651
+ # Save position for token
652
+ token_line = self.line
653
+ token_column = self.column
654
+
655
+ # Check for tag tokens (<summary>, </summary>, <details>, </details>)
656
+ if self.current_char == "<":
657
+ tag_result = self.read_tag_token()
658
+ if tag_result:
659
+ literal, token_type, line, pos = tag_result
660
+ # If we just read a summary start tag, set flag for next token
661
+ if token_type == TokenType.TAG_SUMMARY_START:
662
+ self.in_summary_comment = True
663
+ return Token(token_type, literal, line, pos)
664
+
665
+ # Check for underscore-wrapped literals
666
+ if self.current_char == "_":
667
+ start_pos = self.position
668
+ literal_result = self.read_underscore_literal()
669
+ if literal_result:
670
+ literal, token_type, line, pos = literal_result
671
+ return Token(token_type, literal, line, pos)
672
+
673
+ # If read_underscore_literal returned None and consumed characters
674
+ # we have an invalid pattern
675
+ if self.position > start_pos:
676
+ # We've consumed some characters - it's an invalid pattern
677
+ # Continue consuming any remaining underscores
678
+ while self.current_char == "_":
679
+ self.advance()
680
+ illegal_literal = self.source[start_pos : self.position]
681
+ return Token(TokenType.MISC_ILLEGAL, illegal_literal, token_line, token_column)
682
+
683
+ # Check if this is an incomplete underscore pattern
684
+ next_char = self.peek()
685
+ next_next_char = self.peek(2)
686
+ if next_char and (
687
+ next_char.isdigit() or (next_char == "." and next_next_char is not None and next_next_char.isdigit())
688
+ ):
689
+ # Invalid underscore pattern
690
+ self.advance() # Skip underscore
691
+
692
+ # Read the number part
693
+ if self.current_char == "." or (self.current_char and self.current_char.isdigit()):
694
+ self.read_number()
695
+
696
+ # Consume trailing underscores
697
+ while self.current_char == "_":
698
+ self.advance()
699
+
700
+ illegal_literal = self.source[start_pos : self.position]
701
+ return Token(TokenType.MISC_ILLEGAL, illegal_literal, token_line, token_column)
702
+
703
+ # Check for double-asterisk wrapped keywords or operator
704
+ if self.current_char == "*" and self.peek() == "*":
705
+ asterisk_result = self.read_double_asterisk_keyword()
706
+ if asterisk_result:
707
+ literal, token_type, line, pos = asterisk_result
708
+ return Token(token_type, literal, line, pos)
709
+ else:
710
+ # Not a wrapped keyword, treat as ** operator
711
+ self.advance() # First *
712
+ self.advance() # Second *
713
+ return Token(TokenType.OP_TWO_STARS, "**", token_line, token_column)
714
+
715
+ # Numbers
716
+ next_char = self.peek()
717
+ if self.current_char.isdigit() or (self.current_char == "." and next_char and next_char.isdigit()):
718
+ literal, is_float, _, _ = self.read_number()
719
+
720
+ # Check for invalid trailing underscore
721
+ if self.current_char == "_":
722
+ start_pos = self.position - len(literal)
723
+ self.advance()
724
+ illegal_literal = self.source[start_pos : self.position]
725
+ return Token(TokenType.MISC_ILLEGAL, illegal_literal, token_line, token_column)
726
+
727
+ # Prepend "0" to literals starting with "."
728
+ if literal.startswith("."):
729
+ literal = "0" + literal
730
+
731
+ token_type = TokenType.LIT_FLOAT if is_float else TokenType.LIT_WHOLE_NUMBER
732
+ return Token(token_type, literal, token_line, token_column)
733
+
734
+ # Identifiers and keywords
735
+ if self.current_char.isalpha() or self.current_char == "_":
736
+ # Handle multiple underscores followed by number
737
+ if self.current_char == "_":
738
+ underscore_count = 0
739
+ temp_pos = self.position
740
+ while temp_pos < len(self.source) and self.source[temp_pos] == "_":
741
+ underscore_count += 1
742
+ temp_pos += 1
743
+
744
+ if temp_pos < len(self.source) and (
745
+ self.source[temp_pos].isdigit()
746
+ or (
747
+ self.source[temp_pos] == "."
748
+ and temp_pos + 1 < len(self.source)
749
+ and self.source[temp_pos + 1].isdigit()
750
+ )
751
+ ):
752
+ if underscore_count > 1:
753
+ # Invalid pattern
754
+ start_pos = self.position
755
+ for _ in range(underscore_count):
756
+ self.advance()
757
+
758
+ self.read_number()
759
+
760
+ while self.current_char == "_":
761
+ self.advance()
762
+
763
+ illegal_literal = self.source[start_pos : self.position]
764
+ return Token(TokenType.MISC_ILLEGAL, illegal_literal, token_line, token_column)
765
+
766
+ # Read identifier
767
+ literal, _, _ = self.read_identifier()
768
+
769
+ # Special check for "Yes/No" type keyword
770
+ if (
771
+ literal is not None
772
+ and literal.lower() == "yes"
773
+ and self.current_char == "/"
774
+ and self.peek() is not None
775
+ and self.peek().lower() == "n" # type: ignore[union-attr]
776
+ and self.peek(2) is not None
777
+ and self.peek(2).lower() == "o" # type: ignore[union-attr]
778
+ ):
779
+ # Consume "/No"
780
+ self.advance() # Skip '/'
781
+ self.advance() # Skip 'N' or 'n'
782
+ self.advance() # Skip 'o' or 'O'
783
+ # Return the Yes/No keyword token
784
+ return Token(TokenType.KW_YES_NO, "Yes/No", token_line, token_column)
785
+
786
+ # Check for multi-word keywords
787
+ multi_word, _ = self.check_multi_word_keyword(literal, token_line, token_column)
788
+ if multi_word:
789
+ token_type, canonical_literal = lookup_token_type(multi_word)
790
+ return Token(token_type, canonical_literal, token_line, token_column)
791
+
792
+ # Single word keyword or identifier
793
+ token_type, canonical_literal = lookup_token_type(literal)
794
+ return Token(token_type, canonical_literal, token_line, token_column)
795
+
796
+ # Strings
797
+ if self.current_char in ('"', "'"):
798
+ literal, _, _ = self.read_string()
799
+
800
+ # Check if it's a URL
801
+ url_to_check = literal[1:-1] if len(literal) > 2 else literal
802
+ token_type = TokenType.LIT_URL if is_valid_url(url_to_check) else TokenType.LIT_TEXT
803
+ return Token(token_type, literal, token_line, token_column)
804
+
805
+ # Backticks
806
+ if self.current_char == "`":
807
+ # Check for triple backticks
808
+ if self.peek() == "`" and self.peek(2) == "`":
809
+ literal, _, _ = self.read_triple_backtick_string()
810
+ return Token(TokenType.LIT_TRIPLE_BACKTICK, literal, token_line, token_column)
811
+
812
+ # Single backtick identifier
813
+ start_pos = self.position
814
+ self.advance() # Skip opening backtick
815
+
816
+ # For backtick identifiers:
817
+ # - If backtick is at position 1, report position 1
818
+ # - Otherwise, report position after the backtick
819
+ identifier_column = token_column if token_column == 1 else self.column
820
+ identifier_start = self.position
821
+ while self.current_char and self.current_char != "`":
822
+ self.advance()
823
+
824
+ identifier = self.source[identifier_start : self.position]
825
+
826
+ if self.current_char == "`" and identifier:
827
+ from machine_dialect.lexer.tokens import is_valid_identifier
828
+
829
+ if is_valid_identifier(identifier):
830
+ self.advance() # Skip closing backtick
831
+ token_type, canonical_literal = lookup_token_type(identifier)
832
+
833
+ # Keywords, stopwords, and boolean literals in backticks become identifiers
834
+ # Backticks force the content to be treated as an identifier
835
+ from machine_dialect.lexer.tokens import TokenMetaType
836
+
837
+ if (
838
+ token_type == TokenType.MISC_STOPWORD
839
+ or token_type.meta_type == TokenMetaType.KW
840
+ or token_type in (TokenType.LIT_YES, TokenType.LIT_NO)
841
+ ):
842
+ token_type = TokenType.MISC_IDENT
843
+ canonical_literal = identifier
844
+
845
+ if token_type != TokenType.MISC_ILLEGAL:
846
+ # Check if this identifier is followed by 's for possessive
847
+ # This allows us to handle `person`'s name patterns
848
+ if self.current_char == "'" and self.peek() == "s":
849
+ # Skip the apostrophe and 's'
850
+ self.advance() # Skip '
851
+ self.advance() # Skip 's'
852
+ # Return a special token that indicates possessive access
853
+ # The literal includes the identifier for context
854
+ return Token(TokenType.PUNCT_APOSTROPHE_S, canonical_literal, token_line, identifier_column)
855
+ return Token(token_type, canonical_literal, token_line, identifier_column)
856
+
857
+ # Invalid backtick usage
858
+ self._restore_position(start_pos)
859
+
860
+ # Single backtick is illegal
861
+ self.advance()
862
+ return Token(TokenType.MISC_ILLEGAL, "`", token_line, token_column)
863
+
864
+ # Check for dash at line start in list context
865
+ if self.current_char == "-" and self.at_line_start and in_list_context:
866
+ # In list context, dash at line start is a list marker
867
+ self.advance()
868
+ return Token(TokenType.PUNCT_DASH, "-", token_line, token_column)
869
+
870
+ # Single character tokens (operators, delimiters, punctuation)
871
+ if self.current_char in CHAR_TO_TOKEN_MAP:
872
+ char = self.current_char
873
+ self.advance()
874
+
875
+ # Check for multi-character operators
876
+ if char == "<" and self.current_char == "=":
877
+ self.advance()
878
+ return Token(TokenType.OP_LTE, "<=", token_line, token_column)
879
+ elif char == ">" and self.current_char == "=":
880
+ self.advance()
881
+ return Token(TokenType.OP_GTE, ">=", token_line, token_column)
882
+ elif char == "#":
883
+ # Check for ##, ###, or ####
884
+ if self.current_char == "#":
885
+ self.advance()
886
+ if self.current_char == "#":
887
+ self.advance()
888
+ if self.current_char == "#":
889
+ self.advance()
890
+ return Token(TokenType.PUNCT_HASH_QUAD, "####", token_line, token_column)
891
+ return Token(TokenType.PUNCT_HASH_TRIPLE, "###", token_line, token_column)
892
+ return Token(TokenType.PUNCT_HASH_DOUBLE, "##", token_line, token_column)
893
+ elif char == "-":
894
+ # Check for --- (frontmatter delimiter)
895
+ if self.current_char == "-" and self.peek() == "-":
896
+ self.advance() # Second dash
897
+ self.advance() # Third dash
898
+ return Token(TokenType.PUNCT_FRONTMATTER, "---", token_line, token_column)
899
+
900
+ # Single character token
901
+ token_type = CHAR_TO_TOKEN_MAP[char]
902
+ return Token(token_type, char, token_line, token_column)
903
+
904
+ # Unknown character - illegal token
905
+ char = self.current_char
906
+ self.advance()
907
+ return Token(TokenType.MISC_ILLEGAL, char, token_line, token_column)