machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,282 @@
1
+ """Test list-related token recognition in the lexer."""
2
+
3
+ from machine_dialect.lexer import Lexer, TokenType
4
+
5
+
6
+ class TestListMarkers:
7
+ """Test recognition of list markers (dash vs minus)."""
8
+
9
+ def test_dash_at_line_start(self) -> None:
10
+ """Test that dash at line start is recognized as PUNCT_DASH in list context."""
11
+ lexer = Lexer('- _"apple"_')
12
+
13
+ # Without list context, it's OP_MINUS
14
+ token = lexer.next_token(in_list_context=False)
15
+ assert token.type == TokenType.OP_MINUS
16
+ assert token.literal == "-"
17
+
18
+ # Reset lexer
19
+ lexer = Lexer('- _"apple"_')
20
+
21
+ # With list context, it's PUNCT_DASH
22
+ token = lexer.next_token(in_list_context=True)
23
+ assert token.type == TokenType.PUNCT_DASH
24
+ assert token.literal == "-"
25
+
26
+ token = lexer.next_token(in_list_context=True)
27
+ assert token.type == TokenType.LIT_TEXT
28
+ assert token.literal == '"apple"'
29
+
30
+ def test_dash_after_whitespace(self) -> None:
31
+ """Test that dash after whitespace at line start is PUNCT_DASH in list context."""
32
+ lexer = Lexer(' - _"apple"_')
33
+
34
+ token = lexer.next_token(in_list_context=True)
35
+ assert token.type == TokenType.PUNCT_DASH
36
+ assert token.literal == "-"
37
+
38
+ def test_dash_after_block_marker(self) -> None:
39
+ """Test that dash after block marker (>) is PUNCT_DASH in list context."""
40
+ lexer = Lexer('> - _"apple"_')
41
+
42
+ token = lexer.next_token(in_list_context=True)
43
+ assert token.type == TokenType.OP_GT
44
+
45
+ token = lexer.next_token(in_list_context=True)
46
+ assert token.type == TokenType.PUNCT_DASH
47
+ assert token.literal == "-"
48
+
49
+ def test_dash_in_expression(self) -> None:
50
+ """Test that dash in expression context is OP_MINUS."""
51
+ lexer = Lexer("_5_ - _3_")
52
+
53
+ # First number
54
+ token = lexer.next_token()
55
+ assert token.type == TokenType.LIT_WHOLE_NUMBER
56
+
57
+ # Minus operator
58
+ token = lexer.next_token()
59
+ assert token.type == TokenType.OP_MINUS
60
+ assert token.literal == "-"
61
+
62
+ # Second number
63
+ token = lexer.next_token()
64
+ assert token.type == TokenType.LIT_WHOLE_NUMBER
65
+
66
+ def test_multiple_dashes_at_line_start(self) -> None:
67
+ """Test that --- at line start is PUNCT_FRONTMATTER."""
68
+ lexer = Lexer("---")
69
+
70
+ token = lexer.next_token()
71
+ assert token.type == TokenType.PUNCT_FRONTMATTER
72
+ assert token.literal == "---"
73
+
74
+ def test_dash_on_new_line(self) -> None:
75
+ """Test dash recognition across multiple lines in list context."""
76
+ source = """Set `x` to _5_.
77
+ - _"apple"_
78
+ - _"banana"_"""
79
+
80
+ lexer = Lexer(source)
81
+
82
+ # First line: Set `x` to _5_ (not in list context)
83
+ token = lexer.next_token()
84
+ assert token.type == TokenType.KW_SET
85
+
86
+ token = lexer.next_token()
87
+ assert token.type == TokenType.MISC_IDENT
88
+
89
+ token = lexer.next_token()
90
+ assert token.type == TokenType.KW_TO
91
+
92
+ token = lexer.next_token()
93
+ assert token.type == TokenType.LIT_WHOLE_NUMBER
94
+
95
+ token = lexer.next_token()
96
+ assert token.type == TokenType.PUNCT_PERIOD
97
+
98
+ # Second line: - _"apple"_ (in list context)
99
+ token = lexer.next_token(in_list_context=True)
100
+ assert token.type == TokenType.PUNCT_DASH
101
+ assert token.literal == "-"
102
+
103
+ token = lexer.next_token(in_list_context=True)
104
+ assert token.type == TokenType.LIT_TEXT
105
+
106
+ # Third line: - _"banana"_ (in list context)
107
+ token = lexer.next_token(in_list_context=True)
108
+ assert token.type == TokenType.PUNCT_DASH
109
+ assert token.literal == "-"
110
+
111
+ token = lexer.next_token(in_list_context=True)
112
+ assert token.type == TokenType.LIT_TEXT
113
+
114
+
115
+ class TestListKeywords:
116
+ """Test new keywords for list operations."""
117
+
118
+ def test_list_operation_keywords(self) -> None:
119
+ """Test recognition of list operation keywords."""
120
+ keywords = [
121
+ ("add", TokenType.KW_ADD),
122
+ ("remove", TokenType.KW_REMOVE),
123
+ ("insert", TokenType.KW_INSERT),
124
+ ("has", TokenType.KW_HAS),
125
+ ]
126
+
127
+ for literal, expected_type in keywords:
128
+ lexer = Lexer(literal)
129
+ token = lexer.next_token()
130
+ assert token.type == expected_type
131
+ assert token.literal == literal
132
+
133
+ def test_list_type_keywords(self) -> None:
134
+ """Test recognition of list type keywords."""
135
+ keywords = [
136
+ ("Ordered List", TokenType.KW_ORDERED_LIST),
137
+ ("Unordered List", TokenType.KW_UNORDERED_LIST),
138
+ ("Named List", TokenType.KW_NAMED_LIST),
139
+ ]
140
+
141
+ for literal, expected_type in keywords:
142
+ lexer = Lexer(literal)
143
+ token = lexer.next_token()
144
+ assert token.type == expected_type
145
+ assert token.literal == literal
146
+
147
+ def test_list_access_keywords(self) -> None:
148
+ """Test recognition of list access keywords."""
149
+ keywords = [
150
+ ("first", TokenType.KW_FIRST),
151
+ ("second", TokenType.KW_SECOND),
152
+ ("third", TokenType.KW_THIRD),
153
+ ("last", TokenType.KW_LAST),
154
+ ("item", TokenType.KW_ITEM),
155
+ ("of", TokenType.KW_OF),
156
+ ]
157
+
158
+ for literal, expected_type in keywords:
159
+ lexer = Lexer(literal)
160
+ token = lexer.next_token()
161
+ assert token.type == expected_type
162
+ assert token.literal == literal
163
+
164
+ def test_iteration_keywords(self) -> None:
165
+ """Test recognition of iteration keywords."""
166
+ keywords = [
167
+ ("for", TokenType.KW_FOR),
168
+ ("each", TokenType.KW_EACH),
169
+ ("in", TokenType.KW_IN),
170
+ ]
171
+
172
+ for literal, expected_type in keywords:
173
+ lexer = Lexer(literal)
174
+ token = lexer.next_token()
175
+ assert token.type == expected_type
176
+ assert token.literal == literal
177
+
178
+ def test_named_list_keywords(self) -> None:
179
+ """Test recognition of named list keywords."""
180
+ keywords = [
181
+ ("name", TokenType.KW_NAME),
182
+ ("names", TokenType.KW_NAME), # Plural maps to same token type
183
+ ("content", TokenType.KW_CONTENT),
184
+ ("contents", TokenType.KW_CONTENT), # Plural maps to same token type
185
+ ]
186
+
187
+ for literal, expected_type in keywords:
188
+ lexer = Lexer(literal)
189
+ token = lexer.next_token()
190
+ assert token.type == expected_type
191
+ # Literals are preserved as-is in the token
192
+ assert token.literal == literal
193
+
194
+
195
+ class TestComplexListScenarios:
196
+ """Test complex scenarios involving list tokens."""
197
+
198
+ def test_list_with_colon(self) -> None:
199
+ """Test dash followed by identifier and colon (named list syntax)."""
200
+ lexer = Lexer("- name: `value`")
201
+
202
+ token = lexer.next_token(in_list_context=True)
203
+ assert token.type == TokenType.PUNCT_DASH
204
+
205
+ token = lexer.next_token(in_list_context=True)
206
+ assert token.type == TokenType.KW_NAME
207
+
208
+ token = lexer.next_token(in_list_context=True)
209
+ assert token.type == TokenType.PUNCT_COLON
210
+
211
+ token = lexer.next_token(in_list_context=True)
212
+ assert token.type == TokenType.MISC_IDENT
213
+ assert token.literal == "value"
214
+
215
+ def test_numbered_list_marker(self) -> None:
216
+ """Test numbered list markers (1., 2., etc)."""
217
+ lexer = Lexer("1. first\n2. second")
218
+
219
+ # 1.
220
+ token = lexer.next_token()
221
+ assert token.type == TokenType.LIT_WHOLE_NUMBER
222
+ assert token.literal == "1"
223
+
224
+ token = lexer.next_token()
225
+ assert token.type == TokenType.PUNCT_PERIOD
226
+
227
+ # first
228
+ token = lexer.next_token()
229
+ assert token.type == TokenType.KW_FIRST
230
+
231
+ # 2.
232
+ token = lexer.next_token()
233
+ assert token.type == TokenType.LIT_WHOLE_NUMBER
234
+ assert token.literal == "2"
235
+
236
+ token = lexer.next_token()
237
+ assert token.type == TokenType.PUNCT_PERIOD
238
+
239
+ # second
240
+ token = lexer.next_token()
241
+ assert token.type == TokenType.KW_SECOND
242
+
243
+ def test_expression_with_negative_number(self) -> None:
244
+ """Test that negative numbers still work correctly."""
245
+ lexer = Lexer("_-42_")
246
+
247
+ token = lexer.next_token()
248
+ assert token.type == TokenType.LIT_WHOLE_NUMBER
249
+ assert token.literal == "-42"
250
+
251
+ def test_subtraction_vs_list_marker(self) -> None:
252
+ """Test differentiating subtraction from list markers."""
253
+ # Subtraction (not in list context)
254
+ lexer = Lexer("`x` - `y`")
255
+
256
+ token = lexer.next_token()
257
+ assert token.type == TokenType.MISC_IDENT
258
+
259
+ token = lexer.next_token()
260
+ assert token.type == TokenType.OP_MINUS
261
+
262
+ token = lexer.next_token()
263
+ assert token.type == TokenType.MISC_IDENT
264
+
265
+ # List marker on new line (in list context)
266
+ lexer = Lexer('\n- _"apple"_')
267
+
268
+ token = lexer.next_token(in_list_context=True)
269
+ assert token.type == TokenType.PUNCT_DASH
270
+
271
+ def test_list_with_negative_number(self) -> None:
272
+ """Test list items that include negative numbers."""
273
+ lexer = Lexer("- _-42_")
274
+
275
+ # First dash is list marker in list context
276
+ token = lexer.next_token(in_list_context=True)
277
+ assert token.type == TokenType.PUNCT_DASH
278
+
279
+ # The literal with negative number
280
+ token = lexer.next_token(in_list_context=True)
281
+ assert token.type == TokenType.LIT_WHOLE_NUMBER
282
+ assert token.literal == "-42"
@@ -0,0 +1,80 @@
1
+ import pytest
2
+
3
+ from machine_dialect.lexer import Lexer
4
+ from machine_dialect.lexer.tests.helpers import stream_and_assert_tokens
5
+ from machine_dialect.lexer.tokens import Token, TokenType
6
+
7
+
8
+ class TestStopwords:
9
+ @pytest.mark.parametrize(
10
+ "input_text,expected_tokens",
11
+ [
12
+ # Common stopwords
13
+ ("the", [Token(TokenType.MISC_STOPWORD, "the", line=1, position=1)]),
14
+ ("a", [Token(TokenType.MISC_STOPWORD, "a", line=1, position=1)]),
15
+ ("an", [Token(TokenType.MISC_STOPWORD, "an", line=1, position=1)]),
16
+ ("on", [Token(TokenType.MISC_STOPWORD, "on", line=1, position=1)]),
17
+ ("at", [Token(TokenType.MISC_STOPWORD, "at", line=1, position=1)]),
18
+ ("by", [Token(TokenType.MISC_STOPWORD, "by", line=1, position=1)]),
19
+ ("about", [Token(TokenType.MISC_STOPWORD, "about", line=1, position=1)]),
20
+ ("against", [Token(TokenType.MISC_STOPWORD, "against", line=1, position=1)]),
21
+ ("between", [Token(TokenType.MISC_STOPWORD, "between", line=1, position=1)]),
22
+ ("into", [Token(TokenType.MISC_STOPWORD, "into", line=1, position=1)]),
23
+ ("through", [Token(TokenType.MISC_STOPWORD, "through", line=1, position=1)]),
24
+ ("during", [Token(TokenType.MISC_STOPWORD, "during", line=1, position=1)]),
25
+ ("before", [Token(TokenType.MISC_STOPWORD, "before", line=1, position=1)]),
26
+ ("after", [Token(TokenType.MISC_STOPWORD, "after", line=1, position=1)]),
27
+ ("above", [Token(TokenType.MISC_STOPWORD, "above", line=1, position=1)]),
28
+ ("below", [Token(TokenType.MISC_STOPWORD, "below", line=1, position=1)]),
29
+ ("up", [Token(TokenType.MISC_STOPWORD, "up", line=1, position=1)]),
30
+ ("down", [Token(TokenType.MISC_STOPWORD, "down", line=1, position=1)]),
31
+ ("out", [Token(TokenType.MISC_STOPWORD, "out", line=1, position=1)]),
32
+ ("off", [Token(TokenType.MISC_STOPWORD, "off", line=1, position=1)]),
33
+ ("over", [Token(TokenType.MISC_STOPWORD, "over", line=1, position=1)]),
34
+ ("under", [Token(TokenType.MISC_STOPWORD, "under", line=1, position=1)]),
35
+ ("again", [Token(TokenType.MISC_STOPWORD, "again", line=1, position=1)]),
36
+ ("further", [Token(TokenType.MISC_STOPWORD, "further", line=1, position=1)]),
37
+ ("once", [Token(TokenType.MISC_STOPWORD, "once", line=1, position=1)]),
38
+ # Case-insensitive stopword detection
39
+ ("The", [Token(TokenType.MISC_STOPWORD, "The", line=1, position=1)]),
40
+ ("THE", [Token(TokenType.MISC_STOPWORD, "THE", line=1, position=1)]),
41
+ # Non-stopwords should be identifiers
42
+ ("variable", [Token(TokenType.MISC_IDENT, "variable", line=1, position=1)]),
43
+ ("myfunction", [Token(TokenType.MISC_IDENT, "myfunction", line=1, position=1)]),
44
+ ("data", [Token(TokenType.MISC_IDENT, "data", line=1, position=1)]),
45
+ ],
46
+ )
47
+ def test_stopword_detection(self, input_text: str, expected_tokens: list[Token]) -> None:
48
+ lexer = Lexer(input_text)
49
+ stream_and_assert_tokens(lexer, expected_tokens)
50
+
51
+ def test_stopwords_mixed_with_code(self) -> None:
52
+ input_text = "Set the `value` to 5"
53
+ lexer = Lexer(input_text)
54
+
55
+ # Expected tokens: "Set" (keyword), "the" (stopword), "value" (ident), "to" (keyword), "5" (int)
56
+ expected_tokens = [
57
+ Token(TokenType.KW_SET, "Set", line=1, position=1),
58
+ Token(TokenType.MISC_STOPWORD, "the", line=1, position=5),
59
+ Token(TokenType.MISC_IDENT, "value", line=1, position=10),
60
+ Token(TokenType.KW_TO, "to", line=1, position=17),
61
+ Token(TokenType.LIT_WHOLE_NUMBER, "5", line=1, position=20),
62
+ ]
63
+
64
+ stream_and_assert_tokens(lexer, expected_tokens)
65
+
66
+ def test_parser_ignores_stopwords(self) -> None:
67
+ from machine_dialect.parser import Parser
68
+
69
+ # Test that parser skips stopwords correctly
70
+ input_text = "Define `x` as Whole Number. Set the `x` to _5_"
71
+ parser = Parser()
72
+ program = parser.parse(input_text)
73
+
74
+ # The parser should skip "the" stopword and parse correctly
75
+ assert len(program.statements) == 2 # Define + Set
76
+ # First statement is Define, second is Set
77
+ assert program.statements[1].token.type == TokenType.KW_SET
78
+
79
+ # Check no parsing errors
80
+ assert len(parser.errors) == 0
@@ -0,0 +1,129 @@
1
+ """Tests for strict equality operators in the lexer.
2
+
3
+ This module tests that the lexer correctly recognizes strict equality
4
+ and strict inequality operators in their various natural language forms.
5
+ """
6
+
7
+ import pytest
8
+
9
+ from machine_dialect.lexer import Lexer
10
+ from machine_dialect.lexer.tokens import TokenType
11
+
12
+
13
+ class TestStrictEqualityOperators:
14
+ """Test the lexer's handling of strict equality operators."""
15
+
16
+ @pytest.mark.parametrize(
17
+ "source,expected_token_type,expected_literal",
18
+ [
19
+ # Strict equality operators
20
+ ("is strictly equal to", TokenType.OP_STRICT_EQ, "is strictly equal to"),
21
+ ("is exactly equal to", TokenType.OP_STRICT_EQ, "is exactly equal to"),
22
+ ("is identical to", TokenType.OP_STRICT_EQ, "is identical to"),
23
+ # Strict inequality operators
24
+ ("is not strictly equal to", TokenType.OP_STRICT_NOT_EQ, "is not strictly equal to"),
25
+ ("is not exactly equal to", TokenType.OP_STRICT_NOT_EQ, "is not exactly equal to"),
26
+ ("is not identical to", TokenType.OP_STRICT_NOT_EQ, "is not identical to"),
27
+ # Value equality (for comparison)
28
+ ("is equal to", TokenType.OP_EQ, "is equal to"),
29
+ ("equals", TokenType.OP_EQ, "equals"),
30
+ ("is the same as", TokenType.OP_EQ, "is the same as"),
31
+ # Value inequality (for comparison)
32
+ ("is not equal to", TokenType.OP_NOT_EQ, "is not equal to"),
33
+ ("does not equal", TokenType.OP_NOT_EQ, "does not equal"),
34
+ ("is different from", TokenType.OP_NOT_EQ, "is different from"),
35
+ ],
36
+ )
37
+ def test_strict_equality_operators(
38
+ self, source: str, expected_token_type: TokenType, expected_literal: str
39
+ ) -> None:
40
+ """Test that strict equality operators are correctly tokenized.
41
+
42
+ Args:
43
+ source: The source string containing the operator.
44
+ expected_token_type: The expected token type.
45
+ expected_literal: The expected literal value.
46
+ """
47
+ lexer = Lexer(source)
48
+ token = lexer.next_token()
49
+
50
+ assert token.type == expected_token_type
51
+ assert token.literal == expected_literal
52
+
53
+ def test_strict_equality_in_expression(self) -> None:
54
+ """Test strict equality operators in complete expressions."""
55
+ source = "if x is strictly equal to 5 then give back Yes"
56
+ lexer = Lexer(source)
57
+
58
+ expected_tokens = [
59
+ (TokenType.KW_IF, "if"),
60
+ (TokenType.MISC_IDENT, "x"),
61
+ (TokenType.OP_STRICT_EQ, "is strictly equal to"),
62
+ (TokenType.LIT_WHOLE_NUMBER, "5"),
63
+ (TokenType.KW_THEN, "then"),
64
+ (TokenType.KW_RETURN, "give back"),
65
+ (TokenType.LIT_YES, "Yes"),
66
+ (TokenType.MISC_EOF, ""),
67
+ ]
68
+
69
+ for expected_type, expected_literal in expected_tokens:
70
+ token = lexer.next_token()
71
+ assert token.type == expected_type
72
+ assert token.literal == expected_literal
73
+
74
+ def test_strict_inequality_in_expression(self) -> None:
75
+ """Test strict inequality operators in complete expressions."""
76
+ source = "if `value` is not identical to `null` then `process`"
77
+ lexer = Lexer(source)
78
+
79
+ expected_tokens = [
80
+ (TokenType.KW_IF, "if"),
81
+ (TokenType.MISC_IDENT, "value"),
82
+ (TokenType.OP_STRICT_NOT_EQ, "is not identical to"),
83
+ (TokenType.MISC_IDENT, "null"),
84
+ (TokenType.KW_THEN, "then"),
85
+ (TokenType.MISC_IDENT, "process"),
86
+ (TokenType.MISC_EOF, ""),
87
+ ]
88
+
89
+ for expected_type, expected_literal in expected_tokens:
90
+ token = lexer.next_token()
91
+ assert token.type == expected_type
92
+ assert token.literal == expected_literal
93
+
94
+ def test_mixed_equality_operators(self) -> None:
95
+ """Test that different equality operators are distinguished correctly."""
96
+ source = "`a` equals `b` and `c` is strictly equal to `d`"
97
+ lexer = Lexer(source)
98
+
99
+ expected_tokens = [
100
+ (TokenType.MISC_IDENT, "a"),
101
+ (TokenType.OP_EQ, "equals"),
102
+ (TokenType.MISC_IDENT, "b"),
103
+ (TokenType.KW_AND, "and"),
104
+ (TokenType.MISC_IDENT, "c"),
105
+ (TokenType.OP_STRICT_EQ, "is strictly equal to"),
106
+ (TokenType.MISC_IDENT, "d"),
107
+ (TokenType.MISC_EOF, ""),
108
+ ]
109
+
110
+ for expected_type, expected_literal in expected_tokens:
111
+ token = lexer.next_token()
112
+ assert token.type == expected_type
113
+ assert token.literal == expected_literal
114
+
115
+ def test_case_insensitive_strict_operators(self) -> None:
116
+ """Test that strict operators are case-insensitive."""
117
+ test_cases = [
118
+ ("Is Strictly Equal To", TokenType.OP_STRICT_EQ),
119
+ ("IS EXACTLY EQUAL TO", TokenType.OP_STRICT_EQ),
120
+ ("Is Identical To", TokenType.OP_STRICT_EQ),
121
+ ("IS NOT STRICTLY EQUAL TO", TokenType.OP_STRICT_NOT_EQ),
122
+ ("Is Not Exactly Equal To", TokenType.OP_STRICT_NOT_EQ),
123
+ ("is NOT identical TO", TokenType.OP_STRICT_NOT_EQ),
124
+ ]
125
+
126
+ for source, expected_type in test_cases:
127
+ lexer = Lexer(source)
128
+ token = lexer.next_token()
129
+ assert token.type == expected_type
@@ -0,0 +1,41 @@
1
+ from machine_dialect.lexer.tokens import Token, TokenType
2
+
3
+
4
+ class TestToken:
5
+ def test_token_with_line_and_position(self) -> None:
6
+ """Test that Token includes line and position information."""
7
+ token = Token(type=TokenType.MISC_IDENT, literal="test", line=1, position=1)
8
+
9
+ assert token.type == TokenType.MISC_IDENT
10
+ assert token.literal == "test"
11
+ assert token.line == 1
12
+ assert token.position == 1
13
+
14
+ def test_token_string_representation_with_location(self) -> None:
15
+ """Test string representation includes line and position."""
16
+ token = Token(type=TokenType.KW_IF, literal="if", line=5, position=10)
17
+
18
+ expected = "Type: TokenType.KW_IF, Literal: if, Line: 5, Position: 10"
19
+ assert str(token) == expected
20
+
21
+ def test_token_equality_with_location(self) -> None:
22
+ """Test that tokens are equal if all attributes match."""
23
+ token1 = Token(type=TokenType.LIT_WHOLE_NUMBER, literal="42", line=1, position=1)
24
+ token2 = Token(type=TokenType.LIT_WHOLE_NUMBER, literal="42", line=1, position=1)
25
+ token3 = Token(
26
+ type=TokenType.LIT_WHOLE_NUMBER,
27
+ literal="42",
28
+ line=2, # Different line
29
+ position=1,
30
+ )
31
+
32
+ assert token1 == token2
33
+ assert token1 != token3
34
+
35
+ def test_token_creation_with_defaults(self) -> None:
36
+ """Test Token creation with default line and position values."""
37
+ # This test assumes we might want default values for backward compatibility
38
+ token = Token(type=TokenType.OP_PLUS, literal="+", line=1, position=1)
39
+
40
+ assert token.line == 1
41
+ assert token.position == 1