machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,188 @@
1
+ """Tests for parser expected token errors.
2
+
3
+ This module tests the parser's ability to detect and report syntax errors
4
+ when expected tokens are not found during parsing.
5
+ """
6
+
7
+ from machine_dialect.errors.exceptions import MDNameError, MDSyntaxError
8
+ from machine_dialect.parser import Parser
9
+
10
+
11
+ class TestExpectedTokenErrors:
12
+ """Test cases for expected token error handling."""
13
+
14
+ def test_missing_identifier_after_set(self) -> None:
15
+ """Test error when Set statement is missing the identifier."""
16
+ source = "Set 42 to X" # 42 is not a valid identifier
17
+ parser = Parser()
18
+
19
+ parser.parse(source)
20
+
21
+ # Should have name error when non-identifier used as identifier
22
+ assert parser.has_errors() is True
23
+ # With panic recovery, we get 1 error and skip to EOF (no period)
24
+ assert len(parser.errors) == 1
25
+ assert isinstance(parser.errors[0], MDNameError)
26
+
27
+ # Error should mention expected identifier and the illegal character
28
+ error_msg = str(parser.errors[0])
29
+ assert "identifier" in error_msg.lower()
30
+ assert "42" in error_msg # The illegal character should be in the message
31
+
32
+ def test_missing_to_keyword(self) -> None:
33
+ """Test error when Set statement is missing the 'to' keyword."""
34
+ source = "Set `X` 42" # Missing 'to' keyword
35
+ parser = Parser()
36
+
37
+ parser.parse(source)
38
+
39
+ # Should have two errors: undefined variable + syntax error
40
+ assert parser.has_errors() is True
41
+ assert len(parser.errors) == 2
42
+ # First is NameError for undefined variable
43
+ assert isinstance(parser.errors[0], MDNameError)
44
+ # Second is the syntax error for missing 'to'
45
+ assert isinstance(parser.errors[1], MDSyntaxError)
46
+
47
+ # Syntax error should mention expected 'to' keyword
48
+ error_msg = str(parser.errors[1])
49
+ assert "TokenType.KW_TO" in error_msg or "to" in error_msg.lower()
50
+
51
+ def test_multiple_expected_token_errors(self) -> None:
52
+ """Test multiple expected token errors in one parse."""
53
+ # Add periods so panic recovery stops at statement boundaries
54
+ source = """Set 42 to X.
55
+ Set price 3.14.
56
+ Set to "hello".
57
+ """
58
+ parser = Parser()
59
+
60
+ parser.parse(source)
61
+
62
+ # Should have multiple errors (including undefined variable errors)
63
+ assert parser.has_errors() is True
64
+ # With periods, panic recovery allows finding syntax + name errors
65
+ assert len(parser.errors) == 4
66
+
67
+ # Check for expected error types - mix of syntax and name errors
68
+ # Line 1: "Set 42" - MDNameError (42 is not valid identifier)
69
+ # Line 2: "Set price" - MDNameError (undefined variable) + MDSyntaxError (missing 'to')
70
+ # Line 3: "Set to" - MDNameError ('to' is not valid identifier)
71
+ name_errors = [e for e in parser.errors if isinstance(e, MDNameError)]
72
+ syntax_errors = [e for e in parser.errors if isinstance(e, MDSyntaxError)]
73
+ assert len(name_errors) == 3 # 3 name errors
74
+ assert len(syntax_errors) == 1 # 1 syntax error
75
+
76
+ def test_empty_identifier(self) -> None:
77
+ """Test error with empty backtick identifier."""
78
+ source = "Set `` to 42" # Empty backticks produce illegal tokens
79
+ parser = Parser()
80
+
81
+ parser.parse(source)
82
+
83
+ # Should have errors (from lexer producing illegal tokens)
84
+ # Empty backticks produce two illegal backtick characters
85
+ assert parser.has_errors() is True
86
+
87
+ def test_unclosed_backtick(self) -> None:
88
+ """Test error with unclosed backtick identifier."""
89
+ source = "Set `X to 42" # Missing closing backtick
90
+ parser = Parser()
91
+
92
+ parser.parse(source)
93
+
94
+ # Should have an error (either from lexer or parser)
95
+ assert parser.has_errors() is True
96
+
97
+ def test_error_location_info(self) -> None:
98
+ """Test that expected token errors have correct location information."""
99
+ source = "Set 42 to X" # Error at position of 42
100
+ parser = Parser()
101
+
102
+ parser.parse(source)
103
+
104
+ # With panic recovery, we get 1 name error (42 is not valid identifier)
105
+ assert len(parser.errors) == 1
106
+ # The error should be MDNameError since 42 is not a valid identifier
107
+ error = parser.errors[0]
108
+ assert isinstance(error, MDNameError)
109
+
110
+ # Check that error has location information
111
+ assert hasattr(error, "_line")
112
+ assert hasattr(error, "_column")
113
+ assert error._line == 1
114
+ assert error._column == 5 # Points to '42'
115
+
116
+ def test_error_message_content(self) -> None:
117
+ """Test that error messages contain helpful information."""
118
+ source = "Set `X` something" # 'to' keyword missing
119
+ parser = Parser()
120
+
121
+ parser.parse(source)
122
+
123
+ assert len(parser.errors) == 2 # Name error + syntax error
124
+ # Get the syntax error
125
+ error = parser.errors[1] if isinstance(parser.errors[1], MDSyntaxError) else parser.errors[0]
126
+ error_msg = str(error)
127
+
128
+ # Error message should contain what was expected and what was found
129
+ assert "expected" in error_msg.lower() or "Expected" in error_msg
130
+ # The parser now expects 'to' after the merged identifier 'X something'
131
+ # and finds EOF, so check for that
132
+ assert "TokenType.KW_TO" in error_msg or "to" in error_msg.lower()
133
+
134
+ def test_parser_continues_after_expected_token_error(self) -> None:
135
+ """Test that parser continues parsing after encountering expected token errors."""
136
+ source = """Set 42 to X.
137
+ Set `price` to 3.14.
138
+ Set `Z` 99.
139
+ """
140
+ parser = Parser()
141
+
142
+ program = parser.parse(source)
143
+
144
+ # Should have errors for first and third statements
145
+ assert parser.has_errors() is True
146
+ # We expect 4 errors:
147
+ # Line 1: expected identifier (42 is invalid)
148
+ # Line 2: undefined variable 'price'
149
+ # Line 3: undefined variable 'Z' + missing 'to'
150
+ assert len(parser.errors) == 4
151
+
152
+ # The parser should attempt to parse all statements, even if some fail
153
+ # Due to error recovery, we may get fewer successfully parsed statements
154
+ # But we should get at least the valid one (second statement)
155
+ assert len(program.statements) >= 1
156
+
157
+ # Check that we have the valid statement
158
+ from machine_dialect.ast import SetStatement
159
+
160
+ valid_statements = [
161
+ s for s in program.statements if isinstance(s, SetStatement) and s.name and s.name.value == "price"
162
+ ]
163
+ assert len(valid_statements) == 1
164
+
165
+ def test_consecutive_errors(self) -> None:
166
+ """Test handling of consecutive expected token errors."""
167
+ source = "Set Set Set" # Multiple Set keywords without proper syntax
168
+ parser = Parser()
169
+
170
+ parser.parse(source)
171
+
172
+ # Should have multiple errors
173
+ assert parser.has_errors() is True
174
+ assert len(parser.errors) == 1
175
+
176
+ def test_eof_during_parsing(self) -> None:
177
+ """Test error when EOF is encountered while expecting a token."""
178
+ source = "Define `X` as Empty. Set `X`" # Missing 'to' and value
179
+ parser = Parser()
180
+
181
+ parser.parse(source)
182
+
183
+ # Should have an error for missing 'to'
184
+ assert parser.has_errors() is True
185
+ assert len(parser.errors) == 1
186
+ # Find syntax error (may not be first if there are name errors)
187
+ syntax_errors = [e for e in parser.errors if isinstance(e, MDSyntaxError)]
188
+ assert len(syntax_errors) == 1
@@ -0,0 +1,118 @@
1
+ """Tests for parser error handling.
2
+
3
+ This module tests the parser's ability to collect and report errors
4
+ from the lexer, including lexical errors like illegal characters.
5
+ """
6
+
7
+ from machine_dialect.errors.exceptions import MDSyntaxError
8
+ from machine_dialect.parser import Parser
9
+
10
+
11
+ class TestParserErrors:
12
+ """Test cases for parser error handling."""
13
+
14
+ def test_parser_collects_lexer_errors(self) -> None:
15
+ """Test that parser reports errors for illegal tokens during parsing."""
16
+ # Source with illegal character
17
+ source = "Define `X` as Empty. Set `X` to @."
18
+ # Lexer instantiation moved to Parser.parse()
19
+ parser = Parser()
20
+
21
+ # Errors are reported during parsing, not before
22
+ parser.parse(source)
23
+
24
+ # Parser should have reported the error for @ as a syntax error (illegal token)
25
+ assert len(parser.errors) == 1
26
+ assert isinstance(parser.errors[0], MDSyntaxError)
27
+ assert "@" in str(parser.errors[0])
28
+
29
+ def test_parser_has_errors_method(self) -> None:
30
+ """Test the has_errors() method."""
31
+ # Valid source - no errors
32
+ source = "Define `X` as Whole Number. Set `X` to 42."
33
+ parser = Parser()
34
+ parser.parse(source)
35
+
36
+ assert parser.has_errors() is False
37
+ assert len(parser.errors) == 0
38
+
39
+ # Invalid source - with illegal character
40
+ source_with_error = "Define `Y` as Empty. Set `Y` to §." # § is not a valid token
41
+ parser_with_error = Parser()
42
+ parser_with_error.parse(source_with_error)
43
+
44
+ assert parser_with_error.has_errors() is True
45
+ assert len(parser_with_error.errors) == 1
46
+
47
+ def test_parser_collects_multiple_errors(self) -> None:
48
+ """Test that parser reports multiple errors through panic recovery."""
49
+ # Source with multiple illegal characters - periods are mandatory
50
+ source = (
51
+ "Define `A` as Empty. Define `B` as Empty. Define `C` as Empty. Set `A` to @. Set `B` to $. Set `C` to %."
52
+ )
53
+ parser = Parser()
54
+ parser.parse(source)
55
+
56
+ # Should have 3 errors for illegal characters (all syntax errors)
57
+ assert len(parser.errors) == 3
58
+ assert all(isinstance(error, MDSyntaxError) for error in parser.errors)
59
+
60
+ # Check that all illegal characters are in the errors
61
+ error_messages = [str(error) for error in parser.errors]
62
+ assert any("@" in msg for msg in error_messages)
63
+ assert any("$" in msg for msg in error_messages)
64
+ assert any("%" in msg for msg in error_messages)
65
+
66
+ def test_parser_continues_after_lexer_errors(self) -> None:
67
+ """Test that parser continues parsing despite lexer errors."""
68
+ # Source with an error but valid structure
69
+ source = "Define `X` as Empty. Define `result` as Whole Number. Set `X` to @. Set `result` to _123_."
70
+ # Lexer instantiation moved to Parser.parse()
71
+ parser = Parser()
72
+
73
+ # Parse the program
74
+ program = parser.parse(source)
75
+
76
+ # Should have one error for illegal character
77
+ assert parser.has_errors() is True
78
+ assert len(parser.errors) == 1
79
+
80
+ # But should still parse the valid statements
81
+ assert len(program.statements) == 4 # 2 defines + 2 sets
82
+ # Type assertions to help mypy
83
+ from machine_dialect.ast import DefineStatement, SetStatement
84
+
85
+ assert isinstance(program.statements[0], DefineStatement)
86
+ assert isinstance(program.statements[1], DefineStatement)
87
+ assert isinstance(program.statements[2], SetStatement)
88
+ assert isinstance(program.statements[3], SetStatement)
89
+ assert program.statements[2].name is not None
90
+ assert program.statements[2].name.value == "X"
91
+ assert program.statements[3].name is not None
92
+ assert program.statements[3].name.value == "result"
93
+
94
+ def test_empty_source_no_errors(self) -> None:
95
+ """Test that empty source produces no errors."""
96
+ source = ""
97
+ parser = Parser()
98
+
99
+ program = parser.parse(source)
100
+
101
+ assert parser.has_errors() is False
102
+ assert len(parser.errors) == 0
103
+ assert len(program.statements) == 0
104
+
105
+ def test_parser_error_details(self) -> None:
106
+ """Test that parser errors contain correct location information."""
107
+ source = "Define `X` as Empty. Set `X` to &."
108
+ parser = Parser()
109
+ parser.parse(source)
110
+
111
+ assert len(parser.errors) == 1
112
+ error = parser.errors[0]
113
+
114
+ # Check error has location information
115
+ assert hasattr(error, "_line")
116
+ assert hasattr(error, "_column")
117
+ assert error._line == 1 # First line
118
+ assert error._column > 0 # Should have a column position
File without changes
@@ -0,0 +1,225 @@
1
+ """English stopwords module for Machine Dialect™.
2
+
3
+ This module provides a curated set of English stopwords commonly used in
4
+ natural language processing. These words are typically filtered out during
5
+ text analysis as they carry little semantic meaning on their own.
6
+
7
+ The stopwords include common articles, prepositions, pronouns, conjunctions,
8
+ and other functional words that appear frequently in English text.
9
+
10
+ Attributes:
11
+ ENGLISH_STOPWORDS (set[str]): A set containing common English stopwords
12
+ in lowercase. Includes contractions and their expanded forms.
13
+ """
14
+
15
+ ENGLISH_STOPWORDS = {
16
+ "a",
17
+ "about",
18
+ "above",
19
+ "after",
20
+ "again",
21
+ "against",
22
+ "ain",
23
+ "all",
24
+ "am",
25
+ "an",
26
+ "and",
27
+ "any",
28
+ "are",
29
+ "aren",
30
+ "aren't",
31
+ "as",
32
+ "at",
33
+ "be",
34
+ "because",
35
+ "been",
36
+ "before",
37
+ "being",
38
+ "below",
39
+ "between",
40
+ "both",
41
+ "but",
42
+ "by",
43
+ "can",
44
+ "could",
45
+ "couldn",
46
+ "couldn't",
47
+ "d",
48
+ "did",
49
+ "didn",
50
+ "didn't",
51
+ "do",
52
+ "does",
53
+ "doesn",
54
+ "doesn't",
55
+ "doing",
56
+ "don",
57
+ "don't",
58
+ "down",
59
+ "during",
60
+ "each",
61
+ "few",
62
+ "for",
63
+ "from",
64
+ "further",
65
+ "had",
66
+ "hadn",
67
+ "hadn't",
68
+ "has",
69
+ "hasn",
70
+ "hasn't",
71
+ "have",
72
+ "haven",
73
+ "haven't",
74
+ "having",
75
+ "he",
76
+ "he'd",
77
+ "he'll",
78
+ "he's",
79
+ "here's",
80
+ "her",
81
+ "here",
82
+ "hers",
83
+ "herself",
84
+ "him",
85
+ "himself",
86
+ "his",
87
+ "how",
88
+ "how's",
89
+ "i",
90
+ "i'd",
91
+ "i'll",
92
+ "i'm",
93
+ "i've",
94
+ "if",
95
+ "in",
96
+ "into",
97
+ "is",
98
+ "isn",
99
+ "isn't",
100
+ "it",
101
+ "it'd",
102
+ "it'll",
103
+ "it's",
104
+ "its",
105
+ "itself",
106
+ "just",
107
+ "ll",
108
+ "m",
109
+ "ma",
110
+ "me",
111
+ "mightn",
112
+ "mightn't",
113
+ "more",
114
+ "most",
115
+ "mustn",
116
+ "mustn't",
117
+ "my",
118
+ "myself",
119
+ "needn",
120
+ "needn't",
121
+ "no",
122
+ "nor",
123
+ "not",
124
+ "now",
125
+ "o",
126
+ "of",
127
+ "off",
128
+ "on",
129
+ "once",
130
+ "only",
131
+ "or",
132
+ "other",
133
+ "ought",
134
+ "our",
135
+ "ours",
136
+ "ourselves",
137
+ "out",
138
+ "over",
139
+ "own",
140
+ "re",
141
+ "s",
142
+ "same",
143
+ "shan",
144
+ "shan't",
145
+ "she",
146
+ "she'd",
147
+ "she'll",
148
+ "she's",
149
+ "should",
150
+ "should've",
151
+ "shouldn",
152
+ "shouldn't",
153
+ "so",
154
+ "some",
155
+ "such",
156
+ "t",
157
+ "than",
158
+ "that",
159
+ "that'll",
160
+ "that's",
161
+ "the",
162
+ "their",
163
+ "theirs",
164
+ "them",
165
+ "themselves",
166
+ "then",
167
+ "there",
168
+ "there's",
169
+ "they'd",
170
+ "they'll",
171
+ "they're",
172
+ "they've",
173
+ "these",
174
+ "they",
175
+ "this",
176
+ "those",
177
+ "through",
178
+ "to",
179
+ "too",
180
+ "under",
181
+ "until",
182
+ "up",
183
+ "ve",
184
+ "very",
185
+ "was",
186
+ "wasn",
187
+ "wasn't",
188
+ "we",
189
+ "we'd",
190
+ "we'll",
191
+ "we're",
192
+ "we've",
193
+ "were",
194
+ "weren",
195
+ "weren't",
196
+ "what",
197
+ "what's",
198
+ "when",
199
+ "when's",
200
+ "where's",
201
+ "where",
202
+ "which",
203
+ "while",
204
+ "who",
205
+ "who's",
206
+ "whom",
207
+ "why",
208
+ "will",
209
+ "why's",
210
+ "with",
211
+ "won",
212
+ "won't",
213
+ "would",
214
+ "wouldn",
215
+ "wouldn't",
216
+ "you",
217
+ "you'd",
218
+ "you'll",
219
+ "you're",
220
+ "you've",
221
+ "your",
222
+ "yours",
223
+ "yourself",
224
+ "yourselves",
225
+ }
@@ -0,0 +1,30 @@
1
+ from rfc3986 import uri_reference
2
+ from rfc3986.validators import Validator
3
+
4
+
5
+ def is_valid_url(url: str, *, require_scheme: bool = True) -> bool:
6
+ try:
7
+ uri = uri_reference(url).normalize()
8
+
9
+ # Use the new Validator API
10
+ validator = Validator()
11
+
12
+ # Special handling for certain schemes that don't require host
13
+ if uri.scheme in ("mailto", "data", "file"):
14
+ # These schemes have their own validation rules
15
+ if require_scheme:
16
+ validator = validator.require_presence_of("scheme")
17
+ else:
18
+ # For typical URLs (http, https, ftp, etc.), require host
19
+ if require_scheme:
20
+ validator = validator.require_presence_of("scheme", "host")
21
+ else:
22
+ validator = validator.require_presence_of("host")
23
+
24
+ # Check all components
25
+ validator = validator.check_validity_of("scheme", "userinfo", "host", "port", "path", "query", "fragment")
26
+
27
+ validator.validate(uri)
28
+ return True
29
+ except Exception:
30
+ return False
@@ -0,0 +1,9 @@
1
+ from .lexer import Lexer
2
+ from .tokens import Token, TokenMetaType, TokenType
3
+
4
+ __all__ = [
5
+ "Lexer",
6
+ "Token",
7
+ "TokenMetaType",
8
+ "TokenType",
9
+ ]
@@ -0,0 +1,23 @@
1
+ from machine_dialect.lexer.tokens import TokenType
2
+
3
+ # Single-character tokens
4
+ CHAR_TO_TOKEN_MAP = {
5
+ "+": TokenType.OP_PLUS,
6
+ "-": TokenType.OP_MINUS,
7
+ "*": TokenType.OP_STAR,
8
+ "/": TokenType.OP_DIVISION,
9
+ "^": TokenType.OP_CARET,
10
+ "=": TokenType.OP_ASSIGN,
11
+ "<": TokenType.OP_LT,
12
+ ">": TokenType.OP_GT,
13
+ "(": TokenType.DELIM_LPAREN,
14
+ ")": TokenType.DELIM_RPAREN,
15
+ "{": TokenType.DELIM_LBRACE,
16
+ "}": TokenType.DELIM_RBRACE,
17
+ ";": TokenType.PUNCT_SEMICOLON,
18
+ ",": TokenType.PUNCT_COMMA,
19
+ ".": TokenType.PUNCT_PERIOD,
20
+ ":": TokenType.PUNCT_COLON,
21
+ "#": TokenType.PUNCT_HASH,
22
+ "\\": TokenType.PUNCT_BACKSLASH,
23
+ }