machine-dialect 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- machine_dialect/__main__.py +667 -0
- machine_dialect/agent/__init__.py +5 -0
- machine_dialect/agent/agent.py +360 -0
- machine_dialect/ast/__init__.py +95 -0
- machine_dialect/ast/ast_node.py +35 -0
- machine_dialect/ast/call_expression.py +82 -0
- machine_dialect/ast/dict_extraction.py +60 -0
- machine_dialect/ast/expressions.py +439 -0
- machine_dialect/ast/literals.py +309 -0
- machine_dialect/ast/program.py +35 -0
- machine_dialect/ast/statements.py +1433 -0
- machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
- machine_dialect/ast/tests/test_boolean_literal.py +29 -0
- machine_dialect/ast/tests/test_collection_hir.py +138 -0
- machine_dialect/ast/tests/test_define_statement.py +142 -0
- machine_dialect/ast/tests/test_desugar.py +541 -0
- machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
- machine_dialect/cfg/__init__.py +6 -0
- machine_dialect/cfg/config.py +156 -0
- machine_dialect/cfg/examples.py +221 -0
- machine_dialect/cfg/generate_with_ai.py +187 -0
- machine_dialect/cfg/openai_generation.py +200 -0
- machine_dialect/cfg/parser.py +94 -0
- machine_dialect/cfg/tests/__init__.py +1 -0
- machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
- machine_dialect/cfg/tests/test_config.py +188 -0
- machine_dialect/cfg/tests/test_examples.py +391 -0
- machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
- machine_dialect/cfg/tests/test_openai_generation.py +256 -0
- machine_dialect/codegen/__init__.py +5 -0
- machine_dialect/codegen/bytecode_module.py +89 -0
- machine_dialect/codegen/bytecode_serializer.py +300 -0
- machine_dialect/codegen/opcodes.py +101 -0
- machine_dialect/codegen/register_codegen.py +1996 -0
- machine_dialect/codegen/symtab.py +208 -0
- machine_dialect/codegen/tests/__init__.py +1 -0
- machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
- machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
- machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
- machine_dialect/codegen/tests/test_symtab.py +418 -0
- machine_dialect/codegen/vm_serializer.py +621 -0
- machine_dialect/compiler/__init__.py +18 -0
- machine_dialect/compiler/compiler.py +197 -0
- machine_dialect/compiler/config.py +149 -0
- machine_dialect/compiler/context.py +149 -0
- machine_dialect/compiler/phases/__init__.py +19 -0
- machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
- machine_dialect/compiler/phases/codegen.py +40 -0
- machine_dialect/compiler/phases/hir_generation.py +39 -0
- machine_dialect/compiler/phases/mir_generation.py +86 -0
- machine_dialect/compiler/phases/optimization.py +110 -0
- machine_dialect/compiler/phases/parsing.py +39 -0
- machine_dialect/compiler/pipeline.py +143 -0
- machine_dialect/compiler/tests/__init__.py +1 -0
- machine_dialect/compiler/tests/test_compiler.py +568 -0
- machine_dialect/compiler/vm_runner.py +173 -0
- machine_dialect/errors/__init__.py +32 -0
- machine_dialect/errors/exceptions.py +369 -0
- machine_dialect/errors/messages.py +82 -0
- machine_dialect/errors/tests/__init__.py +0 -0
- machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
- machine_dialect/errors/tests/test_name_errors.py +118 -0
- machine_dialect/helpers/__init__.py +0 -0
- machine_dialect/helpers/stopwords.py +225 -0
- machine_dialect/helpers/validators.py +30 -0
- machine_dialect/lexer/__init__.py +9 -0
- machine_dialect/lexer/constants.py +23 -0
- machine_dialect/lexer/lexer.py +907 -0
- machine_dialect/lexer/tests/__init__.py +0 -0
- machine_dialect/lexer/tests/helpers.py +86 -0
- machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
- machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
- machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
- machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
- machine_dialect/lexer/tests/test_comments.py +200 -0
- machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
- machine_dialect/lexer/tests/test_lexer_position.py +113 -0
- machine_dialect/lexer/tests/test_list_tokens.py +282 -0
- machine_dialect/lexer/tests/test_stopwords.py +80 -0
- machine_dialect/lexer/tests/test_strict_equality.py +129 -0
- machine_dialect/lexer/tests/test_token.py +41 -0
- machine_dialect/lexer/tests/test_tokenization.py +294 -0
- machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
- machine_dialect/lexer/tests/test_url_literals.py +169 -0
- machine_dialect/lexer/tokens.py +487 -0
- machine_dialect/linter/__init__.py +10 -0
- machine_dialect/linter/__main__.py +144 -0
- machine_dialect/linter/linter.py +154 -0
- machine_dialect/linter/rules/__init__.py +8 -0
- machine_dialect/linter/rules/base.py +112 -0
- machine_dialect/linter/rules/statement_termination.py +99 -0
- machine_dialect/linter/tests/__init__.py +1 -0
- machine_dialect/linter/tests/mdrules/__init__.py +0 -0
- machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
- machine_dialect/linter/tests/test_linter.py +81 -0
- machine_dialect/linter/tests/test_rules.py +110 -0
- machine_dialect/linter/tests/test_violations.py +71 -0
- machine_dialect/linter/violations.py +51 -0
- machine_dialect/mir/__init__.py +69 -0
- machine_dialect/mir/analyses/__init__.py +20 -0
- machine_dialect/mir/analyses/alias_analysis.py +315 -0
- machine_dialect/mir/analyses/dominance_analysis.py +49 -0
- machine_dialect/mir/analyses/escape_analysis.py +286 -0
- machine_dialect/mir/analyses/loop_analysis.py +272 -0
- machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
- machine_dialect/mir/analyses/type_analysis.py +448 -0
- machine_dialect/mir/analyses/use_def_chains.py +232 -0
- machine_dialect/mir/basic_block.py +385 -0
- machine_dialect/mir/dataflow.py +445 -0
- machine_dialect/mir/debug_info.py +208 -0
- machine_dialect/mir/hir_to_mir.py +1738 -0
- machine_dialect/mir/mir_dumper.py +366 -0
- machine_dialect/mir/mir_function.py +167 -0
- machine_dialect/mir/mir_instructions.py +1877 -0
- machine_dialect/mir/mir_interpreter.py +556 -0
- machine_dialect/mir/mir_module.py +225 -0
- machine_dialect/mir/mir_printer.py +480 -0
- machine_dialect/mir/mir_transformer.py +410 -0
- machine_dialect/mir/mir_types.py +367 -0
- machine_dialect/mir/mir_validation.py +455 -0
- machine_dialect/mir/mir_values.py +268 -0
- machine_dialect/mir/optimization_config.py +233 -0
- machine_dialect/mir/optimization_pass.py +251 -0
- machine_dialect/mir/optimization_pipeline.py +355 -0
- machine_dialect/mir/optimizations/__init__.py +84 -0
- machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
- machine_dialect/mir/optimizations/branch_prediction.py +372 -0
- machine_dialect/mir/optimizations/constant_propagation.py +634 -0
- machine_dialect/mir/optimizations/cse.py +398 -0
- machine_dialect/mir/optimizations/dce.py +288 -0
- machine_dialect/mir/optimizations/inlining.py +551 -0
- machine_dialect/mir/optimizations/jump_threading.py +487 -0
- machine_dialect/mir/optimizations/licm.py +405 -0
- machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
- machine_dialect/mir/optimizations/strength_reduction.py +422 -0
- machine_dialect/mir/optimizations/tail_call.py +207 -0
- machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
- machine_dialect/mir/optimizations/type_narrowing.py +397 -0
- machine_dialect/mir/optimizations/type_specialization.py +447 -0
- machine_dialect/mir/optimizations/type_specific.py +906 -0
- machine_dialect/mir/optimize_mir.py +89 -0
- machine_dialect/mir/pass_manager.py +391 -0
- machine_dialect/mir/profiling/__init__.py +26 -0
- machine_dialect/mir/profiling/profile_collector.py +318 -0
- machine_dialect/mir/profiling/profile_data.py +372 -0
- machine_dialect/mir/profiling/profile_reader.py +272 -0
- machine_dialect/mir/profiling/profile_writer.py +226 -0
- machine_dialect/mir/register_allocation.py +302 -0
- machine_dialect/mir/reporting/__init__.py +17 -0
- machine_dialect/mir/reporting/optimization_reporter.py +314 -0
- machine_dialect/mir/reporting/report_formatter.py +289 -0
- machine_dialect/mir/ssa_construction.py +342 -0
- machine_dialect/mir/tests/__init__.py +1 -0
- machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
- machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
- machine_dialect/mir/tests/test_algebraic_division.py +126 -0
- machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
- machine_dialect/mir/tests/test_basic_block.py +425 -0
- machine_dialect/mir/tests/test_branch_prediction.py +459 -0
- machine_dialect/mir/tests/test_call_lowering.py +168 -0
- machine_dialect/mir/tests/test_collection_lowering.py +604 -0
- machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
- machine_dialect/mir/tests/test_custom_passes.py +166 -0
- machine_dialect/mir/tests/test_debug_info.py +285 -0
- machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
- machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
- machine_dialect/mir/tests/test_double_negation.py +231 -0
- machine_dialect/mir/tests/test_escape_analysis.py +233 -0
- machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
- machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
- machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
- machine_dialect/mir/tests/test_inlining.py +435 -0
- machine_dialect/mir/tests/test_licm.py +472 -0
- machine_dialect/mir/tests/test_mir_dumper.py +313 -0
- machine_dialect/mir/tests/test_mir_instructions.py +445 -0
- machine_dialect/mir/tests/test_mir_module.py +860 -0
- machine_dialect/mir/tests/test_mir_printer.py +387 -0
- machine_dialect/mir/tests/test_mir_types.py +123 -0
- machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
- machine_dialect/mir/tests/test_mir_validation.py +378 -0
- machine_dialect/mir/tests/test_mir_values.py +168 -0
- machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
- machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
- machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
- machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
- machine_dialect/mir/tests/test_pass_manager.py +294 -0
- machine_dialect/mir/tests/test_pass_registration.py +64 -0
- machine_dialect/mir/tests/test_profiling.py +356 -0
- machine_dialect/mir/tests/test_register_allocation.py +307 -0
- machine_dialect/mir/tests/test_report_formatters.py +372 -0
- machine_dialect/mir/tests/test_ssa_construction.py +433 -0
- machine_dialect/mir/tests/test_tail_call.py +236 -0
- machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
- machine_dialect/mir/tests/test_type_narrowing.py +277 -0
- machine_dialect/mir/tests/test_type_specialization.py +421 -0
- machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
- machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
- machine_dialect/mir/type_inference.py +368 -0
- machine_dialect/parser/__init__.py +12 -0
- machine_dialect/parser/enums.py +45 -0
- machine_dialect/parser/parser.py +3655 -0
- machine_dialect/parser/protocols.py +11 -0
- machine_dialect/parser/symbol_table.py +169 -0
- machine_dialect/parser/tests/__init__.py +0 -0
- machine_dialect/parser/tests/helper_functions.py +193 -0
- machine_dialect/parser/tests/test_action_statements.py +334 -0
- machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
- machine_dialect/parser/tests/test_call_statements.py +154 -0
- machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
- machine_dialect/parser/tests/test_collection_mutations.py +264 -0
- machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
- machine_dialect/parser/tests/test_define_integration.py +468 -0
- machine_dialect/parser/tests/test_define_statements.py +311 -0
- machine_dialect/parser/tests/test_dict_extraction.py +115 -0
- machine_dialect/parser/tests/test_empty_literal.py +155 -0
- machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
- machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
- machine_dialect/parser/tests/test_if_empty_block.py +61 -0
- machine_dialect/parser/tests/test_if_statements.py +299 -0
- machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
- machine_dialect/parser/tests/test_infix_expressions.py +680 -0
- machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
- machine_dialect/parser/tests/test_interaction_statements.py +269 -0
- machine_dialect/parser/tests/test_list_literals.py +277 -0
- machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
- machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
- machine_dialect/parser/tests/test_parse_errors.py +114 -0
- machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
- machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
- machine_dialect/parser/tests/test_program.py +13 -0
- machine_dialect/parser/tests/test_return_statements.py +89 -0
- machine_dialect/parser/tests/test_set_statements.py +152 -0
- machine_dialect/parser/tests/test_strict_equality.py +258 -0
- machine_dialect/parser/tests/test_symbol_table.py +217 -0
- machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
- machine_dialect/parser/tests/test_utility_statements.py +423 -0
- machine_dialect/parser/token_buffer.py +159 -0
- machine_dialect/repl/__init__.py +3 -0
- machine_dialect/repl/repl.py +426 -0
- machine_dialect/repl/tests/__init__.py +0 -0
- machine_dialect/repl/tests/test_repl.py +606 -0
- machine_dialect/semantic/__init__.py +12 -0
- machine_dialect/semantic/analyzer.py +906 -0
- machine_dialect/semantic/error_messages.py +189 -0
- machine_dialect/semantic/tests/__init__.py +1 -0
- machine_dialect/semantic/tests/test_analyzer.py +364 -0
- machine_dialect/semantic/tests/test_error_messages.py +104 -0
- machine_dialect/tests/edge_cases/__init__.py +10 -0
- machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
- machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
- machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
- machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
- machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
- machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
- machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
- machine_dialect/tests/integration/test_list_compilation.py +395 -0
- machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
- machine_dialect/type_checking/__init__.py +21 -0
- machine_dialect/type_checking/tests/__init__.py +1 -0
- machine_dialect/type_checking/tests/test_type_system.py +230 -0
- machine_dialect/type_checking/type_system.py +270 -0
- machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
- machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
- machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
- machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
- machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
- machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
- machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,3655 @@
|
|
1
|
+
# mypy: disable-error-code="comparison-overlap"
|
2
|
+
|
3
|
+
import re
|
4
|
+
from collections.abc import Callable
|
5
|
+
from copy import copy
|
6
|
+
|
7
|
+
from machine_dialect.ast import (
|
8
|
+
ActionStatement,
|
9
|
+
Arguments,
|
10
|
+
BlockStatement,
|
11
|
+
CallExpression,
|
12
|
+
CallStatement,
|
13
|
+
CollectionAccessExpression,
|
14
|
+
ConditionalExpression,
|
15
|
+
DefineStatement,
|
16
|
+
EmptyLiteral,
|
17
|
+
ErrorExpression,
|
18
|
+
ErrorStatement,
|
19
|
+
Expression,
|
20
|
+
ExpressionStatement,
|
21
|
+
FloatLiteral,
|
22
|
+
ForEachStatement,
|
23
|
+
Identifier,
|
24
|
+
IfStatement,
|
25
|
+
InfixExpression,
|
26
|
+
InteractionStatement,
|
27
|
+
NamedListLiteral,
|
28
|
+
OrderedListLiteral,
|
29
|
+
Output,
|
30
|
+
Parameter,
|
31
|
+
PrefixExpression,
|
32
|
+
Program,
|
33
|
+
ReturnStatement,
|
34
|
+
SayStatement,
|
35
|
+
SetStatement,
|
36
|
+
Statement,
|
37
|
+
StringLiteral,
|
38
|
+
UnorderedListLiteral,
|
39
|
+
URLLiteral,
|
40
|
+
UtilityStatement,
|
41
|
+
WhileStatement,
|
42
|
+
WholeNumberLiteral,
|
43
|
+
YesNoLiteral,
|
44
|
+
)
|
45
|
+
from machine_dialect.errors.exceptions import MDBaseException, MDNameError, MDSyntaxError, MDTypeError
|
46
|
+
from machine_dialect.errors.messages import (
|
47
|
+
EMPTY_ELSE_BLOCK,
|
48
|
+
EMPTY_FOR_EACH_BODY,
|
49
|
+
EMPTY_IF_CONSEQUENCE,
|
50
|
+
EMPTY_WHILE_BODY,
|
51
|
+
EXPECTED_DETAILS_CLOSE,
|
52
|
+
EXPECTED_EXPRESSION,
|
53
|
+
EXPECTED_FUNCTION_NAME,
|
54
|
+
EXPECTED_IDENTIFIER_AFTER,
|
55
|
+
EXPECTED_TOKEN,
|
56
|
+
EXPECTED_TOKEN_AFTER,
|
57
|
+
ILLEGAL_TOKEN,
|
58
|
+
INVALID_ARGUMENT_VALUE,
|
59
|
+
INVALID_FLOAT_LITERAL,
|
60
|
+
INVALID_INTEGER_LITERAL,
|
61
|
+
INVALID_TYPE_NAME,
|
62
|
+
MISSING_COMMA_BETWEEN_ARGS,
|
63
|
+
MISSING_DEPTH_TRANSITION,
|
64
|
+
NAME_UNDEFINED,
|
65
|
+
NO_PARSE_FUNCTION,
|
66
|
+
UNEXPECTED_BLOCK_DEPTH,
|
67
|
+
UNEXPECTED_STATEMENT,
|
68
|
+
UNEXPECTED_TOKEN_AT_START,
|
69
|
+
UNHANDLED_OPERATION,
|
70
|
+
VARIABLE_ALREADY_DEFINED,
|
71
|
+
VARIABLE_NOT_DEFINED,
|
72
|
+
ErrorTemplate,
|
73
|
+
)
|
74
|
+
from machine_dialect.lexer import Lexer
|
75
|
+
from machine_dialect.lexer.tokens import Token, TokenType
|
76
|
+
from machine_dialect.parser import Precedence
|
77
|
+
from machine_dialect.parser.protocols import (
|
78
|
+
InfixParseFuncs,
|
79
|
+
PostfixParseFuncs,
|
80
|
+
PrefixParseFuncs,
|
81
|
+
)
|
82
|
+
from machine_dialect.parser.symbol_table import SymbolTable
|
83
|
+
from machine_dialect.parser.token_buffer import TokenBuffer
|
84
|
+
from machine_dialect.type_checking import TypeSpec, check_type_compatibility, get_type_from_value
|
85
|
+
|
86
|
+
PRECEDENCES: dict[TokenType, Precedence] = {
|
87
|
+
# Ternary conditional
|
88
|
+
TokenType.KW_IF: Precedence.TERNARY,
|
89
|
+
# Logical operators
|
90
|
+
TokenType.KW_OR: Precedence.LOGICAL_OR,
|
91
|
+
TokenType.KW_AND: Precedence.LOGICAL_AND,
|
92
|
+
# Comparison operators
|
93
|
+
TokenType.OP_EQ: Precedence.REL_SYM_COMP,
|
94
|
+
TokenType.OP_NOT_EQ: Precedence.REL_SYM_COMP,
|
95
|
+
TokenType.OP_STRICT_EQ: Precedence.REL_SYM_COMP,
|
96
|
+
TokenType.OP_STRICT_NOT_EQ: Precedence.REL_SYM_COMP,
|
97
|
+
TokenType.OP_LT: Precedence.REL_ASYM_COMP,
|
98
|
+
TokenType.OP_GT: Precedence.REL_ASYM_COMP,
|
99
|
+
TokenType.OP_LTE: Precedence.REL_ASYM_COMP,
|
100
|
+
TokenType.OP_GTE: Precedence.REL_ASYM_COMP,
|
101
|
+
# Arithmetic operators
|
102
|
+
TokenType.OP_PLUS: Precedence.MATH_ADD_SUB,
|
103
|
+
TokenType.OP_MINUS: Precedence.MATH_ADD_SUB,
|
104
|
+
TokenType.OP_STAR: Precedence.MATH_PROD_DIV_MOD,
|
105
|
+
TokenType.OP_DIVISION: Precedence.MATH_PROD_DIV_MOD,
|
106
|
+
TokenType.OP_CARET: Precedence.MATH_EXPONENT,
|
107
|
+
# Dictionary extraction operators (postfix-like)
|
108
|
+
TokenType.OP_THE_NAMES_OF: Precedence.UNARY_POST_OPERATOR,
|
109
|
+
TokenType.OP_THE_CONTENTS_OF: Precedence.UNARY_POST_OPERATOR,
|
110
|
+
}
|
111
|
+
|
112
|
+
TYPING_MAP: dict[TokenType, str] = {
|
113
|
+
TokenType.KW_TEXT: "Text",
|
114
|
+
TokenType.KW_WHOLE_NUMBER: "Whole Number",
|
115
|
+
TokenType.KW_FLOAT: "Float",
|
116
|
+
TokenType.KW_NUMBER: "Number",
|
117
|
+
TokenType.KW_YES_NO: "Yes/No",
|
118
|
+
TokenType.KW_URL: "URL",
|
119
|
+
TokenType.KW_DATE: "Date",
|
120
|
+
TokenType.KW_DATETIME: "DateTime",
|
121
|
+
TokenType.KW_TIME: "Time",
|
122
|
+
TokenType.KW_LIST: "List",
|
123
|
+
TokenType.KW_ORDERED_LIST: "Ordered List",
|
124
|
+
TokenType.KW_UNORDERED_LIST: "Unordered List",
|
125
|
+
TokenType.KW_NAMED_LIST: "Named List",
|
126
|
+
TokenType.KW_EMPTY: "Empty",
|
127
|
+
}
|
128
|
+
|
129
|
+
__all__ = ["Parser"]
|
130
|
+
|
131
|
+
|
132
|
+
class Parser:
|
133
|
+
"""Parser for Machine Dialect™ language.
|
134
|
+
|
135
|
+
Transforms source code into an Abstract Syntax Tree (AST) by first
|
136
|
+
tokenizing it with the lexer and then parsing the tokens.
|
137
|
+
Also collects any lexical errors from the tokenizer.
|
138
|
+
|
139
|
+
Attributes:
|
140
|
+
errors (list[MDBaseException]): List of errors encountered during parsing,
|
141
|
+
including lexical errors from the tokenizer.
|
142
|
+
"""
|
143
|
+
|
144
|
+
def __init__(self) -> None:
|
145
|
+
"""Initialize the parser."""
|
146
|
+
self._current_token: Token | None = None
|
147
|
+
self._peek_token: Token | None = None
|
148
|
+
self._token_buffer: TokenBuffer | None = None
|
149
|
+
self._errors: list[MDBaseException] = []
|
150
|
+
self._panic_count = 0 # Track panic-mode recoveries
|
151
|
+
self._block_depth = 0 # Track if we're inside block statements
|
152
|
+
self._symbol_table: SymbolTable = SymbolTable() # Track variable definitions
|
153
|
+
|
154
|
+
self._prefix_parse_funcs: PrefixParseFuncs = self._register_prefix_funcs()
|
155
|
+
self._infix_parse_funcs: InfixParseFuncs = self._register_infix_funcs()
|
156
|
+
self._postfix_parse_funcs: PostfixParseFuncs = self._register_postfix_funcs()
|
157
|
+
|
158
|
+
def parse(self, source: str, as_hir: bool = False, check_semantics: bool = True) -> Program:
|
159
|
+
"""Parse the source code into an AST.
|
160
|
+
|
161
|
+
Args:
|
162
|
+
source: The source code to parse.
|
163
|
+
as_hir: If True, return a HIR (High level Intermediate Representation).
|
164
|
+
check_semantics: If True, perform semantic analysis.
|
165
|
+
|
166
|
+
Returns:
|
167
|
+
The root Program node of the AST.
|
168
|
+
|
169
|
+
Note:
|
170
|
+
Any errors encountered during parsing are added to the
|
171
|
+
errors attribute. The parser attempts to continue parsing
|
172
|
+
even after encountering errors using panic-mode recovery.
|
173
|
+
"""
|
174
|
+
# Reset parser state for new parse
|
175
|
+
self._reset_state()
|
176
|
+
|
177
|
+
# Create lexer and token buffer for streaming
|
178
|
+
lexer = Lexer(source)
|
179
|
+
self._token_buffer = TokenBuffer(lexer)
|
180
|
+
|
181
|
+
# Initialize token pointers
|
182
|
+
self._advance_tokens()
|
183
|
+
self._advance_tokens()
|
184
|
+
|
185
|
+
# Skip frontmatter if present
|
186
|
+
self._skip_frontmatter()
|
187
|
+
|
188
|
+
# Parse the program
|
189
|
+
program: Program = Program(statements=[])
|
190
|
+
|
191
|
+
assert self._current_token is not None
|
192
|
+
while self._current_token.type != TokenType.MISC_EOF and self._panic_count < 20:
|
193
|
+
# Skip standalone periods
|
194
|
+
if self._current_token.type == TokenType.PUNCT_PERIOD:
|
195
|
+
self._advance_tokens()
|
196
|
+
continue
|
197
|
+
|
198
|
+
# Save the token position before parsing
|
199
|
+
token_before = self._current_token
|
200
|
+
|
201
|
+
statement = self._parse_statement()
|
202
|
+
program.statements.append(statement)
|
203
|
+
|
204
|
+
# If we haven't advanced past the token we started with, we need to advance
|
205
|
+
# This happens when expression parsing leaves us at the last token
|
206
|
+
if self._current_token == token_before:
|
207
|
+
self._advance_tokens()
|
208
|
+
# After parsing a statement, skip any trailing period
|
209
|
+
elif self._current_token and self._current_token.type == TokenType.PUNCT_PERIOD:
|
210
|
+
self._advance_tokens()
|
211
|
+
|
212
|
+
# Perform semantic analysis if requested
|
213
|
+
if check_semantics and not self._errors:
|
214
|
+
from machine_dialect.semantic.analyzer import SemanticAnalyzer
|
215
|
+
|
216
|
+
analyzer = SemanticAnalyzer()
|
217
|
+
program, semantic_errors = analyzer.analyze(program)
|
218
|
+
self._errors.extend(semantic_errors)
|
219
|
+
|
220
|
+
return program.desugar() if as_hir else program
|
221
|
+
|
222
|
+
def _reset_state(self) -> None:
|
223
|
+
"""Reset the parser state for a new parse."""
|
224
|
+
self._current_token = None
|
225
|
+
self._peek_token = None
|
226
|
+
self._token_buffer = None
|
227
|
+
self._errors = []
|
228
|
+
self._panic_count = 0
|
229
|
+
self._block_depth = 0
|
230
|
+
self._symbol_table = SymbolTable() # Reset symbol table for new parse
|
231
|
+
|
232
|
+
def has_errors(self) -> bool:
|
233
|
+
"""Check if any errors were encountered during parsing.
|
234
|
+
|
235
|
+
Returns:
|
236
|
+
True if there are any errors, False otherwise.
|
237
|
+
"""
|
238
|
+
return len(self._errors) > 0
|
239
|
+
|
240
|
+
@property
|
241
|
+
def errors(self) -> list[MDBaseException]:
|
242
|
+
"""Get the list of errors encountered during parsing.
|
243
|
+
|
244
|
+
This includes both lexical errors from the tokenizer and syntax errors
|
245
|
+
from the parser. Errors are collected in the order they were encountered.
|
246
|
+
|
247
|
+
Returns:
|
248
|
+
List of MDBaseException instances representing all errors found
|
249
|
+
during lexical analysis and parsing.
|
250
|
+
"""
|
251
|
+
return copy(self._errors)
|
252
|
+
|
253
|
+
def _skip_frontmatter(self) -> None:
|
254
|
+
"""Skip YAML frontmatter section if present at the beginning of the document.
|
255
|
+
|
256
|
+
Frontmatter starts with --- and ends with another --- on its own line.
|
257
|
+
Everything between these delimiters is skipped.
|
258
|
+
"""
|
259
|
+
# Check if we're at the beginning with a frontmatter delimiter
|
260
|
+
if self._current_token and self._current_token.type == TokenType.PUNCT_FRONTMATTER:
|
261
|
+
# Skip tokens until we find the closing frontmatter delimiter
|
262
|
+
self._advance_tokens()
|
263
|
+
|
264
|
+
while self._current_token and self._current_token.type != TokenType.MISC_EOF:
|
265
|
+
if self._current_token.type == TokenType.PUNCT_FRONTMATTER:
|
266
|
+
# Found closing delimiter, skip it and exit
|
267
|
+
self._advance_tokens()
|
268
|
+
break
|
269
|
+
# Skip any token that's not a closing frontmatter delimiter
|
270
|
+
self._advance_tokens()
|
271
|
+
|
272
|
+
def _advance_tokens(self) -> None:
|
273
|
+
"""Advance to the next token in the stream.
|
274
|
+
|
275
|
+
Moves the peek token to current token and reads the next token
|
276
|
+
into peek token from the buffer. Automatically skips MISC_STOPWORD tokens.
|
277
|
+
"""
|
278
|
+
self._current_token = self._peek_token
|
279
|
+
|
280
|
+
# Skip any stopword tokens
|
281
|
+
if self._token_buffer:
|
282
|
+
while True:
|
283
|
+
self._peek_token = self._token_buffer.current()
|
284
|
+
if self._peek_token is None:
|
285
|
+
self._peek_token = Token(TokenType.MISC_EOF, "", line=1, position=1)
|
286
|
+
break
|
287
|
+
|
288
|
+
self._token_buffer.advance()
|
289
|
+
|
290
|
+
# Skip stopwords and backslashes
|
291
|
+
if self._peek_token.type not in (TokenType.MISC_STOPWORD, TokenType.PUNCT_BACKSLASH):
|
292
|
+
break
|
293
|
+
else:
|
294
|
+
# No buffer available
|
295
|
+
self._peek_token = Token(TokenType.MISC_EOF, "", line=1, position=1)
|
296
|
+
|
297
|
+
def _current_precedence(self) -> Precedence:
|
298
|
+
"""Get the precedence of the current token.
|
299
|
+
|
300
|
+
Returns:
|
301
|
+
The precedence level of the current token, or LOWEST if not found.
|
302
|
+
"""
|
303
|
+
assert self._current_token is not None
|
304
|
+
return PRECEDENCES.get(self._current_token.type, Precedence.LOWEST)
|
305
|
+
|
306
|
+
def _peek_precedence(self) -> Precedence:
|
307
|
+
"""Get the precedence of the peek token.
|
308
|
+
|
309
|
+
Returns:
|
310
|
+
The precedence level of the peek token, or LOWEST if not found.
|
311
|
+
"""
|
312
|
+
assert self._peek_token is not None
|
313
|
+
return PRECEDENCES.get(self._peek_token.type, Precedence.LOWEST)
|
314
|
+
|
315
|
+
def _panic_recovery(self, stop_at: list[TokenType] | None = None, stop_at_types: bool = False) -> list[Token]:
|
316
|
+
"""Unified error recovery: skip tokens until finding synchronization point.
|
317
|
+
|
318
|
+
Args:
|
319
|
+
stop_at: Token types to stop at (default: [PERIOD, EOF])
|
320
|
+
stop_at_types: If True, also stop at type keywords
|
321
|
+
|
322
|
+
Returns:
|
323
|
+
List of tokens that were skipped during recovery.
|
324
|
+
"""
|
325
|
+
self._panic_count += 1 # Always increment to prevent infinite loops
|
326
|
+
|
327
|
+
if stop_at is None:
|
328
|
+
stop_at = [TokenType.PUNCT_PERIOD, TokenType.MISC_EOF]
|
329
|
+
|
330
|
+
skipped_tokens = []
|
331
|
+
|
332
|
+
# Skip tokens until finding a synchronization point
|
333
|
+
while self._peek_token is not None and self._peek_token.type not in stop_at:
|
334
|
+
self._advance_tokens()
|
335
|
+
if self._current_token is not None:
|
336
|
+
# Check if we should stop at type keywords
|
337
|
+
if stop_at_types and self._is_type_token(self._current_token.type):
|
338
|
+
break
|
339
|
+
skipped_tokens.append(self._current_token)
|
340
|
+
|
341
|
+
# Advance one more token to move past the last error token
|
342
|
+
# This prevents the main loop from trying to parse the last token again
|
343
|
+
if self._current_token is not None and self._current_token.type != TokenType.MISC_EOF:
|
344
|
+
self._advance_tokens()
|
345
|
+
|
346
|
+
return skipped_tokens
|
347
|
+
|
348
|
+
def _report_error_and_recover(
|
349
|
+
self,
|
350
|
+
template: ErrorTemplate,
|
351
|
+
error_type: str = "syntax",
|
352
|
+
expected_token: TokenType | None = None,
|
353
|
+
recovery_tokens: list[TokenType] | None = None,
|
354
|
+
recovery_to_types: bool = False,
|
355
|
+
skip_recovery: bool = False,
|
356
|
+
is_expression: bool = False,
|
357
|
+
**kwargs: str,
|
358
|
+
) -> ErrorStatement | ErrorExpression:
|
359
|
+
"""Unified error handling: always adds error to list and returns appropriate error node.
|
360
|
+
|
361
|
+
Args:
|
362
|
+
template: ErrorTemplate with the error message
|
363
|
+
error_type: Type of error - "syntax", "name", or "type"
|
364
|
+
expected_token: Expected token type (for syntax errors)
|
365
|
+
recovery_tokens: Specific tokens to recover to
|
366
|
+
recovery_to_types: If True, recover to type keywords
|
367
|
+
skip_recovery: If True, skip error recovery
|
368
|
+
is_expression: If True, return ErrorExpression instead of ErrorStatement
|
369
|
+
**kwargs: Template substitution parameters
|
370
|
+
|
371
|
+
Returns:
|
372
|
+
ErrorStatement or ErrorExpression with consistent error reporting.
|
373
|
+
"""
|
374
|
+
# Determine which token to use for error reporting
|
375
|
+
# If we have an expected_token, we're likely checking peek_token
|
376
|
+
# Otherwise, use current_token
|
377
|
+
if expected_token and self._peek_token:
|
378
|
+
token = self._peek_token or Token(TokenType.MISC_EOF, "", 0, 0)
|
379
|
+
else:
|
380
|
+
token = self._current_token or Token(TokenType.MISC_EOF, "", 0, 0)
|
381
|
+
|
382
|
+
# Create appropriate error type
|
383
|
+
error: MDBaseException
|
384
|
+
if error_type == "name":
|
385
|
+
error = MDNameError(message=template, line=token.line, column=token.position, **kwargs)
|
386
|
+
elif error_type == "type":
|
387
|
+
error = MDTypeError(message=template, line=token.line, column=token.position, **kwargs)
|
388
|
+
else: # syntax
|
389
|
+
# Special case: if we expected an identifier and got something else
|
390
|
+
# Check if it's an illegal token (syntax error) or something else (name error)
|
391
|
+
if expected_token == TokenType.MISC_IDENT and token.type != TokenType.MISC_IDENT:
|
392
|
+
# If it's an illegal token, it's a syntax error
|
393
|
+
if token.type == TokenType.MISC_ILLEGAL:
|
394
|
+
error = MDSyntaxError(
|
395
|
+
message=ILLEGAL_TOKEN,
|
396
|
+
line=token.line,
|
397
|
+
column=token.position,
|
398
|
+
token=token.literal,
|
399
|
+
)
|
400
|
+
else:
|
401
|
+
# Otherwise, it's still a name error (e.g., keyword used as identifier)
|
402
|
+
from machine_dialect.errors.messages import ILLEGAL_CHARACTER
|
403
|
+
|
404
|
+
# Get a human-readable name for the expected token
|
405
|
+
expected_name = "identifier"
|
406
|
+
error = MDNameError(
|
407
|
+
message=ILLEGAL_CHARACTER,
|
408
|
+
line=token.line,
|
409
|
+
column=token.position,
|
410
|
+
expected=expected_name,
|
411
|
+
character=token.literal,
|
412
|
+
)
|
413
|
+
else:
|
414
|
+
error = MDSyntaxError(message=template, line=token.line, column=token.position, **kwargs)
|
415
|
+
|
416
|
+
# ALWAYS add the error to ensure consistency
|
417
|
+
self._errors.append(error)
|
418
|
+
|
419
|
+
# Perform recovery unless explicitly skipped
|
420
|
+
skipped = []
|
421
|
+
if not skip_recovery:
|
422
|
+
skipped = self._panic_recovery(stop_at=recovery_tokens, stop_at_types=recovery_to_types)
|
423
|
+
|
424
|
+
# Get formatted message
|
425
|
+
formatted_message = template.format(**kwargs) if kwargs else template.substitute()
|
426
|
+
|
427
|
+
# Return appropriate error node type
|
428
|
+
if is_expression:
|
429
|
+
return ErrorExpression(token=token, message=formatted_message)
|
430
|
+
else:
|
431
|
+
return ErrorStatement(token=token, skipped_tokens=skipped, message=formatted_message)
|
432
|
+
|
433
|
+
def _expect_token(
|
434
|
+
self,
|
435
|
+
token_type: TokenType,
|
436
|
+
context: str | None = None,
|
437
|
+
error_message: str | None = None,
|
438
|
+
error_node: ErrorExpression | None = None,
|
439
|
+
) -> ErrorStatement | ErrorExpression | None:
|
440
|
+
"""Unified token expectation with automatic error handling.
|
441
|
+
|
442
|
+
Args:
|
443
|
+
token_type: Expected token type
|
444
|
+
context: Context for error message (e.g., "after 'Set'")
|
445
|
+
error_message: Full custom error message (overrides auto-generated)
|
446
|
+
error_node: Custom ErrorExpression to return (for expression contexts)
|
447
|
+
|
448
|
+
Returns:
|
449
|
+
None if token matches (advances and continues)
|
450
|
+
ErrorExpression if error_node provided and token doesn't match
|
451
|
+
ErrorStatement if token doesn't match (with appropriate recovery)
|
452
|
+
"""
|
453
|
+
if self._peek_token and self._peek_token.type == token_type:
|
454
|
+
self._advance_tokens()
|
455
|
+
return None # Success, continue parsing
|
456
|
+
|
457
|
+
# Token doesn't match - use appropriate error template
|
458
|
+
from machine_dialect.errors.messages import EXPECTED_TOKEN, EXPECTED_TOKEN_AFTER
|
459
|
+
|
460
|
+
if context:
|
461
|
+
# Use "Expected X after Y" template
|
462
|
+
token_name = token_type.name.lower().replace("_", " ")
|
463
|
+
template = EXPECTED_TOKEN_AFTER
|
464
|
+
kwargs = {"expected": token_name, "after": context}
|
465
|
+
else:
|
466
|
+
# Use simple "Expected X" template
|
467
|
+
token_name = token_type.name.lower().replace("_", " ")
|
468
|
+
template = EXPECTED_TOKEN
|
469
|
+
# Get the actual token type that was found
|
470
|
+
actual_token = self._peek_token if self._peek_token else None
|
471
|
+
got_type = actual_token.type.name if actual_token else "EOF"
|
472
|
+
kwargs = {"token": token_name, "got_token_type": got_type}
|
473
|
+
|
474
|
+
# Special handling for period (statement terminator)
|
475
|
+
skip_recovery = token_type == TokenType.PUNCT_PERIOD
|
476
|
+
|
477
|
+
# Always use _report_error_and_recover for error reporting
|
478
|
+
error_statement = self._report_error_and_recover(
|
479
|
+
template=template,
|
480
|
+
expected_token=token_type,
|
481
|
+
skip_recovery=skip_recovery or error_node is not None, # Skip recovery for expressions too
|
482
|
+
error_type="syntax",
|
483
|
+
recovery_tokens=None,
|
484
|
+
recovery_to_types=False,
|
485
|
+
is_expression=False,
|
486
|
+
**kwargs,
|
487
|
+
)
|
488
|
+
|
489
|
+
# If custom error node provided, return it instead of the ErrorStatement
|
490
|
+
if error_node is not None:
|
491
|
+
return error_node
|
492
|
+
|
493
|
+
return error_statement
|
494
|
+
|
495
|
+
def _parse_expression(self, precedence: Precedence = Precedence.LOWEST) -> Expression:
|
496
|
+
"""Parse an expression with a given precedence level.
|
497
|
+
|
498
|
+
Args:
|
499
|
+
precedence: The minimum precedence level to parse. Defaults to LOWEST.
|
500
|
+
|
501
|
+
Returns:
|
502
|
+
An Expression AST node if successful, ErrorExpression if parsing fails, None if no expression.
|
503
|
+
"""
|
504
|
+
assert self._current_token is not None
|
505
|
+
|
506
|
+
# Handle illegal tokens
|
507
|
+
if self._current_token.type == TokenType.MISC_ILLEGAL:
|
508
|
+
# Report as syntax error, not name error
|
509
|
+
result = self._report_error_and_recover(
|
510
|
+
template=ILLEGAL_TOKEN,
|
511
|
+
error_type="syntax",
|
512
|
+
token=self._current_token.literal,
|
513
|
+
skip_recovery=True, # Don't recover - let caller handle advancement
|
514
|
+
is_expression=True,
|
515
|
+
)
|
516
|
+
assert isinstance(result, ErrorExpression)
|
517
|
+
return result
|
518
|
+
|
519
|
+
if self._current_token.type not in self._prefix_parse_funcs:
|
520
|
+
# Check if it's an infix operator at the start
|
521
|
+
# Determine which error template to use and its parameters
|
522
|
+
if self._current_token.type in self._infix_parse_funcs:
|
523
|
+
error_expr = self._report_error_and_recover(
|
524
|
+
template=UNEXPECTED_TOKEN_AT_START,
|
525
|
+
token=self._current_token.literal,
|
526
|
+
skip_recovery=True,
|
527
|
+
is_expression=True,
|
528
|
+
)
|
529
|
+
elif self._current_token.type == TokenType.MISC_EOF:
|
530
|
+
error_expr = self._report_error_and_recover(
|
531
|
+
template=EXPECTED_EXPRESSION,
|
532
|
+
got="<end-of-file>",
|
533
|
+
skip_recovery=True,
|
534
|
+
is_expression=True,
|
535
|
+
)
|
536
|
+
else:
|
537
|
+
error_expr = self._report_error_and_recover(
|
538
|
+
template=NO_PARSE_FUNCTION,
|
539
|
+
literal=self._current_token.literal,
|
540
|
+
skip_recovery=True,
|
541
|
+
is_expression=True,
|
542
|
+
)
|
543
|
+
|
544
|
+
# Advance past the problematic token so we can continue parsing
|
545
|
+
if self._current_token.type != TokenType.MISC_EOF:
|
546
|
+
self._advance_tokens()
|
547
|
+
assert isinstance(error_expr, ErrorExpression)
|
548
|
+
return error_expr
|
549
|
+
|
550
|
+
prefix_parse_fn = self._prefix_parse_funcs[self._current_token.type]
|
551
|
+
|
552
|
+
left_expression = prefix_parse_fn()
|
553
|
+
|
554
|
+
# Handle infix operators
|
555
|
+
assert self._peek_token is not None
|
556
|
+
while self._peek_token.type != TokenType.PUNCT_PERIOD and precedence < self._peek_precedence():
|
557
|
+
if self._peek_token.type not in self._infix_parse_funcs:
|
558
|
+
return left_expression
|
559
|
+
|
560
|
+
self._advance_tokens()
|
561
|
+
|
562
|
+
assert self._current_token is not None
|
563
|
+
infix_parse_fn = self._infix_parse_funcs[self._current_token.type]
|
564
|
+
left_expression = infix_parse_fn(left_expression)
|
565
|
+
|
566
|
+
assert self._peek_token is not None
|
567
|
+
|
568
|
+
return left_expression
|
569
|
+
|
570
|
+
def _parse_expression_statement(self) -> ExpressionStatement | ErrorStatement:
|
571
|
+
assert self._current_token is not None
|
572
|
+
|
573
|
+
expression = self._parse_expression()
|
574
|
+
|
575
|
+
expression_statement = ExpressionStatement(
|
576
|
+
token=self._current_token,
|
577
|
+
expression=expression,
|
578
|
+
)
|
579
|
+
|
580
|
+
# Require trailing period if not at EOF or if we're in a block
|
581
|
+
assert self._peek_token is not None
|
582
|
+
if self._peek_token.type != TokenType.MISC_EOF or self._block_depth > 0:
|
583
|
+
if error := self._expect_token(TokenType.PUNCT_PERIOD):
|
584
|
+
assert isinstance(error, ErrorStatement)
|
585
|
+
return error
|
586
|
+
|
587
|
+
# Advance past the last token of the expression
|
588
|
+
# Expression parsing leaves us at the last token, not after it
|
589
|
+
self._advance_tokens()
|
590
|
+
|
591
|
+
return expression_statement
|
592
|
+
|
593
|
+
def _parse_float_literal(self) -> FloatLiteral | ErrorExpression:
|
594
|
+
assert self._current_token is not None
|
595
|
+
|
596
|
+
# The lexer has already validated and cleaned the literal
|
597
|
+
# so we can directly parse it as a float
|
598
|
+
try:
|
599
|
+
value = float(self._current_token.literal)
|
600
|
+
except ValueError:
|
601
|
+
# This shouldn't happen if the lexer is working correctly
|
602
|
+
result = self._report_error_and_recover(
|
603
|
+
template=INVALID_FLOAT_LITERAL,
|
604
|
+
literal=self._current_token.literal,
|
605
|
+
skip_recovery=True,
|
606
|
+
is_expression=True,
|
607
|
+
)
|
608
|
+
assert isinstance(result, ErrorExpression)
|
609
|
+
return result
|
610
|
+
|
611
|
+
return FloatLiteral(
|
612
|
+
token=self._current_token,
|
613
|
+
value=value,
|
614
|
+
)
|
615
|
+
|
616
|
+
def _parse_identifier(self) -> Identifier:
|
617
|
+
assert self._current_token is not None
|
618
|
+
|
619
|
+
return Identifier(
|
620
|
+
token=self._current_token,
|
621
|
+
value=self._current_token.literal,
|
622
|
+
)
|
623
|
+
|
624
|
+
def _parse_identifier_or_keyword_as_identifier(self) -> Identifier | None:
|
625
|
+
"""Parse an identifier, accepting keywords as identifiers when appropriate.
|
626
|
+
|
627
|
+
This is useful when a keyword appears where we expect an identifier,
|
628
|
+
like variable names that happen to match keywords.
|
629
|
+
|
630
|
+
Returns:
|
631
|
+
An Identifier AST node, or None if current token can't be used as identifier.
|
632
|
+
"""
|
633
|
+
if not self._current_token:
|
634
|
+
return None
|
635
|
+
|
636
|
+
# Accept actual identifiers
|
637
|
+
if self._current_token.type == TokenType.MISC_IDENT:
|
638
|
+
return self._parse_identifier()
|
639
|
+
|
640
|
+
# Also accept any keyword that has a literal value as an identifier
|
641
|
+
# This allows using words like "items", "first", etc. as variable names
|
642
|
+
if self._current_token.literal:
|
643
|
+
# Create an identifier from the keyword's literal
|
644
|
+
return Identifier(
|
645
|
+
token=self._current_token,
|
646
|
+
value=self._current_token.literal,
|
647
|
+
)
|
648
|
+
|
649
|
+
return None
|
650
|
+
|
651
|
+
def _parse_integer_literal(self) -> WholeNumberLiteral | ErrorExpression:
|
652
|
+
assert self._current_token is not None
|
653
|
+
|
654
|
+
# The lexer has already validated and cleaned the literal
|
655
|
+
# so we can directly parse it as an integer
|
656
|
+
try:
|
657
|
+
value = int(self._current_token.literal)
|
658
|
+
except ValueError:
|
659
|
+
# This shouldn't happen if the lexer is working correctly
|
660
|
+
result = self._report_error_and_recover(
|
661
|
+
template=INVALID_INTEGER_LITERAL,
|
662
|
+
literal=self._current_token.literal,
|
663
|
+
skip_recovery=True,
|
664
|
+
is_expression=True,
|
665
|
+
)
|
666
|
+
assert isinstance(result, ErrorExpression)
|
667
|
+
return result
|
668
|
+
|
669
|
+
return WholeNumberLiteral(
|
670
|
+
token=self._current_token,
|
671
|
+
value=value,
|
672
|
+
)
|
673
|
+
|
674
|
+
def _parse_boolean_literal(self) -> YesNoLiteral:
|
675
|
+
"""Parse a boolean literal.
|
676
|
+
|
677
|
+
Returns:
|
678
|
+
A YesNoLiteral AST node.
|
679
|
+
|
680
|
+
Note:
|
681
|
+
The lexer has already validated and provided the canonical
|
682
|
+
representation of the boolean literal ("True" or "False").
|
683
|
+
"""
|
684
|
+
assert self._current_token is not None
|
685
|
+
|
686
|
+
# Determine the boolean value based on the token type
|
687
|
+
value = self._current_token.type == TokenType.LIT_YES
|
688
|
+
|
689
|
+
return YesNoLiteral(
|
690
|
+
token=self._current_token,
|
691
|
+
value=value,
|
692
|
+
)
|
693
|
+
|
694
|
+
def _parse_empty_literal(self) -> EmptyLiteral:
|
695
|
+
"""Parse an empty literal.
|
696
|
+
|
697
|
+
Returns:
|
698
|
+
An EmptyLiteral AST node.
|
699
|
+
"""
|
700
|
+
assert self._current_token is not None
|
701
|
+
|
702
|
+
return EmptyLiteral(
|
703
|
+
token=self._current_token,
|
704
|
+
)
|
705
|
+
|
706
|
+
def _parse_string_literal(self) -> StringLiteral:
|
707
|
+
"""Parse a string literal.
|
708
|
+
|
709
|
+
Returns:
|
710
|
+
A StringLiteral AST node.
|
711
|
+
"""
|
712
|
+
assert self._current_token is not None
|
713
|
+
|
714
|
+
# Extract the actual string value without quotes
|
715
|
+
literal = self._current_token.literal
|
716
|
+
if literal.startswith('"') and literal.endswith('"'):
|
717
|
+
value = literal[1:-1]
|
718
|
+
elif literal.startswith("'") and literal.endswith("'"):
|
719
|
+
value = literal[1:-1]
|
720
|
+
else:
|
721
|
+
# Fallback if no quotes found
|
722
|
+
value = literal
|
723
|
+
|
724
|
+
return StringLiteral(
|
725
|
+
token=self._current_token,
|
726
|
+
value=value,
|
727
|
+
)
|
728
|
+
|
729
|
+
def _parse_list_literal(self) -> Expression:
|
730
|
+
"""Parse a list literal (unordered, ordered, or named).
|
731
|
+
|
732
|
+
Called when current token is the first list item marker after "Set x to:"
|
733
|
+
Determines the list type based on the first item marker and
|
734
|
+
delegates to the appropriate specialized parser.
|
735
|
+
|
736
|
+
Returns:
|
737
|
+
UnorderedListLiteral, OrderedListLiteral, or NamedListLiteral
|
738
|
+
"""
|
739
|
+
# SetStatement has already advanced past the colon to the first list item
|
740
|
+
# Current token should be the first list item marker (dash, number, or EOF for empty list)
|
741
|
+
if not self._current_token:
|
742
|
+
return ErrorExpression(
|
743
|
+
token=Token(TokenType.MISC_EOF, "", 0, 0),
|
744
|
+
message="Unexpected EOF while parsing list",
|
745
|
+
)
|
746
|
+
|
747
|
+
# Save the starting token for error reporting
|
748
|
+
list_token = self._current_token
|
749
|
+
|
750
|
+
# List context should already be set by SetStatement
|
751
|
+
|
752
|
+
# Check for empty list (no items)
|
753
|
+
current_type = self._current_token.type if self._current_token else None
|
754
|
+
if current_type in (None, TokenType.MISC_EOF) or (
|
755
|
+
# Also check if we hit a statement terminator or new statement
|
756
|
+
current_type in (TokenType.PUNCT_PERIOD, TokenType.KW_SET, TokenType.KW_DEFINE)
|
757
|
+
):
|
758
|
+
# Empty list - default to unordered
|
759
|
+
return UnorderedListLiteral(token=list_token, elements=[])
|
760
|
+
|
761
|
+
# Skip any stopwords that might appear
|
762
|
+
while self._current_token and self._current_token.type in (TokenType.MISC_STOPWORD,):
|
763
|
+
self._advance_tokens()
|
764
|
+
# Update current type after skipping stopwords
|
765
|
+
current_type = self._current_token.type if self._current_token else None
|
766
|
+
|
767
|
+
# Now current_token should be the first list item marker (dash or number)
|
768
|
+
# Update current_type after advancing
|
769
|
+
current_type = self._current_token.type if self._current_token else None
|
770
|
+
|
771
|
+
# Look at what type of list this is
|
772
|
+
if current_type == TokenType.PUNCT_DASH:
|
773
|
+
# Check if it's a named list by looking for pattern: dash, key, colon
|
774
|
+
# Named lists have patterns like: - _"key"_: value
|
775
|
+
|
776
|
+
# Use the token buffer to peek ahead without advancing
|
777
|
+
is_named_list = False
|
778
|
+
|
779
|
+
# We're at the dash, peek_token is the key
|
780
|
+
if self._peek_token:
|
781
|
+
# Check token after the key using buffer
|
782
|
+
if self._token_buffer:
|
783
|
+
# The buffer's current token is the token after our peek_token
|
784
|
+
colon_after_key = self._token_buffer.current()
|
785
|
+
|
786
|
+
# Check if we have the pattern: dash, key, colon
|
787
|
+
if colon_after_key and colon_after_key.type == TokenType.PUNCT_COLON:
|
788
|
+
# Check if peek_token (the key) is valid for a named list
|
789
|
+
peek_type = self._peek_token.type
|
790
|
+
if peek_type in (
|
791
|
+
TokenType.LIT_TEXT,
|
792
|
+
TokenType.MISC_IDENT,
|
793
|
+
TokenType.KW_NAME,
|
794
|
+
TokenType.KW_CONTENT,
|
795
|
+
):
|
796
|
+
is_named_list = True
|
797
|
+
elif self._peek_token.literal and self._peek_token.literal.lower() in (
|
798
|
+
"age",
|
799
|
+
"active",
|
800
|
+
"profession",
|
801
|
+
):
|
802
|
+
is_named_list = True
|
803
|
+
|
804
|
+
result: Expression
|
805
|
+
if is_named_list:
|
806
|
+
result = self._parse_named_list_literal(list_token)
|
807
|
+
else:
|
808
|
+
# Not a named list, it's an unordered list
|
809
|
+
result = self._parse_unordered_list_literal(list_token)
|
810
|
+
|
811
|
+
# Check if it's an ordered list (number followed by period)
|
812
|
+
elif (
|
813
|
+
current_type == TokenType.LIT_WHOLE_NUMBER
|
814
|
+
and self._peek_token
|
815
|
+
and self._peek_token.type == TokenType.PUNCT_PERIOD
|
816
|
+
):
|
817
|
+
result = self._parse_ordered_list_literal(list_token)
|
818
|
+
else:
|
819
|
+
# Invalid list format - return error expression
|
820
|
+
result = ErrorExpression(
|
821
|
+
token=self._current_token or list_token,
|
822
|
+
message=(
|
823
|
+
f"Expected list item marker (dash or number), got "
|
824
|
+
f"{self._current_token.type if self._current_token else 'EOF'}"
|
825
|
+
),
|
826
|
+
)
|
827
|
+
|
828
|
+
return result
|
829
|
+
|
830
|
+
def _parse_unordered_list_literal(self, list_token: Token) -> UnorderedListLiteral:
|
831
|
+
"""Parse an unordered list (dash-prefixed items).
|
832
|
+
|
833
|
+
Args:
|
834
|
+
list_token: The token marking the start of the list
|
835
|
+
|
836
|
+
Returns:
|
837
|
+
UnorderedListLiteral with parsed items
|
838
|
+
"""
|
839
|
+
items: list[Expression] = []
|
840
|
+
|
841
|
+
# We might not be at the dash yet if we came from lookahead
|
842
|
+
# Go back to find the dash
|
843
|
+
if self._current_token and self._current_token.type != TokenType.PUNCT_DASH:
|
844
|
+
# We're past the dash (probably at 'name' from lookahead), go back
|
845
|
+
# Actually, this is complex. Let's just handle where we are.
|
846
|
+
pass
|
847
|
+
|
848
|
+
while True:
|
849
|
+
# Check if we're at a dash (list item marker)
|
850
|
+
if not self._current_token:
|
851
|
+
break
|
852
|
+
token_type = self._current_token.type
|
853
|
+
if token_type != TokenType.PUNCT_DASH:
|
854
|
+
break
|
855
|
+
|
856
|
+
# Move past dash
|
857
|
+
self._advance_tokens()
|
858
|
+
|
859
|
+
# Parse the item expression
|
860
|
+
item = self._parse_expression(Precedence.LOWEST)
|
861
|
+
items.append(item)
|
862
|
+
|
863
|
+
# After parsing expression, advance to check for period
|
864
|
+
self._advance_tokens()
|
865
|
+
|
866
|
+
# Each list item must end with a period
|
867
|
+
if self._current_token and self._current_token.type == TokenType.PUNCT_PERIOD:
|
868
|
+
# Good, we have the required period
|
869
|
+
# Now check if there's another list item
|
870
|
+
if self._peek_token and self._peek_token.type == TokenType.PUNCT_DASH:
|
871
|
+
# There's another list item, advance to it
|
872
|
+
self._advance_tokens()
|
873
|
+
else:
|
874
|
+
# No more list items - we're done
|
875
|
+
break
|
876
|
+
else:
|
877
|
+
# Missing period after list item - use unified error handling
|
878
|
+
from machine_dialect.errors.messages import EXPECTED_TOKEN_AFTER
|
879
|
+
|
880
|
+
self._report_error_and_recover(
|
881
|
+
template=EXPECTED_TOKEN_AFTER,
|
882
|
+
expected_token=TokenType.PUNCT_PERIOD,
|
883
|
+
skip_recovery=True, # We'll handle recovery manually below
|
884
|
+
expected="period",
|
885
|
+
after="list item",
|
886
|
+
)
|
887
|
+
|
888
|
+
# Check if we're at another dash (next item) or done
|
889
|
+
if self._current_token and self._current_token.type == TokenType.PUNCT_DASH:
|
890
|
+
# Continue with next item despite missing period
|
891
|
+
continue
|
892
|
+
else:
|
893
|
+
# No more items
|
894
|
+
break
|
895
|
+
|
896
|
+
return UnorderedListLiteral(token=list_token, elements=items)
|
897
|
+
|
898
|
+
def _parse_ordered_list_literal(self, list_token: Token) -> OrderedListLiteral:
|
899
|
+
"""Parse an ordered list (numbered items like 1., 2., etc).
|
900
|
+
|
901
|
+
Args:
|
902
|
+
list_token: The token marking the start of the list
|
903
|
+
|
904
|
+
Returns:
|
905
|
+
OrderedListLiteral with parsed items
|
906
|
+
"""
|
907
|
+
items: list[Expression] = []
|
908
|
+
|
909
|
+
while True:
|
910
|
+
# Check if we're at a number (ordered list item marker)
|
911
|
+
if not self._current_token:
|
912
|
+
break
|
913
|
+
token_type = self._current_token.type
|
914
|
+
if token_type != TokenType.LIT_WHOLE_NUMBER:
|
915
|
+
break
|
916
|
+
|
917
|
+
# Skip the number
|
918
|
+
self._advance_tokens()
|
919
|
+
|
920
|
+
# Check for period after number (this is the list marker period, e.g., "1.")
|
921
|
+
if not self._current_token or self._current_token.type != TokenType.PUNCT_PERIOD:
|
922
|
+
break
|
923
|
+
|
924
|
+
# Move past the list marker period
|
925
|
+
self._advance_tokens()
|
926
|
+
|
927
|
+
# Parse the item expression
|
928
|
+
item = self._parse_expression(Precedence.LOWEST)
|
929
|
+
items.append(item)
|
930
|
+
|
931
|
+
# After parsing expression, advance to check for item-terminating period
|
932
|
+
self._advance_tokens()
|
933
|
+
|
934
|
+
# Each list item must end with a period
|
935
|
+
if self._current_token and self._current_token.type == TokenType.PUNCT_PERIOD:
|
936
|
+
# Good, we have the required period
|
937
|
+
# Check if there's another list item
|
938
|
+
if self._peek_token and self._peek_token.type == TokenType.LIT_WHOLE_NUMBER:
|
939
|
+
# There's another list item, advance to it
|
940
|
+
self._advance_tokens()
|
941
|
+
else:
|
942
|
+
# No more list items - we're done
|
943
|
+
break
|
944
|
+
else:
|
945
|
+
# Missing period after list item - use unified error handling
|
946
|
+
from machine_dialect.errors.messages import EXPECTED_TOKEN_AFTER
|
947
|
+
|
948
|
+
self._report_error_and_recover(
|
949
|
+
template=EXPECTED_TOKEN_AFTER,
|
950
|
+
expected_token=TokenType.PUNCT_PERIOD,
|
951
|
+
skip_recovery=True, # We'll handle recovery manually below
|
952
|
+
expected="period",
|
953
|
+
after="list item",
|
954
|
+
)
|
955
|
+
|
956
|
+
# Check if we're at another number (next item) or done
|
957
|
+
if self._current_token and self._current_token.type == TokenType.LIT_WHOLE_NUMBER:
|
958
|
+
# Continue with next item despite missing period
|
959
|
+
continue
|
960
|
+
else:
|
961
|
+
# No more items
|
962
|
+
break
|
963
|
+
|
964
|
+
return OrderedListLiteral(token=list_token, elements=items)
|
965
|
+
|
966
|
+
def _parse_named_list_literal(self, list_token: Token) -> NamedListLiteral:
|
967
|
+
"""Parse a named list (dictionary with key:value pairs).
|
968
|
+
|
969
|
+
Format:
|
970
|
+
- key1: value1
|
971
|
+
- key2: value2
|
972
|
+
|
973
|
+
Args:
|
974
|
+
list_token: The token marking the start of the list
|
975
|
+
|
976
|
+
Returns:
|
977
|
+
NamedListLiteral with parsed key-value pairs
|
978
|
+
"""
|
979
|
+
entries: list[tuple[str, Expression]] = []
|
980
|
+
|
981
|
+
# Parse entries while we have dash-prefixed lines
|
982
|
+
while True:
|
983
|
+
# Check if we're at a dash (named list item marker)
|
984
|
+
if not self._current_token:
|
985
|
+
break
|
986
|
+
token_type = self._current_token.type
|
987
|
+
if token_type != TokenType.PUNCT_DASH:
|
988
|
+
break
|
989
|
+
|
990
|
+
# Move past the dash
|
991
|
+
self._advance_tokens()
|
992
|
+
|
993
|
+
# Parse the key (MUST be a string literal)
|
994
|
+
key = ""
|
995
|
+
current_type_after_dash: TokenType | None = self._current_token.type if self._current_token else None
|
996
|
+
if current_type_after_dash == TokenType.LIT_TEXT:
|
997
|
+
key = self._current_token.literal.strip('"')
|
998
|
+
self._advance_tokens()
|
999
|
+
else:
|
1000
|
+
# Invalid key - named lists require string literal keys only
|
1001
|
+
if self._current_token:
|
1002
|
+
from machine_dialect.errors.messages import INVALID_NAMED_LIST_KEY
|
1003
|
+
|
1004
|
+
self._report_error_and_recover(
|
1005
|
+
template=INVALID_NAMED_LIST_KEY,
|
1006
|
+
error_type="type",
|
1007
|
+
literal=self._current_token.literal,
|
1008
|
+
recovery_tokens=[TokenType.PUNCT_DASH, TokenType.MISC_EOF],
|
1009
|
+
)
|
1010
|
+
else:
|
1011
|
+
self._panic_recovery(stop_at=[TokenType.PUNCT_DASH, TokenType.MISC_EOF])
|
1012
|
+
continue
|
1013
|
+
|
1014
|
+
# Expect colon
|
1015
|
+
current_type_for_colon: TokenType | None = self._current_token.type if self._current_token else None
|
1016
|
+
if current_type_for_colon != TokenType.PUNCT_COLON:
|
1017
|
+
# Missing colon, this might be an unordered list item
|
1018
|
+
# Add error and try to continue
|
1019
|
+
entries.append(
|
1020
|
+
(key, ErrorExpression(token=self._current_token or list_token, message="Expected colon after key"))
|
1021
|
+
)
|
1022
|
+
self._panic_recovery(stop_at=[TokenType.PUNCT_DASH, TokenType.MISC_EOF])
|
1023
|
+
continue
|
1024
|
+
|
1025
|
+
self._advance_tokens() # Move past colon
|
1026
|
+
|
1027
|
+
# Parse the value expression
|
1028
|
+
value = self._parse_expression(Precedence.LOWEST)
|
1029
|
+
if not value:
|
1030
|
+
entries.append(
|
1031
|
+
(
|
1032
|
+
key,
|
1033
|
+
ErrorExpression(token=self._current_token or list_token, message="Expected value after colon"),
|
1034
|
+
)
|
1035
|
+
)
|
1036
|
+
self._panic_recovery(stop_at=[TokenType.PUNCT_DASH, TokenType.MISC_EOF])
|
1037
|
+
continue
|
1038
|
+
|
1039
|
+
# After parsing expression, advance to check for period
|
1040
|
+
self._advance_tokens()
|
1041
|
+
|
1042
|
+
# Each named list entry must end with a period
|
1043
|
+
if self._current_token and self._current_token.type == TokenType.PUNCT_PERIOD:
|
1044
|
+
# Good, we have the required period
|
1045
|
+
entries.append((key, value))
|
1046
|
+
# Check if there's another entry
|
1047
|
+
if self._peek_token and self._peek_token.type == TokenType.PUNCT_DASH:
|
1048
|
+
# There's another entry, advance to it
|
1049
|
+
self._advance_tokens()
|
1050
|
+
else:
|
1051
|
+
# No more entries - we're done
|
1052
|
+
break
|
1053
|
+
else:
|
1054
|
+
# Missing period after entry - add error but include the entry
|
1055
|
+
from machine_dialect.errors.messages import EXPECTED_TOKEN_AFTER
|
1056
|
+
|
1057
|
+
self._report_error_and_recover(
|
1058
|
+
template=EXPECTED_TOKEN_AFTER,
|
1059
|
+
expected_token=TokenType.PUNCT_PERIOD,
|
1060
|
+
skip_recovery=True, # We'll handle recovery manually below
|
1061
|
+
expected="period",
|
1062
|
+
after="named list entry",
|
1063
|
+
)
|
1064
|
+
entries.append((key, value))
|
1065
|
+
|
1066
|
+
# Check if we're at another dash (next entry) or done
|
1067
|
+
if self._current_token and self._current_token.type == TokenType.PUNCT_DASH:
|
1068
|
+
# Continue with next entry despite missing period
|
1069
|
+
continue
|
1070
|
+
else:
|
1071
|
+
# No more entries
|
1072
|
+
break
|
1073
|
+
|
1074
|
+
return NamedListLiteral(token=list_token, entries=entries)
|
1075
|
+
|
1076
|
+
def _parse_url_literal(self) -> URLLiteral:
|
1077
|
+
"""Parse a URL literal.
|
1078
|
+
|
1079
|
+
Returns:
|
1080
|
+
A URLLiteral AST node.
|
1081
|
+
"""
|
1082
|
+
assert self._current_token is not None
|
1083
|
+
|
1084
|
+
# Extract the actual URL value without quotes (like string literals)
|
1085
|
+
literal = self._current_token.literal
|
1086
|
+
if literal.startswith('"') and literal.endswith('"'):
|
1087
|
+
value = literal[1:-1]
|
1088
|
+
elif literal.startswith("'") and literal.endswith("'"):
|
1089
|
+
value = literal[1:-1]
|
1090
|
+
else:
|
1091
|
+
# Fallback if no quotes found
|
1092
|
+
value = literal
|
1093
|
+
|
1094
|
+
return URLLiteral(
|
1095
|
+
token=self._current_token,
|
1096
|
+
value=value,
|
1097
|
+
)
|
1098
|
+
|
1099
|
+
def _parse_prefix_expression(self) -> PrefixExpression:
|
1100
|
+
"""Parse a prefix expression.
|
1101
|
+
|
1102
|
+
Prefix expressions consist of a prefix operator followed by an expression.
|
1103
|
+
Examples: -42, not True, --5, not not False
|
1104
|
+
|
1105
|
+
Returns:
|
1106
|
+
A PrefixExpression AST node if successful, None if parsing fails.
|
1107
|
+
"""
|
1108
|
+
assert self._current_token is not None
|
1109
|
+
|
1110
|
+
# Create the prefix expression with the operator
|
1111
|
+
expression = PrefixExpression(
|
1112
|
+
token=self._current_token,
|
1113
|
+
operator=self._current_token.literal,
|
1114
|
+
)
|
1115
|
+
|
1116
|
+
# Advance past the operator
|
1117
|
+
self._advance_tokens()
|
1118
|
+
|
1119
|
+
# Parse the right-hand expression with appropriate precedence
|
1120
|
+
# All unary operators (including 'not') have high precedence
|
1121
|
+
expression.right = self._parse_expression(Precedence.UNARY_SIMPLIFIED)
|
1122
|
+
|
1123
|
+
return expression
|
1124
|
+
|
1125
|
+
def _parse_ordinal_list_access(self) -> Expression:
|
1126
|
+
"""Parse ordinal list access: '[the] first item of list'.
|
1127
|
+
|
1128
|
+
Handles both forms:
|
1129
|
+
- 'the first item of list' (with optional 'the')
|
1130
|
+
- 'first item of list' (without 'the')
|
1131
|
+
|
1132
|
+
Returns:
|
1133
|
+
A CollectionAccessExpression for ordinal access.
|
1134
|
+
"""
|
1135
|
+
assert self._current_token is not None
|
1136
|
+
|
1137
|
+
# Check if we're starting with an ordinal directly or with 'the' (stopword)
|
1138
|
+
if self._current_token.type == TokenType.MISC_STOPWORD and self._current_token.literal.lower() == "the":
|
1139
|
+
# Skip optional 'the'
|
1140
|
+
self._advance_tokens()
|
1141
|
+
|
1142
|
+
# Now we should have an ordinal (first, second, third, last)
|
1143
|
+
if self._current_token is None or self._current_token.type not in [
|
1144
|
+
TokenType.KW_FIRST,
|
1145
|
+
TokenType.KW_SECOND,
|
1146
|
+
TokenType.KW_THIRD,
|
1147
|
+
TokenType.KW_LAST,
|
1148
|
+
]:
|
1149
|
+
# Not a valid ordinal access pattern
|
1150
|
+
return ErrorExpression(
|
1151
|
+
token=self._current_token or Token(TokenType.MISC_ILLEGAL, "", 0, 0),
|
1152
|
+
message="Not a valid ordinal access pattern",
|
1153
|
+
)
|
1154
|
+
|
1155
|
+
ordinal_token = self._current_token
|
1156
|
+
ordinal = self._current_token.literal
|
1157
|
+
|
1158
|
+
# Skip ordinal
|
1159
|
+
self._advance_tokens()
|
1160
|
+
|
1161
|
+
# Expect 'item'
|
1162
|
+
if self._current_token is None or self._current_token.type != TokenType.KW_ITEM:
|
1163
|
+
msg = f"Expected 'item' after ordinal, got {self._current_token.type if self._current_token else 'EOF'}"
|
1164
|
+
return ErrorExpression(token=self._current_token or ordinal_token, message=msg)
|
1165
|
+
|
1166
|
+
# Skip 'item'
|
1167
|
+
self._advance_tokens()
|
1168
|
+
|
1169
|
+
# Expect 'of' - check the new current token after advancing
|
1170
|
+
current = self._current_token
|
1171
|
+
if current is None or current.type != TokenType.KW_OF:
|
1172
|
+
msg = f"Expected 'of' after 'item', got {self._current_token.type if self._current_token else 'EOF'}"
|
1173
|
+
return ErrorExpression(token=self._current_token or ordinal_token, message=msg)
|
1174
|
+
|
1175
|
+
# Skip 'of'
|
1176
|
+
self._advance_tokens()
|
1177
|
+
|
1178
|
+
# Parse the collection expression
|
1179
|
+
collection = self._parse_expression(Precedence.LOWEST)
|
1180
|
+
|
1181
|
+
return CollectionAccessExpression(
|
1182
|
+
token=ordinal_token, collection=collection, accessor=ordinal, access_type="ordinal"
|
1183
|
+
)
|
1184
|
+
|
1185
|
+
def _parse_stopword_expression(self) -> Expression:
|
1186
|
+
"""Parse expressions that start with stopwords.
|
1187
|
+
|
1188
|
+
Handles:
|
1189
|
+
- 'the' for list access patterns (the first/second/third/last item of)
|
1190
|
+
|
1191
|
+
Returns:
|
1192
|
+
An appropriate expression or None if not a valid pattern.
|
1193
|
+
"""
|
1194
|
+
assert self._current_token is not None
|
1195
|
+
|
1196
|
+
# Check if it's 'the' which might start various patterns
|
1197
|
+
if self._current_token.literal.lower() == "the":
|
1198
|
+
# Look ahead to see if it's followed by an ordinal
|
1199
|
+
if self._peek_token and self._peek_token.type in [
|
1200
|
+
TokenType.KW_FIRST,
|
1201
|
+
TokenType.KW_SECOND,
|
1202
|
+
TokenType.KW_THIRD,
|
1203
|
+
TokenType.KW_LAST,
|
1204
|
+
]:
|
1205
|
+
return self._parse_ordinal_list_access()
|
1206
|
+
|
1207
|
+
# Otherwise, stopwords aren't valid expression starters
|
1208
|
+
return ErrorExpression(
|
1209
|
+
token=self._current_token,
|
1210
|
+
message=f"Unexpected stopword '{self._current_token.literal}' at start of expression",
|
1211
|
+
)
|
1212
|
+
|
1213
|
+
def _parse_dict_extraction_prefix(self) -> Expression:
|
1214
|
+
"""Parse dictionary extraction as a prefix operator.
|
1215
|
+
|
1216
|
+
Examples:
|
1217
|
+
the names of `person` -> DictExtraction(dictionary=person, extract_type="names")
|
1218
|
+
the contents of `config` -> DictExtraction(dictionary=config, extract_type="contents")
|
1219
|
+
|
1220
|
+
Returns:
|
1221
|
+
A DictExtraction expression.
|
1222
|
+
"""
|
1223
|
+
assert self._current_token is not None
|
1224
|
+
operator_token = self._current_token
|
1225
|
+
|
1226
|
+
# Determine extraction type based on operator
|
1227
|
+
if operator_token.type == TokenType.OP_THE_NAMES_OF:
|
1228
|
+
extract_type = "names"
|
1229
|
+
elif operator_token.type == TokenType.OP_THE_CONTENTS_OF:
|
1230
|
+
extract_type = "contents"
|
1231
|
+
else:
|
1232
|
+
msg = f"Unknown dictionary extraction operator: {operator_token.type}"
|
1233
|
+
return ErrorExpression(token=operator_token, message=msg)
|
1234
|
+
|
1235
|
+
# Skip the operator
|
1236
|
+
self._advance_tokens()
|
1237
|
+
|
1238
|
+
# Parse the dictionary expression
|
1239
|
+
dictionary = self._parse_expression(Precedence.UNARY_POST_OPERATOR)
|
1240
|
+
|
1241
|
+
if dictionary is None:
|
1242
|
+
msg = "Expected dictionary expression after extraction operator"
|
1243
|
+
return ErrorExpression(token=self._current_token or operator_token, message=msg)
|
1244
|
+
|
1245
|
+
# Import here to avoid circular dependency
|
1246
|
+
from machine_dialect.ast.dict_extraction import DictExtraction
|
1247
|
+
|
1248
|
+
return DictExtraction(token=operator_token, dictionary=dictionary, extract_type=extract_type)
|
1249
|
+
|
1250
|
+
def _parse_dict_extraction_infix(self, dictionary: Expression) -> Expression:
|
1251
|
+
"""Parse dictionary extraction as an infix operator.
|
1252
|
+
|
1253
|
+
Examples:
|
1254
|
+
`person` the names of -> DictExtraction(dictionary=person, extract_type="names")
|
1255
|
+
`config` the contents of -> DictExtraction(dictionary=config, extract_type="contents")
|
1256
|
+
|
1257
|
+
Args:
|
1258
|
+
dictionary: The dictionary expression to extract from
|
1259
|
+
|
1260
|
+
Returns:
|
1261
|
+
A DictExtraction expression.
|
1262
|
+
"""
|
1263
|
+
assert self._current_token is not None
|
1264
|
+
operator_token = self._current_token
|
1265
|
+
|
1266
|
+
# Determine extraction type based on operator
|
1267
|
+
if operator_token.type == TokenType.OP_THE_NAMES_OF:
|
1268
|
+
extract_type = "names"
|
1269
|
+
elif operator_token.type == TokenType.OP_THE_CONTENTS_OF:
|
1270
|
+
extract_type = "contents"
|
1271
|
+
else:
|
1272
|
+
msg = f"Unknown dictionary extraction operator: {operator_token.type}"
|
1273
|
+
return ErrorExpression(token=operator_token, message=msg)
|
1274
|
+
|
1275
|
+
# Import here to avoid circular dependency
|
1276
|
+
from machine_dialect.ast.dict_extraction import DictExtraction
|
1277
|
+
|
1278
|
+
return DictExtraction(token=operator_token, dictionary=dictionary, extract_type=extract_type)
|
1279
|
+
|
1280
|
+
# TODO: Refactor this function to an infix expression
|
1281
|
+
def _parse_possessive_access(self) -> Expression:
|
1282
|
+
"""Parse possessive property access: `person`'s _"name"_.
|
1283
|
+
|
1284
|
+
When the lexer sees `person`'s, it emits a PUNCT_APOSTROPHE_S token
|
1285
|
+
with the identifier as the literal. We then need to parse the property name
|
1286
|
+
as a string literal.
|
1287
|
+
|
1288
|
+
Returns:
|
1289
|
+
A CollectionAccessExpression for property access.
|
1290
|
+
"""
|
1291
|
+
assert self._current_token is not None
|
1292
|
+
assert self._current_token.type == TokenType.PUNCT_APOSTROPHE_S
|
1293
|
+
|
1294
|
+
# The literal contains the identifier name (e.g., "person")
|
1295
|
+
dict_name = self._current_token.literal
|
1296
|
+
token = self._current_token
|
1297
|
+
|
1298
|
+
# Create an identifier for the dictionary
|
1299
|
+
dict_identifier = Identifier(Token(TokenType.MISC_IDENT, dict_name, token.line, token.position), dict_name)
|
1300
|
+
|
1301
|
+
# Skip the possessive token
|
1302
|
+
self._advance_tokens()
|
1303
|
+
|
1304
|
+
# Now we expect a string literal for the property name
|
1305
|
+
# Note: after _advance_tokens(), current_token has changed from PUNCT_APOSTROPHE_S
|
1306
|
+
if self._current_token is None or self._current_token.type != TokenType.LIT_TEXT:
|
1307
|
+
msg = (
|
1308
|
+
"Expected string literal for property name after possessive, got "
|
1309
|
+
f"{self._current_token.type if self._current_token else 'EOF'}"
|
1310
|
+
)
|
1311
|
+
return ErrorExpression(token=self._current_token or token, message=msg)
|
1312
|
+
|
1313
|
+
# Extract the property name from the string literal (remove quotes)
|
1314
|
+
property_literal = self._current_token.literal
|
1315
|
+
# Remove quotes from the literal
|
1316
|
+
if property_literal.startswith('"') and property_literal.endswith('"'):
|
1317
|
+
property_name = property_literal[1:-1]
|
1318
|
+
elif property_literal.startswith("'") and property_literal.endswith("'"):
|
1319
|
+
property_name = property_literal[1:-1]
|
1320
|
+
else:
|
1321
|
+
property_name = property_literal
|
1322
|
+
|
1323
|
+
# Note: We do NOT advance past the property name here.
|
1324
|
+
# Expression parsers should leave current_token AT the last token of the expression,
|
1325
|
+
# not after it. The caller will advance when needed.
|
1326
|
+
|
1327
|
+
# Create a collection access expression with property access type
|
1328
|
+
return CollectionAccessExpression(
|
1329
|
+
token=token, collection=dict_identifier, accessor=property_name, access_type="property"
|
1330
|
+
)
|
1331
|
+
|
1332
|
+
def _parse_numeric_list_access(self) -> Expression:
|
1333
|
+
"""Parse numeric list access: 'item _5_ of list'.
|
1334
|
+
|
1335
|
+
Returns:
|
1336
|
+
A CollectionAccessExpression for numeric access.
|
1337
|
+
"""
|
1338
|
+
assert self._current_token is not None
|
1339
|
+
assert self._current_token.type == TokenType.KW_ITEM
|
1340
|
+
|
1341
|
+
item_token = self._current_token
|
1342
|
+
|
1343
|
+
# Skip 'item'
|
1344
|
+
self._advance_tokens()
|
1345
|
+
|
1346
|
+
# Expect a number literal - check the new current token after advancing
|
1347
|
+
current = self._current_token
|
1348
|
+
if current is None or current.type != TokenType.LIT_WHOLE_NUMBER:
|
1349
|
+
msg = f"Expected number after 'item', got {self._current_token.type if self._current_token else 'EOF'}"
|
1350
|
+
return ErrorExpression(token=self._current_token or item_token, message=msg)
|
1351
|
+
|
1352
|
+
# Get the index (one-based in Machine Dialect™)
|
1353
|
+
index = int(self._current_token.literal)
|
1354
|
+
|
1355
|
+
# Skip number
|
1356
|
+
self._advance_tokens()
|
1357
|
+
|
1358
|
+
# Expect 'of' - check the new current token after advancing
|
1359
|
+
current = self._current_token
|
1360
|
+
if current is None or current.type != TokenType.KW_OF:
|
1361
|
+
msg = f"Expected 'of' after number, got {self._current_token.type if self._current_token else 'EOF'}"
|
1362
|
+
return ErrorExpression(token=self._current_token or item_token, message=msg)
|
1363
|
+
|
1364
|
+
# Skip 'of'
|
1365
|
+
self._advance_tokens()
|
1366
|
+
|
1367
|
+
# Parse the collection expression
|
1368
|
+
collection = self._parse_expression(Precedence.LOWEST)
|
1369
|
+
|
1370
|
+
return CollectionAccessExpression(
|
1371
|
+
token=item_token, collection=collection, accessor=index, access_type="numeric"
|
1372
|
+
)
|
1373
|
+
|
1374
|
+
def _parse_infix_expression(self, left: Expression) -> InfixExpression:
|
1375
|
+
"""Parse an infix expression.
|
1376
|
+
|
1377
|
+
Infix expressions consist of a left expression, an infix operator, and a
|
1378
|
+
right expression. Examples: 5 + 3, x == y, a and b.
|
1379
|
+
|
1380
|
+
Args:
|
1381
|
+
left: The left-hand expression that was already parsed.
|
1382
|
+
|
1383
|
+
Returns:
|
1384
|
+
An InfixExpression AST node.
|
1385
|
+
"""
|
1386
|
+
assert self._current_token is not None
|
1387
|
+
|
1388
|
+
# Map token type to operator string
|
1389
|
+
operator_map = {
|
1390
|
+
TokenType.OP_PLUS: "+",
|
1391
|
+
TokenType.OP_MINUS: "-",
|
1392
|
+
TokenType.OP_STAR: "*",
|
1393
|
+
TokenType.OP_DIVISION: "/",
|
1394
|
+
TokenType.OP_EQ: "equals",
|
1395
|
+
TokenType.OP_NOT_EQ: "is not",
|
1396
|
+
TokenType.OP_STRICT_EQ: "is strictly equal to",
|
1397
|
+
TokenType.OP_STRICT_NOT_EQ: "is not strictly equal to",
|
1398
|
+
TokenType.OP_LT: "<",
|
1399
|
+
TokenType.OP_GT: ">",
|
1400
|
+
TokenType.OP_LTE: "<=",
|
1401
|
+
TokenType.OP_GTE: ">=",
|
1402
|
+
TokenType.KW_AND: "and",
|
1403
|
+
TokenType.KW_OR: "or",
|
1404
|
+
}
|
1405
|
+
|
1406
|
+
# Get the operator string
|
1407
|
+
operator = operator_map.get(self._current_token.type, self._current_token.literal)
|
1408
|
+
|
1409
|
+
# Create the infix expression with the operator and left operand
|
1410
|
+
expression = InfixExpression(
|
1411
|
+
token=self._current_token,
|
1412
|
+
operator=operator,
|
1413
|
+
left=left,
|
1414
|
+
)
|
1415
|
+
|
1416
|
+
# Get the precedence of this operator
|
1417
|
+
precedence = self._current_precedence()
|
1418
|
+
|
1419
|
+
# Advance past the operator
|
1420
|
+
self._advance_tokens()
|
1421
|
+
|
1422
|
+
# Parse the right-hand expression
|
1423
|
+
expression.right = self._parse_expression(precedence)
|
1424
|
+
|
1425
|
+
return expression
|
1426
|
+
|
1427
|
+
def _parse_grouped_expression(self) -> Expression:
|
1428
|
+
"""Parse a grouped expression (expression in parentheses).
|
1429
|
+
|
1430
|
+
Grouped expressions are expressions wrapped in parentheses, which
|
1431
|
+
can be used to override operator precedence.
|
1432
|
+
|
1433
|
+
Returns:
|
1434
|
+
The expression inside the parentheses, or None if parsing fails.
|
1435
|
+
"""
|
1436
|
+
# Advance past the opening parenthesis
|
1437
|
+
self._advance_tokens()
|
1438
|
+
|
1439
|
+
# Parse the inner expression
|
1440
|
+
expression = self._parse_expression(Precedence.LOWEST)
|
1441
|
+
|
1442
|
+
# Expect closing parenthesis
|
1443
|
+
assert self._current_token is not None
|
1444
|
+
if error := self._expect_token(
|
1445
|
+
TokenType.DELIM_RPAREN,
|
1446
|
+
error_node=ErrorExpression(token=self._current_token, message="Expected closing parenthesis"),
|
1447
|
+
):
|
1448
|
+
assert isinstance(error, ErrorExpression)
|
1449
|
+
return error
|
1450
|
+
|
1451
|
+
return expression
|
1452
|
+
|
1453
|
+
def _parse_conditional_expression(self, consequence: Expression) -> ConditionalExpression:
|
1454
|
+
"""Parse a conditional (ternary) expression.
|
1455
|
+
|
1456
|
+
Formats supported:
|
1457
|
+
- consequence if condition, else alternative
|
1458
|
+
- consequence if condition, otherwise alternative
|
1459
|
+
- consequence when condition, else alternative
|
1460
|
+
- consequence when condition, otherwise alternative
|
1461
|
+
- consequence if condition; else alternative
|
1462
|
+
- consequence if condition; otherwise alternative
|
1463
|
+
- consequence when condition; else alternative
|
1464
|
+
- consequence when condition; otherwise alternative
|
1465
|
+
|
1466
|
+
Args:
|
1467
|
+
consequence: The expression to return if condition is true.
|
1468
|
+
|
1469
|
+
Returns:
|
1470
|
+
A ConditionalExpression node.
|
1471
|
+
"""
|
1472
|
+
assert self._current_token is not None
|
1473
|
+
# Create the conditional expression with the consequence
|
1474
|
+
expression = ConditionalExpression(token=self._current_token, consequence=consequence)
|
1475
|
+
|
1476
|
+
# Move past 'if' or 'when'
|
1477
|
+
self._advance_tokens()
|
1478
|
+
|
1479
|
+
# Parse the condition with TERNARY precedence to stop at comma
|
1480
|
+
expression.condition = self._parse_expression(Precedence.TERNARY)
|
1481
|
+
|
1482
|
+
# After parsing the condition, we need to advance to the next token
|
1483
|
+
# _parse_expression leaves us at the last token of the parsed expression
|
1484
|
+
self._advance_tokens()
|
1485
|
+
|
1486
|
+
# DEBUG: Print current state
|
1487
|
+
# print(f"After parsing condition and advancing: current={self._current_token}, peek={self._peek_token}")
|
1488
|
+
|
1489
|
+
# Check for comma or semicolon before 'else'/'otherwise'
|
1490
|
+
if self._current_token and self._current_token.type in (TokenType.PUNCT_COMMA, TokenType.PUNCT_SEMICOLON):
|
1491
|
+
self._advance_tokens() # Move past comma/semicolon
|
1492
|
+
# print(f"After advancing past comma/semicolon: current={self._current_token}, peek={self._peek_token}")
|
1493
|
+
|
1494
|
+
# Expect 'else' or 'otherwise' (both map to KW_ELSE)
|
1495
|
+
if not self._current_token or self._current_token.type != TokenType.KW_ELSE:
|
1496
|
+
return expression # Return incomplete expression if no else clause
|
1497
|
+
|
1498
|
+
# Move past 'else' or 'otherwise'
|
1499
|
+
self._advance_tokens()
|
1500
|
+
|
1501
|
+
# Parse the alternative expression
|
1502
|
+
expression.alternative = self._parse_expression(Precedence.LOWEST)
|
1503
|
+
|
1504
|
+
return expression
|
1505
|
+
|
1506
|
+
def _parse_define_statement(self) -> DefineStatement | ErrorStatement:
|
1507
|
+
"""Parse a Define statement.
|
1508
|
+
|
1509
|
+
Grammar:
|
1510
|
+
define_statement ::= "Define" identifier "as" type_spec
|
1511
|
+
["(" "default" ":" expression ")"] "."
|
1512
|
+
|
1513
|
+
Examples:
|
1514
|
+
Define `x` as Whole Number.
|
1515
|
+
Define `name` as Text (default: _"Unknown"_).
|
1516
|
+
Define `value` as Whole Number or Text.
|
1517
|
+
|
1518
|
+
Returns:
|
1519
|
+
DefineStatement on success, ErrorStatement on parse error.
|
1520
|
+
"""
|
1521
|
+
statement_token = self._current_token
|
1522
|
+
assert statement_token is not None
|
1523
|
+
|
1524
|
+
# Move past "Define" to get to the identifier
|
1525
|
+
self._advance_tokens()
|
1526
|
+
|
1527
|
+
# Check if we have an identifier
|
1528
|
+
if not self._current_token or self._current_token.type != TokenType.MISC_IDENT:
|
1529
|
+
# Report error and get recovery result
|
1530
|
+
error_stmt = self._report_error_and_recover(
|
1531
|
+
template=EXPECTED_IDENTIFIER_AFTER,
|
1532
|
+
expected_token=TokenType.MISC_IDENT,
|
1533
|
+
what="variable",
|
1534
|
+
after="'Define'",
|
1535
|
+
recovery_tokens=[TokenType.KW_AS, TokenType.PUNCT_PERIOD, TokenType.MISC_EOF],
|
1536
|
+
)
|
1537
|
+
|
1538
|
+
# Try to continue parsing if we recovered at 'as' keyword
|
1539
|
+
if self._current_token and self._current_token.type == TokenType.KW_AS:
|
1540
|
+
# Found 'as', try to continue parsing from here
|
1541
|
+
name = Identifier(statement_token, "<error>") # Placeholder name
|
1542
|
+
self._advance_tokens() # Skip 'as'
|
1543
|
+
type_spec = self._parse_type_spec()
|
1544
|
+
if type_spec:
|
1545
|
+
return DefineStatement(statement_token, name, type_spec, None)
|
1546
|
+
|
1547
|
+
assert isinstance(error_stmt, ErrorStatement)
|
1548
|
+
return error_stmt
|
1549
|
+
|
1550
|
+
# Parse the identifier
|
1551
|
+
name = self._parse_identifier()
|
1552
|
+
|
1553
|
+
# Move past the identifier
|
1554
|
+
self._advance_tokens()
|
1555
|
+
|
1556
|
+
# Skip any stopwords between identifier and "as"
|
1557
|
+
while self._current_token and self._current_token.type == TokenType.MISC_STOPWORD:
|
1558
|
+
self._advance_tokens()
|
1559
|
+
|
1560
|
+
# Expect "as" keyword - we should be at "as" now
|
1561
|
+
# Re-check current_token to help MyPy's type narrowing
|
1562
|
+
if self._current_token is None or self._current_token.type != TokenType.KW_AS:
|
1563
|
+
# Report error with recovery to type keywords
|
1564
|
+
error_stmt = self._report_error_and_recover(
|
1565
|
+
template=EXPECTED_TOKEN_AFTER,
|
1566
|
+
expected_token=TokenType.KW_AS,
|
1567
|
+
expected="'as'",
|
1568
|
+
after=f"variable name '{name.value}'",
|
1569
|
+
recovery_to_types=True,
|
1570
|
+
)
|
1571
|
+
|
1572
|
+
# If we recovered at a type keyword, try to continue parsing
|
1573
|
+
if self._current_token and self._is_type_token(self._current_token.type):
|
1574
|
+
# Found a type, try to continue parsing
|
1575
|
+
type_spec = self._parse_type_spec()
|
1576
|
+
if type_spec:
|
1577
|
+
# Still register the variable even with syntax error
|
1578
|
+
self._register_variable_definition(
|
1579
|
+
name.value, type_spec, statement_token.line, statement_token.position
|
1580
|
+
)
|
1581
|
+
return DefineStatement(statement_token, name, type_spec, None)
|
1582
|
+
|
1583
|
+
# Need additional recovery if we didn't find a type
|
1584
|
+
if not isinstance(error_stmt, ErrorStatement):
|
1585
|
+
# This shouldn't happen, but handle it just in case
|
1586
|
+
skipped = self._panic_recovery()
|
1587
|
+
return ErrorStatement(
|
1588
|
+
token=statement_token,
|
1589
|
+
skipped_tokens=skipped,
|
1590
|
+
message=f"Expected 'as' after variable name '{name.value}'",
|
1591
|
+
)
|
1592
|
+
return error_stmt
|
1593
|
+
|
1594
|
+
# Move past "as"
|
1595
|
+
self._advance_tokens()
|
1596
|
+
|
1597
|
+
# Parse type specification
|
1598
|
+
type_spec = self._parse_type_spec()
|
1599
|
+
if not type_spec:
|
1600
|
+
# Get the invalid type name token
|
1601
|
+
invalid_name = self._current_token.literal if self._current_token else "unknown"
|
1602
|
+
# Generate valid types list from TYPING_MAP
|
1603
|
+
valid_types = list(TYPING_MAP.values())
|
1604
|
+
return self._report_error_and_recover(
|
1605
|
+
template=INVALID_TYPE_NAME,
|
1606
|
+
name=invalid_name,
|
1607
|
+
valid_types=", ".join(valid_types),
|
1608
|
+
expected_token=TokenType.KW_TEXT, # Use TEXT as representative type
|
1609
|
+
expected="type name",
|
1610
|
+
after="'as'",
|
1611
|
+
)
|
1612
|
+
|
1613
|
+
# Optional: (default: value) clause
|
1614
|
+
initial_value = None
|
1615
|
+
if self._current_token and self._current_token.type == TokenType.DELIM_LPAREN:
|
1616
|
+
self._advance_tokens() # Move past "("
|
1617
|
+
|
1618
|
+
# Expect "default" - we should be at "default" now
|
1619
|
+
if not self._current_token or self._current_token.type != TokenType.KW_DEFAULT:
|
1620
|
+
# Report error and handle recovery to closing paren
|
1621
|
+
error_stmt = self._report_error_and_recover(
|
1622
|
+
template=EXPECTED_TOKEN_AFTER,
|
1623
|
+
expected_token=TokenType.KW_DEFAULT,
|
1624
|
+
expected="'default'",
|
1625
|
+
after="'('",
|
1626
|
+
recovery_tokens=[TokenType.DELIM_RPAREN, TokenType.PUNCT_PERIOD, TokenType.MISC_EOF],
|
1627
|
+
)
|
1628
|
+
# If we found the closing paren, advance past it
|
1629
|
+
if self._current_token and self._current_token.type == TokenType.DELIM_RPAREN:
|
1630
|
+
self._advance_tokens()
|
1631
|
+
return error_stmt
|
1632
|
+
|
1633
|
+
# Move past "default"
|
1634
|
+
self._advance_tokens()
|
1635
|
+
|
1636
|
+
# Expect ":" - we should be at ":"
|
1637
|
+
if not self._current_token or self._current_token.type != TokenType.PUNCT_COLON:
|
1638
|
+
# Report error and handle recovery to closing paren
|
1639
|
+
error_stmt = self._report_error_and_recover(
|
1640
|
+
template=EXPECTED_TOKEN_AFTER,
|
1641
|
+
expected_token=TokenType.PUNCT_COLON,
|
1642
|
+
expected="':'",
|
1643
|
+
after="'default'",
|
1644
|
+
recovery_tokens=[TokenType.DELIM_RPAREN, TokenType.PUNCT_PERIOD, TokenType.MISC_EOF],
|
1645
|
+
)
|
1646
|
+
# If we found the closing paren, advance past it
|
1647
|
+
if self._current_token and self._current_token.type == TokenType.DELIM_RPAREN:
|
1648
|
+
self._advance_tokens()
|
1649
|
+
return error_stmt
|
1650
|
+
|
1651
|
+
# Move past ":"
|
1652
|
+
self._advance_tokens()
|
1653
|
+
|
1654
|
+
# Parse the default value expression
|
1655
|
+
initial_value = self._parse_expression(Precedence.LOWEST)
|
1656
|
+
|
1657
|
+
# Expect ")" - check if we're at the closing paren
|
1658
|
+
if self._peek_token and self._peek_token.type != TokenType.DELIM_RPAREN:
|
1659
|
+
# Report error but don't return - continue to create the statement
|
1660
|
+
self._report_error_and_recover(
|
1661
|
+
template=EXPECTED_TOKEN_AFTER,
|
1662
|
+
expected_token=TokenType.DELIM_RPAREN,
|
1663
|
+
expected="')'",
|
1664
|
+
after="default value",
|
1665
|
+
skip_recovery=True, # Don't recover, continue processing
|
1666
|
+
)
|
1667
|
+
elif self._peek_token:
|
1668
|
+
self._advance_tokens() # Move to ")"
|
1669
|
+
self._advance_tokens() # Skip ")"
|
1670
|
+
|
1671
|
+
# Check for period at statement end (optional for now)
|
1672
|
+
if self._peek_token and self._peek_token.type == TokenType.PUNCT_PERIOD:
|
1673
|
+
self._advance_tokens() # Move to period
|
1674
|
+
|
1675
|
+
# Register the variable definition in the symbol table
|
1676
|
+
self._register_variable_definition(name.value, type_spec, statement_token.line, statement_token.position)
|
1677
|
+
|
1678
|
+
return DefineStatement(statement_token, name, type_spec, initial_value)
|
1679
|
+
|
1680
|
+
def _parse_type_spec(self) -> list[str]:
|
1681
|
+
"""Parse type specification, handling union types.
|
1682
|
+
|
1683
|
+
Grammar:
|
1684
|
+
type_spec ::= type_name ["or" type_name]*
|
1685
|
+
type_name ::= "Text" | "Whole Number" | "Float" | "Number" | "Yes/No"
|
1686
|
+
| "URL" | "Date" | "DateTime" | "Time" | "List" | "Empty"
|
1687
|
+
|
1688
|
+
Examples:
|
1689
|
+
Whole Number -> ["Whole Number"]
|
1690
|
+
Whole Number or Text -> ["Whole Number", "Text"]
|
1691
|
+
Number or Yes/No or Empty -> ["Number", "Yes/No", "Empty"]
|
1692
|
+
|
1693
|
+
Returns:
|
1694
|
+
List of type names, empty list if no valid type found.
|
1695
|
+
"""
|
1696
|
+
types = []
|
1697
|
+
|
1698
|
+
# Parse first type
|
1699
|
+
type_name = self._parse_type_name()
|
1700
|
+
if type_name:
|
1701
|
+
types.append(type_name)
|
1702
|
+
else:
|
1703
|
+
return types # Return empty list if no type found
|
1704
|
+
|
1705
|
+
# Parse additional types with "or" (for union types)
|
1706
|
+
while self._current_token and self._current_token.type == TokenType.KW_OR:
|
1707
|
+
self._advance_tokens() # Skip "or"
|
1708
|
+
|
1709
|
+
type_name = self._parse_type_name()
|
1710
|
+
if type_name:
|
1711
|
+
types.append(type_name)
|
1712
|
+
else:
|
1713
|
+
# If we don't find a type after "or", that's an error
|
1714
|
+
self._report_error_and_recover(
|
1715
|
+
template=EXPECTED_TOKEN_AFTER,
|
1716
|
+
expected_token=TokenType.KW_TEXT, # Use TEXT as representative
|
1717
|
+
expected="type name",
|
1718
|
+
after="'or'",
|
1719
|
+
skip_recovery=True, # Continue with what we have
|
1720
|
+
)
|
1721
|
+
break
|
1722
|
+
|
1723
|
+
return types
|
1724
|
+
|
1725
|
+
def _parse_type_name(self) -> str | None:
|
1726
|
+
"""Parse a single type name.
|
1727
|
+
|
1728
|
+
Only handles keyword-based types as specified in the grammar.
|
1729
|
+
|
1730
|
+
Returns:
|
1731
|
+
The type name as a string, or None if current token is not a type.
|
1732
|
+
"""
|
1733
|
+
if not self._current_token:
|
1734
|
+
return None
|
1735
|
+
|
1736
|
+
if self._current_token.type in TYPING_MAP:
|
1737
|
+
type_name = TYPING_MAP[self._current_token.type]
|
1738
|
+
self._advance_tokens()
|
1739
|
+
return type_name
|
1740
|
+
|
1741
|
+
return None
|
1742
|
+
|
1743
|
+
def _parse_set_statement(self) -> SetStatement | ErrorStatement | Statement:
|
1744
|
+
"""Parse a Set statement.
|
1745
|
+
|
1746
|
+
Expects:
|
1747
|
+
- Set `identifier` to expression
|
1748
|
+
- Set the second item of `list` to expression
|
1749
|
+
- Set item _5_ of `list` to expression
|
1750
|
+
|
1751
|
+
Returns:
|
1752
|
+
A SetStatement AST node if successful, ErrorStatement if parsing fails.
|
1753
|
+
"""
|
1754
|
+
from machine_dialect.ast.statements import CollectionMutationStatement
|
1755
|
+
|
1756
|
+
assert self._current_token is not None
|
1757
|
+
statement_token = self._current_token # Save the 'Set' token
|
1758
|
+
|
1759
|
+
# Check for collection item assignment patterns
|
1760
|
+
# We need to handle both "Set the first item of" and "Set item _1_ of"
|
1761
|
+
if self._peek_token:
|
1762
|
+
# Since stopwords are auto-skipped, after "Set" we might be directly at "first"/"second" etc
|
1763
|
+
# if the user wrote "Set the first..." because "the" gets skipped
|
1764
|
+
if self._peek_token.type in (
|
1765
|
+
TokenType.KW_FIRST,
|
1766
|
+
TokenType.KW_SECOND,
|
1767
|
+
TokenType.KW_THIRD,
|
1768
|
+
TokenType.KW_LAST,
|
1769
|
+
):
|
1770
|
+
# Pattern: Set [the] [ordinal] item of `list` to value
|
1771
|
+
# "the" was already skipped if present
|
1772
|
+
self._advance_tokens() # Move to ordinal
|
1773
|
+
|
1774
|
+
# We're now at the ordinal
|
1775
|
+
if not self._current_token or self._current_token.type not in (
|
1776
|
+
TokenType.KW_FIRST,
|
1777
|
+
TokenType.KW_SECOND,
|
1778
|
+
TokenType.KW_THIRD,
|
1779
|
+
TokenType.KW_LAST,
|
1780
|
+
):
|
1781
|
+
return ErrorStatement(
|
1782
|
+
token=statement_token, message="Expected ordinal (first, second, third, last)"
|
1783
|
+
)
|
1784
|
+
|
1785
|
+
ordinal = self._current_token.literal.lower()
|
1786
|
+
self._advance_tokens() # Move past ordinal
|
1787
|
+
|
1788
|
+
# Expect "item"
|
1789
|
+
if self._current_token and self._current_token.type == TokenType.KW_ITEM:
|
1790
|
+
self._advance_tokens() # Move past 'item'
|
1791
|
+
|
1792
|
+
# Expect "of"
|
1793
|
+
if self._current_token and self._current_token.type == TokenType.KW_OF:
|
1794
|
+
self._advance_tokens() # Move past 'of'
|
1795
|
+
|
1796
|
+
# Parse collection identifier
|
1797
|
+
collection = self._parse_identifier_or_keyword_as_identifier()
|
1798
|
+
if not collection:
|
1799
|
+
error_stmt = self._report_error_and_recover(
|
1800
|
+
template=EXPECTED_TOKEN,
|
1801
|
+
expected_token=TokenType.MISC_IDENT,
|
1802
|
+
token="collection identifier",
|
1803
|
+
got_token_type=self._current_token.type.name if self._current_token else "EOF",
|
1804
|
+
)
|
1805
|
+
assert isinstance(error_stmt, ErrorStatement)
|
1806
|
+
return error_stmt
|
1807
|
+
self._advance_tokens()
|
1808
|
+
|
1809
|
+
# Expect "to"
|
1810
|
+
if self._current_token and self._current_token.type == TokenType.KW_TO:
|
1811
|
+
self._advance_tokens()
|
1812
|
+
|
1813
|
+
# Parse the value
|
1814
|
+
value = self._parse_expression(Precedence.LOWEST)
|
1815
|
+
self._advance_tokens()
|
1816
|
+
|
1817
|
+
# Expect period
|
1818
|
+
if self._current_token and self._current_token.type != TokenType.PUNCT_PERIOD:
|
1819
|
+
if error := self._expect_token(TokenType.PUNCT_PERIOD):
|
1820
|
+
assert isinstance(error, ErrorStatement)
|
1821
|
+
return error
|
1822
|
+
|
1823
|
+
return CollectionMutationStatement(
|
1824
|
+
token=statement_token,
|
1825
|
+
operation="set",
|
1826
|
+
collection=collection,
|
1827
|
+
value=value,
|
1828
|
+
position=ordinal,
|
1829
|
+
position_type="ordinal",
|
1830
|
+
)
|
1831
|
+
|
1832
|
+
elif self._peek_token.type == TokenType.KW_ITEM:
|
1833
|
+
# Pattern: Set item _5_ of `list` to value
|
1834
|
+
self._advance_tokens() # Move to 'item'
|
1835
|
+
self._advance_tokens() # Move past 'item'
|
1836
|
+
|
1837
|
+
# Parse numeric index
|
1838
|
+
index = self._parse_expression(Precedence.LOWEST)
|
1839
|
+
self._advance_tokens()
|
1840
|
+
|
1841
|
+
# Expect "of"
|
1842
|
+
if self._current_token and self._current_token.type == TokenType.KW_OF:
|
1843
|
+
self._advance_tokens()
|
1844
|
+
|
1845
|
+
# Parse collection identifier
|
1846
|
+
collection = self._parse_identifier_or_keyword_as_identifier()
|
1847
|
+
if not collection:
|
1848
|
+
error_stmt = self._report_error_and_recover(
|
1849
|
+
template=EXPECTED_TOKEN,
|
1850
|
+
expected_token=TokenType.MISC_IDENT,
|
1851
|
+
token="collection identifier",
|
1852
|
+
got_token_type=self._current_token.type.name if self._current_token else "EOF",
|
1853
|
+
)
|
1854
|
+
assert isinstance(error_stmt, ErrorStatement)
|
1855
|
+
return error_stmt
|
1856
|
+
self._advance_tokens()
|
1857
|
+
|
1858
|
+
# Expect "to"
|
1859
|
+
if self._current_token and self._current_token.type == TokenType.KW_TO:
|
1860
|
+
self._advance_tokens()
|
1861
|
+
|
1862
|
+
# Parse the value
|
1863
|
+
value = self._parse_expression(Precedence.LOWEST)
|
1864
|
+
self._advance_tokens()
|
1865
|
+
|
1866
|
+
# Expect period
|
1867
|
+
if self._current_token and self._current_token.type != TokenType.PUNCT_PERIOD:
|
1868
|
+
if error := self._expect_token(TokenType.PUNCT_PERIOD):
|
1869
|
+
assert isinstance(error, ErrorStatement)
|
1870
|
+
return error
|
1871
|
+
|
1872
|
+
return CollectionMutationStatement(
|
1873
|
+
token=statement_token,
|
1874
|
+
operation="set",
|
1875
|
+
collection=collection,
|
1876
|
+
value=value,
|
1877
|
+
position=index,
|
1878
|
+
position_type="numeric",
|
1879
|
+
)
|
1880
|
+
|
1881
|
+
# Normal Set statement: Set `identifier` to expression
|
1882
|
+
let_statement = SetStatement(token=statement_token)
|
1883
|
+
|
1884
|
+
# Expect identifier (which may have come from backticks)
|
1885
|
+
if error := self._expect_token(TokenType.MISC_IDENT, "'Set'"):
|
1886
|
+
assert isinstance(error, ErrorStatement)
|
1887
|
+
return error
|
1888
|
+
|
1889
|
+
# Use the identifier value directly (backticks already stripped by lexer)
|
1890
|
+
let_statement.name = self._parse_identifier()
|
1891
|
+
|
1892
|
+
# Variables MUST be defined before use - no exceptions
|
1893
|
+
variable_defined = self._check_variable_defined(
|
1894
|
+
let_statement.name.value, let_statement.name.token.line, let_statement.name.token.position
|
1895
|
+
)
|
1896
|
+
|
1897
|
+
# Check for "to" or "using" keyword
|
1898
|
+
assert self._peek_token is not None
|
1899
|
+
used_using = False # Track if we used the 'using' branch
|
1900
|
+
if self._peek_token.type == TokenType.KW_TO:
|
1901
|
+
# Standard assignment: Set x to value
|
1902
|
+
self._advance_tokens() # Move to 'to'
|
1903
|
+
|
1904
|
+
# Check if this is a list definition (colon after 'to')
|
1905
|
+
# After advancing, peek_token is now the next token
|
1906
|
+
next_token_type: TokenType | None = self._peek_token.type if self._peek_token else None
|
1907
|
+
if next_token_type == TokenType.PUNCT_COLON:
|
1908
|
+
# This will be a list - set context NOW before advancing
|
1909
|
+
# This ensures the dash tokens after the colon are properly tokenized
|
1910
|
+
if self._token_buffer:
|
1911
|
+
self._token_buffer.set_list_context(True)
|
1912
|
+
|
1913
|
+
self._advance_tokens() # Move past 'to' to the colon
|
1914
|
+
|
1915
|
+
# Advance past the colon to get to the first list item
|
1916
|
+
self._advance_tokens()
|
1917
|
+
|
1918
|
+
# Parse the list - current token should now be the first list item marker
|
1919
|
+
let_statement.value = self._parse_list_literal()
|
1920
|
+
|
1921
|
+
# Disable list context after parsing
|
1922
|
+
if self._token_buffer:
|
1923
|
+
self._token_buffer.set_list_context(False)
|
1924
|
+
|
1925
|
+
# After parsing a list, we're already properly positioned
|
1926
|
+
# (either at EOF, a period, or the next statement)
|
1927
|
+
# Set a flag to skip the advance and period check
|
1928
|
+
used_using = True # Reuse this flag to skip advance/period check
|
1929
|
+
else:
|
1930
|
+
# Not a list, advance past 'to' and parse expression normally
|
1931
|
+
self._advance_tokens() # Move past 'to'
|
1932
|
+
|
1933
|
+
# Check for "blank" keyword for empty collections
|
1934
|
+
if self._current_token and self._current_token.type == TokenType.KW_BLANK:
|
1935
|
+
from machine_dialect.ast import BlankLiteral
|
1936
|
+
|
1937
|
+
let_statement.value = BlankLiteral(self._current_token)
|
1938
|
+
# Don't advance here - let the normal flow handle it
|
1939
|
+
else:
|
1940
|
+
# Parse the value expression normally
|
1941
|
+
let_statement.value = self._parse_expression()
|
1942
|
+
|
1943
|
+
elif self._peek_token.type == TokenType.KW_USING:
|
1944
|
+
# Function call assignment: Set x using function_name
|
1945
|
+
self._advance_tokens() # Move to 'using'
|
1946
|
+
self._advance_tokens() # Move past 'using'
|
1947
|
+
# Parse a function call (similar to Use statement but returns the value)
|
1948
|
+
func_call = self._parse_function_call_expression()
|
1949
|
+
# CallExpression is an Expression, so this is valid
|
1950
|
+
let_statement.value = func_call
|
1951
|
+
# Note: _parse_function_call_expression already leaves us at the period,
|
1952
|
+
# so we'll skip the advance_tokens() call below for this branch
|
1953
|
+
used_using = True
|
1954
|
+
else:
|
1955
|
+
# Report the error using unified error handling
|
1956
|
+
assert self._peek_token is not None
|
1957
|
+
error_stmt = self._report_error_and_recover(
|
1958
|
+
template=EXPECTED_TOKEN,
|
1959
|
+
expected_token=TokenType.KW_TO,
|
1960
|
+
token="'to' or 'using' keyword",
|
1961
|
+
got_token_type=self._peek_token.type.name if self._peek_token else "EOF",
|
1962
|
+
)
|
1963
|
+
assert isinstance(error_stmt, ErrorStatement)
|
1964
|
+
return error_stmt
|
1965
|
+
|
1966
|
+
# Advance past the last token of the expression
|
1967
|
+
# Expression parsing leaves us at the last token, not after it
|
1968
|
+
# BUT: the 'using' branch already leaves us at the period, so skip this
|
1969
|
+
if not used_using:
|
1970
|
+
self._advance_tokens()
|
1971
|
+
|
1972
|
+
# Type-check the assignment if the variable is defined
|
1973
|
+
if variable_defined and let_statement.value and not isinstance(let_statement.value, ErrorExpression):
|
1974
|
+
self._validate_assignment_type(
|
1975
|
+
let_statement.name.value,
|
1976
|
+
let_statement.value,
|
1977
|
+
let_statement.name.token.line,
|
1978
|
+
let_statement.name.token.position,
|
1979
|
+
)
|
1980
|
+
|
1981
|
+
# If the expression failed, skip to synchronization point
|
1982
|
+
if isinstance(let_statement.value, ErrorExpression):
|
1983
|
+
# Skip remaining tokens until we're at a period or EOF
|
1984
|
+
while self._current_token is not None and self._current_token.type not in (
|
1985
|
+
TokenType.PUNCT_PERIOD,
|
1986
|
+
TokenType.MISC_EOF,
|
1987
|
+
):
|
1988
|
+
self._advance_tokens()
|
1989
|
+
|
1990
|
+
# Require trailing period if not at EOF or if we're in a block
|
1991
|
+
# But if we're already at a period (after error recovery), don't expect another
|
1992
|
+
assert self._peek_token is not None
|
1993
|
+
if self._current_token and self._current_token.type == TokenType.PUNCT_PERIOD:
|
1994
|
+
# Already at period, no need to expect one
|
1995
|
+
pass
|
1996
|
+
elif self._peek_token.type != TokenType.MISC_EOF or self._block_depth > 0:
|
1997
|
+
if error := self._expect_token(TokenType.PUNCT_PERIOD):
|
1998
|
+
assert isinstance(error, ErrorStatement)
|
1999
|
+
return error
|
2000
|
+
|
2001
|
+
return let_statement
|
2002
|
+
|
2003
|
+
def _parse_return_statement(self) -> ReturnStatement | ErrorStatement:
|
2004
|
+
"""Parse a return statement.
|
2005
|
+
|
2006
|
+
Expects: give back expression or gives back expression
|
2007
|
+
|
2008
|
+
Returns:
|
2009
|
+
A ReturnStatement AST node.
|
2010
|
+
"""
|
2011
|
+
assert self._current_token is not None
|
2012
|
+
return_statement = ReturnStatement(token=self._current_token)
|
2013
|
+
|
2014
|
+
# Advance past "give back" or "gives back"
|
2015
|
+
self._advance_tokens()
|
2016
|
+
|
2017
|
+
# Parse the return value expression
|
2018
|
+
return_statement.return_value = self._parse_expression()
|
2019
|
+
|
2020
|
+
# Advance past the last token of the expression
|
2021
|
+
# Expression parsing leaves us at the last token, not after it
|
2022
|
+
self._advance_tokens()
|
2023
|
+
|
2024
|
+
# If the expression failed, don't require a period since we're already in error recovery
|
2025
|
+
if not isinstance(return_statement.return_value, ErrorExpression):
|
2026
|
+
# Require trailing period if not at EOF or if we're in a block
|
2027
|
+
# But if we're already at a period (after error recovery), don't expect another
|
2028
|
+
assert self._peek_token is not None
|
2029
|
+
if self._current_token and self._current_token.type == TokenType.PUNCT_PERIOD:
|
2030
|
+
# Already at period, no need to expect one
|
2031
|
+
pass
|
2032
|
+
elif self._peek_token.type != TokenType.MISC_EOF or self._block_depth > 0:
|
2033
|
+
if error := self._expect_token(TokenType.PUNCT_PERIOD):
|
2034
|
+
assert isinstance(error, ErrorStatement)
|
2035
|
+
return error
|
2036
|
+
|
2037
|
+
return return_statement
|
2038
|
+
|
2039
|
+
def _parse_say_statement(self) -> SayStatement | ErrorStatement:
|
2040
|
+
"""Parse a Say or Tell statement.
|
2041
|
+
|
2042
|
+
Syntax: Say <expression>. or Tell <expression>.
|
2043
|
+
|
2044
|
+
Returns:
|
2045
|
+
A SayStatement AST node.
|
2046
|
+
"""
|
2047
|
+
assert self._current_token is not None
|
2048
|
+
assert self._current_token.type in (TokenType.KW_SAY, TokenType.KW_TELL)
|
2049
|
+
|
2050
|
+
statement_token = self._current_token
|
2051
|
+
|
2052
|
+
# Move past 'Say'
|
2053
|
+
self._advance_tokens()
|
2054
|
+
|
2055
|
+
# Parse the expression to output
|
2056
|
+
expression = self._parse_expression(Precedence.LOWEST)
|
2057
|
+
|
2058
|
+
# Create the Say statement
|
2059
|
+
say_statement = SayStatement(statement_token, expression)
|
2060
|
+
|
2061
|
+
# Expect a period at the end
|
2062
|
+
if self._peek_token and self._peek_token.type == TokenType.PUNCT_PERIOD:
|
2063
|
+
self._advance_tokens()
|
2064
|
+
# But if we're already at a period (after error recovery), don't expect another
|
2065
|
+
elif self._current_token and self._current_token.type != TokenType.PUNCT_PERIOD:
|
2066
|
+
if error := self._expect_token(TokenType.PUNCT_PERIOD):
|
2067
|
+
assert isinstance(error, ErrorStatement)
|
2068
|
+
return error
|
2069
|
+
|
2070
|
+
return say_statement
|
2071
|
+
|
2072
|
+
def _parse_collection_mutation_statement(self) -> Statement:
|
2073
|
+
"""Parse a collection mutation statement.
|
2074
|
+
|
2075
|
+
Handles:
|
2076
|
+
- Add _value_ to `list`.
|
2077
|
+
- Remove _value_ from `list`.
|
2078
|
+
- Set the second item of `list` to _value_.
|
2079
|
+
- Set item _5_ of `list` to _value_.
|
2080
|
+
- Insert _value_ at position _3_ in `list`.
|
2081
|
+
- Empty `list`.
|
2082
|
+
|
2083
|
+
Returns:
|
2084
|
+
A CollectionMutationStatement AST node.
|
2085
|
+
"""
|
2086
|
+
from machine_dialect.ast.statements import CollectionMutationStatement
|
2087
|
+
|
2088
|
+
assert self._current_token is not None
|
2089
|
+
start_token = self._current_token
|
2090
|
+
operation = start_token.literal.lower()
|
2091
|
+
|
2092
|
+
# Move past the operation keyword
|
2093
|
+
self._advance_tokens()
|
2094
|
+
|
2095
|
+
if operation == "add":
|
2096
|
+
# Two syntaxes:
|
2097
|
+
# 1. Add _value_ to `list`. (for arrays)
|
2098
|
+
# 2. Add "key" to `dict` with value _value_. (for named lists)
|
2099
|
+
|
2100
|
+
# Parse the first value/key
|
2101
|
+
first_value = self._parse_expression(Precedence.LOWEST)
|
2102
|
+
self._advance_tokens()
|
2103
|
+
|
2104
|
+
# Skip "to"
|
2105
|
+
if self._current_token and self._current_token.type == TokenType.KW_TO:
|
2106
|
+
self._advance_tokens()
|
2107
|
+
|
2108
|
+
# Parse the collection
|
2109
|
+
collection = self._parse_identifier_or_keyword_as_identifier()
|
2110
|
+
if not collection:
|
2111
|
+
error_stmt = self._report_error_and_recover(
|
2112
|
+
template=EXPECTED_IDENTIFIER_AFTER,
|
2113
|
+
expected_token=TokenType.MISC_IDENT,
|
2114
|
+
what="collection",
|
2115
|
+
after="'Add ... to'",
|
2116
|
+
)
|
2117
|
+
assert isinstance(error_stmt, ErrorStatement)
|
2118
|
+
return error_stmt
|
2119
|
+
self._advance_tokens()
|
2120
|
+
|
2121
|
+
# Check if this is dictionary syntax (with value)
|
2122
|
+
current_token_type = self._current_token.type if self._current_token else None
|
2123
|
+
if current_token_type == TokenType.KW_WITH:
|
2124
|
+
self._advance_tokens()
|
2125
|
+
|
2126
|
+
# Skip "value" if present
|
2127
|
+
if self._current_token and self._current_token.type == TokenType.KW_VALUE:
|
2128
|
+
self._advance_tokens()
|
2129
|
+
|
2130
|
+
# Parse the actual value
|
2131
|
+
dict_value = self._parse_expression(Precedence.LOWEST)
|
2132
|
+
self._advance_tokens()
|
2133
|
+
|
2134
|
+
# Expect period
|
2135
|
+
if self._current_token and self._current_token.type != TokenType.PUNCT_PERIOD:
|
2136
|
+
if error := self._expect_token(TokenType.PUNCT_PERIOD):
|
2137
|
+
assert isinstance(error, ErrorStatement)
|
2138
|
+
return error
|
2139
|
+
|
2140
|
+
return CollectionMutationStatement(
|
2141
|
+
token=start_token,
|
2142
|
+
operation="add",
|
2143
|
+
collection=collection,
|
2144
|
+
value=dict_value,
|
2145
|
+
position=first_value, # The key
|
2146
|
+
position_type="key",
|
2147
|
+
)
|
2148
|
+
else:
|
2149
|
+
# Regular array syntax
|
2150
|
+
# Expect period
|
2151
|
+
if self._current_token and self._current_token.type != TokenType.PUNCT_PERIOD:
|
2152
|
+
if error := self._expect_token(TokenType.PUNCT_PERIOD):
|
2153
|
+
assert isinstance(error, ErrorStatement)
|
2154
|
+
return error
|
2155
|
+
|
2156
|
+
return CollectionMutationStatement(
|
2157
|
+
token=start_token,
|
2158
|
+
operation="add",
|
2159
|
+
collection=collection,
|
2160
|
+
value=first_value,
|
2161
|
+
)
|
2162
|
+
|
2163
|
+
elif operation == "remove":
|
2164
|
+
# Two syntaxes:
|
2165
|
+
# 1. Remove _value_ from `list`. (for arrays - removes by value)
|
2166
|
+
# 2. Remove "key" from `dict`. (for named lists - removes by key)
|
2167
|
+
# Note: The semantic analyzer will determine which one based on collection type
|
2168
|
+
|
2169
|
+
value = self._parse_expression(Precedence.LOWEST)
|
2170
|
+
self._advance_tokens()
|
2171
|
+
|
2172
|
+
# Skip "from"
|
2173
|
+
if self._current_token and self._current_token.type == TokenType.KW_FROM:
|
2174
|
+
self._advance_tokens()
|
2175
|
+
|
2176
|
+
# Parse the collection
|
2177
|
+
collection = self._parse_identifier_or_keyword_as_identifier()
|
2178
|
+
if not collection:
|
2179
|
+
return ErrorStatement(
|
2180
|
+
token=start_token, message="Expected collection identifier after 'Remove ... from'"
|
2181
|
+
)
|
2182
|
+
self._advance_tokens()
|
2183
|
+
|
2184
|
+
# Expect period
|
2185
|
+
if self._current_token and self._current_token.type != TokenType.PUNCT_PERIOD:
|
2186
|
+
if error := self._expect_token(TokenType.PUNCT_PERIOD):
|
2187
|
+
assert isinstance(error, ErrorStatement)
|
2188
|
+
return error
|
2189
|
+
|
2190
|
+
return CollectionMutationStatement(
|
2191
|
+
token=start_token,
|
2192
|
+
operation="remove",
|
2193
|
+
collection=collection,
|
2194
|
+
value=value,
|
2195
|
+
)
|
2196
|
+
|
2197
|
+
elif operation == "insert":
|
2198
|
+
# Insert _value_ at position _3_ in `list`.
|
2199
|
+
value = self._parse_expression(Precedence.LOWEST)
|
2200
|
+
self._advance_tokens()
|
2201
|
+
|
2202
|
+
# Skip "at"
|
2203
|
+
if self._current_token and self._current_token.literal and self._current_token.literal.lower() == "at":
|
2204
|
+
self._advance_tokens()
|
2205
|
+
|
2206
|
+
# Skip "position" if present
|
2207
|
+
if self._current_token and self._current_token.literal.lower() == "position":
|
2208
|
+
self._advance_tokens()
|
2209
|
+
|
2210
|
+
# Parse the position (should be a number)
|
2211
|
+
position = self._parse_expression(Precedence.LOWEST)
|
2212
|
+
self._advance_tokens()
|
2213
|
+
|
2214
|
+
# Skip "in"
|
2215
|
+
if self._current_token and self._current_token.type == TokenType.KW_IN:
|
2216
|
+
self._advance_tokens()
|
2217
|
+
|
2218
|
+
# Parse the collection
|
2219
|
+
collection = self._parse_identifier_or_keyword_as_identifier()
|
2220
|
+
if not collection:
|
2221
|
+
return ErrorStatement(
|
2222
|
+
token=start_token, message="Expected collection identifier after 'Insert ... at position ... in'"
|
2223
|
+
)
|
2224
|
+
self._advance_tokens()
|
2225
|
+
|
2226
|
+
# Expect period
|
2227
|
+
if self._current_token and self._current_token.type != TokenType.PUNCT_PERIOD:
|
2228
|
+
if error := self._expect_token(TokenType.PUNCT_PERIOD):
|
2229
|
+
assert isinstance(error, ErrorStatement)
|
2230
|
+
return error
|
2231
|
+
|
2232
|
+
return CollectionMutationStatement(
|
2233
|
+
token=start_token,
|
2234
|
+
operation="insert",
|
2235
|
+
collection=collection,
|
2236
|
+
value=value,
|
2237
|
+
position=position,
|
2238
|
+
position_type="numeric",
|
2239
|
+
)
|
2240
|
+
|
2241
|
+
elif operation == "clear":
|
2242
|
+
# Clear `collection`.
|
2243
|
+
# Parse the collection identifier
|
2244
|
+
collection = self._parse_identifier_or_keyword_as_identifier()
|
2245
|
+
if not collection:
|
2246
|
+
error_stmt = self._report_error_and_recover(
|
2247
|
+
template=EXPECTED_IDENTIFIER_AFTER,
|
2248
|
+
expected_token=TokenType.MISC_IDENT,
|
2249
|
+
what="collection",
|
2250
|
+
after="'Clear'",
|
2251
|
+
)
|
2252
|
+
assert isinstance(error_stmt, ErrorStatement)
|
2253
|
+
return error_stmt
|
2254
|
+
|
2255
|
+
# Advance past the identifier to check for period
|
2256
|
+
self._advance_tokens()
|
2257
|
+
|
2258
|
+
# Expect period
|
2259
|
+
if self._current_token and self._current_token.type != TokenType.PUNCT_PERIOD:
|
2260
|
+
if error := self._expect_token(TokenType.PUNCT_PERIOD):
|
2261
|
+
assert isinstance(error, ErrorStatement)
|
2262
|
+
return error
|
2263
|
+
|
2264
|
+
return CollectionMutationStatement(
|
2265
|
+
token=start_token,
|
2266
|
+
operation="clear",
|
2267
|
+
collection=collection,
|
2268
|
+
)
|
2269
|
+
|
2270
|
+
elif operation == "update":
|
2271
|
+
# Update "key" in `dict` to _value_.
|
2272
|
+
# Parse the key (should be a string literal)
|
2273
|
+
key = self._parse_expression(Precedence.LOWEST)
|
2274
|
+
self._advance_tokens()
|
2275
|
+
|
2276
|
+
# Skip "in"
|
2277
|
+
if self._current_token and self._current_token.type == TokenType.KW_IN:
|
2278
|
+
self._advance_tokens()
|
2279
|
+
|
2280
|
+
# Parse the collection
|
2281
|
+
collection = self._parse_identifier_or_keyword_as_identifier()
|
2282
|
+
if not collection:
|
2283
|
+
error_stmt = self._report_error_and_recover(
|
2284
|
+
template=EXPECTED_IDENTIFIER_AFTER,
|
2285
|
+
expected_token=TokenType.MISC_IDENT,
|
2286
|
+
what="collection",
|
2287
|
+
after="'Update ... in'",
|
2288
|
+
)
|
2289
|
+
assert isinstance(error_stmt, ErrorStatement)
|
2290
|
+
return error_stmt
|
2291
|
+
self._advance_tokens()
|
2292
|
+
|
2293
|
+
# Skip "to"
|
2294
|
+
if self._current_token and self._current_token.type == TokenType.KW_TO:
|
2295
|
+
self._advance_tokens()
|
2296
|
+
|
2297
|
+
# Parse the value
|
2298
|
+
value = self._parse_expression(Precedence.LOWEST)
|
2299
|
+
self._advance_tokens()
|
2300
|
+
|
2301
|
+
# Expect period
|
2302
|
+
if self._current_token and self._current_token.type != TokenType.PUNCT_PERIOD:
|
2303
|
+
if error := self._expect_token(TokenType.PUNCT_PERIOD):
|
2304
|
+
assert isinstance(error, ErrorStatement)
|
2305
|
+
return error
|
2306
|
+
|
2307
|
+
return CollectionMutationStatement(
|
2308
|
+
token=start_token,
|
2309
|
+
operation="update",
|
2310
|
+
collection=collection,
|
2311
|
+
value=value,
|
2312
|
+
position=key, # Using position field to store the key
|
2313
|
+
position_type="key",
|
2314
|
+
)
|
2315
|
+
|
2316
|
+
# Should not reach here
|
2317
|
+
error_stmt = self._report_error_and_recover(
|
2318
|
+
template=UNHANDLED_OPERATION, what="collection mutation", operation=operation
|
2319
|
+
)
|
2320
|
+
assert isinstance(error_stmt, ErrorStatement)
|
2321
|
+
return error_stmt
|
2322
|
+
|
2323
|
+
def _parse_function_call_expression(self) -> Expression:
|
2324
|
+
"""Parse a function call as an expression (for use with 'using' in Set statements).
|
2325
|
+
|
2326
|
+
Syntax: function_name [with <arguments>] or function_name [where <named arguments>].
|
2327
|
+
|
2328
|
+
Returns:
|
2329
|
+
A CallExpression AST node that will be evaluated as an expression.
|
2330
|
+
"""
|
2331
|
+
assert self._current_token is not None
|
2332
|
+
|
2333
|
+
# Parse the function name (must be an identifier in backticks)
|
2334
|
+
if self._current_token and self._current_token.type == TokenType.MISC_IDENT:
|
2335
|
+
function_name = Identifier(self._current_token, self._current_token.literal)
|
2336
|
+
call_token = self._current_token
|
2337
|
+
self._advance_tokens()
|
2338
|
+
else:
|
2339
|
+
# Error: expected identifier for function name
|
2340
|
+
result = self._report_error_and_recover(
|
2341
|
+
template=EXPECTED_FUNCTION_NAME,
|
2342
|
+
token_type=str(self._current_token.type) if self._current_token else "EOF",
|
2343
|
+
skip_recovery=True,
|
2344
|
+
is_expression=True,
|
2345
|
+
)
|
2346
|
+
assert isinstance(result, ErrorExpression)
|
2347
|
+
return result
|
2348
|
+
|
2349
|
+
# Create the CallExpression
|
2350
|
+
call_expression = CallExpression(token=call_token, function_name=function_name)
|
2351
|
+
|
2352
|
+
# Check for arguments
|
2353
|
+
if self._current_token and self._current_token.type == TokenType.KW_WITH:
|
2354
|
+
# Positional arguments
|
2355
|
+
with_token = self._current_token
|
2356
|
+
self._advance_tokens()
|
2357
|
+
call_expression.arguments = self._parse_positional_arguments(with_token)
|
2358
|
+
elif self._current_token and self._current_token.type == TokenType.KW_WHERE:
|
2359
|
+
# Named arguments
|
2360
|
+
where_token = self._current_token
|
2361
|
+
self._advance_tokens()
|
2362
|
+
call_expression.arguments = self._parse_named_arguments(where_token)
|
2363
|
+
|
2364
|
+
return call_expression
|
2365
|
+
|
2366
|
+
def _parse_call_statement(self) -> CallStatement | ErrorStatement:
|
2367
|
+
"""Parse a Use statement.
|
2368
|
+
|
2369
|
+
Syntax: use <function> [with <arguments>] or use <function> [where <named arguments>].
|
2370
|
+
|
2371
|
+
Returns:
|
2372
|
+
A CallStatement AST node.
|
2373
|
+
"""
|
2374
|
+
assert self._current_token is not None
|
2375
|
+
assert self._current_token.type == TokenType.KW_USE
|
2376
|
+
|
2377
|
+
statement_token = self._current_token
|
2378
|
+
|
2379
|
+
# Move past 'use'
|
2380
|
+
self._advance_tokens()
|
2381
|
+
|
2382
|
+
# Parse the function name (must be an identifier in backticks)
|
2383
|
+
if self._current_token and self._current_token.type == TokenType.MISC_IDENT:
|
2384
|
+
function_name = Identifier(self._current_token, self._current_token.literal)
|
2385
|
+
self._advance_tokens()
|
2386
|
+
else:
|
2387
|
+
# Record error for missing or invalid function name
|
2388
|
+
error_token = self._current_token or Token(TokenType.MISC_EOF, "", 0, 0)
|
2389
|
+
self._report_error_and_recover(
|
2390
|
+
template=EXPECTED_FUNCTION_NAME,
|
2391
|
+
token_type=str(error_token.type),
|
2392
|
+
skip_recovery=True, # Continue parsing to find more errors
|
2393
|
+
)
|
2394
|
+
function_name = None
|
2395
|
+
|
2396
|
+
# Check for 'with' or 'where' keyword for arguments
|
2397
|
+
arguments: Arguments | None = None
|
2398
|
+
if self._current_token and self._current_token.type == TokenType.KW_WITH:
|
2399
|
+
# 'with' is for positional arguments
|
2400
|
+
with_token = self._current_token
|
2401
|
+
self._advance_tokens() # Move past 'with'
|
2402
|
+
|
2403
|
+
# Parse positional arguments
|
2404
|
+
arguments = self._parse_positional_arguments(with_token)
|
2405
|
+
|
2406
|
+
elif self._current_token and self._current_token.type == TokenType.KW_WHERE:
|
2407
|
+
# 'where' is for named arguments
|
2408
|
+
where_token = self._current_token
|
2409
|
+
self._advance_tokens() # Move past 'where'
|
2410
|
+
|
2411
|
+
# Parse named arguments
|
2412
|
+
arguments = self._parse_named_arguments(where_token)
|
2413
|
+
|
2414
|
+
# Create the Call statement
|
2415
|
+
call_statement = CallStatement(statement_token, function_name, arguments)
|
2416
|
+
|
2417
|
+
# Expect a period at the end
|
2418
|
+
if self._peek_token and self._peek_token.type == TokenType.PUNCT_PERIOD:
|
2419
|
+
self._advance_tokens()
|
2420
|
+
# But if we're already at a period (after error recovery), don't expect another
|
2421
|
+
elif self._current_token and self._current_token.type != TokenType.PUNCT_PERIOD:
|
2422
|
+
if error := self._expect_token(TokenType.PUNCT_PERIOD):
|
2423
|
+
assert isinstance(error, ErrorStatement)
|
2424
|
+
return error
|
2425
|
+
|
2426
|
+
return call_statement
|
2427
|
+
|
2428
|
+
def _parse_argument_value(self) -> Expression | None:
|
2429
|
+
"""Parse a single argument value (literal or identifier).
|
2430
|
+
|
2431
|
+
Returns:
|
2432
|
+
The parsed expression or None if invalid.
|
2433
|
+
"""
|
2434
|
+
if not self._current_token:
|
2435
|
+
return None
|
2436
|
+
|
2437
|
+
token = self._current_token
|
2438
|
+
|
2439
|
+
# Parse based on token type
|
2440
|
+
value: Expression | None = None
|
2441
|
+
if token.type == TokenType.MISC_IDENT:
|
2442
|
+
# Identifier
|
2443
|
+
value = Identifier(token, token.literal)
|
2444
|
+
self._advance_tokens()
|
2445
|
+
return value
|
2446
|
+
elif token.type == TokenType.LIT_WHOLE_NUMBER:
|
2447
|
+
# Integer literal
|
2448
|
+
int_value = self._parse_integer_literal()
|
2449
|
+
self._advance_tokens()
|
2450
|
+
return int_value
|
2451
|
+
elif token.type == TokenType.LIT_FLOAT:
|
2452
|
+
# Float literal
|
2453
|
+
float_value = self._parse_float_literal()
|
2454
|
+
self._advance_tokens()
|
2455
|
+
return float_value
|
2456
|
+
elif token.type == TokenType.LIT_TEXT:
|
2457
|
+
# String literal
|
2458
|
+
str_value = self._parse_string_literal()
|
2459
|
+
self._advance_tokens()
|
2460
|
+
return str_value
|
2461
|
+
elif token.type == TokenType.LIT_URL:
|
2462
|
+
# URL literal
|
2463
|
+
url_value = self._parse_url_literal()
|
2464
|
+
self._advance_tokens()
|
2465
|
+
return url_value
|
2466
|
+
elif token.type in (TokenType.LIT_YES, TokenType.LIT_NO):
|
2467
|
+
# Boolean literal
|
2468
|
+
bool_value = self._parse_boolean_literal()
|
2469
|
+
self._advance_tokens()
|
2470
|
+
return bool_value
|
2471
|
+
elif token.type == TokenType.KW_EMPTY:
|
2472
|
+
# Empty literal
|
2473
|
+
empty_value = self._parse_empty_literal()
|
2474
|
+
self._advance_tokens()
|
2475
|
+
return empty_value
|
2476
|
+
else:
|
2477
|
+
# Unknown token type for argument
|
2478
|
+
self._report_error_and_recover(
|
2479
|
+
template=INVALID_ARGUMENT_VALUE,
|
2480
|
+
literal=token.literal,
|
2481
|
+
skip_recovery=True, # We'll handle advancement manually
|
2482
|
+
)
|
2483
|
+
self._advance_tokens() # Skip the invalid token
|
2484
|
+
return None
|
2485
|
+
|
2486
|
+
def _parse_positional_arguments(self, with_token: Token) -> Arguments:
|
2487
|
+
"""Parse positional arguments after 'with' keyword.
|
2488
|
+
|
2489
|
+
Syntax: with _value1_, _value2_
|
2490
|
+
|
2491
|
+
Returns:
|
2492
|
+
An Arguments AST node with positional arguments.
|
2493
|
+
"""
|
2494
|
+
arguments = Arguments(with_token)
|
2495
|
+
|
2496
|
+
while self._current_token and self._current_token.type not in (
|
2497
|
+
TokenType.PUNCT_PERIOD,
|
2498
|
+
TokenType.MISC_EOF,
|
2499
|
+
):
|
2500
|
+
# Parse argument value
|
2501
|
+
value = self._parse_argument_value()
|
2502
|
+
if value:
|
2503
|
+
arguments.positional.append(value)
|
2504
|
+
|
2505
|
+
# Check for comma (optional)
|
2506
|
+
if self._current_token and self._current_token.type == TokenType.PUNCT_COMMA:
|
2507
|
+
self._advance_tokens()
|
2508
|
+
# Check for 'and' (optional)
|
2509
|
+
elif self._current_token and self._current_token.type == TokenType.KW_AND:
|
2510
|
+
self._advance_tokens()
|
2511
|
+
# If no comma or 'and', and we're not at the end, check if another argument follows
|
2512
|
+
elif self._current_token and self._current_token.type not in (
|
2513
|
+
TokenType.PUNCT_PERIOD,
|
2514
|
+
TokenType.MISC_EOF,
|
2515
|
+
):
|
2516
|
+
# Check if this looks like another argument (identifier or literal)
|
2517
|
+
if self._current_token.type in (
|
2518
|
+
TokenType.MISC_IDENT,
|
2519
|
+
TokenType.LIT_WHOLE_NUMBER,
|
2520
|
+
TokenType.LIT_FLOAT,
|
2521
|
+
TokenType.LIT_TEXT,
|
2522
|
+
TokenType.LIT_YES,
|
2523
|
+
TokenType.LIT_NO,
|
2524
|
+
TokenType.KW_EMPTY,
|
2525
|
+
):
|
2526
|
+
# Report error but continue parsing (error recovery)
|
2527
|
+
self._report_error_and_recover(
|
2528
|
+
template=MISSING_COMMA_BETWEEN_ARGS,
|
2529
|
+
skip_recovery=True, # Continue parsing the next argument
|
2530
|
+
)
|
2531
|
+
continue
|
2532
|
+
else:
|
2533
|
+
# Not an argument, stop parsing
|
2534
|
+
break
|
2535
|
+
|
2536
|
+
return arguments
|
2537
|
+
|
2538
|
+
def _parse_named_arguments(self, where_token: Token) -> Arguments:
|
2539
|
+
"""Parse named arguments after 'where' keyword.
|
2540
|
+
|
2541
|
+
Syntax: where `param1` is _value1_, `param2` is _value2_
|
2542
|
+
|
2543
|
+
Returns:
|
2544
|
+
An Arguments AST node with named arguments.
|
2545
|
+
"""
|
2546
|
+
arguments = Arguments(where_token)
|
2547
|
+
|
2548
|
+
while self._current_token and self._current_token.type not in (
|
2549
|
+
TokenType.PUNCT_PERIOD,
|
2550
|
+
TokenType.MISC_EOF,
|
2551
|
+
):
|
2552
|
+
# Parse parameter name (should be an identifier in backticks)
|
2553
|
+
name_expr: Identifier | None = None
|
2554
|
+
if self._current_token and self._current_token.type == TokenType.MISC_IDENT:
|
2555
|
+
name_expr = Identifier(self._current_token, self._current_token.literal)
|
2556
|
+
self._advance_tokens()
|
2557
|
+
else:
|
2558
|
+
# Error: expected identifier
|
2559
|
+
self._report_error_and_recover(
|
2560
|
+
template=EXPECTED_TOKEN,
|
2561
|
+
expected_token=TokenType.MISC_IDENT,
|
2562
|
+
skip_recovery=True,
|
2563
|
+
token="parameter name",
|
2564
|
+
got_token_type=self._current_token.type.name if self._current_token else "EOF",
|
2565
|
+
)
|
2566
|
+
break
|
2567
|
+
|
2568
|
+
# Expect 'is' keyword - mypy doesn't realize _advance_tokens() changes _current_token
|
2569
|
+
assert self._current_token is not None # Help mypy understand
|
2570
|
+
if self._current_token.type == TokenType.KW_IS:
|
2571
|
+
self._advance_tokens()
|
2572
|
+
else:
|
2573
|
+
self._report_error_and_recover(
|
2574
|
+
template=EXPECTED_TOKEN_AFTER,
|
2575
|
+
expected_token=TokenType.KW_IS,
|
2576
|
+
skip_recovery=True,
|
2577
|
+
expected="'is' keyword",
|
2578
|
+
after="parameter name",
|
2579
|
+
)
|
2580
|
+
break
|
2581
|
+
|
2582
|
+
# Parse the value
|
2583
|
+
value = self._parse_argument_value()
|
2584
|
+
|
2585
|
+
# Add to named arguments if both name and value are valid
|
2586
|
+
if name_expr and value:
|
2587
|
+
arguments.named.append((name_expr, value))
|
2588
|
+
|
2589
|
+
# Check for comma (optional)
|
2590
|
+
if self._current_token and self._current_token.type == TokenType.PUNCT_COMMA:
|
2591
|
+
self._advance_tokens()
|
2592
|
+
# Check for 'and' (optional)
|
2593
|
+
elif self._current_token and self._current_token.type == TokenType.KW_AND:
|
2594
|
+
self._advance_tokens()
|
2595
|
+
# If no comma or 'and', and we're not at the end, break
|
2596
|
+
elif self._current_token and self._current_token.type not in (
|
2597
|
+
TokenType.PUNCT_PERIOD,
|
2598
|
+
TokenType.MISC_EOF,
|
2599
|
+
):
|
2600
|
+
break
|
2601
|
+
|
2602
|
+
return arguments
|
2603
|
+
|
2604
|
+
def _parse_if_statement(self) -> IfStatement | ErrorStatement:
|
2605
|
+
"""Parse an if statement with block statements.
|
2606
|
+
|
2607
|
+
Expects: if/when/whenever <condition> [then]: <block> [else/otherwise: <block>]
|
2608
|
+
|
2609
|
+
Returns:
|
2610
|
+
An IfStatement AST node.
|
2611
|
+
"""
|
2612
|
+
assert self._current_token is not None
|
2613
|
+
if_statement = IfStatement(token=self._current_token)
|
2614
|
+
|
2615
|
+
# Advance past 'if', 'when', or 'whenever'
|
2616
|
+
self._advance_tokens()
|
2617
|
+
|
2618
|
+
# Parse the condition expression
|
2619
|
+
if_statement.condition = self._parse_expression(Precedence.LOWEST)
|
2620
|
+
|
2621
|
+
# Check for optional comma before 'then'
|
2622
|
+
if self._peek_token and self._peek_token.type == TokenType.PUNCT_COMMA:
|
2623
|
+
self._advance_tokens() # Skip the comma
|
2624
|
+
|
2625
|
+
# Check for optional 'then' keyword
|
2626
|
+
if self._peek_token and self._peek_token.type == TokenType.KW_THEN:
|
2627
|
+
self._advance_tokens() # Move to 'then'
|
2628
|
+
|
2629
|
+
# Expect colon
|
2630
|
+
if error := self._expect_token(TokenType.PUNCT_COLON, "if condition"):
|
2631
|
+
assert isinstance(error, ErrorStatement)
|
2632
|
+
return error
|
2633
|
+
|
2634
|
+
# Parse the consequence block
|
2635
|
+
# If we're inside a block, nested if statements should have deeper blocks
|
2636
|
+
expected_depth = self._block_depth + 1
|
2637
|
+
if_statement.consequence = self._parse_block_statement(expected_depth)
|
2638
|
+
|
2639
|
+
# Check if the consequence block is empty - this is an error
|
2640
|
+
if not if_statement.consequence or len(if_statement.consequence.statements) == 0:
|
2641
|
+
self._report_error_and_recover(
|
2642
|
+
template=EMPTY_IF_CONSEQUENCE,
|
2643
|
+
skip_recovery=True, # No recovery needed, continue parsing
|
2644
|
+
)
|
2645
|
+
|
2646
|
+
# Check for else/otherwise clause
|
2647
|
+
if self._current_token and self._current_token.type == TokenType.KW_ELSE:
|
2648
|
+
# Check if next token is colon
|
2649
|
+
if self._peek_token and self._peek_token.type == TokenType.PUNCT_COLON:
|
2650
|
+
self._advance_tokens() # Move past else to colon
|
2651
|
+
self._advance_tokens() # Move past colon
|
2652
|
+
else:
|
2653
|
+
# No colon after else, return without alternative
|
2654
|
+
return if_statement
|
2655
|
+
|
2656
|
+
# Parse the alternative block
|
2657
|
+
if_statement.alternative = self._parse_block_statement(expected_depth)
|
2658
|
+
|
2659
|
+
# Check if the alternative block is empty - this is also an error
|
2660
|
+
if not if_statement.alternative or len(if_statement.alternative.statements) == 0:
|
2661
|
+
self._report_error_and_recover(
|
2662
|
+
template=EMPTY_ELSE_BLOCK,
|
2663
|
+
skip_recovery=True, # No recovery needed, continue parsing
|
2664
|
+
)
|
2665
|
+
elif self._block_depth == 0:
|
2666
|
+
# No else clause and we're at top level (not inside a block)
|
2667
|
+
# Check if we're at a '>' token that was part of the block we just parsed
|
2668
|
+
# If so, don't rewind as it would re-parse block content
|
2669
|
+
if (
|
2670
|
+
self._current_token
|
2671
|
+
and self._current_token.type == TokenType.OP_GT
|
2672
|
+
and if_statement.consequence
|
2673
|
+
and if_statement.consequence.depth > 0
|
2674
|
+
):
|
2675
|
+
# We're at a '>' that was part of the block, don't rewind
|
2676
|
+
pass
|
2677
|
+
elif self._current_token and self._current_token.type != TokenType.MISC_EOF:
|
2678
|
+
# With streaming, we can't back up tokens
|
2679
|
+
# The block parsing should have left us in the right position
|
2680
|
+
pass
|
2681
|
+
|
2682
|
+
return if_statement
|
2683
|
+
|
2684
|
+
def _parse_while_statement(self) -> WhileStatement | ErrorStatement:
|
2685
|
+
"""Parse a while loop statement.
|
2686
|
+
|
2687
|
+
Expects: while <condition>: <block>
|
2688
|
+
|
2689
|
+
Returns:
|
2690
|
+
A WhileStatement AST node.
|
2691
|
+
"""
|
2692
|
+
assert self._current_token is not None
|
2693
|
+
while_statement = WhileStatement(token=self._current_token)
|
2694
|
+
|
2695
|
+
# Advance past 'while'
|
2696
|
+
self._advance_tokens()
|
2697
|
+
|
2698
|
+
# Parse the condition expression
|
2699
|
+
while_statement.condition = self._parse_expression(Precedence.LOWEST)
|
2700
|
+
|
2701
|
+
# Expect colon
|
2702
|
+
if error := self._expect_token(TokenType.PUNCT_COLON, "while condition"):
|
2703
|
+
assert isinstance(error, ErrorStatement)
|
2704
|
+
return error
|
2705
|
+
|
2706
|
+
# Parse the body block
|
2707
|
+
expected_depth = self._block_depth + 1
|
2708
|
+
while_statement.body = self._parse_block_statement(expected_depth)
|
2709
|
+
|
2710
|
+
# Check if the body block is empty - this is an error
|
2711
|
+
if not while_statement.body or len(while_statement.body.statements) == 0:
|
2712
|
+
self._report_error_and_recover(
|
2713
|
+
template=EMPTY_WHILE_BODY,
|
2714
|
+
skip_recovery=True, # No recovery needed, continue parsing
|
2715
|
+
)
|
2716
|
+
|
2717
|
+
return while_statement
|
2718
|
+
|
2719
|
+
def _parse_for_each_statement(self) -> ForEachStatement | ErrorStatement:
|
2720
|
+
"""Parse a for-each loop statement.
|
2721
|
+
|
2722
|
+
Expects: for each <item> in <collection>: <block>
|
2723
|
+
|
2724
|
+
Returns:
|
2725
|
+
A ForEachStatement AST node.
|
2726
|
+
"""
|
2727
|
+
assert self._current_token is not None
|
2728
|
+
for_statement = ForEachStatement(token=self._current_token)
|
2729
|
+
|
2730
|
+
# Advance past 'for'
|
2731
|
+
self._advance_tokens()
|
2732
|
+
|
2733
|
+
# Expect 'each'
|
2734
|
+
if self._current_token and self._current_token.type != TokenType.KW_EACH:
|
2735
|
+
if error := self._expect_token(TokenType.KW_EACH, "'for' keyword"):
|
2736
|
+
assert isinstance(error, ErrorStatement)
|
2737
|
+
return error
|
2738
|
+
self._advance_tokens() # Move past 'each'
|
2739
|
+
|
2740
|
+
# Parse the loop variable (item)
|
2741
|
+
for_statement.item = self._parse_identifier_or_keyword_as_identifier()
|
2742
|
+
if not for_statement.item:
|
2743
|
+
error_stmt = self._report_error_and_recover(
|
2744
|
+
template=EXPECTED_IDENTIFIER_AFTER,
|
2745
|
+
context="'each'",
|
2746
|
+
skip_recovery=True,
|
2747
|
+
)
|
2748
|
+
assert isinstance(error_stmt, ErrorStatement)
|
2749
|
+
return error_stmt
|
2750
|
+
self._advance_tokens()
|
2751
|
+
|
2752
|
+
# Expect 'in'
|
2753
|
+
if self._current_token and self._current_token.type != TokenType.KW_IN:
|
2754
|
+
if error := self._expect_token(TokenType.KW_IN, "loop variable"):
|
2755
|
+
assert isinstance(error, ErrorStatement)
|
2756
|
+
return error
|
2757
|
+
self._advance_tokens() # Move past 'in'
|
2758
|
+
|
2759
|
+
# Parse the collection expression
|
2760
|
+
for_statement.collection = self._parse_expression(Precedence.LOWEST)
|
2761
|
+
|
2762
|
+
# Expect colon
|
2763
|
+
if error := self._expect_token(TokenType.PUNCT_COLON, "for-each header"):
|
2764
|
+
assert isinstance(error, ErrorStatement)
|
2765
|
+
return error
|
2766
|
+
|
2767
|
+
# Parse the body block
|
2768
|
+
expected_depth = self._block_depth + 1
|
2769
|
+
for_statement.body = self._parse_block_statement(expected_depth)
|
2770
|
+
|
2771
|
+
# Check if the body block is empty - this is an error
|
2772
|
+
if not for_statement.body or len(for_statement.body.statements) == 0:
|
2773
|
+
self._report_error_and_recover(
|
2774
|
+
template=EMPTY_FOR_EACH_BODY,
|
2775
|
+
skip_recovery=True, # No recovery needed, continue parsing
|
2776
|
+
)
|
2777
|
+
|
2778
|
+
return for_statement
|
2779
|
+
|
2780
|
+
def _parse_action_interaction_or_utility(
|
2781
|
+
self,
|
2782
|
+
) -> ActionStatement | InteractionStatement | UtilityStatement | ErrorStatement:
|
2783
|
+
"""Parse an Action, Interaction, or Utility statement.
|
2784
|
+
|
2785
|
+
Expected format:
|
2786
|
+
### **Action**: `name`
|
2787
|
+
or
|
2788
|
+
### **Interaction**: `name`
|
2789
|
+
or
|
2790
|
+
### **Utility**: `name`
|
2791
|
+
|
2792
|
+
<details>
|
2793
|
+
<summary>Description</summary>
|
2794
|
+
> statements
|
2795
|
+
</details>
|
2796
|
+
|
2797
|
+
Returns:
|
2798
|
+
ActionStatement, InteractionStatement, or UtilityStatement node, or ErrorStatement if parsing fails.
|
2799
|
+
"""
|
2800
|
+
assert self._current_token is not None
|
2801
|
+
assert self._current_token.type == TokenType.PUNCT_HASH_TRIPLE
|
2802
|
+
|
2803
|
+
# Save the ### token for the statement
|
2804
|
+
# hash_token = self._current_token # Currently unused, but may be needed for error reporting
|
2805
|
+
|
2806
|
+
# Move past ###
|
2807
|
+
self._advance_tokens()
|
2808
|
+
|
2809
|
+
# Expect **Action**, **Interaction**, or **Utility** (wrapped keyword)
|
2810
|
+
if not self._current_token or self._current_token.type not in (
|
2811
|
+
TokenType.KW_ACTION,
|
2812
|
+
TokenType.KW_INTERACTION,
|
2813
|
+
TokenType.KW_UTILITY,
|
2814
|
+
):
|
2815
|
+
error_stmt = self._report_error_and_recover(
|
2816
|
+
template=EXPECTED_TOKEN_AFTER, expected="**Action**, **Interaction**, or **Utility**", after="###"
|
2817
|
+
)
|
2818
|
+
assert isinstance(error_stmt, ErrorStatement)
|
2819
|
+
return error_stmt
|
2820
|
+
|
2821
|
+
statement_type = self._current_token.type
|
2822
|
+
keyword_token = self._current_token
|
2823
|
+
|
2824
|
+
# Move past Action/Interaction/Utility keyword
|
2825
|
+
self._advance_tokens()
|
2826
|
+
|
2827
|
+
# Expect colon - should be at current position
|
2828
|
+
if not self._current_token or self._current_token.type != TokenType.PUNCT_COLON:
|
2829
|
+
error_stmt = self._report_error_and_recover(
|
2830
|
+
template=EXPECTED_TOKEN_AFTER,
|
2831
|
+
expected_token=TokenType.PUNCT_COLON,
|
2832
|
+
expected="':'",
|
2833
|
+
after="Action/Interaction/Utility",
|
2834
|
+
)
|
2835
|
+
assert isinstance(error_stmt, ErrorStatement)
|
2836
|
+
return error_stmt
|
2837
|
+
|
2838
|
+
# Move past colon
|
2839
|
+
self._advance_tokens()
|
2840
|
+
|
2841
|
+
# Expect backtick-wrapped name
|
2842
|
+
if not self._current_token or self._current_token.type != TokenType.MISC_IDENT:
|
2843
|
+
return self._report_error_and_recover(
|
2844
|
+
template=EXPECTED_TOKEN, expected_token=TokenType.MISC_IDENT, token="identifier in backticks for name"
|
2845
|
+
)
|
2846
|
+
|
2847
|
+
name = Identifier(self._current_token, self._current_token.literal)
|
2848
|
+
self._advance_tokens()
|
2849
|
+
|
2850
|
+
# Now expect <details> tag - should be at current position
|
2851
|
+
if not self._current_token or self._current_token.type != TokenType.TAG_DETAILS_START:
|
2852
|
+
return self._report_error_and_recover(
|
2853
|
+
template=EXPECTED_TOKEN,
|
2854
|
+
token="<details> tag",
|
2855
|
+
got_token_type=self._current_token.type.name if self._current_token else "EOF",
|
2856
|
+
)
|
2857
|
+
|
2858
|
+
# Move past <details>
|
2859
|
+
self._advance_tokens()
|
2860
|
+
|
2861
|
+
# Check for <summary> tag and extract description
|
2862
|
+
description = ""
|
2863
|
+
if self._current_token and self._current_token.type == TokenType.TAG_SUMMARY_START:
|
2864
|
+
self._advance_tokens()
|
2865
|
+
# The next token should be a comment with the description
|
2866
|
+
if self._current_token and self._current_token.type == TokenType.MISC_COMMENT:
|
2867
|
+
description = self._current_token.literal
|
2868
|
+
self._advance_tokens()
|
2869
|
+
# Expect </summary>
|
2870
|
+
if self._current_token and self._current_token.type == TokenType.TAG_SUMMARY_END:
|
2871
|
+
self._advance_tokens()
|
2872
|
+
|
2873
|
+
# Parse the body (block of statements with > prefix)
|
2874
|
+
body = self._parse_block_statement()
|
2875
|
+
|
2876
|
+
# Expect </details> tag - should be at current position after block parsing
|
2877
|
+
if self._current_token and self._current_token.type == TokenType.TAG_DETAILS_END:
|
2878
|
+
self._advance_tokens()
|
2879
|
+
else:
|
2880
|
+
# If we're not at </details>, something went wrong with block parsing
|
2881
|
+
# Create an error but don't panic recover
|
2882
|
+
if self._current_token:
|
2883
|
+
# Check if this is likely a missing depth transition issue
|
2884
|
+
if self._current_token.type == TokenType.KW_RETURN and self._block_depth > 0:
|
2885
|
+
# This looks like a "Give back" statement after nested blocks
|
2886
|
+
# The user likely forgot to add a transition line
|
2887
|
+
nested_depth = ">" * (self._block_depth + 1) # The depth they were at (e.g., >>)
|
2888
|
+
parent_depth = ">" * self._block_depth # The depth they need to transition to (e.g., >)
|
2889
|
+
self._report_error_and_recover(
|
2890
|
+
template=MISSING_DEPTH_TRANSITION,
|
2891
|
+
nested_depth=nested_depth,
|
2892
|
+
parent_depth=parent_depth,
|
2893
|
+
token_type=self._current_token.type.name,
|
2894
|
+
skip_recovery=True, # Continue parsing
|
2895
|
+
)
|
2896
|
+
else:
|
2897
|
+
self._report_error_and_recover(
|
2898
|
+
template=EXPECTED_DETAILS_CLOSE,
|
2899
|
+
token_type=self._current_token.type.name,
|
2900
|
+
skip_recovery=True, # Continue parsing
|
2901
|
+
)
|
2902
|
+
|
2903
|
+
# Check for parameter sections (#### Inputs: and #### Outputs:)
|
2904
|
+
inputs: list[Parameter] = []
|
2905
|
+
outputs: list[Parameter] = []
|
2906
|
+
|
2907
|
+
# Check if we have #### for parameter sections
|
2908
|
+
if self._current_token and self._current_token.type == TokenType.PUNCT_HASH_QUAD:
|
2909
|
+
# Parse parameter sections
|
2910
|
+
inputs, outputs = self._parse_parameter_sections()
|
2911
|
+
|
2912
|
+
# Create and return the appropriate statement
|
2913
|
+
if statement_type == TokenType.KW_ACTION:
|
2914
|
+
return ActionStatement(
|
2915
|
+
keyword_token, name, inputs=inputs, outputs=outputs, body=body, description=description
|
2916
|
+
)
|
2917
|
+
elif statement_type == TokenType.KW_INTERACTION:
|
2918
|
+
return InteractionStatement(
|
2919
|
+
keyword_token, name, inputs=inputs, outputs=outputs, body=body, description=description
|
2920
|
+
)
|
2921
|
+
elif statement_type == TokenType.KW_UTILITY:
|
2922
|
+
return UtilityStatement(
|
2923
|
+
keyword_token, name, inputs=inputs, outputs=outputs, body=body, description=description
|
2924
|
+
)
|
2925
|
+
else:
|
2926
|
+
# This should never happen since we check for valid types above
|
2927
|
+
return self._report_error_and_recover(template=UNEXPECTED_STATEMENT, type=statement_type)
|
2928
|
+
|
2929
|
+
def _parse_block_statement(self, expected_depth: int = 1) -> BlockStatement:
|
2930
|
+
"""Parse a block of statements marked by '>' symbols.
|
2931
|
+
|
2932
|
+
A block contains statements that start with one or more '>' symbols.
|
2933
|
+
The number of '>' symbols determines the depth of the block.
|
2934
|
+
The block ends when we encounter a statement with fewer '>' symbols
|
2935
|
+
or a statement without '>' symbols.
|
2936
|
+
|
2937
|
+
Args:
|
2938
|
+
expected_depth: The expected depth for this block (number of '>' symbols).
|
2939
|
+
Defaults to 1 for top-level blocks.
|
2940
|
+
|
2941
|
+
Returns:
|
2942
|
+
A BlockStatement AST node.
|
2943
|
+
"""
|
2944
|
+
assert self._current_token is not None
|
2945
|
+
block_token = self._current_token
|
2946
|
+
block = BlockStatement(token=block_token, depth=expected_depth)
|
2947
|
+
|
2948
|
+
# Track that we're entering a block
|
2949
|
+
self._block_depth += 1
|
2950
|
+
|
2951
|
+
# Tell the token buffer we're in a block
|
2952
|
+
if self._token_buffer:
|
2953
|
+
self._token_buffer.set_block_context(True)
|
2954
|
+
|
2955
|
+
# If we're at a colon, it's the start of a block - advance past it
|
2956
|
+
if self._current_token.type == TokenType.PUNCT_COLON:
|
2957
|
+
self._advance_tokens()
|
2958
|
+
|
2959
|
+
# Parse statements in the block
|
2960
|
+
while self._current_token and self._current_token.type != TokenType.MISC_EOF:
|
2961
|
+
# Note: With streaming tokens, we can't save/restore positions
|
2962
|
+
|
2963
|
+
# Count the depth at the start of the current line
|
2964
|
+
current_depth = 0
|
2965
|
+
original_line = self._current_token.line if self._current_token else 0
|
2966
|
+
|
2967
|
+
# Check if we're at '>' tokens
|
2968
|
+
if self._current_token.type == TokenType.OP_GT:
|
2969
|
+
# Count '>' tokens only on the current line
|
2970
|
+
current_line = self._current_token.line
|
2971
|
+
while (
|
2972
|
+
self._current_token
|
2973
|
+
and self._current_token.type == TokenType.OP_GT
|
2974
|
+
and self._current_token.line == current_line
|
2975
|
+
):
|
2976
|
+
current_depth += 1
|
2977
|
+
self._advance_tokens()
|
2978
|
+
|
2979
|
+
# Check depth against expected depth
|
2980
|
+
if current_depth == 0:
|
2981
|
+
# No '>' means we've exited the block
|
2982
|
+
break
|
2983
|
+
elif current_depth < expected_depth:
|
2984
|
+
# We've exited the block due to lower depth
|
2985
|
+
|
2986
|
+
# TODO: Fix bug where statements after nested if blocks (e.g., after `> >`)
|
2987
|
+
# are not properly parsed as part of the parent block. Currently requires
|
2988
|
+
# an empty `>` line after the nested block to continue parsing correctly.
|
2989
|
+
# Example issue:
|
2990
|
+
# > If condition then:
|
2991
|
+
# > > Give back value.
|
2992
|
+
# > Set `var` to _1_. # This line may not be parsed correctly
|
2993
|
+
# Example working code:
|
2994
|
+
# > If condition then:
|
2995
|
+
# > > Give back value.
|
2996
|
+
# >
|
2997
|
+
# > Set `var` to _1_. # This line may now be parsed correctly
|
2998
|
+
# We expect both example codes to be working
|
2999
|
+
|
3000
|
+
# We've already consumed the '>' tokens while counting depth
|
3001
|
+
# The parent block needs to handle this line's content
|
3002
|
+
# But first check if this is an empty line (only '>')
|
3003
|
+
if self._current_token and self._current_token.line != original_line:
|
3004
|
+
# Empty line - we consumed all tokens on the line
|
3005
|
+
# Just break and let parent continue from next line
|
3006
|
+
break
|
3007
|
+
else:
|
3008
|
+
# Not empty - there's content after the '>'
|
3009
|
+
# With streaming, we can't back up - the tokens are already consumed
|
3010
|
+
# This means nested blocks need special handling
|
3011
|
+
break
|
3012
|
+
elif current_depth > expected_depth:
|
3013
|
+
# Nested block or error - for now treat as error
|
3014
|
+
self._report_error_and_recover(
|
3015
|
+
template=UNEXPECTED_BLOCK_DEPTH,
|
3016
|
+
expected=str(expected_depth),
|
3017
|
+
actual=str(current_depth),
|
3018
|
+
skip_recovery=True, # We'll handle recovery manually
|
3019
|
+
)
|
3020
|
+
# Skip to next line
|
3021
|
+
while self._current_token and self._current_token.type not in (
|
3022
|
+
TokenType.PUNCT_PERIOD,
|
3023
|
+
TokenType.MISC_EOF,
|
3024
|
+
TokenType.OP_GT,
|
3025
|
+
):
|
3026
|
+
self._advance_tokens()
|
3027
|
+
continue
|
3028
|
+
|
3029
|
+
# After depth check, check if this was an empty line (just '>' with no content)
|
3030
|
+
# Empty line is when we counted '>' but are no longer on the same line
|
3031
|
+
if current_depth > 0 and self._current_token and self._current_token.line != original_line:
|
3032
|
+
# The line only had '>' markers, skip to next line
|
3033
|
+
continue
|
3034
|
+
|
3035
|
+
# Check for tokens that would indicate we've left the block
|
3036
|
+
if self._current_token and self._current_token.type in (
|
3037
|
+
TokenType.MISC_EOF,
|
3038
|
+
TokenType.KW_ELSE, # 'else' would be outside the block
|
3039
|
+
):
|
3040
|
+
break # We've exited the block
|
3041
|
+
|
3042
|
+
# Parse the statement
|
3043
|
+
statement = self._parse_statement()
|
3044
|
+
block.statements.append(statement)
|
3045
|
+
|
3046
|
+
# Skip the period if present
|
3047
|
+
if self._current_token and self._current_token.type == TokenType.PUNCT_PERIOD:
|
3048
|
+
self._advance_tokens()
|
3049
|
+
|
3050
|
+
# Track that we're exiting a block
|
3051
|
+
self._block_depth -= 1
|
3052
|
+
|
3053
|
+
# Tell the token buffer we're no longer in a block
|
3054
|
+
if self._token_buffer:
|
3055
|
+
self._token_buffer.set_block_context(self._block_depth > 0)
|
3056
|
+
|
3057
|
+
return block
|
3058
|
+
|
3059
|
+
def _parse_statement(self) -> Statement:
|
3060
|
+
"""Parse a single statement.
|
3061
|
+
|
3062
|
+
Determines the statement type based on the current token and
|
3063
|
+
delegates to the appropriate parsing method.
|
3064
|
+
|
3065
|
+
Returns:
|
3066
|
+
A Statement AST node (may be an ErrorStatement if parsing fails).
|
3067
|
+
"""
|
3068
|
+
assert self._current_token is not None
|
3069
|
+
|
3070
|
+
stmt_funcs = self._register_statement_functions()
|
3071
|
+
if self._current_token.type in stmt_funcs:
|
3072
|
+
return stmt_funcs[self._current_token.type]()
|
3073
|
+
else:
|
3074
|
+
return self._parse_expression_statement()
|
3075
|
+
|
3076
|
+
def _register_infix_funcs(self) -> InfixParseFuncs:
|
3077
|
+
"""Register infix parsing functions for each token type.
|
3078
|
+
|
3079
|
+
Infix parsing functions handle expressions where an operator appears
|
3080
|
+
between operands (e.g., "1 + 2", "a * b"). Each function takes the
|
3081
|
+
left-hand expression as an argument and returns the complete expression.
|
3082
|
+
|
3083
|
+
The parser uses these functions when it encounters a token in the middle
|
3084
|
+
of an expression. For example, when parsing "1 + 2", after parsing "1",
|
3085
|
+
the parser sees "+" and calls the registered infix function for PLUS,
|
3086
|
+
passing "1" as the left operand.
|
3087
|
+
|
3088
|
+
Returns:
|
3089
|
+
Dictionary mapping TokenType to InfixParseFunc callbacks.
|
3090
|
+
Each callback signature: (left: Expression) -> Optional[Expression]
|
3091
|
+
|
3092
|
+
Example:
|
3093
|
+
When implemented, might look like:
|
3094
|
+
return {
|
3095
|
+
TokenType.OP_PLUS: self._parse_infix_expression,
|
3096
|
+
TokenType.OP_MINUS: self._parse_infix_expression,
|
3097
|
+
TokenType.OP_MULTIPLY: self._parse_infix_expression,
|
3098
|
+
TokenType.DELIM_LPAREN: self._parse_call_expression,
|
3099
|
+
}
|
3100
|
+
"""
|
3101
|
+
return {
|
3102
|
+
# Arithmetic operators
|
3103
|
+
TokenType.OP_PLUS: self._parse_infix_expression,
|
3104
|
+
TokenType.OP_MINUS: self._parse_infix_expression,
|
3105
|
+
TokenType.OP_STAR: self._parse_infix_expression,
|
3106
|
+
TokenType.OP_DIVISION: self._parse_infix_expression,
|
3107
|
+
TokenType.OP_CARET: self._parse_infix_expression,
|
3108
|
+
# Comparison operators
|
3109
|
+
TokenType.OP_EQ: self._parse_infix_expression,
|
3110
|
+
TokenType.OP_NOT_EQ: self._parse_infix_expression,
|
3111
|
+
TokenType.OP_STRICT_EQ: self._parse_infix_expression,
|
3112
|
+
TokenType.OP_STRICT_NOT_EQ: self._parse_infix_expression,
|
3113
|
+
TokenType.OP_LT: self._parse_infix_expression,
|
3114
|
+
TokenType.OP_GT: self._parse_infix_expression,
|
3115
|
+
TokenType.OP_LTE: self._parse_infix_expression,
|
3116
|
+
TokenType.OP_GTE: self._parse_infix_expression,
|
3117
|
+
# Logical operators
|
3118
|
+
TokenType.KW_AND: self._parse_infix_expression,
|
3119
|
+
TokenType.KW_OR: self._parse_infix_expression,
|
3120
|
+
# Conditional/ternary expressions
|
3121
|
+
TokenType.KW_IF: self._parse_conditional_expression,
|
3122
|
+
# Dictionary extraction operators
|
3123
|
+
TokenType.OP_THE_NAMES_OF: self._parse_dict_extraction_infix,
|
3124
|
+
TokenType.OP_THE_CONTENTS_OF: self._parse_dict_extraction_infix,
|
3125
|
+
}
|
3126
|
+
|
3127
|
+
def _register_prefix_funcs(self) -> PrefixParseFuncs:
|
3128
|
+
"""Register prefix parsing functions for each token type.
|
3129
|
+
|
3130
|
+
Prefix parsing functions handle expressions that start with a specific
|
3131
|
+
token type. This includes literals (numbers, strings), identifiers,
|
3132
|
+
prefix operators (e.g., "-5", "not true"), and grouped expressions
|
3133
|
+
(parentheses).
|
3134
|
+
|
3135
|
+
The parser calls these functions when it encounters a token at the
|
3136
|
+
beginning of an expression. For example, when parsing "-5", the parser
|
3137
|
+
sees "-" and calls the registered prefix function for MINUS.
|
3138
|
+
|
3139
|
+
Returns:
|
3140
|
+
Dictionary mapping TokenType to PrefixParseFunc callbacks.
|
3141
|
+
Each callback signature: () -> Optional[Expression]
|
3142
|
+
|
3143
|
+
Example:
|
3144
|
+
When implemented, might look like:
|
3145
|
+
return {
|
3146
|
+
TokenType.LIT_IDENTIFIER: self._parse_identifier,
|
3147
|
+
TokenType.LIT_NUMBER: self._parse_number_literal,
|
3148
|
+
TokenType.LIT_TEXT: self._parse_string_literal,
|
3149
|
+
TokenType.OP_MINUS: self._parse_prefix_expression,
|
3150
|
+
TokenType.KW_NOT: self._parse_prefix_expression,
|
3151
|
+
TokenType.DELIM_LPAREN: self._parse_grouped_expression,
|
3152
|
+
}
|
3153
|
+
"""
|
3154
|
+
return {
|
3155
|
+
TokenType.MISC_IDENT: self._parse_identifier,
|
3156
|
+
TokenType.LIT_WHOLE_NUMBER: self._parse_integer_literal,
|
3157
|
+
TokenType.LIT_FLOAT: self._parse_float_literal,
|
3158
|
+
TokenType.LIT_TEXT: self._parse_string_literal,
|
3159
|
+
TokenType.LIT_URL: self._parse_url_literal,
|
3160
|
+
TokenType.LIT_YES: self._parse_boolean_literal,
|
3161
|
+
TokenType.LIT_NO: self._parse_boolean_literal,
|
3162
|
+
TokenType.KW_EMPTY: self._parse_empty_literal,
|
3163
|
+
TokenType.OP_MINUS: self._parse_prefix_expression,
|
3164
|
+
TokenType.KW_NEGATION: self._parse_prefix_expression,
|
3165
|
+
TokenType.DELIM_LPAREN: self._parse_grouped_expression,
|
3166
|
+
# List access patterns
|
3167
|
+
TokenType.KW_FIRST: self._parse_ordinal_list_access,
|
3168
|
+
TokenType.KW_SECOND: self._parse_ordinal_list_access,
|
3169
|
+
TokenType.KW_THIRD: self._parse_ordinal_list_access,
|
3170
|
+
TokenType.KW_LAST: self._parse_ordinal_list_access,
|
3171
|
+
TokenType.KW_ITEM: self._parse_numeric_list_access,
|
3172
|
+
# Handle 'the' stopword for list access
|
3173
|
+
TokenType.MISC_STOPWORD: self._parse_stopword_expression,
|
3174
|
+
# Handle possessive syntax
|
3175
|
+
TokenType.PUNCT_APOSTROPHE_S: self._parse_possessive_access,
|
3176
|
+
# Dictionary extraction operators can also be prefix
|
3177
|
+
TokenType.OP_THE_NAMES_OF: self._parse_dict_extraction_prefix,
|
3178
|
+
TokenType.OP_THE_CONTENTS_OF: self._parse_dict_extraction_prefix,
|
3179
|
+
}
|
3180
|
+
|
3181
|
+
@staticmethod
|
3182
|
+
def _register_postfix_funcs() -> PostfixParseFuncs:
|
3183
|
+
"""Register postfix parsing functions for each token type.
|
3184
|
+
|
3185
|
+
Postfix parsing functions handle expressions where an operator appears
|
3186
|
+
after the operand (e.g., "i++", "factorial!", array indexing "arr[0]").
|
3187
|
+
Each function takes the left-hand expression as an argument and returns
|
3188
|
+
the complete expression.
|
3189
|
+
|
3190
|
+
The parser uses these functions when it encounters a token after a
|
3191
|
+
complete expression. For example, when parsing "i++", after parsing "i",
|
3192
|
+
the parser sees "++" and calls the registered postfix function for
|
3193
|
+
INCREMENT, passing "i" as the operand.
|
3194
|
+
|
3195
|
+
Returns:
|
3196
|
+
Dictionary mapping TokenType to PostfixParseFunc callbacks.
|
3197
|
+
Each callback signature: (left: Expression) -> Optional[Expression]
|
3198
|
+
|
3199
|
+
Example:
|
3200
|
+
When implemented, might look like:
|
3201
|
+
return {
|
3202
|
+
TokenType.OP_INCREMENT: self._parse_postfix_expression,
|
3203
|
+
TokenType.OP_DECREMENT: self._parse_postfix_expression,
|
3204
|
+
TokenType.OP_FACTORIAL: self._parse_postfix_expression,
|
3205
|
+
TokenType.DELIM_LBRACKET: self._parse_index_expression,
|
3206
|
+
TokenType.PUNCT_QUESTION: self._parse_ternary_expression,
|
3207
|
+
}
|
3208
|
+
"""
|
3209
|
+
return {}
|
3210
|
+
|
3211
|
+
def _register_statement_functions(self) -> dict[TokenType, Callable[[], Statement]]:
|
3212
|
+
"""Register statement parsing functions for each token type."""
|
3213
|
+
return {
|
3214
|
+
TokenType.KW_DEFINE: self._parse_define_statement,
|
3215
|
+
TokenType.KW_SET: self._parse_set_statement,
|
3216
|
+
TokenType.KW_RETURN: self._parse_return_statement,
|
3217
|
+
TokenType.KW_IF: self._parse_if_statement,
|
3218
|
+
TokenType.KW_WHILE: self._parse_while_statement,
|
3219
|
+
TokenType.KW_FOR: self._parse_for_each_statement,
|
3220
|
+
TokenType.KW_SAY: self._parse_say_statement,
|
3221
|
+
TokenType.KW_TELL: self._parse_say_statement, # Tell is an alias for Say
|
3222
|
+
TokenType.KW_USE: self._parse_call_statement,
|
3223
|
+
TokenType.PUNCT_HASH_TRIPLE: self._parse_action_interaction_or_utility,
|
3224
|
+
TokenType.KW_ADD: self._parse_collection_mutation_statement,
|
3225
|
+
TokenType.KW_REMOVE: self._parse_collection_mutation_statement,
|
3226
|
+
TokenType.KW_INSERT: self._parse_collection_mutation_statement,
|
3227
|
+
TokenType.KW_CLEAR: self._parse_collection_mutation_statement,
|
3228
|
+
TokenType.KW_UPDATE: self._parse_collection_mutation_statement,
|
3229
|
+
# Note: KW_EMPTY is not registered here because "empty" as a literal is more common
|
3230
|
+
# than "Empty `collection`" as a statement. Standalone "empty" will be parsed as an expression.
|
3231
|
+
}
|
3232
|
+
|
3233
|
+
def _parse_parameter_sections(self) -> tuple[list[Parameter], list[Output]]:
|
3234
|
+
"""Parse parameter sections (#### Inputs: and #### Outputs:).
|
3235
|
+
|
3236
|
+
Returns:
|
3237
|
+
A tuple of (inputs, outputs) - inputs are Parameters, outputs are Outputs.
|
3238
|
+
"""
|
3239
|
+
inputs: list[Parameter] = []
|
3240
|
+
outputs: list[Output] = []
|
3241
|
+
|
3242
|
+
while self._current_token and self._current_token.type == TokenType.PUNCT_HASH_QUAD:
|
3243
|
+
# Move past ####
|
3244
|
+
self._advance_tokens()
|
3245
|
+
|
3246
|
+
# Check if it's Inputs or Outputs
|
3247
|
+
current = self._current_token
|
3248
|
+
if current:
|
3249
|
+
if current.type == TokenType.KW_INPUTS:
|
3250
|
+
# Move past "Inputs"
|
3251
|
+
self._advance_tokens()
|
3252
|
+
|
3253
|
+
# Expect colon
|
3254
|
+
colon_token = self._current_token
|
3255
|
+
if colon_token and colon_token.type == TokenType.PUNCT_COLON:
|
3256
|
+
self._advance_tokens()
|
3257
|
+
|
3258
|
+
# Parse input parameters (lines starting with -)
|
3259
|
+
inputs = self._parse_parameter_list()
|
3260
|
+
|
3261
|
+
elif current.type == TokenType.KW_OUTPUTS:
|
3262
|
+
# Move past "Outputs"
|
3263
|
+
self._advance_tokens()
|
3264
|
+
|
3265
|
+
# Expect colon
|
3266
|
+
colon_token2 = self._current_token
|
3267
|
+
if colon_token2 and colon_token2.type == TokenType.PUNCT_COLON:
|
3268
|
+
self._advance_tokens()
|
3269
|
+
|
3270
|
+
# Parse output list (different format from inputs)
|
3271
|
+
outputs = self._parse_output_list()
|
3272
|
+
|
3273
|
+
else:
|
3274
|
+
# Not a parameter section, break
|
3275
|
+
break
|
3276
|
+
|
3277
|
+
return inputs, outputs
|
3278
|
+
|
3279
|
+
def _parse_parameter_list(self) -> list[Parameter]:
|
3280
|
+
"""Parse a list of parameters (lines starting with -).
|
3281
|
+
|
3282
|
+
Expected format:
|
3283
|
+
- `name` **as** Type (required)
|
3284
|
+
- `name` **as** Type (optional, default: value)
|
3285
|
+
|
3286
|
+
Returns:
|
3287
|
+
List of Parameter objects.
|
3288
|
+
"""
|
3289
|
+
parameters: list[Parameter] = []
|
3290
|
+
|
3291
|
+
while self._current_token and self._current_token.type == TokenType.OP_MINUS:
|
3292
|
+
# Move past -
|
3293
|
+
self._advance_tokens()
|
3294
|
+
|
3295
|
+
# Parse single parameter
|
3296
|
+
param = self._parse_parameter()
|
3297
|
+
if param:
|
3298
|
+
parameters.append(param)
|
3299
|
+
|
3300
|
+
return parameters
|
3301
|
+
|
3302
|
+
def _parse_parameter(self) -> Parameter | None:
|
3303
|
+
"""Parse a single parameter.
|
3304
|
+
|
3305
|
+
Expected format:
|
3306
|
+
`name` **as** Type (required)
|
3307
|
+
`name` **as** Type (optional, default: value)
|
3308
|
+
|
3309
|
+
Returns:
|
3310
|
+
A Parameter object or None if parsing fails.
|
3311
|
+
"""
|
3312
|
+
if not self._current_token:
|
3313
|
+
return None
|
3314
|
+
|
3315
|
+
# Save starting token for error reporting
|
3316
|
+
start_token = self._current_token
|
3317
|
+
|
3318
|
+
# Expect identifier in backticks
|
3319
|
+
if self._current_token.type != TokenType.MISC_IDENT:
|
3320
|
+
return None
|
3321
|
+
|
3322
|
+
name = Identifier(self._current_token, self._current_token.literal)
|
3323
|
+
self._advance_tokens()
|
3324
|
+
|
3325
|
+
# Expect "as" keyword
|
3326
|
+
current = self._current_token
|
3327
|
+
if not current or current.type != TokenType.KW_AS:
|
3328
|
+
return None
|
3329
|
+
self._advance_tokens()
|
3330
|
+
|
3331
|
+
# Parse type name (could be multi-word like "Whole Number")
|
3332
|
+
type_name = self._parse_type_name()
|
3333
|
+
if not type_name:
|
3334
|
+
return None
|
3335
|
+
|
3336
|
+
# Default values
|
3337
|
+
is_required = True
|
3338
|
+
default_value: Expression | None = None
|
3339
|
+
|
3340
|
+
# Check for (required) or (optional, default: value) or (default: value)
|
3341
|
+
# TODO: In the future, remove support for explicit required/optional keywords
|
3342
|
+
# and make it fully implicit based on presence of default value
|
3343
|
+
paren_token = self._current_token
|
3344
|
+
if paren_token and paren_token.type == TokenType.DELIM_LPAREN:
|
3345
|
+
self._advance_tokens()
|
3346
|
+
|
3347
|
+
status_token = self._current_token
|
3348
|
+
if status_token:
|
3349
|
+
if status_token.type == TokenType.KW_OPTIONAL:
|
3350
|
+
is_required = False
|
3351
|
+
self._advance_tokens()
|
3352
|
+
|
3353
|
+
# Check for default value
|
3354
|
+
comma_token = self._current_token
|
3355
|
+
if comma_token and comma_token.type == TokenType.PUNCT_COMMA:
|
3356
|
+
self._advance_tokens()
|
3357
|
+
|
3358
|
+
# Expect "default"
|
3359
|
+
default_token = self._current_token
|
3360
|
+
if default_token and default_token.type == TokenType.KW_DEFAULT:
|
3361
|
+
self._advance_tokens()
|
3362
|
+
|
3363
|
+
# Expect colon
|
3364
|
+
colon_check = self._current_token
|
3365
|
+
if colon_check and colon_check.type == TokenType.PUNCT_COLON:
|
3366
|
+
self._advance_tokens()
|
3367
|
+
|
3368
|
+
# Parse the default value expression
|
3369
|
+
default_value = self._parse_expression(Precedence.LOWEST)
|
3370
|
+
|
3371
|
+
# After parsing expression, advance past it
|
3372
|
+
self._advance_tokens()
|
3373
|
+
|
3374
|
+
elif status_token.type == TokenType.KW_DEFAULT:
|
3375
|
+
# Handle (default: value) without explicit optional/required
|
3376
|
+
# Infer that presence of default means optional
|
3377
|
+
is_required = False
|
3378
|
+
self._advance_tokens()
|
3379
|
+
|
3380
|
+
# Expect colon
|
3381
|
+
colon_check = self._current_token
|
3382
|
+
if colon_check and colon_check.type == TokenType.PUNCT_COLON:
|
3383
|
+
self._advance_tokens()
|
3384
|
+
|
3385
|
+
# Parse the default value expression
|
3386
|
+
default_value = self._parse_expression(Precedence.LOWEST)
|
3387
|
+
|
3388
|
+
# After parsing expression, advance past it
|
3389
|
+
self._advance_tokens()
|
3390
|
+
|
3391
|
+
elif status_token.type == TokenType.KW_REQUIRED:
|
3392
|
+
is_required = True
|
3393
|
+
self._advance_tokens()
|
3394
|
+
else:
|
3395
|
+
# Unknown token, keep as required
|
3396
|
+
is_required = True
|
3397
|
+
self._advance_tokens()
|
3398
|
+
|
3399
|
+
# Expect closing paren
|
3400
|
+
rparen_token = self._current_token
|
3401
|
+
if rparen_token and rparen_token.type == TokenType.DELIM_RPAREN:
|
3402
|
+
self._advance_tokens()
|
3403
|
+
|
3404
|
+
return Parameter(
|
3405
|
+
token=start_token,
|
3406
|
+
name=name,
|
3407
|
+
type_name=type_name,
|
3408
|
+
is_required=is_required,
|
3409
|
+
default_value=default_value,
|
3410
|
+
)
|
3411
|
+
|
3412
|
+
def _parse_output_list(self) -> list[Output]:
|
3413
|
+
"""Parse a list of outputs (lines starting with -).
|
3414
|
+
|
3415
|
+
Expected format for outputs:
|
3416
|
+
- `name` **as** Type
|
3417
|
+
- `name` **as** Type (default: value)
|
3418
|
+
|
3419
|
+
Returns:
|
3420
|
+
List of Output objects.
|
3421
|
+
"""
|
3422
|
+
outputs: list[Output] = []
|
3423
|
+
|
3424
|
+
while self._current_token and self._current_token.type == TokenType.OP_MINUS:
|
3425
|
+
# Move past -
|
3426
|
+
self._advance_tokens()
|
3427
|
+
|
3428
|
+
# Parse single output
|
3429
|
+
output = self._parse_output()
|
3430
|
+
if output:
|
3431
|
+
outputs.append(output)
|
3432
|
+
|
3433
|
+
return outputs
|
3434
|
+
|
3435
|
+
def _parse_output(self) -> Output | None:
|
3436
|
+
"""Parse a single output.
|
3437
|
+
|
3438
|
+
Expected formats:
|
3439
|
+
- Returns Type (simple format)
|
3440
|
+
- `name` **as** Type
|
3441
|
+
- `name` **as** Type (default: value)
|
3442
|
+
|
3443
|
+
Note: Outputs don't have required/optional, only optional defaults.
|
3444
|
+
|
3445
|
+
Returns:
|
3446
|
+
An Output object or None if parsing fails.
|
3447
|
+
"""
|
3448
|
+
if not self._current_token:
|
3449
|
+
return None
|
3450
|
+
|
3451
|
+
# Save starting token for error reporting
|
3452
|
+
start_token = self._current_token
|
3453
|
+
|
3454
|
+
# Check for simple "Returns Type" format
|
3455
|
+
if self._current_token.type == TokenType.MISC_IDENT and self._current_token.literal.lower() == "returns":
|
3456
|
+
self._advance_tokens()
|
3457
|
+
|
3458
|
+
# Parse type name
|
3459
|
+
type_name = self._parse_type_name()
|
3460
|
+
if type_name:
|
3461
|
+
# Create a simple output with no specific name
|
3462
|
+
return Output(
|
3463
|
+
token=start_token,
|
3464
|
+
name=Identifier(start_token, "return_value"), # Default name
|
3465
|
+
type_name=type_name,
|
3466
|
+
default_value=EmptyLiteral(start_token),
|
3467
|
+
)
|
3468
|
+
else:
|
3469
|
+
# Failed to parse type, restore position
|
3470
|
+
return None
|
3471
|
+
|
3472
|
+
# Otherwise expect identifier in backticks for named output
|
3473
|
+
if self._current_token.type != TokenType.MISC_IDENT:
|
3474
|
+
return None
|
3475
|
+
|
3476
|
+
name = Identifier(self._current_token, self._current_token.literal)
|
3477
|
+
self._advance_tokens()
|
3478
|
+
|
3479
|
+
# Expect "as" keyword
|
3480
|
+
current = self._current_token
|
3481
|
+
if not current or current.type != TokenType.KW_AS:
|
3482
|
+
return None
|
3483
|
+
self._advance_tokens()
|
3484
|
+
|
3485
|
+
# Parse type name (could be multi-word like "Whole Number")
|
3486
|
+
type_name = self._parse_type_name()
|
3487
|
+
if not type_name:
|
3488
|
+
return None
|
3489
|
+
|
3490
|
+
# Default value (optional)
|
3491
|
+
default_value: Expression | None = None
|
3492
|
+
|
3493
|
+
# Check for (default: value)
|
3494
|
+
paren_token = self._current_token
|
3495
|
+
if paren_token and paren_token.type == TokenType.DELIM_LPAREN:
|
3496
|
+
self._advance_tokens()
|
3497
|
+
|
3498
|
+
# Expect "default"
|
3499
|
+
default_token = self._current_token
|
3500
|
+
if default_token and default_token.type == TokenType.KW_DEFAULT:
|
3501
|
+
self._advance_tokens()
|
3502
|
+
|
3503
|
+
# Expect colon
|
3504
|
+
colon_check = self._current_token
|
3505
|
+
if colon_check and colon_check.type == TokenType.PUNCT_COLON:
|
3506
|
+
self._advance_tokens()
|
3507
|
+
|
3508
|
+
# Parse the default value expression
|
3509
|
+
default_value = self._parse_expression(Precedence.LOWEST)
|
3510
|
+
|
3511
|
+
# After parsing expression, advance past it
|
3512
|
+
self._advance_tokens()
|
3513
|
+
|
3514
|
+
# Expect closing paren
|
3515
|
+
rparen_token = self._current_token
|
3516
|
+
if rparen_token and rparen_token.type == TokenType.DELIM_RPAREN:
|
3517
|
+
self._advance_tokens()
|
3518
|
+
|
3519
|
+
# If no default value was specified, use Empty as the default
|
3520
|
+
if default_value is None:
|
3521
|
+
default_value = EmptyLiteral(start_token)
|
3522
|
+
|
3523
|
+
return Output(
|
3524
|
+
token=start_token,
|
3525
|
+
name=name,
|
3526
|
+
type_name=type_name,
|
3527
|
+
default_value=default_value,
|
3528
|
+
)
|
3529
|
+
|
3530
|
+
def _register_variable_definition(self, name: str, type_spec: list[str], line: int, position: int) -> None:
|
3531
|
+
"""Register a variable definition in the symbol table.
|
3532
|
+
|
3533
|
+
Args:
|
3534
|
+
name: Variable name
|
3535
|
+
type_spec: List of allowed types
|
3536
|
+
line: Line number
|
3537
|
+
position: Column position
|
3538
|
+
"""
|
3539
|
+
try:
|
3540
|
+
self._symbol_table.define(name, type_spec, line, position)
|
3541
|
+
except NameError as e:
|
3542
|
+
# The error message contains info about redefinition
|
3543
|
+
if "already defined" in str(e):
|
3544
|
+
# Extract the original definition line from error message
|
3545
|
+
match = re.search(r"line (\d+)", str(e))
|
3546
|
+
original_line = int(match.group(1)) if match else line
|
3547
|
+
self._report_error_and_recover(
|
3548
|
+
template=VARIABLE_ALREADY_DEFINED,
|
3549
|
+
error_type="name",
|
3550
|
+
name=name,
|
3551
|
+
original_line=str(original_line),
|
3552
|
+
skip_recovery=True, # No recovery needed for semantic errors
|
3553
|
+
)
|
3554
|
+
else:
|
3555
|
+
# Fallback for other NameError cases
|
3556
|
+
self._report_error_and_recover(
|
3557
|
+
template=NAME_UNDEFINED,
|
3558
|
+
error_type="name",
|
3559
|
+
name=name,
|
3560
|
+
skip_recovery=True, # No recovery needed for semantic errors
|
3561
|
+
)
|
3562
|
+
|
3563
|
+
def _check_variable_defined(self, name: str, line: int, position: int) -> bool:
|
3564
|
+
"""Check if a variable is defined.
|
3565
|
+
|
3566
|
+
Args:
|
3567
|
+
name: Variable name
|
3568
|
+
line: Line number for error reporting
|
3569
|
+
position: Column position for error reporting
|
3570
|
+
|
3571
|
+
Returns:
|
3572
|
+
True if defined, False otherwise
|
3573
|
+
"""
|
3574
|
+
info = self._symbol_table.lookup(name)
|
3575
|
+
if not info:
|
3576
|
+
self._report_error_and_recover(
|
3577
|
+
template=VARIABLE_NOT_DEFINED,
|
3578
|
+
error_type="name",
|
3579
|
+
name=name,
|
3580
|
+
skip_recovery=True, # No recovery needed for semantic errors
|
3581
|
+
)
|
3582
|
+
return False
|
3583
|
+
return True
|
3584
|
+
|
3585
|
+
def _validate_assignment_type(self, variable_name: str, value: Expression, line: int, position: int) -> bool:
|
3586
|
+
"""Validate that an assignment value matches the variable's type.
|
3587
|
+
|
3588
|
+
Args:
|
3589
|
+
variable_name: Name of the variable being assigned to
|
3590
|
+
value: The expression being assigned
|
3591
|
+
line: Line number for error reporting
|
3592
|
+
position: Column position for error reporting
|
3593
|
+
|
3594
|
+
Returns:
|
3595
|
+
True if type is valid, False otherwise
|
3596
|
+
"""
|
3597
|
+
# Look up the variable's type specification
|
3598
|
+
var_info = self._symbol_table.lookup(variable_name)
|
3599
|
+
if not var_info:
|
3600
|
+
# Variable not defined - already reported elsewhere
|
3601
|
+
return False
|
3602
|
+
|
3603
|
+
# Determine the type of the value being assigned
|
3604
|
+
value_type = get_type_from_value(value)
|
3605
|
+
if value_type is None:
|
3606
|
+
# Can't determine type - allow assignment for now
|
3607
|
+
# This might be a function call or complex expression
|
3608
|
+
return True
|
3609
|
+
|
3610
|
+
# Check if the value's type is compatible with the variable's type spec
|
3611
|
+
type_spec = TypeSpec(var_info.type_spec)
|
3612
|
+
is_compatible, _error_msg = check_type_compatibility(value_type, type_spec)
|
3613
|
+
|
3614
|
+
if not is_compatible:
|
3615
|
+
# Create a detailed error message
|
3616
|
+
from machine_dialect.errors.messages import ASSIGNMENT_TYPE_MISMATCH
|
3617
|
+
from machine_dialect.type_checking import TYPE_DISPLAY_NAMES
|
3618
|
+
|
3619
|
+
actual_type_name = TYPE_DISPLAY_NAMES.get(value_type, "unknown")
|
3620
|
+
self._report_error_and_recover(
|
3621
|
+
template=ASSIGNMENT_TYPE_MISMATCH,
|
3622
|
+
error_type="type", # This is a type error
|
3623
|
+
variable=variable_name,
|
3624
|
+
expected_type=str(type_spec),
|
3625
|
+
actual_type=actual_type_name,
|
3626
|
+
skip_recovery=True, # No recovery needed for semantic errors
|
3627
|
+
)
|
3628
|
+
return False
|
3629
|
+
|
3630
|
+
# Mark the variable as initialized on successful type check
|
3631
|
+
self._symbol_table.mark_initialized(variable_name)
|
3632
|
+
return True
|
3633
|
+
|
3634
|
+
def _is_type_token(self, token_type: TokenType) -> bool:
|
3635
|
+
"""Check if a token type represents a type keyword.
|
3636
|
+
|
3637
|
+
Args:
|
3638
|
+
token_type: The token type to check
|
3639
|
+
|
3640
|
+
Returns:
|
3641
|
+
True if it's a type keyword, False otherwise
|
3642
|
+
"""
|
3643
|
+
return token_type in {
|
3644
|
+
TokenType.KW_TEXT,
|
3645
|
+
TokenType.KW_WHOLE_NUMBER,
|
3646
|
+
TokenType.KW_FLOAT,
|
3647
|
+
TokenType.KW_NUMBER,
|
3648
|
+
TokenType.KW_YES_NO,
|
3649
|
+
TokenType.KW_URL,
|
3650
|
+
TokenType.KW_DATE,
|
3651
|
+
TokenType.KW_DATETIME,
|
3652
|
+
TokenType.KW_TIME,
|
3653
|
+
TokenType.KW_LIST,
|
3654
|
+
TokenType.KW_EMPTY,
|
3655
|
+
}
|