machine-dialect 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- machine_dialect/__main__.py +667 -0
- machine_dialect/agent/__init__.py +5 -0
- machine_dialect/agent/agent.py +360 -0
- machine_dialect/ast/__init__.py +95 -0
- machine_dialect/ast/ast_node.py +35 -0
- machine_dialect/ast/call_expression.py +82 -0
- machine_dialect/ast/dict_extraction.py +60 -0
- machine_dialect/ast/expressions.py +439 -0
- machine_dialect/ast/literals.py +309 -0
- machine_dialect/ast/program.py +35 -0
- machine_dialect/ast/statements.py +1433 -0
- machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
- machine_dialect/ast/tests/test_boolean_literal.py +29 -0
- machine_dialect/ast/tests/test_collection_hir.py +138 -0
- machine_dialect/ast/tests/test_define_statement.py +142 -0
- machine_dialect/ast/tests/test_desugar.py +541 -0
- machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
- machine_dialect/cfg/__init__.py +6 -0
- machine_dialect/cfg/config.py +156 -0
- machine_dialect/cfg/examples.py +221 -0
- machine_dialect/cfg/generate_with_ai.py +187 -0
- machine_dialect/cfg/openai_generation.py +200 -0
- machine_dialect/cfg/parser.py +94 -0
- machine_dialect/cfg/tests/__init__.py +1 -0
- machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
- machine_dialect/cfg/tests/test_config.py +188 -0
- machine_dialect/cfg/tests/test_examples.py +391 -0
- machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
- machine_dialect/cfg/tests/test_openai_generation.py +256 -0
- machine_dialect/codegen/__init__.py +5 -0
- machine_dialect/codegen/bytecode_module.py +89 -0
- machine_dialect/codegen/bytecode_serializer.py +300 -0
- machine_dialect/codegen/opcodes.py +101 -0
- machine_dialect/codegen/register_codegen.py +1996 -0
- machine_dialect/codegen/symtab.py +208 -0
- machine_dialect/codegen/tests/__init__.py +1 -0
- machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
- machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
- machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
- machine_dialect/codegen/tests/test_symtab.py +418 -0
- machine_dialect/codegen/vm_serializer.py +621 -0
- machine_dialect/compiler/__init__.py +18 -0
- machine_dialect/compiler/compiler.py +197 -0
- machine_dialect/compiler/config.py +149 -0
- machine_dialect/compiler/context.py +149 -0
- machine_dialect/compiler/phases/__init__.py +19 -0
- machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
- machine_dialect/compiler/phases/codegen.py +40 -0
- machine_dialect/compiler/phases/hir_generation.py +39 -0
- machine_dialect/compiler/phases/mir_generation.py +86 -0
- machine_dialect/compiler/phases/optimization.py +110 -0
- machine_dialect/compiler/phases/parsing.py +39 -0
- machine_dialect/compiler/pipeline.py +143 -0
- machine_dialect/compiler/tests/__init__.py +1 -0
- machine_dialect/compiler/tests/test_compiler.py +568 -0
- machine_dialect/compiler/vm_runner.py +173 -0
- machine_dialect/errors/__init__.py +32 -0
- machine_dialect/errors/exceptions.py +369 -0
- machine_dialect/errors/messages.py +82 -0
- machine_dialect/errors/tests/__init__.py +0 -0
- machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
- machine_dialect/errors/tests/test_name_errors.py +118 -0
- machine_dialect/helpers/__init__.py +0 -0
- machine_dialect/helpers/stopwords.py +225 -0
- machine_dialect/helpers/validators.py +30 -0
- machine_dialect/lexer/__init__.py +9 -0
- machine_dialect/lexer/constants.py +23 -0
- machine_dialect/lexer/lexer.py +907 -0
- machine_dialect/lexer/tests/__init__.py +0 -0
- machine_dialect/lexer/tests/helpers.py +86 -0
- machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
- machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
- machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
- machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
- machine_dialect/lexer/tests/test_comments.py +200 -0
- machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
- machine_dialect/lexer/tests/test_lexer_position.py +113 -0
- machine_dialect/lexer/tests/test_list_tokens.py +282 -0
- machine_dialect/lexer/tests/test_stopwords.py +80 -0
- machine_dialect/lexer/tests/test_strict_equality.py +129 -0
- machine_dialect/lexer/tests/test_token.py +41 -0
- machine_dialect/lexer/tests/test_tokenization.py +294 -0
- machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
- machine_dialect/lexer/tests/test_url_literals.py +169 -0
- machine_dialect/lexer/tokens.py +487 -0
- machine_dialect/linter/__init__.py +10 -0
- machine_dialect/linter/__main__.py +144 -0
- machine_dialect/linter/linter.py +154 -0
- machine_dialect/linter/rules/__init__.py +8 -0
- machine_dialect/linter/rules/base.py +112 -0
- machine_dialect/linter/rules/statement_termination.py +99 -0
- machine_dialect/linter/tests/__init__.py +1 -0
- machine_dialect/linter/tests/mdrules/__init__.py +0 -0
- machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
- machine_dialect/linter/tests/test_linter.py +81 -0
- machine_dialect/linter/tests/test_rules.py +110 -0
- machine_dialect/linter/tests/test_violations.py +71 -0
- machine_dialect/linter/violations.py +51 -0
- machine_dialect/mir/__init__.py +69 -0
- machine_dialect/mir/analyses/__init__.py +20 -0
- machine_dialect/mir/analyses/alias_analysis.py +315 -0
- machine_dialect/mir/analyses/dominance_analysis.py +49 -0
- machine_dialect/mir/analyses/escape_analysis.py +286 -0
- machine_dialect/mir/analyses/loop_analysis.py +272 -0
- machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
- machine_dialect/mir/analyses/type_analysis.py +448 -0
- machine_dialect/mir/analyses/use_def_chains.py +232 -0
- machine_dialect/mir/basic_block.py +385 -0
- machine_dialect/mir/dataflow.py +445 -0
- machine_dialect/mir/debug_info.py +208 -0
- machine_dialect/mir/hir_to_mir.py +1738 -0
- machine_dialect/mir/mir_dumper.py +366 -0
- machine_dialect/mir/mir_function.py +167 -0
- machine_dialect/mir/mir_instructions.py +1877 -0
- machine_dialect/mir/mir_interpreter.py +556 -0
- machine_dialect/mir/mir_module.py +225 -0
- machine_dialect/mir/mir_printer.py +480 -0
- machine_dialect/mir/mir_transformer.py +410 -0
- machine_dialect/mir/mir_types.py +367 -0
- machine_dialect/mir/mir_validation.py +455 -0
- machine_dialect/mir/mir_values.py +268 -0
- machine_dialect/mir/optimization_config.py +233 -0
- machine_dialect/mir/optimization_pass.py +251 -0
- machine_dialect/mir/optimization_pipeline.py +355 -0
- machine_dialect/mir/optimizations/__init__.py +84 -0
- machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
- machine_dialect/mir/optimizations/branch_prediction.py +372 -0
- machine_dialect/mir/optimizations/constant_propagation.py +634 -0
- machine_dialect/mir/optimizations/cse.py +398 -0
- machine_dialect/mir/optimizations/dce.py +288 -0
- machine_dialect/mir/optimizations/inlining.py +551 -0
- machine_dialect/mir/optimizations/jump_threading.py +487 -0
- machine_dialect/mir/optimizations/licm.py +405 -0
- machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
- machine_dialect/mir/optimizations/strength_reduction.py +422 -0
- machine_dialect/mir/optimizations/tail_call.py +207 -0
- machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
- machine_dialect/mir/optimizations/type_narrowing.py +397 -0
- machine_dialect/mir/optimizations/type_specialization.py +447 -0
- machine_dialect/mir/optimizations/type_specific.py +906 -0
- machine_dialect/mir/optimize_mir.py +89 -0
- machine_dialect/mir/pass_manager.py +391 -0
- machine_dialect/mir/profiling/__init__.py +26 -0
- machine_dialect/mir/profiling/profile_collector.py +318 -0
- machine_dialect/mir/profiling/profile_data.py +372 -0
- machine_dialect/mir/profiling/profile_reader.py +272 -0
- machine_dialect/mir/profiling/profile_writer.py +226 -0
- machine_dialect/mir/register_allocation.py +302 -0
- machine_dialect/mir/reporting/__init__.py +17 -0
- machine_dialect/mir/reporting/optimization_reporter.py +314 -0
- machine_dialect/mir/reporting/report_formatter.py +289 -0
- machine_dialect/mir/ssa_construction.py +342 -0
- machine_dialect/mir/tests/__init__.py +1 -0
- machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
- machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
- machine_dialect/mir/tests/test_algebraic_division.py +126 -0
- machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
- machine_dialect/mir/tests/test_basic_block.py +425 -0
- machine_dialect/mir/tests/test_branch_prediction.py +459 -0
- machine_dialect/mir/tests/test_call_lowering.py +168 -0
- machine_dialect/mir/tests/test_collection_lowering.py +604 -0
- machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
- machine_dialect/mir/tests/test_custom_passes.py +166 -0
- machine_dialect/mir/tests/test_debug_info.py +285 -0
- machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
- machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
- machine_dialect/mir/tests/test_double_negation.py +231 -0
- machine_dialect/mir/tests/test_escape_analysis.py +233 -0
- machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
- machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
- machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
- machine_dialect/mir/tests/test_inlining.py +435 -0
- machine_dialect/mir/tests/test_licm.py +472 -0
- machine_dialect/mir/tests/test_mir_dumper.py +313 -0
- machine_dialect/mir/tests/test_mir_instructions.py +445 -0
- machine_dialect/mir/tests/test_mir_module.py +860 -0
- machine_dialect/mir/tests/test_mir_printer.py +387 -0
- machine_dialect/mir/tests/test_mir_types.py +123 -0
- machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
- machine_dialect/mir/tests/test_mir_validation.py +378 -0
- machine_dialect/mir/tests/test_mir_values.py +168 -0
- machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
- machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
- machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
- machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
- machine_dialect/mir/tests/test_pass_manager.py +294 -0
- machine_dialect/mir/tests/test_pass_registration.py +64 -0
- machine_dialect/mir/tests/test_profiling.py +356 -0
- machine_dialect/mir/tests/test_register_allocation.py +307 -0
- machine_dialect/mir/tests/test_report_formatters.py +372 -0
- machine_dialect/mir/tests/test_ssa_construction.py +433 -0
- machine_dialect/mir/tests/test_tail_call.py +236 -0
- machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
- machine_dialect/mir/tests/test_type_narrowing.py +277 -0
- machine_dialect/mir/tests/test_type_specialization.py +421 -0
- machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
- machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
- machine_dialect/mir/type_inference.py +368 -0
- machine_dialect/parser/__init__.py +12 -0
- machine_dialect/parser/enums.py +45 -0
- machine_dialect/parser/parser.py +3655 -0
- machine_dialect/parser/protocols.py +11 -0
- machine_dialect/parser/symbol_table.py +169 -0
- machine_dialect/parser/tests/__init__.py +0 -0
- machine_dialect/parser/tests/helper_functions.py +193 -0
- machine_dialect/parser/tests/test_action_statements.py +334 -0
- machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
- machine_dialect/parser/tests/test_call_statements.py +154 -0
- machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
- machine_dialect/parser/tests/test_collection_mutations.py +264 -0
- machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
- machine_dialect/parser/tests/test_define_integration.py +468 -0
- machine_dialect/parser/tests/test_define_statements.py +311 -0
- machine_dialect/parser/tests/test_dict_extraction.py +115 -0
- machine_dialect/parser/tests/test_empty_literal.py +155 -0
- machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
- machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
- machine_dialect/parser/tests/test_if_empty_block.py +61 -0
- machine_dialect/parser/tests/test_if_statements.py +299 -0
- machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
- machine_dialect/parser/tests/test_infix_expressions.py +680 -0
- machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
- machine_dialect/parser/tests/test_interaction_statements.py +269 -0
- machine_dialect/parser/tests/test_list_literals.py +277 -0
- machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
- machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
- machine_dialect/parser/tests/test_parse_errors.py +114 -0
- machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
- machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
- machine_dialect/parser/tests/test_program.py +13 -0
- machine_dialect/parser/tests/test_return_statements.py +89 -0
- machine_dialect/parser/tests/test_set_statements.py +152 -0
- machine_dialect/parser/tests/test_strict_equality.py +258 -0
- machine_dialect/parser/tests/test_symbol_table.py +217 -0
- machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
- machine_dialect/parser/tests/test_utility_statements.py +423 -0
- machine_dialect/parser/token_buffer.py +159 -0
- machine_dialect/repl/__init__.py +3 -0
- machine_dialect/repl/repl.py +426 -0
- machine_dialect/repl/tests/__init__.py +0 -0
- machine_dialect/repl/tests/test_repl.py +606 -0
- machine_dialect/semantic/__init__.py +12 -0
- machine_dialect/semantic/analyzer.py +906 -0
- machine_dialect/semantic/error_messages.py +189 -0
- machine_dialect/semantic/tests/__init__.py +1 -0
- machine_dialect/semantic/tests/test_analyzer.py +364 -0
- machine_dialect/semantic/tests/test_error_messages.py +104 -0
- machine_dialect/tests/edge_cases/__init__.py +10 -0
- machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
- machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
- machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
- machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
- machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
- machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
- machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
- machine_dialect/tests/integration/test_list_compilation.py +395 -0
- machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
- machine_dialect/type_checking/__init__.py +21 -0
- machine_dialect/type_checking/tests/__init__.py +1 -0
- machine_dialect/type_checking/tests/test_type_system.py +230 -0
- machine_dialect/type_checking/type_system.py +270 -0
- machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
- machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
- machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
- machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
- machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
- machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
- machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,906 @@
|
|
1
|
+
"""Semantic analyzer for Machine Dialect™.
|
2
|
+
|
3
|
+
This module provides semantic analysis capabilities including type checking,
|
4
|
+
variable usage validation, and scope analysis.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from dataclasses import dataclass
|
8
|
+
from typing import Any
|
9
|
+
|
10
|
+
from machine_dialect.ast import (
|
11
|
+
CollectionAccessExpression,
|
12
|
+
CollectionMutationStatement,
|
13
|
+
DefineStatement,
|
14
|
+
EmptyLiteral,
|
15
|
+
Expression,
|
16
|
+
FloatLiteral,
|
17
|
+
Identifier,
|
18
|
+
InfixExpression,
|
19
|
+
NamedListLiteral,
|
20
|
+
OrderedListLiteral,
|
21
|
+
PrefixExpression,
|
22
|
+
Program,
|
23
|
+
SetStatement,
|
24
|
+
Statement,
|
25
|
+
StringLiteral,
|
26
|
+
UnorderedListLiteral,
|
27
|
+
URLLiteral,
|
28
|
+
WholeNumberLiteral,
|
29
|
+
YesNoLiteral,
|
30
|
+
)
|
31
|
+
from machine_dialect.errors.exceptions import MDException, MDNameError, MDTypeError, MDUninitializedError, MDValueError
|
32
|
+
from machine_dialect.errors.messages import ErrorTemplate
|
33
|
+
from machine_dialect.parser.parser import TYPING_MAP
|
34
|
+
from machine_dialect.parser.symbol_table import SymbolTable
|
35
|
+
from machine_dialect.semantic.error_messages import ErrorMessageGenerator
|
36
|
+
|
37
|
+
|
38
|
+
@dataclass
|
39
|
+
class TypeInfo:
|
40
|
+
"""Type information for expressions and variables.
|
41
|
+
|
42
|
+
Attributes:
|
43
|
+
type_name: The resolved type name (e.g., "Whole Number", "Text")
|
44
|
+
is_literal: Whether this is a literal value
|
45
|
+
literal_value: The actual value if it's a literal
|
46
|
+
"""
|
47
|
+
|
48
|
+
type_name: str
|
49
|
+
is_literal: bool = False
|
50
|
+
literal_value: Any = None
|
51
|
+
|
52
|
+
def is_compatible_with(self, allowed_types: list[str]) -> bool:
|
53
|
+
"""Check if this type is compatible with allowed types.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
allowed_types: List of allowed type names
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
True if compatible, False otherwise
|
60
|
+
"""
|
61
|
+
# Any type is compatible with all types (dynamic typing)
|
62
|
+
if self.type_name == "Any":
|
63
|
+
return True
|
64
|
+
|
65
|
+
# Direct type match
|
66
|
+
if self.type_name in allowed_types:
|
67
|
+
return True
|
68
|
+
|
69
|
+
# Number type accepts both Whole Number and Float
|
70
|
+
if "Number" in allowed_types:
|
71
|
+
if self.type_name in ["Whole Number", "Float"]:
|
72
|
+
return True
|
73
|
+
|
74
|
+
# Empty is compatible with any nullable type
|
75
|
+
if self.type_name == "Empty" and "Empty" in allowed_types:
|
76
|
+
return True
|
77
|
+
|
78
|
+
return False
|
79
|
+
|
80
|
+
|
81
|
+
class SemanticAnalyzer:
|
82
|
+
"""Performs semantic analysis on the AST.
|
83
|
+
|
84
|
+
Validates:
|
85
|
+
- Variable definitions and usage
|
86
|
+
- Type consistency
|
87
|
+
- Scope rules
|
88
|
+
- Initialization before use
|
89
|
+
"""
|
90
|
+
|
91
|
+
def __init__(self) -> None:
|
92
|
+
"""Initialize the semantic analyzer."""
|
93
|
+
self.symbol_table = SymbolTable()
|
94
|
+
self.errors: list[MDException] = []
|
95
|
+
self.in_function = False
|
96
|
+
self.function_return_type: str | None = None
|
97
|
+
|
98
|
+
def analyze(self, program: Program) -> tuple[Program, list[MDException]]:
|
99
|
+
"""Analyze a program for semantic correctness.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
program: The AST program to analyze
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
Tuple of (annotated program, list of errors)
|
106
|
+
"""
|
107
|
+
self.errors = []
|
108
|
+
self.symbol_table = SymbolTable()
|
109
|
+
|
110
|
+
# Analyze each statement
|
111
|
+
for statement in program.statements:
|
112
|
+
self._analyze_statement(statement)
|
113
|
+
|
114
|
+
return program, self.errors
|
115
|
+
|
116
|
+
def _analyze_statement(self, stmt: Statement) -> None:
|
117
|
+
"""Analyze a single statement.
|
118
|
+
|
119
|
+
Args:
|
120
|
+
stmt: Statement to analyze
|
121
|
+
"""
|
122
|
+
from machine_dialect.ast.statements import (
|
123
|
+
ActionStatement,
|
124
|
+
CallStatement,
|
125
|
+
CollectionMutationStatement,
|
126
|
+
FunctionStatement,
|
127
|
+
IfStatement,
|
128
|
+
InteractionStatement,
|
129
|
+
ReturnStatement,
|
130
|
+
SayStatement,
|
131
|
+
UtilityStatement,
|
132
|
+
)
|
133
|
+
|
134
|
+
if isinstance(stmt, DefineStatement):
|
135
|
+
self._analyze_define_statement(stmt)
|
136
|
+
elif isinstance(stmt, SetStatement):
|
137
|
+
self._analyze_set_statement(stmt)
|
138
|
+
elif isinstance(stmt, FunctionStatement | ActionStatement | InteractionStatement | UtilityStatement):
|
139
|
+
self._analyze_function_statement(stmt)
|
140
|
+
elif isinstance(stmt, IfStatement):
|
141
|
+
# Analyze condition
|
142
|
+
if stmt.condition:
|
143
|
+
self._analyze_expression(stmt.condition)
|
144
|
+
# Analyze consequence and alternative blocks
|
145
|
+
if stmt.consequence:
|
146
|
+
self._analyze_statement(stmt.consequence)
|
147
|
+
if stmt.alternative:
|
148
|
+
self._analyze_statement(stmt.alternative)
|
149
|
+
elif isinstance(stmt, SayStatement | ReturnStatement):
|
150
|
+
# Analyze the expression being said or returned
|
151
|
+
if hasattr(stmt, "expression") and stmt.expression:
|
152
|
+
self._analyze_expression(stmt.expression)
|
153
|
+
elif hasattr(stmt, "return_value") and stmt.return_value:
|
154
|
+
self._analyze_expression(stmt.return_value)
|
155
|
+
elif isinstance(stmt, CallStatement):
|
156
|
+
# Analyze the function being called and its arguments
|
157
|
+
if stmt.function_name:
|
158
|
+
self._analyze_expression(stmt.function_name)
|
159
|
+
if stmt.arguments:
|
160
|
+
self._analyze_expression(stmt.arguments)
|
161
|
+
elif isinstance(stmt, CollectionMutationStatement):
|
162
|
+
self._analyze_collection_mutation_statement(stmt)
|
163
|
+
elif hasattr(stmt, "expression"): # ExpressionStatement
|
164
|
+
self._analyze_expression(stmt.expression)
|
165
|
+
elif hasattr(stmt, "statements"): # BlockStatement
|
166
|
+
# Enter new scope for block
|
167
|
+
self.symbol_table = self.symbol_table.enter_scope()
|
168
|
+
for s in stmt.statements:
|
169
|
+
self._analyze_statement(s)
|
170
|
+
# Exit scope
|
171
|
+
parent_table = self.symbol_table.exit_scope()
|
172
|
+
if parent_table:
|
173
|
+
self.symbol_table = parent_table
|
174
|
+
# Add more statement types as needed
|
175
|
+
|
176
|
+
def _analyze_define_statement(self, stmt: DefineStatement) -> None:
|
177
|
+
"""Analyze a Define statement.
|
178
|
+
|
179
|
+
Args:
|
180
|
+
stmt: DefineStatement to analyze
|
181
|
+
"""
|
182
|
+
var_name = stmt.name.value
|
183
|
+
|
184
|
+
# Check for redefinition in current scope
|
185
|
+
if self.symbol_table.is_defined_in_current_scope(var_name):
|
186
|
+
existing = self.symbol_table.lookup(var_name)
|
187
|
+
if existing:
|
188
|
+
error_msg = ErrorMessageGenerator.redefinition(
|
189
|
+
var_name,
|
190
|
+
stmt.token.line,
|
191
|
+
stmt.token.position,
|
192
|
+
existing.definition_line,
|
193
|
+
existing.definition_pos,
|
194
|
+
)
|
195
|
+
self.errors.append(MDNameError(error_msg, stmt.token.line, stmt.token.position))
|
196
|
+
return
|
197
|
+
|
198
|
+
# Validate type names
|
199
|
+
for type_name in stmt.type_spec:
|
200
|
+
if type_name not in TYPING_MAP.values():
|
201
|
+
error_msg = ErrorMessageGenerator.invalid_type(
|
202
|
+
type_name, stmt.token.line, stmt.token.position, list(TYPING_MAP.values())
|
203
|
+
)
|
204
|
+
self.errors.append(MDTypeError(error_msg, stmt.token.line, stmt.token.position))
|
205
|
+
return
|
206
|
+
|
207
|
+
# Register the variable definition
|
208
|
+
try:
|
209
|
+
self.symbol_table.define(var_name, stmt.type_spec, stmt.token.line, stmt.token.position)
|
210
|
+
except NameError as e:
|
211
|
+
self.errors.append(MDNameError(str(e), stmt.token.line, stmt.token.position))
|
212
|
+
return
|
213
|
+
|
214
|
+
# Validate default value type if present
|
215
|
+
if stmt.initial_value:
|
216
|
+
value_type = self._infer_expression_type(stmt.initial_value)
|
217
|
+
if value_type and not value_type.is_compatible_with(stmt.type_spec):
|
218
|
+
error_msg = (
|
219
|
+
f"Default value type '{value_type.type_name}' is not compatible "
|
220
|
+
f"with declared types: {', '.join(stmt.type_spec)}"
|
221
|
+
)
|
222
|
+
self.errors.append(MDTypeError(error_msg, stmt.token.line, stmt.token.position))
|
223
|
+
else:
|
224
|
+
# Mark as initialized since it has a default
|
225
|
+
self.symbol_table.mark_initialized(var_name)
|
226
|
+
|
227
|
+
def _analyze_function_statement(self, stmt: Statement) -> None:
|
228
|
+
"""Analyze a function definition (Action, Interaction, Utility, or Function).
|
229
|
+
|
230
|
+
Args:
|
231
|
+
stmt: Function statement to analyze
|
232
|
+
"""
|
233
|
+
|
234
|
+
# Get function name
|
235
|
+
func_name = stmt.name.value if hasattr(stmt, "name") else None
|
236
|
+
if not func_name:
|
237
|
+
return
|
238
|
+
|
239
|
+
# Check for redefinition
|
240
|
+
if self.symbol_table.is_defined_in_current_scope(func_name):
|
241
|
+
existing = self.symbol_table.lookup(func_name)
|
242
|
+
if existing:
|
243
|
+
error_msg = ErrorMessageGenerator.redefinition(
|
244
|
+
func_name,
|
245
|
+
stmt.token.line,
|
246
|
+
stmt.token.position,
|
247
|
+
existing.definition_line,
|
248
|
+
existing.definition_pos,
|
249
|
+
)
|
250
|
+
self.errors.append(MDNameError(error_msg, stmt.token.line, stmt.token.position))
|
251
|
+
return
|
252
|
+
|
253
|
+
# Determine return type from outputs
|
254
|
+
return_type = None
|
255
|
+
if hasattr(stmt, "outputs") and stmt.outputs:
|
256
|
+
# If function has outputs, use the first output's type
|
257
|
+
# This is simplified - a full implementation might handle multiple outputs
|
258
|
+
if stmt.outputs[0].type_name:
|
259
|
+
return_type = stmt.outputs[0].type_name
|
260
|
+
|
261
|
+
# Register the function in the symbol table
|
262
|
+
# We'll use a simple approach - store it as a variable with a return_type attribute
|
263
|
+
try:
|
264
|
+
# First define it as a "Function" type
|
265
|
+
self.symbol_table.define(func_name, ["Function"], stmt.token.line, stmt.token.position)
|
266
|
+
# Then add return type info if available
|
267
|
+
func_info = self.symbol_table.lookup(func_name)
|
268
|
+
if func_info and return_type:
|
269
|
+
func_info.return_type = return_type
|
270
|
+
# Mark as initialized since functions are defined with their body
|
271
|
+
self.symbol_table.mark_initialized(func_name)
|
272
|
+
except NameError as e:
|
273
|
+
self.errors.append(MDNameError(str(e), stmt.token.line, stmt.token.position))
|
274
|
+
return
|
275
|
+
|
276
|
+
# Enter new scope for function body
|
277
|
+
old_in_function = self.in_function
|
278
|
+
old_return_type = self.function_return_type
|
279
|
+
self.in_function = True
|
280
|
+
self.function_return_type = return_type
|
281
|
+
|
282
|
+
self.symbol_table = self.symbol_table.enter_scope()
|
283
|
+
|
284
|
+
# Analyze parameters (inputs) - they become local variables in the function scope
|
285
|
+
if hasattr(stmt, "inputs"):
|
286
|
+
for param in stmt.inputs:
|
287
|
+
if param.name and param.type_name:
|
288
|
+
try:
|
289
|
+
self.symbol_table.define(
|
290
|
+
param.name.value,
|
291
|
+
[param.type_name],
|
292
|
+
param.token.line if hasattr(param, "token") else stmt.token.line,
|
293
|
+
param.token.position if hasattr(param, "token") else stmt.token.position,
|
294
|
+
)
|
295
|
+
# Parameters are considered initialized
|
296
|
+
self.symbol_table.mark_initialized(param.name.value)
|
297
|
+
except NameError:
|
298
|
+
pass # Ignore parameter definition errors for now
|
299
|
+
|
300
|
+
# Analyze function body
|
301
|
+
if hasattr(stmt, "body") and stmt.body:
|
302
|
+
self._analyze_statement(stmt.body)
|
303
|
+
|
304
|
+
# Exit function scope
|
305
|
+
parent_table = self.symbol_table.exit_scope()
|
306
|
+
if parent_table:
|
307
|
+
self.symbol_table = parent_table
|
308
|
+
|
309
|
+
self.in_function = old_in_function
|
310
|
+
self.function_return_type = old_return_type
|
311
|
+
|
312
|
+
def _analyze_set_statement(self, stmt: SetStatement) -> None:
|
313
|
+
"""Analyze a Set statement.
|
314
|
+
|
315
|
+
Args:
|
316
|
+
stmt: SetStatement to analyze
|
317
|
+
"""
|
318
|
+
if stmt.name is None:
|
319
|
+
return
|
320
|
+
var_name = stmt.name.value
|
321
|
+
|
322
|
+
# Check if variable is defined
|
323
|
+
var_info = self.symbol_table.lookup(var_name)
|
324
|
+
if not var_info:
|
325
|
+
# Get list of all defined variables for suggestions
|
326
|
+
all_vars: list[str] = []
|
327
|
+
current_table: SymbolTable | None = self.symbol_table
|
328
|
+
while current_table:
|
329
|
+
all_vars.extend(current_table.symbols.keys())
|
330
|
+
current_table = current_table.parent
|
331
|
+
|
332
|
+
# Find similar variables using ErrorMessageGenerator
|
333
|
+
similar_vars = ErrorMessageGenerator._find_similar(var_name, all_vars) if all_vars else None
|
334
|
+
|
335
|
+
error_msg = ErrorMessageGenerator.undefined_variable(
|
336
|
+
var_name, stmt.token.line, stmt.token.position, similar_vars
|
337
|
+
)
|
338
|
+
self.errors.append(MDNameError(error_msg, stmt.token.line, stmt.token.position))
|
339
|
+
return
|
340
|
+
|
341
|
+
# Analyze the value expression (this will check for uninitialized variables)
|
342
|
+
if stmt.value:
|
343
|
+
# First analyze the expression to check for errors
|
344
|
+
self._analyze_expression(stmt.value)
|
345
|
+
|
346
|
+
# Then check type compatibility
|
347
|
+
value_type = self._infer_expression_type(stmt.value)
|
348
|
+
if value_type and not value_type.is_compatible_with(var_info.type_spec):
|
349
|
+
# Try to get string representation of the value for better error message
|
350
|
+
value_repr = None
|
351
|
+
if value_type.is_literal and value_type.literal_value is not None:
|
352
|
+
if value_type.type_name == "Text":
|
353
|
+
value_repr = f'"{value_type.literal_value}"'
|
354
|
+
else:
|
355
|
+
value_repr = str(value_type.literal_value)
|
356
|
+
|
357
|
+
error_msg = ErrorMessageGenerator.type_mismatch(
|
358
|
+
var_name,
|
359
|
+
var_info.type_spec,
|
360
|
+
value_type.type_name,
|
361
|
+
stmt.token.line,
|
362
|
+
stmt.token.position,
|
363
|
+
value_repr,
|
364
|
+
)
|
365
|
+
self.errors.append(MDTypeError(error_msg, stmt.token.line, stmt.token.position))
|
366
|
+
return
|
367
|
+
|
368
|
+
# Mark variable as initialized and track the assigned value
|
369
|
+
self.symbol_table.mark_initialized(var_name)
|
370
|
+
|
371
|
+
# Store the assigned value for type tracking
|
372
|
+
if stmt.value and var_info:
|
373
|
+
# Update the variable info with the assigned value
|
374
|
+
var_info.last_assigned_value = stmt.value
|
375
|
+
|
376
|
+
# If it's a collection literal, track element types
|
377
|
+
if value_type:
|
378
|
+
if value_type.type_name in ["Ordered List", "Unordered List", "Named List"]:
|
379
|
+
if value_type.is_literal and value_type.literal_value:
|
380
|
+
# Extract element types from the literal
|
381
|
+
element_types = set()
|
382
|
+
if isinstance(value_type.literal_value, list):
|
383
|
+
for element in value_type.literal_value:
|
384
|
+
elem_type = self._infer_expression_type(element)
|
385
|
+
if elem_type:
|
386
|
+
element_types.add(elem_type.type_name)
|
387
|
+
elif isinstance(value_type.literal_value, dict):
|
388
|
+
for element in value_type.literal_value.values():
|
389
|
+
elem_type = self._infer_expression_type(element)
|
390
|
+
if elem_type:
|
391
|
+
element_types.add(elem_type.type_name)
|
392
|
+
|
393
|
+
if element_types:
|
394
|
+
var_info.inferred_element_types = list(element_types)
|
395
|
+
|
396
|
+
def _analyze_collection_mutation_statement(self, stmt: CollectionMutationStatement) -> None:
|
397
|
+
"""Analyze a collection mutation statement.
|
398
|
+
|
399
|
+
Validates that the operation is appropriate for the collection type.
|
400
|
+
|
401
|
+
Args:
|
402
|
+
stmt: CollectionMutationStatement to analyze
|
403
|
+
"""
|
404
|
+
|
405
|
+
# Analyze the collection expression to get its type
|
406
|
+
collection_type = self._analyze_expression(stmt.collection)
|
407
|
+
if not collection_type:
|
408
|
+
return
|
409
|
+
|
410
|
+
# Check if it's a Named List (dictionary) or array
|
411
|
+
is_named_list = collection_type.type_name == "Named List"
|
412
|
+
is_array = collection_type.type_name in ["Ordered List", "Unordered List", "List"]
|
413
|
+
|
414
|
+
# Validate operations based on collection type
|
415
|
+
if stmt.operation in ["add", "update", "remove"]:
|
416
|
+
if is_named_list:
|
417
|
+
# Named List operations
|
418
|
+
if stmt.operation == "add" and stmt.position_type != "key":
|
419
|
+
self.errors.append(
|
420
|
+
MDTypeError(
|
421
|
+
'Add operation on Named List requires a key. Use: Add "key" to `dict` with value _value_.',
|
422
|
+
stmt.token.line,
|
423
|
+
stmt.token.position,
|
424
|
+
)
|
425
|
+
)
|
426
|
+
elif stmt.operation == "update" and stmt.position_type != "key":
|
427
|
+
self.errors.append(
|
428
|
+
MDTypeError(
|
429
|
+
'Update operation on Named List requires a key. Use: Update "key" in `dict` to _value_.',
|
430
|
+
stmt.token.line,
|
431
|
+
stmt.token.position,
|
432
|
+
)
|
433
|
+
)
|
434
|
+
# For Named Lists, remove should work with keys (strings)
|
435
|
+
if stmt.operation == "remove" and stmt.value:
|
436
|
+
value_type = self._infer_expression_type(stmt.value)
|
437
|
+
if value_type and value_type.type_name != "Text":
|
438
|
+
self.errors.append(
|
439
|
+
MDTypeError(
|
440
|
+
f"Remove from Named List requires a string key. Got {value_type.type_name}.",
|
441
|
+
stmt.token.line,
|
442
|
+
stmt.token.position,
|
443
|
+
)
|
444
|
+
)
|
445
|
+
elif is_array:
|
446
|
+
# Array operations shouldn't have key type
|
447
|
+
if stmt.position_type == "key":
|
448
|
+
self.errors.append(
|
449
|
+
MDTypeError(
|
450
|
+
f"Operation '{stmt.operation}' with key is not valid for "
|
451
|
+
f"{collection_type.type_name}. Keys are only for Named Lists.",
|
452
|
+
stmt.token.line,
|
453
|
+
stmt.token.position,
|
454
|
+
)
|
455
|
+
)
|
456
|
+
|
457
|
+
elif stmt.operation in ["set", "insert"]:
|
458
|
+
# These operations are only for arrays
|
459
|
+
if is_named_list:
|
460
|
+
self.errors.append(
|
461
|
+
MDTypeError(
|
462
|
+
f"Operation '{stmt.operation}' is not valid for Named Lists. Use 'Update' instead.",
|
463
|
+
stmt.token.line,
|
464
|
+
stmt.token.position,
|
465
|
+
)
|
466
|
+
)
|
467
|
+
elif stmt.operation == "clear":
|
468
|
+
# Clear operation works for all collection types
|
469
|
+
pass
|
470
|
+
|
471
|
+
# Analyze value and position expressions if present
|
472
|
+
if stmt.value:
|
473
|
+
self._analyze_expression(stmt.value)
|
474
|
+
if stmt.position and isinstance(stmt.position, Expression):
|
475
|
+
self._analyze_expression(stmt.position)
|
476
|
+
|
477
|
+
def _analyze_expression(self, expr: Expression | None) -> TypeInfo | None:
|
478
|
+
"""Analyze an expression and return its type.
|
479
|
+
|
480
|
+
Args:
|
481
|
+
expr: Expression to analyze
|
482
|
+
|
483
|
+
Returns:
|
484
|
+
TypeInfo of the expression, or None if cannot be determined
|
485
|
+
"""
|
486
|
+
if not expr:
|
487
|
+
return None
|
488
|
+
|
489
|
+
type_info = self._infer_expression_type(expr)
|
490
|
+
|
491
|
+
# Check variable usage in expressions
|
492
|
+
if isinstance(expr, Identifier):
|
493
|
+
var_info = self.symbol_table.lookup(expr.value)
|
494
|
+
if not var_info:
|
495
|
+
# Get list of all defined variables for suggestions
|
496
|
+
all_vars: list[str] = []
|
497
|
+
current_table: SymbolTable | None = self.symbol_table
|
498
|
+
while current_table:
|
499
|
+
all_vars.extend(current_table.symbols.keys())
|
500
|
+
current_table = current_table.parent
|
501
|
+
|
502
|
+
similar_vars = ErrorMessageGenerator._find_similar(expr.value, all_vars) if all_vars else None
|
503
|
+
error_msg = ErrorMessageGenerator.undefined_variable(
|
504
|
+
expr.value, expr.token.line, expr.token.position, similar_vars
|
505
|
+
)
|
506
|
+
self.errors.append(MDNameError(error_msg, expr.token.line, expr.token.position))
|
507
|
+
elif not var_info.initialized:
|
508
|
+
error_msg = ErrorMessageGenerator.uninitialized_use(
|
509
|
+
expr.value, expr.token.line, expr.token.position, var_info.definition_line
|
510
|
+
)
|
511
|
+
self.errors.append(MDUninitializedError(error_msg, expr.token.line, expr.token.position))
|
512
|
+
|
513
|
+
# Check collection access
|
514
|
+
elif isinstance(expr, CollectionAccessExpression):
|
515
|
+
self._analyze_collection_access(expr)
|
516
|
+
|
517
|
+
return type_info
|
518
|
+
|
519
|
+
def _infer_expression_type(self, expr: Expression) -> TypeInfo | None:
|
520
|
+
"""Infer the type of an expression.
|
521
|
+
|
522
|
+
Args:
|
523
|
+
expr: Expression to type-check
|
524
|
+
|
525
|
+
Returns:
|
526
|
+
TypeInfo or None if type cannot be inferred
|
527
|
+
"""
|
528
|
+
# Literal types
|
529
|
+
if isinstance(expr, WholeNumberLiteral):
|
530
|
+
return TypeInfo("Whole Number", is_literal=True, literal_value=expr.value)
|
531
|
+
elif isinstance(expr, FloatLiteral):
|
532
|
+
return TypeInfo("Float", is_literal=True, literal_value=expr.value)
|
533
|
+
elif isinstance(expr, StringLiteral):
|
534
|
+
return TypeInfo("Text", is_literal=True, literal_value=expr.value)
|
535
|
+
elif isinstance(expr, YesNoLiteral):
|
536
|
+
return TypeInfo("Yes/No", is_literal=True, literal_value=expr.value)
|
537
|
+
elif isinstance(expr, URLLiteral):
|
538
|
+
return TypeInfo("URL", is_literal=True, literal_value=expr.value)
|
539
|
+
elif isinstance(expr, EmptyLiteral):
|
540
|
+
return TypeInfo("Empty", is_literal=True, literal_value=None)
|
541
|
+
|
542
|
+
# Identifier - look up its type
|
543
|
+
elif isinstance(expr, Identifier):
|
544
|
+
var_info = self.symbol_table.lookup(expr.value)
|
545
|
+
if var_info:
|
546
|
+
# For union types, we can't determine exact type statically
|
547
|
+
# Return the first type as a best guess
|
548
|
+
return TypeInfo(var_info.type_spec[0])
|
549
|
+
return None
|
550
|
+
|
551
|
+
# Prefix expressions
|
552
|
+
elif isinstance(expr, PrefixExpression):
|
553
|
+
if expr.operator == "-":
|
554
|
+
if expr.right:
|
555
|
+
right_type = self._infer_expression_type(expr.right)
|
556
|
+
if right_type and right_type.type_name in ["Whole Number", "Float", "Number"]:
|
557
|
+
return right_type
|
558
|
+
elif expr.operator in ["not", "!"]:
|
559
|
+
return TypeInfo("Yes/No")
|
560
|
+
|
561
|
+
# Infix expressions
|
562
|
+
elif isinstance(expr, InfixExpression):
|
563
|
+
left_type = self._infer_expression_type(expr.left) if expr.left else None
|
564
|
+
right_type = self._infer_expression_type(expr.right) if expr.right else None
|
565
|
+
|
566
|
+
# Arithmetic operators
|
567
|
+
if expr.operator in ["+", "-", "*", "/", "^", "**"]:
|
568
|
+
if left_type and right_type:
|
569
|
+
if left_type.type_name == "Float" or right_type.type_name == "Float":
|
570
|
+
return TypeInfo("Float")
|
571
|
+
elif left_type.type_name == "Whole Number" and right_type.type_name == "Whole Number":
|
572
|
+
if expr.operator == "/":
|
573
|
+
return TypeInfo("Float") # Division always returns float
|
574
|
+
return TypeInfo("Whole Number")
|
575
|
+
return TypeInfo("Number") # Generic number type
|
576
|
+
|
577
|
+
# Comparison operators
|
578
|
+
elif expr.operator in ["<", ">", "<=", ">=", "==", "!=", "===", "!=="]:
|
579
|
+
return TypeInfo("Yes/No")
|
580
|
+
|
581
|
+
# Logical operators
|
582
|
+
elif expr.operator in ["and", "or"]:
|
583
|
+
return TypeInfo("Yes/No")
|
584
|
+
|
585
|
+
# Bitwise operators
|
586
|
+
elif expr.operator in ["|", "&", "^", "<<", ">>"]:
|
587
|
+
# Bitwise operators work on integers and return integers
|
588
|
+
if left_type and right_type:
|
589
|
+
if left_type.type_name == "Whole Number" and right_type.type_name == "Whole Number":
|
590
|
+
return TypeInfo("Whole Number")
|
591
|
+
return None
|
592
|
+
|
593
|
+
# Additional expression types for better coverage
|
594
|
+
# Import these types if needed
|
595
|
+
from machine_dialect.ast.call_expression import CallExpression
|
596
|
+
from machine_dialect.ast.expressions import Arguments, ConditionalExpression, ErrorExpression
|
597
|
+
|
598
|
+
# Check for grouped/parenthesized expressions
|
599
|
+
# GroupedExpression would just pass through the inner expression type
|
600
|
+
# but since we don't have a specific GroupedExpression class,
|
601
|
+
# parenthesized expressions are handled transparently by the parser
|
602
|
+
|
603
|
+
# Arguments expression type
|
604
|
+
if isinstance(expr, Arguments):
|
605
|
+
# Arguments don't have a single type, they're a collection
|
606
|
+
# Return None as we can't determine a single type
|
607
|
+
return None
|
608
|
+
|
609
|
+
# Conditional expressions (ternary: condition ? true_expr : false_expr)
|
610
|
+
if isinstance(expr, ConditionalExpression):
|
611
|
+
# Type is the common type of consequence and alternative
|
612
|
+
if expr.consequence and expr.alternative:
|
613
|
+
cons_type = self._infer_expression_type(expr.consequence)
|
614
|
+
alt_type = self._infer_expression_type(expr.alternative)
|
615
|
+
if cons_type and alt_type:
|
616
|
+
# If both branches have same type, return that type
|
617
|
+
if cons_type.type_name == alt_type.type_name:
|
618
|
+
return cons_type
|
619
|
+
# If one is Empty, return the other
|
620
|
+
if cons_type.type_name == "Empty":
|
621
|
+
return alt_type
|
622
|
+
if alt_type.type_name == "Empty":
|
623
|
+
return cons_type
|
624
|
+
# If numeric types, return Float as common type
|
625
|
+
if cons_type.type_name in ["Whole Number", "Float"] and alt_type.type_name in [
|
626
|
+
"Whole Number",
|
627
|
+
"Float",
|
628
|
+
]:
|
629
|
+
return TypeInfo("Float")
|
630
|
+
return None
|
631
|
+
|
632
|
+
# Call expressions - check user-defined and built-in functions
|
633
|
+
elif isinstance(expr, CallExpression):
|
634
|
+
# Check if it's a user-defined function by looking it up
|
635
|
+
if expr.function_name and isinstance(expr.function_name, Identifier):
|
636
|
+
func_name = expr.function_name.value
|
637
|
+
|
638
|
+
# Try to find the function in the symbol table
|
639
|
+
func_info = self.symbol_table.lookup(func_name)
|
640
|
+
if func_info and func_info.return_type:
|
641
|
+
# User-defined function with known return type
|
642
|
+
return TypeInfo(func_info.return_type)
|
643
|
+
elif func_info:
|
644
|
+
# Function without return type
|
645
|
+
return None
|
646
|
+
|
647
|
+
# TODO: Check if it's a built-in function from runtime/builtins.py
|
648
|
+
# Built-ins like 'len' return Whole Number, 'str' returns Text, etc.
|
649
|
+
# For now, built-in functions are not tracked in the symbol table
|
650
|
+
|
651
|
+
# Unknown function or complex call expression
|
652
|
+
return None
|
653
|
+
|
654
|
+
# List literals - collections
|
655
|
+
elif isinstance(expr, UnorderedListLiteral):
|
656
|
+
return TypeInfo(
|
657
|
+
"Unordered List", is_literal=True, literal_value=expr.elements if hasattr(expr, "elements") else []
|
658
|
+
)
|
659
|
+
elif isinstance(expr, OrderedListLiteral):
|
660
|
+
return TypeInfo(
|
661
|
+
"Ordered List", is_literal=True, literal_value=expr.elements if hasattr(expr, "elements") else []
|
662
|
+
)
|
663
|
+
elif isinstance(expr, NamedListLiteral):
|
664
|
+
return TypeInfo(
|
665
|
+
"Named List", is_literal=True, literal_value=expr.entries if hasattr(expr, "entries") else {}
|
666
|
+
)
|
667
|
+
|
668
|
+
# Collection access
|
669
|
+
elif isinstance(expr, CollectionAccessExpression):
|
670
|
+
# For collection access, we need to determine the element type
|
671
|
+
# In Machine Dialect, lists can contain heterogeneous types,
|
672
|
+
# so we can't always determine the exact type statically
|
673
|
+
# However, if we have type hints or can infer from context, we should use them
|
674
|
+
|
675
|
+
# For now, check if we can infer the collection type
|
676
|
+
if expr.collection:
|
677
|
+
collection_type = self._infer_expression_type(expr.collection)
|
678
|
+
if collection_type:
|
679
|
+
# If it's a literal collection with known elements
|
680
|
+
if collection_type.is_literal and collection_type.literal_value:
|
681
|
+
elements = collection_type.literal_value
|
682
|
+
|
683
|
+
# For lists, try to determine element type
|
684
|
+
if isinstance(elements, list) and len(elements) > 0:
|
685
|
+
# Get the accessor to determine which element
|
686
|
+
if expr.accessor:
|
687
|
+
# Try to get the index
|
688
|
+
index = None
|
689
|
+
if isinstance(expr.accessor, int):
|
690
|
+
index = expr.accessor - 1 # Convert to 0-based
|
691
|
+
elif isinstance(expr.accessor, Expression):
|
692
|
+
accessor_type = self._infer_expression_type(expr.accessor)
|
693
|
+
if (
|
694
|
+
accessor_type
|
695
|
+
and accessor_type.is_literal
|
696
|
+
and isinstance(accessor_type.literal_value, int)
|
697
|
+
):
|
698
|
+
index = accessor_type.literal_value - 1 # Convert to 0-based
|
699
|
+
|
700
|
+
# If we know the index and it's valid
|
701
|
+
if index is not None and 0 <= index < len(elements):
|
702
|
+
element = elements[index]
|
703
|
+
# Infer type of the element
|
704
|
+
if hasattr(element, "__class__"):
|
705
|
+
element_type = self._infer_expression_type(element)
|
706
|
+
if element_type:
|
707
|
+
return element_type
|
708
|
+
|
709
|
+
# If we can't determine the specific element, check if all elements have the same type
|
710
|
+
element_types = set()
|
711
|
+
for element in elements:
|
712
|
+
if hasattr(element, "__class__"):
|
713
|
+
elem_type = self._infer_expression_type(element)
|
714
|
+
if elem_type:
|
715
|
+
element_types.add(elem_type.type_name)
|
716
|
+
|
717
|
+
# If all elements have the same type, return that type
|
718
|
+
if len(element_types) == 1:
|
719
|
+
return TypeInfo(element_types.pop())
|
720
|
+
|
721
|
+
# For dictionaries (Named Lists)
|
722
|
+
elif isinstance(elements, dict) and len(elements) > 0:
|
723
|
+
# If we know the key, we can determine the value type
|
724
|
+
if expr.accessor:
|
725
|
+
key = None
|
726
|
+
if isinstance(expr.accessor, str):
|
727
|
+
key = expr.accessor
|
728
|
+
elif isinstance(expr.accessor, Expression):
|
729
|
+
accessor_type = self._infer_expression_type(expr.accessor)
|
730
|
+
if accessor_type and accessor_type.is_literal:
|
731
|
+
key = str(accessor_type.literal_value)
|
732
|
+
|
733
|
+
if key and key in elements:
|
734
|
+
element = elements[key]
|
735
|
+
if hasattr(element, "__class__"):
|
736
|
+
element_type = self._infer_expression_type(element)
|
737
|
+
if element_type:
|
738
|
+
return element_type
|
739
|
+
|
740
|
+
# For non-literal collections (e.g., variables holding lists)
|
741
|
+
# We need to check if the variable was set to a collection literal
|
742
|
+
elif isinstance(expr.collection, Identifier):
|
743
|
+
# Try to find what this variable was set to
|
744
|
+
var_info = self.symbol_table.lookup(expr.collection.value)
|
745
|
+
if var_info and var_info.initialized:
|
746
|
+
# Check if we have tracked element types from assignment
|
747
|
+
if var_info.inferred_element_types:
|
748
|
+
# If all elements have the same type, return that type
|
749
|
+
if len(var_info.inferred_element_types) == 1:
|
750
|
+
return TypeInfo(var_info.inferred_element_types[0])
|
751
|
+
# Otherwise, we could return a union type or Any
|
752
|
+
# For now, return Any for mixed types
|
753
|
+
elif len(var_info.inferred_element_types) > 1:
|
754
|
+
return TypeInfo("Any")
|
755
|
+
|
756
|
+
# If we have the last assigned value, try to infer from it
|
757
|
+
elif var_info.last_assigned_value and isinstance(var_info.last_assigned_value, Expression):
|
758
|
+
# Recursively infer the type of the assigned value
|
759
|
+
assigned_type = self._infer_expression_type(var_info.last_assigned_value)
|
760
|
+
if assigned_type and assigned_type.is_literal and assigned_type.literal_value:
|
761
|
+
# This is a literal collection, process it
|
762
|
+
elements = assigned_type.literal_value
|
763
|
+
if isinstance(elements, list) and len(elements) > 0:
|
764
|
+
# Try to get element type based on accessor
|
765
|
+
if expr.accessor:
|
766
|
+
index = None
|
767
|
+
if isinstance(expr.accessor, int):
|
768
|
+
index = expr.accessor - 1 # Convert to 0-based
|
769
|
+
elif isinstance(expr.accessor, Expression):
|
770
|
+
accessor_type = self._infer_expression_type(expr.accessor)
|
771
|
+
if (
|
772
|
+
accessor_type
|
773
|
+
and accessor_type.is_literal
|
774
|
+
and isinstance(accessor_type.literal_value, int)
|
775
|
+
):
|
776
|
+
index = accessor_type.literal_value - 1
|
777
|
+
|
778
|
+
if index is not None and 0 <= index < len(elements):
|
779
|
+
element = elements[index]
|
780
|
+
element_type = self._infer_expression_type(element)
|
781
|
+
if element_type:
|
782
|
+
return element_type
|
783
|
+
|
784
|
+
# Check if all elements have the same type
|
785
|
+
element_types = set()
|
786
|
+
for element in elements:
|
787
|
+
elem_type = self._infer_expression_type(element)
|
788
|
+
if elem_type:
|
789
|
+
element_types.add(elem_type.type_name)
|
790
|
+
if len(element_types) == 1:
|
791
|
+
return TypeInfo(element_types.pop())
|
792
|
+
|
793
|
+
# If we can't determine the exact type, return Any
|
794
|
+
# This is valid since Machine Dialect allows heterogeneous collections
|
795
|
+
# But we should try to be more specific when possible
|
796
|
+
# For now, return a more flexible type that won't cause type errors
|
797
|
+
return TypeInfo("Any")
|
798
|
+
|
799
|
+
# Error expressions always have unknown type
|
800
|
+
elif isinstance(expr, ErrorExpression):
|
801
|
+
return None
|
802
|
+
|
803
|
+
return None
|
804
|
+
|
805
|
+
def _analyze_collection_access(self, expr: CollectionAccessExpression) -> None:
|
806
|
+
"""Analyze collection access for bounds and type checking.
|
807
|
+
|
808
|
+
Args:
|
809
|
+
expr: CollectionAccessExpression to analyze
|
810
|
+
"""
|
811
|
+
# First, analyze the collection being accessed
|
812
|
+
if expr.collection:
|
813
|
+
collection_type = self._infer_expression_type(expr.collection)
|
814
|
+
|
815
|
+
# Check if we're accessing a non-collection
|
816
|
+
if collection_type and collection_type.type_name not in ["Unordered List", "Ordered List", "Named List"]:
|
817
|
+
error_msg = ErrorTemplate(
|
818
|
+
f"Cannot access elements of non-collection type '{collection_type.type_name}'"
|
819
|
+
)
|
820
|
+
self.errors.append(MDTypeError(error_msg, expr.token.line, expr.token.position))
|
821
|
+
return
|
822
|
+
|
823
|
+
# Special case: if collection is an identifier that was defined but never set, it's empty
|
824
|
+
if isinstance(expr.collection, Identifier):
|
825
|
+
var_info = self.symbol_table.lookup(expr.collection.value)
|
826
|
+
if var_info and not var_info.initialized:
|
827
|
+
# Variable defined but not initialized means it's empty
|
828
|
+
if var_info.type_spec[0] in ["Unordered List", "Ordered List", "Named List"]:
|
829
|
+
error_msg = ErrorTemplate(f"Cannot access elements from empty list '{expr.collection.value}'")
|
830
|
+
self.errors.append(MDValueError(error_msg, expr.token.line, expr.token.position))
|
831
|
+
return
|
832
|
+
|
833
|
+
# If the collection is a literal with known elements, check bounds
|
834
|
+
if collection_type and collection_type.is_literal and collection_type.literal_value is not None:
|
835
|
+
elements = collection_type.literal_value
|
836
|
+
|
837
|
+
# Check for empty collection access
|
838
|
+
if isinstance(elements, list) and len(elements) == 0:
|
839
|
+
error_msg = ErrorTemplate("Cannot access elements from an empty list")
|
840
|
+
self.errors.append(MDValueError(error_msg, expr.token.line, expr.token.position))
|
841
|
+
return
|
842
|
+
|
843
|
+
# Check bounds for numeric/ordinal access
|
844
|
+
if expr.access_type in ["numeric", "ordinal"]:
|
845
|
+
# Try to get the index value
|
846
|
+
if expr.accessor:
|
847
|
+
# Only call _infer_expression_type if accessor is an Expression
|
848
|
+
if isinstance(expr.accessor, Expression):
|
849
|
+
accessor_type = self._infer_expression_type(expr.accessor)
|
850
|
+
else:
|
851
|
+
# For str/int accessors, create a TypeInfo directly
|
852
|
+
if isinstance(expr.accessor, int):
|
853
|
+
accessor_type = TypeInfo("Whole Number", is_literal=True, literal_value=expr.accessor)
|
854
|
+
elif isinstance(expr.accessor, str):
|
855
|
+
accessor_type = TypeInfo("Text", is_literal=True, literal_value=expr.accessor)
|
856
|
+
else:
|
857
|
+
accessor_type = None
|
858
|
+
|
859
|
+
# Check for zero or negative index
|
860
|
+
if accessor_type and accessor_type.is_literal and accessor_type.literal_value is not None:
|
861
|
+
index_value = accessor_type.literal_value
|
862
|
+
|
863
|
+
# Handle ordinal keywords
|
864
|
+
if expr.access_type == "ordinal" and isinstance(expr.accessor, str | Expression):
|
865
|
+
# For ordinal access, accessor is a string like "first", "second"
|
866
|
+
if isinstance(expr.accessor, str):
|
867
|
+
accessor_str = expr.accessor
|
868
|
+
elif hasattr(expr.accessor, "value"):
|
869
|
+
accessor_str = expr.accessor.value
|
870
|
+
else:
|
871
|
+
accessor_str = str(expr.accessor)
|
872
|
+
|
873
|
+
if accessor_str == "first":
|
874
|
+
index_value = 1
|
875
|
+
elif accessor_str == "second":
|
876
|
+
index_value = 2
|
877
|
+
elif accessor_str == "third":
|
878
|
+
index_value = 3
|
879
|
+
elif accessor_str == "last":
|
880
|
+
if len(elements) == 0:
|
881
|
+
error_msg = ErrorTemplate("Cannot access 'last' element of an empty list")
|
882
|
+
self.errors.append(
|
883
|
+
MDValueError(error_msg, expr.token.line, expr.token.position)
|
884
|
+
)
|
885
|
+
return
|
886
|
+
index_value = len(elements)
|
887
|
+
|
888
|
+
# Check for invalid indices
|
889
|
+
if isinstance(index_value, int | float):
|
890
|
+
if index_value <= 0:
|
891
|
+
error_msg = ErrorTemplate(
|
892
|
+
f"Invalid index {index_value}: Machine Dialect uses one-based "
|
893
|
+
"indexing (indices start at 1)"
|
894
|
+
)
|
895
|
+
self.errors.append(MDValueError(error_msg, expr.token.line, expr.token.position))
|
896
|
+
return
|
897
|
+
elif isinstance(elements, list) and index_value > len(elements):
|
898
|
+
error_msg = ErrorTemplate(
|
899
|
+
f"Index {index_value} is out of bounds for list with {len(elements)} elements"
|
900
|
+
)
|
901
|
+
self.errors.append(MDValueError(error_msg, expr.token.line, expr.token.position))
|
902
|
+
return
|
903
|
+
|
904
|
+
# Also analyze the accessor expression itself if it's an Expression
|
905
|
+
if expr.accessor and isinstance(expr.accessor, Expression):
|
906
|
+
self._analyze_expression(expr.accessor)
|