machine-dialect 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- machine_dialect/__main__.py +667 -0
- machine_dialect/agent/__init__.py +5 -0
- machine_dialect/agent/agent.py +360 -0
- machine_dialect/ast/__init__.py +95 -0
- machine_dialect/ast/ast_node.py +35 -0
- machine_dialect/ast/call_expression.py +82 -0
- machine_dialect/ast/dict_extraction.py +60 -0
- machine_dialect/ast/expressions.py +439 -0
- machine_dialect/ast/literals.py +309 -0
- machine_dialect/ast/program.py +35 -0
- machine_dialect/ast/statements.py +1433 -0
- machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
- machine_dialect/ast/tests/test_boolean_literal.py +29 -0
- machine_dialect/ast/tests/test_collection_hir.py +138 -0
- machine_dialect/ast/tests/test_define_statement.py +142 -0
- machine_dialect/ast/tests/test_desugar.py +541 -0
- machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
- machine_dialect/cfg/__init__.py +6 -0
- machine_dialect/cfg/config.py +156 -0
- machine_dialect/cfg/examples.py +221 -0
- machine_dialect/cfg/generate_with_ai.py +187 -0
- machine_dialect/cfg/openai_generation.py +200 -0
- machine_dialect/cfg/parser.py +94 -0
- machine_dialect/cfg/tests/__init__.py +1 -0
- machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
- machine_dialect/cfg/tests/test_config.py +188 -0
- machine_dialect/cfg/tests/test_examples.py +391 -0
- machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
- machine_dialect/cfg/tests/test_openai_generation.py +256 -0
- machine_dialect/codegen/__init__.py +5 -0
- machine_dialect/codegen/bytecode_module.py +89 -0
- machine_dialect/codegen/bytecode_serializer.py +300 -0
- machine_dialect/codegen/opcodes.py +101 -0
- machine_dialect/codegen/register_codegen.py +1996 -0
- machine_dialect/codegen/symtab.py +208 -0
- machine_dialect/codegen/tests/__init__.py +1 -0
- machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
- machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
- machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
- machine_dialect/codegen/tests/test_symtab.py +418 -0
- machine_dialect/codegen/vm_serializer.py +621 -0
- machine_dialect/compiler/__init__.py +18 -0
- machine_dialect/compiler/compiler.py +197 -0
- machine_dialect/compiler/config.py +149 -0
- machine_dialect/compiler/context.py +149 -0
- machine_dialect/compiler/phases/__init__.py +19 -0
- machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
- machine_dialect/compiler/phases/codegen.py +40 -0
- machine_dialect/compiler/phases/hir_generation.py +39 -0
- machine_dialect/compiler/phases/mir_generation.py +86 -0
- machine_dialect/compiler/phases/optimization.py +110 -0
- machine_dialect/compiler/phases/parsing.py +39 -0
- machine_dialect/compiler/pipeline.py +143 -0
- machine_dialect/compiler/tests/__init__.py +1 -0
- machine_dialect/compiler/tests/test_compiler.py +568 -0
- machine_dialect/compiler/vm_runner.py +173 -0
- machine_dialect/errors/__init__.py +32 -0
- machine_dialect/errors/exceptions.py +369 -0
- machine_dialect/errors/messages.py +82 -0
- machine_dialect/errors/tests/__init__.py +0 -0
- machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
- machine_dialect/errors/tests/test_name_errors.py +118 -0
- machine_dialect/helpers/__init__.py +0 -0
- machine_dialect/helpers/stopwords.py +225 -0
- machine_dialect/helpers/validators.py +30 -0
- machine_dialect/lexer/__init__.py +9 -0
- machine_dialect/lexer/constants.py +23 -0
- machine_dialect/lexer/lexer.py +907 -0
- machine_dialect/lexer/tests/__init__.py +0 -0
- machine_dialect/lexer/tests/helpers.py +86 -0
- machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
- machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
- machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
- machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
- machine_dialect/lexer/tests/test_comments.py +200 -0
- machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
- machine_dialect/lexer/tests/test_lexer_position.py +113 -0
- machine_dialect/lexer/tests/test_list_tokens.py +282 -0
- machine_dialect/lexer/tests/test_stopwords.py +80 -0
- machine_dialect/lexer/tests/test_strict_equality.py +129 -0
- machine_dialect/lexer/tests/test_token.py +41 -0
- machine_dialect/lexer/tests/test_tokenization.py +294 -0
- machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
- machine_dialect/lexer/tests/test_url_literals.py +169 -0
- machine_dialect/lexer/tokens.py +487 -0
- machine_dialect/linter/__init__.py +10 -0
- machine_dialect/linter/__main__.py +144 -0
- machine_dialect/linter/linter.py +154 -0
- machine_dialect/linter/rules/__init__.py +8 -0
- machine_dialect/linter/rules/base.py +112 -0
- machine_dialect/linter/rules/statement_termination.py +99 -0
- machine_dialect/linter/tests/__init__.py +1 -0
- machine_dialect/linter/tests/mdrules/__init__.py +0 -0
- machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
- machine_dialect/linter/tests/test_linter.py +81 -0
- machine_dialect/linter/tests/test_rules.py +110 -0
- machine_dialect/linter/tests/test_violations.py +71 -0
- machine_dialect/linter/violations.py +51 -0
- machine_dialect/mir/__init__.py +69 -0
- machine_dialect/mir/analyses/__init__.py +20 -0
- machine_dialect/mir/analyses/alias_analysis.py +315 -0
- machine_dialect/mir/analyses/dominance_analysis.py +49 -0
- machine_dialect/mir/analyses/escape_analysis.py +286 -0
- machine_dialect/mir/analyses/loop_analysis.py +272 -0
- machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
- machine_dialect/mir/analyses/type_analysis.py +448 -0
- machine_dialect/mir/analyses/use_def_chains.py +232 -0
- machine_dialect/mir/basic_block.py +385 -0
- machine_dialect/mir/dataflow.py +445 -0
- machine_dialect/mir/debug_info.py +208 -0
- machine_dialect/mir/hir_to_mir.py +1738 -0
- machine_dialect/mir/mir_dumper.py +366 -0
- machine_dialect/mir/mir_function.py +167 -0
- machine_dialect/mir/mir_instructions.py +1877 -0
- machine_dialect/mir/mir_interpreter.py +556 -0
- machine_dialect/mir/mir_module.py +225 -0
- machine_dialect/mir/mir_printer.py +480 -0
- machine_dialect/mir/mir_transformer.py +410 -0
- machine_dialect/mir/mir_types.py +367 -0
- machine_dialect/mir/mir_validation.py +455 -0
- machine_dialect/mir/mir_values.py +268 -0
- machine_dialect/mir/optimization_config.py +233 -0
- machine_dialect/mir/optimization_pass.py +251 -0
- machine_dialect/mir/optimization_pipeline.py +355 -0
- machine_dialect/mir/optimizations/__init__.py +84 -0
- machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
- machine_dialect/mir/optimizations/branch_prediction.py +372 -0
- machine_dialect/mir/optimizations/constant_propagation.py +634 -0
- machine_dialect/mir/optimizations/cse.py +398 -0
- machine_dialect/mir/optimizations/dce.py +288 -0
- machine_dialect/mir/optimizations/inlining.py +551 -0
- machine_dialect/mir/optimizations/jump_threading.py +487 -0
- machine_dialect/mir/optimizations/licm.py +405 -0
- machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
- machine_dialect/mir/optimizations/strength_reduction.py +422 -0
- machine_dialect/mir/optimizations/tail_call.py +207 -0
- machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
- machine_dialect/mir/optimizations/type_narrowing.py +397 -0
- machine_dialect/mir/optimizations/type_specialization.py +447 -0
- machine_dialect/mir/optimizations/type_specific.py +906 -0
- machine_dialect/mir/optimize_mir.py +89 -0
- machine_dialect/mir/pass_manager.py +391 -0
- machine_dialect/mir/profiling/__init__.py +26 -0
- machine_dialect/mir/profiling/profile_collector.py +318 -0
- machine_dialect/mir/profiling/profile_data.py +372 -0
- machine_dialect/mir/profiling/profile_reader.py +272 -0
- machine_dialect/mir/profiling/profile_writer.py +226 -0
- machine_dialect/mir/register_allocation.py +302 -0
- machine_dialect/mir/reporting/__init__.py +17 -0
- machine_dialect/mir/reporting/optimization_reporter.py +314 -0
- machine_dialect/mir/reporting/report_formatter.py +289 -0
- machine_dialect/mir/ssa_construction.py +342 -0
- machine_dialect/mir/tests/__init__.py +1 -0
- machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
- machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
- machine_dialect/mir/tests/test_algebraic_division.py +126 -0
- machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
- machine_dialect/mir/tests/test_basic_block.py +425 -0
- machine_dialect/mir/tests/test_branch_prediction.py +459 -0
- machine_dialect/mir/tests/test_call_lowering.py +168 -0
- machine_dialect/mir/tests/test_collection_lowering.py +604 -0
- machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
- machine_dialect/mir/tests/test_custom_passes.py +166 -0
- machine_dialect/mir/tests/test_debug_info.py +285 -0
- machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
- machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
- machine_dialect/mir/tests/test_double_negation.py +231 -0
- machine_dialect/mir/tests/test_escape_analysis.py +233 -0
- machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
- machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
- machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
- machine_dialect/mir/tests/test_inlining.py +435 -0
- machine_dialect/mir/tests/test_licm.py +472 -0
- machine_dialect/mir/tests/test_mir_dumper.py +313 -0
- machine_dialect/mir/tests/test_mir_instructions.py +445 -0
- machine_dialect/mir/tests/test_mir_module.py +860 -0
- machine_dialect/mir/tests/test_mir_printer.py +387 -0
- machine_dialect/mir/tests/test_mir_types.py +123 -0
- machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
- machine_dialect/mir/tests/test_mir_validation.py +378 -0
- machine_dialect/mir/tests/test_mir_values.py +168 -0
- machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
- machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
- machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
- machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
- machine_dialect/mir/tests/test_pass_manager.py +294 -0
- machine_dialect/mir/tests/test_pass_registration.py +64 -0
- machine_dialect/mir/tests/test_profiling.py +356 -0
- machine_dialect/mir/tests/test_register_allocation.py +307 -0
- machine_dialect/mir/tests/test_report_formatters.py +372 -0
- machine_dialect/mir/tests/test_ssa_construction.py +433 -0
- machine_dialect/mir/tests/test_tail_call.py +236 -0
- machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
- machine_dialect/mir/tests/test_type_narrowing.py +277 -0
- machine_dialect/mir/tests/test_type_specialization.py +421 -0
- machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
- machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
- machine_dialect/mir/type_inference.py +368 -0
- machine_dialect/parser/__init__.py +12 -0
- machine_dialect/parser/enums.py +45 -0
- machine_dialect/parser/parser.py +3655 -0
- machine_dialect/parser/protocols.py +11 -0
- machine_dialect/parser/symbol_table.py +169 -0
- machine_dialect/parser/tests/__init__.py +0 -0
- machine_dialect/parser/tests/helper_functions.py +193 -0
- machine_dialect/parser/tests/test_action_statements.py +334 -0
- machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
- machine_dialect/parser/tests/test_call_statements.py +154 -0
- machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
- machine_dialect/parser/tests/test_collection_mutations.py +264 -0
- machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
- machine_dialect/parser/tests/test_define_integration.py +468 -0
- machine_dialect/parser/tests/test_define_statements.py +311 -0
- machine_dialect/parser/tests/test_dict_extraction.py +115 -0
- machine_dialect/parser/tests/test_empty_literal.py +155 -0
- machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
- machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
- machine_dialect/parser/tests/test_if_empty_block.py +61 -0
- machine_dialect/parser/tests/test_if_statements.py +299 -0
- machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
- machine_dialect/parser/tests/test_infix_expressions.py +680 -0
- machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
- machine_dialect/parser/tests/test_interaction_statements.py +269 -0
- machine_dialect/parser/tests/test_list_literals.py +277 -0
- machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
- machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
- machine_dialect/parser/tests/test_parse_errors.py +114 -0
- machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
- machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
- machine_dialect/parser/tests/test_program.py +13 -0
- machine_dialect/parser/tests/test_return_statements.py +89 -0
- machine_dialect/parser/tests/test_set_statements.py +152 -0
- machine_dialect/parser/tests/test_strict_equality.py +258 -0
- machine_dialect/parser/tests/test_symbol_table.py +217 -0
- machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
- machine_dialect/parser/tests/test_utility_statements.py +423 -0
- machine_dialect/parser/token_buffer.py +159 -0
- machine_dialect/repl/__init__.py +3 -0
- machine_dialect/repl/repl.py +426 -0
- machine_dialect/repl/tests/__init__.py +0 -0
- machine_dialect/repl/tests/test_repl.py +606 -0
- machine_dialect/semantic/__init__.py +12 -0
- machine_dialect/semantic/analyzer.py +906 -0
- machine_dialect/semantic/error_messages.py +189 -0
- machine_dialect/semantic/tests/__init__.py +1 -0
- machine_dialect/semantic/tests/test_analyzer.py +364 -0
- machine_dialect/semantic/tests/test_error_messages.py +104 -0
- machine_dialect/tests/edge_cases/__init__.py +10 -0
- machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
- machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
- machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
- machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
- machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
- machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
- machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
- machine_dialect/tests/integration/test_list_compilation.py +395 -0
- machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
- machine_dialect/type_checking/__init__.py +21 -0
- machine_dialect/type_checking/tests/__init__.py +1 -0
- machine_dialect/type_checking/tests/test_type_system.py +230 -0
- machine_dialect/type_checking/type_system.py +270 -0
- machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
- machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
- machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
- machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
- machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
- machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
- machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,169 @@
|
|
1
|
+
from machine_dialect.lexer import Lexer, Token, TokenType
|
2
|
+
from machine_dialect.lexer.tests.helpers import collect_all_tokens
|
3
|
+
|
4
|
+
|
5
|
+
class TestURLLiterals:
|
6
|
+
"""Test URL literal detection in the lexer."""
|
7
|
+
|
8
|
+
def _tokenize_no_errors(self, source: str) -> list[Token]:
|
9
|
+
"""Helper to tokenize and assert no errors.
|
10
|
+
|
11
|
+
Args:
|
12
|
+
source: The source code to tokenize.
|
13
|
+
|
14
|
+
Returns:
|
15
|
+
The list of tokens.
|
16
|
+
"""
|
17
|
+
lexer = Lexer(source)
|
18
|
+
tokens = collect_all_tokens(lexer)
|
19
|
+
return tokens
|
20
|
+
|
21
|
+
def test_http_url_detection(self) -> None:
|
22
|
+
source = '"http://example.com"'
|
23
|
+
tokens = self._tokenize_no_errors(source)
|
24
|
+
|
25
|
+
assert len(tokens) == 1
|
26
|
+
assert tokens[0].type == TokenType.LIT_URL
|
27
|
+
assert tokens[0].literal == '"http://example.com"'
|
28
|
+
|
29
|
+
def test_https_url_detection(self) -> None:
|
30
|
+
source = '"https://www.example.com/path"'
|
31
|
+
tokens = self._tokenize_no_errors(source)
|
32
|
+
|
33
|
+
assert len(tokens) == 1
|
34
|
+
assert tokens[0].type == TokenType.LIT_URL
|
35
|
+
assert tokens[0].literal == '"https://www.example.com/path"'
|
36
|
+
|
37
|
+
def test_ftp_url_detection(self) -> None:
|
38
|
+
source = '"ftp://files.example.com/file.txt"'
|
39
|
+
tokens = self._tokenize_no_errors(source)
|
40
|
+
|
41
|
+
assert len(tokens) == 1
|
42
|
+
assert tokens[0].type == TokenType.LIT_URL
|
43
|
+
assert tokens[0].literal == '"ftp://files.example.com/file.txt"'
|
44
|
+
|
45
|
+
def test_url_with_query_params(self) -> None:
|
46
|
+
source = '"https://api.example.com/data?key=value&foo=bar"'
|
47
|
+
tokens = self._tokenize_no_errors(source)
|
48
|
+
|
49
|
+
assert len(tokens) == 1
|
50
|
+
assert tokens[0].type == TokenType.LIT_URL
|
51
|
+
assert tokens[0].literal == '"https://api.example.com/data?key=value&foo=bar"'
|
52
|
+
|
53
|
+
def test_url_with_fragment(self) -> None:
|
54
|
+
source = '"https://example.com/page#section"'
|
55
|
+
tokens = self._tokenize_no_errors(source)
|
56
|
+
|
57
|
+
assert len(tokens) == 1
|
58
|
+
assert tokens[0].type == TokenType.LIT_URL
|
59
|
+
assert tokens[0].literal == '"https://example.com/page#section"'
|
60
|
+
|
61
|
+
def test_url_with_port(self) -> None:
|
62
|
+
source = '"http://localhost:8080/api"'
|
63
|
+
tokens = self._tokenize_no_errors(source)
|
64
|
+
|
65
|
+
assert len(tokens) == 1
|
66
|
+
assert tokens[0].type == TokenType.LIT_URL
|
67
|
+
assert tokens[0].literal == '"http://localhost:8080/api"'
|
68
|
+
|
69
|
+
def test_non_url_string(self) -> None:
|
70
|
+
source = '"Hello, World!"'
|
71
|
+
tokens = self._tokenize_no_errors(source)
|
72
|
+
|
73
|
+
assert len(tokens) == 1
|
74
|
+
assert tokens[0].type == TokenType.LIT_TEXT
|
75
|
+
assert tokens[0].literal == '"Hello, World!"'
|
76
|
+
|
77
|
+
def test_invalid_url_format(self) -> None:
|
78
|
+
source = '"http://invalid url with spaces"'
|
79
|
+
tokens = self._tokenize_no_errors(source)
|
80
|
+
|
81
|
+
assert len(tokens) == 1
|
82
|
+
assert tokens[0].type == TokenType.LIT_TEXT
|
83
|
+
assert tokens[0].literal == '"http://invalid url with spaces"'
|
84
|
+
|
85
|
+
def test_url_without_scheme(self) -> None:
|
86
|
+
source = '"example.com"'
|
87
|
+
tokens = self._tokenize_no_errors(source)
|
88
|
+
|
89
|
+
# Without scheme, it should be treated as regular text
|
90
|
+
assert len(tokens) == 1
|
91
|
+
assert tokens[0].type == TokenType.LIT_TEXT
|
92
|
+
assert tokens[0].literal == '"example.com"'
|
93
|
+
|
94
|
+
def test_single_quoted_url(self) -> None:
|
95
|
+
source = "'https://example.com'"
|
96
|
+
tokens = self._tokenize_no_errors(source)
|
97
|
+
|
98
|
+
assert len(tokens) == 1
|
99
|
+
assert tokens[0].type == TokenType.LIT_URL
|
100
|
+
assert tokens[0].literal == "'https://example.com'"
|
101
|
+
|
102
|
+
def test_empty_string(self) -> None:
|
103
|
+
source = '""'
|
104
|
+
tokens = self._tokenize_no_errors(source)
|
105
|
+
|
106
|
+
assert len(tokens) == 1
|
107
|
+
assert tokens[0].type == TokenType.LIT_TEXT
|
108
|
+
assert tokens[0].literal == '""'
|
109
|
+
|
110
|
+
def test_multiple_urls_in_source(self) -> None:
|
111
|
+
source = 'Set `url1` to "https://api.example.com" and `url2` to "https://docs.example.com"'
|
112
|
+
tokens = self._tokenize_no_errors(source)
|
113
|
+
|
114
|
+
# Find URL tokens
|
115
|
+
url_tokens = [t for t in tokens if t.type == TokenType.LIT_URL]
|
116
|
+
assert len(url_tokens) == 2
|
117
|
+
assert url_tokens[0].literal == '"https://api.example.com"'
|
118
|
+
assert url_tokens[1].literal == '"https://docs.example.com"'
|
119
|
+
|
120
|
+
def test_url_with_special_characters(self) -> None:
|
121
|
+
source = '"https://example.com/path?q=test+query&id=123#anchor"'
|
122
|
+
tokens = self._tokenize_no_errors(source)
|
123
|
+
|
124
|
+
assert len(tokens) == 1
|
125
|
+
assert tokens[0].type == TokenType.LIT_URL
|
126
|
+
assert tokens[0].literal == '"https://example.com/path?q=test+query&id=123#anchor"'
|
127
|
+
|
128
|
+
def test_mailto_url(self) -> None:
|
129
|
+
source = '"mailto:user@example.com"'
|
130
|
+
tokens = self._tokenize_no_errors(source)
|
131
|
+
|
132
|
+
assert len(tokens) == 1
|
133
|
+
assert tokens[0].type == TokenType.LIT_URL
|
134
|
+
assert tokens[0].literal == '"mailto:user@example.com"'
|
135
|
+
|
136
|
+
def test_data_url(self) -> None:
|
137
|
+
source = '"data:text/plain;base64,SGVsbG8="'
|
138
|
+
tokens = self._tokenize_no_errors(source)
|
139
|
+
|
140
|
+
assert len(tokens) == 1
|
141
|
+
assert tokens[0].type == TokenType.LIT_URL
|
142
|
+
assert tokens[0].literal == '"data:text/plain;base64,SGVsbG8="'
|
143
|
+
|
144
|
+
def test_underscore_wrapped_url(self) -> None:
|
145
|
+
"""Test URL detection with underscore wrapping (Machine Dialect™ syntax)."""
|
146
|
+
source = '_"https://example.com"_'
|
147
|
+
tokens = self._tokenize_no_errors(source)
|
148
|
+
|
149
|
+
assert len(tokens) == 1
|
150
|
+
assert tokens[0].type == TokenType.LIT_URL
|
151
|
+
assert tokens[0].literal == '"https://example.com"'
|
152
|
+
|
153
|
+
def test_underscore_wrapped_non_url(self) -> None:
|
154
|
+
"""Test that non-URLs with underscores are still text."""
|
155
|
+
source = '_"not a url"_'
|
156
|
+
tokens = self._tokenize_no_errors(source)
|
157
|
+
|
158
|
+
assert len(tokens) == 1
|
159
|
+
assert tokens[0].type == TokenType.LIT_TEXT
|
160
|
+
assert tokens[0].literal == '"not a url"'
|
161
|
+
|
162
|
+
def test_underscore_wrapped_url_with_query(self) -> None:
|
163
|
+
"""Test complex URL with underscore wrapping."""
|
164
|
+
source = '_"https://api.example.com/v1/users?id=123&active=true#profile"_'
|
165
|
+
tokens = self._tokenize_no_errors(source)
|
166
|
+
|
167
|
+
assert len(tokens) == 1
|
168
|
+
assert tokens[0].type == TokenType.LIT_URL
|
169
|
+
assert tokens[0].literal == '"https://api.example.com/v1/users?id=123&active=true#profile"'
|
@@ -0,0 +1,487 @@
|
|
1
|
+
from enum import (
|
2
|
+
Enum,
|
3
|
+
auto,
|
4
|
+
unique,
|
5
|
+
)
|
6
|
+
from typing import (
|
7
|
+
NamedTuple,
|
8
|
+
)
|
9
|
+
|
10
|
+
from machine_dialect.helpers.stopwords import ENGLISH_STOPWORDS
|
11
|
+
|
12
|
+
|
13
|
+
class TokenMetaType(Enum):
|
14
|
+
OP = "operator"
|
15
|
+
DELIM = "delimiter"
|
16
|
+
PUNCT = "punctuation"
|
17
|
+
LIT = "literal"
|
18
|
+
MISC = "misc"
|
19
|
+
KW = "keyword"
|
20
|
+
TAG = "tag"
|
21
|
+
|
22
|
+
|
23
|
+
@unique
|
24
|
+
class TokenType(Enum):
|
25
|
+
# Operators
|
26
|
+
OP_PLUS = auto()
|
27
|
+
OP_MINUS = auto()
|
28
|
+
OP_STAR = auto()
|
29
|
+
OP_DIVISION = auto()
|
30
|
+
OP_ASSIGN = auto()
|
31
|
+
OP_EQ = auto()
|
32
|
+
OP_NOT_EQ = auto()
|
33
|
+
OP_STRICT_EQ = auto()
|
34
|
+
OP_STRICT_NOT_EQ = auto()
|
35
|
+
OP_LT = auto()
|
36
|
+
OP_LTE = auto()
|
37
|
+
OP_GT = auto()
|
38
|
+
OP_GTE = auto()
|
39
|
+
OP_NEGATION = auto()
|
40
|
+
OP_TWO_STARS = auto()
|
41
|
+
OP_CARET = auto() # Exponentiation operator ^
|
42
|
+
OP_THE_NAMES_OF = auto() # Dictionary keys extraction operator
|
43
|
+
OP_THE_CONTENTS_OF = auto() # Dictionary values extraction operator
|
44
|
+
|
45
|
+
# Delimiters
|
46
|
+
DELIM_LPAREN = auto()
|
47
|
+
DELIM_RPAREN = auto()
|
48
|
+
DELIM_LBRACE = auto()
|
49
|
+
DELIM_RBRACE = auto()
|
50
|
+
|
51
|
+
# Punctuation
|
52
|
+
PUNCT_SEMICOLON = auto()
|
53
|
+
PUNCT_COMMA = auto()
|
54
|
+
PUNCT_PERIOD = auto()
|
55
|
+
PUNCT_COLON = auto()
|
56
|
+
PUNCT_HASH = auto()
|
57
|
+
PUNCT_HASH_DOUBLE = auto()
|
58
|
+
PUNCT_HASH_TRIPLE = auto()
|
59
|
+
PUNCT_HASH_QUAD = auto()
|
60
|
+
PUNCT_BACKSLASH = auto()
|
61
|
+
PUNCT_FRONTMATTER = auto() # Triple dash (---) for YAML frontmatter
|
62
|
+
PUNCT_DASH = auto() # Single dash (-) for list markers
|
63
|
+
PUNCT_APOSTROPHE_S = auto() # Possessive apostrophe-s ('s) for property access
|
64
|
+
|
65
|
+
# Literals
|
66
|
+
LIT_FLOAT = auto()
|
67
|
+
LIT_WHOLE_NUMBER = auto()
|
68
|
+
LIT_NO = auto()
|
69
|
+
LIT_TEXT = auto()
|
70
|
+
LIT_TRIPLE_BACKTICK = auto()
|
71
|
+
LIT_URL = auto()
|
72
|
+
LIT_YES = auto()
|
73
|
+
|
74
|
+
# Special
|
75
|
+
MISC_EOF = auto()
|
76
|
+
MISC_ILLEGAL = auto()
|
77
|
+
MISC_IDENT = auto()
|
78
|
+
MISC_STOPWORD = auto()
|
79
|
+
MISC_COMMENT = auto()
|
80
|
+
|
81
|
+
# Keywords
|
82
|
+
KW_ACTION = auto()
|
83
|
+
KW_ADD = auto() # For list operations: Add _"item"_ to `list`
|
84
|
+
KW_AND = auto()
|
85
|
+
KW_AS = auto()
|
86
|
+
KW_BEHAVIOR = auto()
|
87
|
+
KW_BLANK = auto() # For empty collections: Set `list` to blank
|
88
|
+
KW_CLEAR = auto() # For clearing collections: Clear `dict` or Clear all entries from `dict`
|
89
|
+
KW_CONTENT = auto() # For named lists: content/contents in name-content pairs
|
90
|
+
KW_DATATYPE = auto()
|
91
|
+
KW_DATE = auto()
|
92
|
+
KW_DATETIME = auto()
|
93
|
+
KW_DEFAULT = auto()
|
94
|
+
KW_DEFINE = auto()
|
95
|
+
KW_EACH = auto() # For iteration: for each item in list
|
96
|
+
KW_ELSE = auto()
|
97
|
+
KW_EMPTY = auto()
|
98
|
+
KW_ENTRYPOINT = auto()
|
99
|
+
KW_FILTER = auto()
|
100
|
+
KW_FIRST = auto() # For list access: the first item of
|
101
|
+
KW_FLOAT = auto()
|
102
|
+
KW_FOR = auto() # For iteration: for each item in list
|
103
|
+
KW_FROM = auto()
|
104
|
+
KW_HAS = auto() # For named lists: if `dict` has key
|
105
|
+
KW_IF = auto()
|
106
|
+
KW_IN = auto() # For iteration: for each item in list
|
107
|
+
KW_INPUTS = auto()
|
108
|
+
KW_INSERT = auto() # For list operations: Insert _"item"_ at position _3_
|
109
|
+
KW_INTERACTION = auto()
|
110
|
+
KW_IS = auto()
|
111
|
+
KW_ITEM = auto() # For numeric list access: item _5_ of
|
112
|
+
KW_LAST = auto() # For list access: the last item of
|
113
|
+
KW_LIST = auto()
|
114
|
+
KW_NAME = auto() # For named lists: name/names in name-content pairs
|
115
|
+
KW_NAMED_LIST = auto() # Compound type: "named list"
|
116
|
+
KW_NEGATION = auto()
|
117
|
+
KW_NUMBER = auto()
|
118
|
+
KW_OF = auto() # For list access: item _5_ of `list`
|
119
|
+
KW_OPTIONAL = auto()
|
120
|
+
KW_OR = auto()
|
121
|
+
KW_ORDERED_LIST = auto() # Compound type: "ordered list"
|
122
|
+
KW_OUTPUTS = auto()
|
123
|
+
KW_PROMPT = auto()
|
124
|
+
KW_REMOVE = auto() # For list operations: Remove _"item"_ from `list`
|
125
|
+
KW_REQUIRED = auto()
|
126
|
+
KW_RETURN = auto()
|
127
|
+
KW_RULE = auto()
|
128
|
+
KW_SAY = auto()
|
129
|
+
KW_SECOND = auto() # For list access: the second item of
|
130
|
+
KW_SET = auto()
|
131
|
+
KW_TAKE = auto()
|
132
|
+
KW_TELL = auto()
|
133
|
+
KW_TEMPLATE = auto()
|
134
|
+
KW_TEXT = auto()
|
135
|
+
KW_THEN = auto()
|
136
|
+
KW_THIRD = auto() # For list access: the third item of
|
137
|
+
KW_TIME = auto()
|
138
|
+
KW_TO = auto()
|
139
|
+
KW_TRAIT = auto()
|
140
|
+
KW_UNORDERED_LIST = auto() # Compound type: "unordered list"
|
141
|
+
KW_URL = auto()
|
142
|
+
KW_UPDATE = auto() # For dictionary operations: Update "key" in `dict` to _value_
|
143
|
+
KW_USE = auto()
|
144
|
+
KW_USING = auto()
|
145
|
+
KW_UTILITY = auto()
|
146
|
+
KW_VALUE = auto() # For dictionary operations: Add "key" to `dict` with value _x_
|
147
|
+
KW_WHERE = auto()
|
148
|
+
KW_WHILE = auto() # For while loops: While `condition`:
|
149
|
+
KW_WHOLE_NUMBER = auto()
|
150
|
+
KW_WITH = auto()
|
151
|
+
KW_YES_NO = auto()
|
152
|
+
|
153
|
+
# Tags
|
154
|
+
TAG_SUMMARY_START = auto()
|
155
|
+
TAG_SUMMARY_END = auto()
|
156
|
+
TAG_DETAILS_START = auto()
|
157
|
+
TAG_DETAILS_END = auto()
|
158
|
+
|
159
|
+
@property
|
160
|
+
def meta_type(self) -> TokenMetaType:
|
161
|
+
name_str = getattr(self, "name", "")
|
162
|
+
if name_str.startswith("KW_"):
|
163
|
+
return TokenMetaType.KW
|
164
|
+
if name_str.startswith("DELIM_"):
|
165
|
+
return TokenMetaType.DELIM
|
166
|
+
if name_str.startswith("PUNCT_"):
|
167
|
+
return TokenMetaType.PUNCT
|
168
|
+
if name_str.startswith("LIT_"):
|
169
|
+
return TokenMetaType.LIT
|
170
|
+
if name_str.startswith("OP_"):
|
171
|
+
return TokenMetaType.OP
|
172
|
+
if name_str.startswith("TAG_"):
|
173
|
+
return TokenMetaType.TAG
|
174
|
+
|
175
|
+
return TokenMetaType.MISC
|
176
|
+
|
177
|
+
|
178
|
+
class Token(NamedTuple):
|
179
|
+
type: TokenType
|
180
|
+
literal: str
|
181
|
+
line: int
|
182
|
+
position: int
|
183
|
+
|
184
|
+
def __str__(self) -> str:
|
185
|
+
return f"Type: {self.type}, Literal: {self.literal}, Line: {self.line}, Position: {self.position}"
|
186
|
+
|
187
|
+
|
188
|
+
def is_valid_identifier(literal: str) -> bool:
|
189
|
+
"""Check if a string is a valid identifier.
|
190
|
+
|
191
|
+
Valid identifiers:
|
192
|
+
- Start with a letter (a-z, A-Z) or underscore (_)
|
193
|
+
- Followed by any number of letters, digits, underscores, spaces, hyphens, or apostrophes
|
194
|
+
- Cannot be empty
|
195
|
+
- Apostrophes cannot be at the beginning/end of the identifier or any word
|
196
|
+
- Special case: underscore followed by only digits is ILLEGAL (e.g., _42, _123)
|
197
|
+
- Special case: underscore(s) + digits + underscore(s) is ILLEGAL (e.g., _42_, __42__)
|
198
|
+
"""
|
199
|
+
if not literal:
|
200
|
+
return False
|
201
|
+
|
202
|
+
# First character must be letter or underscore
|
203
|
+
if not (literal[0].isalpha() or literal[0] == "_"):
|
204
|
+
return False
|
205
|
+
|
206
|
+
# Check for invalid underscore number patterns
|
207
|
+
if literal[0] == "_":
|
208
|
+
# Remove leading underscores and check if the first character is a digit
|
209
|
+
stripped = literal.lstrip("_")
|
210
|
+
if stripped and stripped[0].isdigit():
|
211
|
+
# This is an invalid pattern like _42, __42, _123abc, etc.
|
212
|
+
return False
|
213
|
+
|
214
|
+
# Check apostrophe placement rules
|
215
|
+
if "'" in literal:
|
216
|
+
# Apostrophe cannot be at the beginning or end
|
217
|
+
if literal[0] == "'" or literal[-1] == "'":
|
218
|
+
return False
|
219
|
+
|
220
|
+
# Split by spaces to check each word
|
221
|
+
words = literal.split(" ")
|
222
|
+
for word in words:
|
223
|
+
if word and "'" in word:
|
224
|
+
# Apostrophe cannot be at the beginning or end of any word
|
225
|
+
if word[0] == "'" or word[-1] == "'":
|
226
|
+
return False
|
227
|
+
|
228
|
+
# Rest can be alphanumeric, underscore, space, hyphen, or apostrophe
|
229
|
+
return all(c.isalnum() or c in ("_", " ", "-", "'") for c in literal[1:])
|
230
|
+
|
231
|
+
|
232
|
+
keywords_mapping: dict[str, TokenType] = {
|
233
|
+
# classes methods
|
234
|
+
# Define a **blueprint** called `Person` with action (`walk`)
|
235
|
+
"action": TokenType.KW_ACTION,
|
236
|
+
# List operations: Add _"item"_ to `list`
|
237
|
+
"add": TokenType.KW_ADD,
|
238
|
+
# Clear collections
|
239
|
+
"clear": TokenType.KW_CLEAR,
|
240
|
+
# logic and: true and false
|
241
|
+
"and": TokenType.KW_AND,
|
242
|
+
# Use function:
|
243
|
+
# Use `turn alarm off`.
|
244
|
+
# Use `make noise` where `sound` is _"WEE-OO"_, `volume` is _80_.
|
245
|
+
# TODO: Implement proper 'apply' statement with its own token type (KW_APPLY)
|
246
|
+
# Should support: apply rule `add` with **1** and **5**
|
247
|
+
# apply formula `calculate` with `left` = **1** and `right` = **5**
|
248
|
+
# "apply": TokenType.KW_APPLY, # Reserved for future use
|
249
|
+
"Use": TokenType.KW_USE,
|
250
|
+
# type indicator: set `a` as integer
|
251
|
+
"as": TokenType.KW_AS,
|
252
|
+
# behavior for objects
|
253
|
+
"behavior": TokenType.KW_BEHAVIOR,
|
254
|
+
"behaviors": TokenType.KW_BEHAVIOR,
|
255
|
+
"behaviour": TokenType.KW_BEHAVIOR,
|
256
|
+
"behaviours": TokenType.KW_BEHAVIOR,
|
257
|
+
# blank for empty collections
|
258
|
+
"blank": TokenType.KW_BLANK,
|
259
|
+
# Named lists: content/contents in name-content pairs
|
260
|
+
"content": TokenType.KW_CONTENT,
|
261
|
+
"contents": TokenType.KW_CONTENT,
|
262
|
+
# default value indicator
|
263
|
+
"default": TokenType.KW_DEFAULT,
|
264
|
+
# declare function: define a `sum` as function
|
265
|
+
"define": TokenType.KW_DEFINE,
|
266
|
+
# iteration: for each item in list
|
267
|
+
"each": TokenType.KW_EACH,
|
268
|
+
# else statement
|
269
|
+
"else": TokenType.KW_ELSE,
|
270
|
+
# empty collections (lists, dicts)
|
271
|
+
"empty": TokenType.KW_EMPTY,
|
272
|
+
# entrypoint for execution
|
273
|
+
"entrypoint": TokenType.KW_ENTRYPOINT,
|
274
|
+
# boolean primitive: false
|
275
|
+
"No": TokenType.LIT_NO,
|
276
|
+
# filter mini-programs that act as proxy to decide on AI code execution
|
277
|
+
"filter": TokenType.KW_FILTER,
|
278
|
+
# List access: the first item of
|
279
|
+
"first": TokenType.KW_FIRST,
|
280
|
+
# float typing: set `a` as float | set `a` to float 3.14
|
281
|
+
"Float": TokenType.KW_FLOAT,
|
282
|
+
# iteration: for each item in list
|
283
|
+
"for": TokenType.KW_FOR,
|
284
|
+
# range indicator: from 1 to 10
|
285
|
+
"from": TokenType.KW_FROM,
|
286
|
+
# Named lists: if `dict` has key
|
287
|
+
"has": TokenType.KW_HAS,
|
288
|
+
# if condition: if true
|
289
|
+
"if": TokenType.KW_IF,
|
290
|
+
"when": TokenType.KW_IF,
|
291
|
+
"whenever": TokenType.KW_IF,
|
292
|
+
"while": TokenType.KW_WHILE,
|
293
|
+
# iteration: for each item in list
|
294
|
+
"in": TokenType.KW_IN,
|
295
|
+
# inputs section for parameters
|
296
|
+
"Inputs": TokenType.KW_INPUTS,
|
297
|
+
# List operations: Insert _"item"_ at position _3_
|
298
|
+
"insert": TokenType.KW_INSERT,
|
299
|
+
# interaction for objects
|
300
|
+
"interaction": TokenType.KW_INTERACTION,
|
301
|
+
"interactions": TokenType.KW_INTERACTION,
|
302
|
+
# equal comparator: if `x` is 0
|
303
|
+
"is": TokenType.KW_IS,
|
304
|
+
# Natural language comparison operators
|
305
|
+
# Value equality (==)
|
306
|
+
"is equal to": TokenType.OP_EQ,
|
307
|
+
"equals": TokenType.OP_EQ,
|
308
|
+
"is the same as": TokenType.OP_EQ,
|
309
|
+
# Value inequality (!=)
|
310
|
+
"is not equal to": TokenType.OP_NOT_EQ,
|
311
|
+
"does not equal": TokenType.OP_NOT_EQ,
|
312
|
+
"doesn't equal": TokenType.OP_NOT_EQ,
|
313
|
+
"is different from": TokenType.OP_NOT_EQ,
|
314
|
+
"is not": TokenType.OP_NOT_EQ,
|
315
|
+
"isn't": TokenType.OP_NOT_EQ,
|
316
|
+
# Strict equality (===)
|
317
|
+
"is strictly equal to": TokenType.OP_STRICT_EQ,
|
318
|
+
"is exactly equal to": TokenType.OP_STRICT_EQ,
|
319
|
+
"is identical to": TokenType.OP_STRICT_EQ,
|
320
|
+
# Strict inequality (!==)
|
321
|
+
# TODO: Simplify support for comparisons
|
322
|
+
"is not strictly equal to": TokenType.OP_STRICT_NOT_EQ,
|
323
|
+
"is not exactly equal to": TokenType.OP_STRICT_NOT_EQ,
|
324
|
+
"is not identical to": TokenType.OP_STRICT_NOT_EQ,
|
325
|
+
"is greater than": TokenType.OP_GT,
|
326
|
+
"is more than": TokenType.OP_GT,
|
327
|
+
"is less than": TokenType.OP_LT,
|
328
|
+
"is under": TokenType.OP_LT,
|
329
|
+
"is fewer than": TokenType.OP_LT,
|
330
|
+
"is greater than or equal to": TokenType.OP_GTE,
|
331
|
+
"is at least": TokenType.OP_GTE,
|
332
|
+
"is no less than": TokenType.OP_GTE,
|
333
|
+
"is less than or equal to": TokenType.OP_LTE,
|
334
|
+
"is at most": TokenType.OP_LTE,
|
335
|
+
"is no more than": TokenType.OP_LTE,
|
336
|
+
# List access: item _5_ of
|
337
|
+
"item": TokenType.KW_ITEM,
|
338
|
+
# List access: the last item of
|
339
|
+
"last": TokenType.KW_LAST,
|
340
|
+
# list data type
|
341
|
+
"List": TokenType.KW_LIST,
|
342
|
+
# Named lists: name/names in name-content pairs
|
343
|
+
"name": TokenType.KW_NAME,
|
344
|
+
"names": TokenType.KW_NAME,
|
345
|
+
# logic not: not true
|
346
|
+
"not": TokenType.KW_NEGATION,
|
347
|
+
# numbers
|
348
|
+
"Number": TokenType.KW_NUMBER,
|
349
|
+
# List access: item _5_ of `list`
|
350
|
+
"of": TokenType.KW_OF,
|
351
|
+
# Dictionary extraction operators (multi-word)
|
352
|
+
"the names of": TokenType.OP_THE_NAMES_OF,
|
353
|
+
"the contents of": TokenType.OP_THE_CONTENTS_OF,
|
354
|
+
# optional parameter modifier
|
355
|
+
"optional": TokenType.KW_OPTIONAL,
|
356
|
+
# logic or: true or false
|
357
|
+
"or": TokenType.KW_OR,
|
358
|
+
# else statement
|
359
|
+
"otherwise": TokenType.KW_ELSE,
|
360
|
+
# outputs section for parameters
|
361
|
+
"Outputs": TokenType.KW_OUTPUTS,
|
362
|
+
# prompt for user input or AI
|
363
|
+
"prompt": TokenType.KW_PROMPT,
|
364
|
+
# List operations: Remove _"item"_ from `list`
|
365
|
+
"remove": TokenType.KW_REMOVE,
|
366
|
+
# required parameter modifier
|
367
|
+
"required": TokenType.KW_REQUIRED,
|
368
|
+
# return value.
|
369
|
+
"give back": TokenType.KW_RETURN,
|
370
|
+
"gives back": TokenType.KW_RETURN,
|
371
|
+
# The typical functions: Define a rule called `add` that takes two numbers and returns another number.
|
372
|
+
"rule": TokenType.KW_RULE,
|
373
|
+
# output/display: Say `message`.
|
374
|
+
# TODO: Make 'Say' case-insensitive (currently only accepts capital 'S')
|
375
|
+
"Say": TokenType.KW_SAY,
|
376
|
+
# List access: the second item of
|
377
|
+
"second": TokenType.KW_SECOND,
|
378
|
+
# declare variable: set `a` as integer.
|
379
|
+
"Set": TokenType.KW_SET,
|
380
|
+
# status type
|
381
|
+
"Yes/No": TokenType.KW_YES_NO,
|
382
|
+
# classes' properties:
|
383
|
+
# Define a blueprint called Person with these traits
|
384
|
+
"take": TokenType.KW_TAKE,
|
385
|
+
# Call actions
|
386
|
+
"Tell": TokenType.KW_TELL,
|
387
|
+
# template (equivalent to class in other languages)
|
388
|
+
"template": TokenType.KW_TEMPLATE,
|
389
|
+
# text typing (string)
|
390
|
+
"text": TokenType.KW_TEXT,
|
391
|
+
# separates if statement from block of code: `if true then return x`.
|
392
|
+
"then": TokenType.KW_THEN,
|
393
|
+
# List access: the third item of
|
394
|
+
"third": TokenType.KW_THIRD,
|
395
|
+
# range indicator: from 1 to 10
|
396
|
+
"to": TokenType.KW_TO,
|
397
|
+
# classes properties:
|
398
|
+
# Define a blueprint called Person with these traits
|
399
|
+
"trait": TokenType.KW_TRAIT,
|
400
|
+
# boolean primitive: true
|
401
|
+
"Yes": TokenType.LIT_YES,
|
402
|
+
# using - for capturing function return values in Set statements
|
403
|
+
"using": TokenType.KW_USING,
|
404
|
+
# Utility (equivalent to function in other languages)
|
405
|
+
"Utility": TokenType.KW_UTILITY,
|
406
|
+
# parameters:
|
407
|
+
# tell **alice** to **walk**.
|
408
|
+
# tell **alice** to **walk** with `speed` = `10`.
|
409
|
+
"where": TokenType.KW_WHERE,
|
410
|
+
# Update dictionary entries
|
411
|
+
"update": TokenType.KW_UPDATE,
|
412
|
+
# Value keyword for dictionary operations
|
413
|
+
"value": TokenType.KW_VALUE,
|
414
|
+
"with": TokenType.KW_WITH,
|
415
|
+
# type indicators
|
416
|
+
"URL": TokenType.KW_URL,
|
417
|
+
"Date": TokenType.KW_DATE,
|
418
|
+
"DateTime": TokenType.KW_DATETIME,
|
419
|
+
"Time": TokenType.KW_TIME,
|
420
|
+
"DataType": TokenType.KW_DATATYPE,
|
421
|
+
"Whole Number": TokenType.KW_WHOLE_NUMBER,
|
422
|
+
"Named List": TokenType.KW_NAMED_LIST,
|
423
|
+
"Ordered List": TokenType.KW_ORDERED_LIST,
|
424
|
+
"Unordered List": TokenType.KW_UNORDERED_LIST,
|
425
|
+
# Plural forms map to singular token types
|
426
|
+
"actions": TokenType.KW_ACTION,
|
427
|
+
"Floats": TokenType.KW_FLOAT,
|
428
|
+
"Numbers": TokenType.KW_NUMBER,
|
429
|
+
"takes": TokenType.KW_TAKE,
|
430
|
+
"texts": TokenType.KW_TEXT,
|
431
|
+
"traits": TokenType.KW_TRAIT,
|
432
|
+
"URLs": TokenType.KW_URL,
|
433
|
+
"Dates": TokenType.KW_DATE,
|
434
|
+
"DateTimes": TokenType.KW_DATETIME,
|
435
|
+
"Times": TokenType.KW_TIME,
|
436
|
+
}
|
437
|
+
|
438
|
+
|
439
|
+
lowercase_keywords_mapping: dict[str, str] = {key.lower(): key for key in keywords_mapping}
|
440
|
+
|
441
|
+
|
442
|
+
# Tag tokens mapping (case-insensitive)
|
443
|
+
TAG_TOKENS: dict[str, TokenType] = {
|
444
|
+
"<summary>": TokenType.TAG_SUMMARY_START,
|
445
|
+
"</summary>": TokenType.TAG_SUMMARY_END,
|
446
|
+
"<details>": TokenType.TAG_DETAILS_START,
|
447
|
+
"</details>": TokenType.TAG_DETAILS_END,
|
448
|
+
}
|
449
|
+
|
450
|
+
|
451
|
+
def lookup_tag_token(literal: str) -> tuple[TokenType | None, str]:
|
452
|
+
"""Lookup a tag token from the literal.
|
453
|
+
|
454
|
+
Args:
|
455
|
+
literal: The tag literal to lookup (e.g., '<summary>', '</details>')
|
456
|
+
|
457
|
+
Returns:
|
458
|
+
Tuple of (TokenType, canonical_literal) if found, (None, literal) otherwise.
|
459
|
+
Canonical form is always lowercase.
|
460
|
+
"""
|
461
|
+
# Convert to lowercase for case-insensitive comparison
|
462
|
+
lowercase_literal = literal.lower()
|
463
|
+
|
464
|
+
if lowercase_literal in TAG_TOKENS:
|
465
|
+
return TAG_TOKENS[lowercase_literal], lowercase_literal
|
466
|
+
|
467
|
+
return None, literal
|
468
|
+
|
469
|
+
|
470
|
+
def lookup_token_type(literal: str) -> tuple[TokenType, str]:
|
471
|
+
# First check if it's a keyword (case-insensitive)
|
472
|
+
lowercase_literal = literal.lower()
|
473
|
+
if lowercase_literal in lowercase_keywords_mapping:
|
474
|
+
canonical_form = lowercase_keywords_mapping[lowercase_literal]
|
475
|
+
token_type = keywords_mapping[canonical_form]
|
476
|
+
return token_type, canonical_form
|
477
|
+
|
478
|
+
# Check if it's a stopword (case-insensitive)
|
479
|
+
if lowercase_literal in ENGLISH_STOPWORDS:
|
480
|
+
return TokenType.MISC_STOPWORD, literal
|
481
|
+
|
482
|
+
# Only return MISC_IDENT if it's a valid identifier
|
483
|
+
if is_valid_identifier(literal):
|
484
|
+
return TokenType.MISC_IDENT, literal
|
485
|
+
|
486
|
+
# If not a valid identifier, it's illegal
|
487
|
+
return TokenType.MISC_ILLEGAL, literal
|
@@ -0,0 +1,10 @@
|
|
1
|
+
"""Machine Dialect™ Linter.
|
2
|
+
|
3
|
+
This module provides linting capabilities for Machine Dialect™ code,
|
4
|
+
including style checking, error detection, and code quality analysis.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from .linter import Linter
|
8
|
+
from .violations import Violation, ViolationSeverity
|
9
|
+
|
10
|
+
__all__ = ["Linter", "Violation", "ViolationSeverity"]
|