minecraft-datapack-language 15.4.28__py3-none-any.whl → 15.4.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minecraft_datapack_language/__init__.py +17 -2
- minecraft_datapack_language/_version.py +2 -2
- minecraft_datapack_language/ast_nodes.py +87 -59
- minecraft_datapack_language/mdl_compiler.py +470 -0
- minecraft_datapack_language/mdl_errors.py +14 -0
- minecraft_datapack_language/mdl_lexer.py +624 -0
- minecraft_datapack_language/mdl_parser.py +573 -0
- minecraft_datapack_language-15.4.29.dist-info/METADATA +266 -0
- minecraft_datapack_language-15.4.29.dist-info/RECORD +16 -0
- minecraft_datapack_language/cli.py +0 -159
- minecraft_datapack_language/cli_build.py +0 -1292
- minecraft_datapack_language/cli_check.py +0 -155
- minecraft_datapack_language/cli_colors.py +0 -264
- minecraft_datapack_language/cli_help.py +0 -508
- minecraft_datapack_language/cli_new.py +0 -300
- minecraft_datapack_language/cli_utils.py +0 -276
- minecraft_datapack_language/expression_processor.py +0 -352
- minecraft_datapack_language/linter.py +0 -409
- minecraft_datapack_language/mdl_lexer_js.py +0 -754
- minecraft_datapack_language/mdl_parser_js.py +0 -1049
- minecraft_datapack_language/pack.py +0 -758
- minecraft_datapack_language-15.4.28.dist-info/METADATA +0 -1274
- minecraft_datapack_language-15.4.28.dist-info/RECORD +0 -25
- {minecraft_datapack_language-15.4.28.dist-info → minecraft_datapack_language-15.4.29.dist-info}/WHEEL +0 -0
- {minecraft_datapack_language-15.4.28.dist-info → minecraft_datapack_language-15.4.29.dist-info}/entry_points.txt +0 -0
- {minecraft_datapack_language-15.4.28.dist-info → minecraft_datapack_language-15.4.29.dist-info}/licenses/LICENSE +0 -0
- {minecraft_datapack_language-15.4.28.dist-info → minecraft_datapack_language-15.4.29.dist-info}/top_level.txt +0 -0
@@ -1,754 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
MDL Lexer - Simplified JavaScript-style syntax with curly braces and semicolons
|
3
|
-
Handles basic control structures and number variables only
|
4
|
-
"""
|
5
|
-
|
6
|
-
import re
|
7
|
-
from dataclasses import dataclass
|
8
|
-
from typing import List, Optional
|
9
|
-
from .mdl_errors import MDLLexerError
|
10
|
-
|
11
|
-
|
12
|
-
@dataclass
|
13
|
-
class Token:
|
14
|
-
type: str
|
15
|
-
value: str
|
16
|
-
line: int
|
17
|
-
column: int
|
18
|
-
|
19
|
-
|
20
|
-
class TokenType:
|
21
|
-
# Keywords
|
22
|
-
PACK = "PACK"
|
23
|
-
NAMESPACE = "NAMESPACE"
|
24
|
-
FUNCTION = "FUNCTION"
|
25
|
-
VAR = "VAR"
|
26
|
-
NUM = "NUM"
|
27
|
-
SCOPE = "SCOPE"
|
28
|
-
IF = "IF"
|
29
|
-
ELSE = "ELSE"
|
30
|
-
ELSE_IF = "ELSE_IF"
|
31
|
-
WHILE = "WHILE"
|
32
|
-
ON_TICK = "ON_TICK"
|
33
|
-
ON_LOAD = "ON_LOAD"
|
34
|
-
TAG = "TAG"
|
35
|
-
ADD = "ADD"
|
36
|
-
RAW = "RAW"
|
37
|
-
RAW_START = "RAW_START"
|
38
|
-
RAW_END = "RAW_END"
|
39
|
-
EXECUTE = "EXECUTE"
|
40
|
-
|
41
|
-
# Registry types
|
42
|
-
RECIPE = "RECIPE"
|
43
|
-
LOOT_TABLE = "LOOT_TABLE"
|
44
|
-
ADVANCEMENT = "ADVANCEMENT"
|
45
|
-
PREDICATE = "PREDICATE"
|
46
|
-
ITEM_MODIFIER = "ITEM_MODIFIER"
|
47
|
-
STRUCTURE = "STRUCTURE"
|
48
|
-
|
49
|
-
# Operators
|
50
|
-
ASSIGN = "ASSIGN"
|
51
|
-
PLUS = "PLUS"
|
52
|
-
MINUS = "MINUS"
|
53
|
-
MULTIPLY = "MULTIPLY"
|
54
|
-
DIVIDE = "DIVIDE"
|
55
|
-
MODULO = "MODULO"
|
56
|
-
EQUAL = "EQUAL"
|
57
|
-
NOT_EQUAL = "NOT_EQUAL"
|
58
|
-
LESS = "LESS"
|
59
|
-
LESS_EQUAL = "LESS_EQUAL"
|
60
|
-
GREATER = "GREATER"
|
61
|
-
GREATER_EQUAL = "GREATER_EQUAL"
|
62
|
-
AND = "AND"
|
63
|
-
OR = "OR"
|
64
|
-
|
65
|
-
# Delimiters
|
66
|
-
SEMICOLON = "SEMICOLON"
|
67
|
-
COMMA = "COMMA"
|
68
|
-
LPAREN = "LPAREN"
|
69
|
-
RPAREN = "RPAREN"
|
70
|
-
LBRACE = "LBRACE"
|
71
|
-
RBRACE = "RBRACE"
|
72
|
-
LBRACKET = "LBRACKET"
|
73
|
-
RBRACKET = "RBRACKET"
|
74
|
-
LANGLE = "LANGLE" # < for scope syntax
|
75
|
-
RANGLE = "RANGLE" # > for scope syntax
|
76
|
-
DOT = "DOT"
|
77
|
-
COLON = "COLON"
|
78
|
-
|
79
|
-
# Literals
|
80
|
-
IDENTIFIER = "IDENTIFIER"
|
81
|
-
NUMBER = "NUMBER"
|
82
|
-
STRING = "STRING"
|
83
|
-
|
84
|
-
# Special
|
85
|
-
NEWLINE = "NEWLINE"
|
86
|
-
EOF = "EOF"
|
87
|
-
COMMENT = "COMMENT"
|
88
|
-
COMMAND = "COMMAND"
|
89
|
-
SAY = "SAY" # Say command
|
90
|
-
|
91
|
-
# Variable substitution
|
92
|
-
VARIABLE_SUB = "VARIABLE_SUB" # $variable$
|
93
|
-
|
94
|
-
|
95
|
-
class MDLLexer:
|
96
|
-
"""Lexer for simplified MDL language."""
|
97
|
-
|
98
|
-
def __init__(self, source_file: str = None):
|
99
|
-
self.tokens = []
|
100
|
-
self.current = 0
|
101
|
-
self.start = 0
|
102
|
-
self.line = 1
|
103
|
-
self.column = 1
|
104
|
-
self.source_file = source_file
|
105
|
-
self.in_raw_mode = False
|
106
|
-
|
107
|
-
def lex(self, source: str) -> List[Token]:
|
108
|
-
"""Lex the source code into tokens."""
|
109
|
-
self.tokens = []
|
110
|
-
self.current = 0
|
111
|
-
self.start = 0
|
112
|
-
self.line = 1
|
113
|
-
self.column = 1
|
114
|
-
self.in_raw_mode = False
|
115
|
-
|
116
|
-
while self.current < len(source):
|
117
|
-
self.start = self.current
|
118
|
-
self._scan_token(source)
|
119
|
-
|
120
|
-
# Add EOF token
|
121
|
-
self.tokens.append(Token(TokenType.EOF, "", self.line, self.column))
|
122
|
-
return self.tokens
|
123
|
-
|
124
|
-
def _scan_token(self, source: str):
|
125
|
-
"""Scan a single token."""
|
126
|
-
char = source[self.current]
|
127
|
-
|
128
|
-
# If in raw mode, scan raw text until we find raw!$
|
129
|
-
if self.in_raw_mode:
|
130
|
-
self._scan_raw_text(source)
|
131
|
-
return
|
132
|
-
|
133
|
-
# Handle whitespace and newlines
|
134
|
-
if char.isspace():
|
135
|
-
if char == '\n':
|
136
|
-
self.line += 1
|
137
|
-
self.column = 1
|
138
|
-
else:
|
139
|
-
self.column += 1
|
140
|
-
self.current += 1
|
141
|
-
return
|
142
|
-
|
143
|
-
# Handle comments
|
144
|
-
if char == '/' and self.current + 1 < len(source) and source[self.current + 1] == '/':
|
145
|
-
self._scan_comment(source)
|
146
|
-
return
|
147
|
-
|
148
|
-
# Handle strings
|
149
|
-
if char in ['"', "'"]:
|
150
|
-
self._scan_string(source, char)
|
151
|
-
return
|
152
|
-
|
153
|
-
# Handle raw block markers and variable substitutions (before numbers to avoid conflicts)
|
154
|
-
if char == '$':
|
155
|
-
# Check if this is a raw block start marker
|
156
|
-
if (self.current + 4 < len(source) and
|
157
|
-
source[self.current:self.current + 5] == '$!raw'):
|
158
|
-
self._scan_raw_start(source)
|
159
|
-
return
|
160
|
-
else:
|
161
|
-
self._scan_variable_substitution(source)
|
162
|
-
return
|
163
|
-
|
164
|
-
# Handle raw block end marker (only if not in raw mode)
|
165
|
-
if char == 'r' and not self.in_raw_mode:
|
166
|
-
# Check if this is a raw block end marker
|
167
|
-
if (self.current + 4 < len(source) and
|
168
|
-
source[self.current:self.current + 5] == 'raw!$'):
|
169
|
-
self._scan_raw_end(source)
|
170
|
-
return
|
171
|
-
|
172
|
-
# Handle numbers
|
173
|
-
if char.isdigit():
|
174
|
-
self._scan_number(source)
|
175
|
-
return
|
176
|
-
|
177
|
-
# Handle identifiers and keywords
|
178
|
-
if char.isalpha() or char == '_':
|
179
|
-
# Special handling for 'say' command
|
180
|
-
if (char == 's' and
|
181
|
-
self.current + 2 < len(source) and
|
182
|
-
source[self.current:self.current + 3] == 'say'):
|
183
|
-
self._scan_say_command(source)
|
184
|
-
return
|
185
|
-
else:
|
186
|
-
self._scan_identifier(source)
|
187
|
-
return
|
188
|
-
|
189
|
-
# Handle operators and delimiters
|
190
|
-
self._scan_operator_or_delimiter(source)
|
191
|
-
|
192
|
-
def _scan_comment(self, source: str):
|
193
|
-
"""Scan a comment."""
|
194
|
-
# Skip the //
|
195
|
-
self.current += 2
|
196
|
-
self.column += 2
|
197
|
-
|
198
|
-
# Scan until end of line or end of source
|
199
|
-
while (self.current < len(source) and
|
200
|
-
source[self.current] != '\n'):
|
201
|
-
self.current += 1
|
202
|
-
self.column += 1
|
203
|
-
|
204
|
-
# Don't add comment tokens to the output
|
205
|
-
# Comments are ignored during parsing
|
206
|
-
|
207
|
-
def _scan_string(self, source: str, quote_char: str):
|
208
|
-
"""Scan a string literal."""
|
209
|
-
self.current += 1 # Skip opening quote
|
210
|
-
self.column += 1
|
211
|
-
|
212
|
-
start_column = self.column
|
213
|
-
start_line = self.line
|
214
|
-
|
215
|
-
while (self.current < len(source) and
|
216
|
-
source[self.current] != quote_char):
|
217
|
-
if source[self.current] == '\n':
|
218
|
-
# Unterminated string - report error
|
219
|
-
raise create_lexer_error(
|
220
|
-
message=f"Unterminated string literal",
|
221
|
-
file_path=self.source_file,
|
222
|
-
line=start_line,
|
223
|
-
column=start_column,
|
224
|
-
line_content=source[start_line-1:start_line] if start_line <= len(source.split('\n')) else "",
|
225
|
-
suggestion="Add a closing quote to terminate the string"
|
226
|
-
)
|
227
|
-
|
228
|
-
if source[self.current] == '\\' and self.current + 1 < len(source):
|
229
|
-
# Handle escape sequences
|
230
|
-
self.current += 2
|
231
|
-
self.column += 2
|
232
|
-
else:
|
233
|
-
self.current += 1
|
234
|
-
self.column += 1
|
235
|
-
|
236
|
-
if self.current >= len(source):
|
237
|
-
# Unterminated string at end of file
|
238
|
-
raise create_lexer_error(
|
239
|
-
message=f"Unterminated string literal at end of file",
|
240
|
-
file_path=self.source_file,
|
241
|
-
line=start_line,
|
242
|
-
column=start_column,
|
243
|
-
line_content=source[start_line-1:start_line] if start_line <= len(source.split('\n')) else "",
|
244
|
-
suggestion="Add a closing quote to terminate the string"
|
245
|
-
)
|
246
|
-
|
247
|
-
# Include the closing quote
|
248
|
-
self.current += 1
|
249
|
-
self.column += 1
|
250
|
-
|
251
|
-
text = source[self.start:self.current]
|
252
|
-
self.tokens.append(Token(TokenType.STRING, text, start_line, start_column))
|
253
|
-
|
254
|
-
def _scan_number(self, source: str):
|
255
|
-
"""Scan a number literal."""
|
256
|
-
while (self.current < len(source) and
|
257
|
-
source[self.current].isdigit()):
|
258
|
-
self.current += 1
|
259
|
-
self.column += 1
|
260
|
-
|
261
|
-
# Check for decimal point
|
262
|
-
if (self.current < len(source) and
|
263
|
-
source[self.current] == '.' and
|
264
|
-
self.current + 1 < len(source) and
|
265
|
-
source[self.current + 1].isdigit()):
|
266
|
-
self.current += 1 # consume the decimal point
|
267
|
-
self.column += 1
|
268
|
-
|
269
|
-
while (self.current < len(source) and
|
270
|
-
source[self.current].isdigit()):
|
271
|
-
self.current += 1
|
272
|
-
self.column += 1
|
273
|
-
|
274
|
-
text = source[self.start:self.current]
|
275
|
-
self.tokens.append(Token(TokenType.NUMBER, text, self.line, self.column - len(text)))
|
276
|
-
|
277
|
-
def _scan_identifier(self, source: str):
|
278
|
-
"""Scan an identifier or keyword."""
|
279
|
-
while (self.current < len(source) and
|
280
|
-
(source[self.current].isalnum() or source[self.current] == '_')):
|
281
|
-
self.current += 1
|
282
|
-
self.column += 1
|
283
|
-
|
284
|
-
text = source[self.start:self.current]
|
285
|
-
|
286
|
-
# Check if it's a keyword
|
287
|
-
keyword_map = {
|
288
|
-
'pack': TokenType.PACK,
|
289
|
-
'namespace': TokenType.NAMESPACE,
|
290
|
-
'function': TokenType.FUNCTION,
|
291
|
-
'var': TokenType.VAR,
|
292
|
-
'num': TokenType.NUM,
|
293
|
-
'scope': TokenType.SCOPE,
|
294
|
-
'if': TokenType.IF,
|
295
|
-
'else': TokenType.ELSE,
|
296
|
-
'while': TokenType.WHILE,
|
297
|
-
'on_tick': TokenType.ON_TICK,
|
298
|
-
'on_load': TokenType.ON_LOAD,
|
299
|
-
'tag': TokenType.TAG,
|
300
|
-
'add': TokenType.ADD,
|
301
|
-
'raw': TokenType.RAW,
|
302
|
-
'execute': TokenType.EXECUTE,
|
303
|
-
'recipe': TokenType.RECIPE,
|
304
|
-
'loot_table': TokenType.LOOT_TABLE,
|
305
|
-
'advancement': TokenType.ADVANCEMENT,
|
306
|
-
'predicate': TokenType.PREDICATE,
|
307
|
-
'item_modifier': TokenType.ITEM_MODIFIER,
|
308
|
-
'structure': TokenType.STRUCTURE
|
309
|
-
}
|
310
|
-
|
311
|
-
token_type = keyword_map.get(text.lower(), TokenType.IDENTIFIER)
|
312
|
-
|
313
|
-
# Note: Special handling for say and execute commands is done in _scan_token
|
314
|
-
# to avoid duplicate processing
|
315
|
-
self.tokens.append(Token(token_type, text, self.line, self.column - len(text)))
|
316
|
-
|
317
|
-
def _scan_variable_substitution(self, source: str):
|
318
|
-
"""Scan variable substitution ($variable$)."""
|
319
|
-
self.current += 1 # Skip the $
|
320
|
-
self.column += 1
|
321
|
-
|
322
|
-
scope_selector = None
|
323
|
-
variable_name = ""
|
324
|
-
|
325
|
-
# Scan the variable name
|
326
|
-
while (self.current < len(source) and
|
327
|
-
(source[self.current].isalnum() or source[self.current] == '_')):
|
328
|
-
variable_name += source[self.current]
|
329
|
-
self.current += 1
|
330
|
-
self.column += 1
|
331
|
-
|
332
|
-
# Validate variable name starts with letter or underscore
|
333
|
-
if variable_name and (variable_name[0].isdigit()):
|
334
|
-
raise create_lexer_error(
|
335
|
-
message=f"Invalid variable name '{variable_name}' - cannot start with a digit",
|
336
|
-
file_path=self.source_file,
|
337
|
-
line=self.line,
|
338
|
-
column=self.column - len(variable_name),
|
339
|
-
line_content=source[self.line-1:self.line] if self.line <= len(source.split('\n')) else "",
|
340
|
-
suggestion="Variable names must start with a letter or underscore"
|
341
|
-
)
|
342
|
-
|
343
|
-
# Check for scope selector after variable name
|
344
|
-
if (self.current < len(source) and
|
345
|
-
source[self.current] == '<'):
|
346
|
-
self.current += 1 # consume <
|
347
|
-
self.column += 1
|
348
|
-
|
349
|
-
scope_start = self.current
|
350
|
-
bracket_count = 1
|
351
|
-
|
352
|
-
# Scan until we find the matching closing >
|
353
|
-
while (self.current < len(source) and bracket_count > 0):
|
354
|
-
if source[self.current] == '<':
|
355
|
-
bracket_count += 1
|
356
|
-
elif source[self.current] == '>':
|
357
|
-
bracket_count -= 1
|
358
|
-
self.current += 1
|
359
|
-
self.column += 1
|
360
|
-
|
361
|
-
if bracket_count == 0:
|
362
|
-
# Successfully found closing >
|
363
|
-
scope_selector = source[scope_start:self.current-1] # Exclude the closing >
|
364
|
-
else:
|
365
|
-
# Unterminated scope selector - report error
|
366
|
-
raise create_lexer_error(
|
367
|
-
message="Unterminated scope selector in variable substitution",
|
368
|
-
file_path=self.source_file,
|
369
|
-
line=self.line,
|
370
|
-
column=self.column - (self.current - self.start),
|
371
|
-
suggestion="Add a closing '>' to terminate the scope selector"
|
372
|
-
)
|
373
|
-
|
374
|
-
# Check for closing $
|
375
|
-
if (self.current < len(source) and
|
376
|
-
source[self.current] == '$'):
|
377
|
-
self.current += 1
|
378
|
-
self.column += 1
|
379
|
-
|
380
|
-
# If we have a scope selector, include it in the token
|
381
|
-
if scope_selector:
|
382
|
-
variable_name = f"{variable_name}<{scope_selector}>"
|
383
|
-
|
384
|
-
self.tokens.append(Token(TokenType.VARIABLE_SUB, variable_name, self.line, self.column - len(variable_name) - 2))
|
385
|
-
else:
|
386
|
-
# Not a valid variable substitution - report error
|
387
|
-
raise create_lexer_error(
|
388
|
-
message="Invalid variable substitution - missing closing '$'",
|
389
|
-
file_path=self.source_file,
|
390
|
-
line=self.line,
|
391
|
-
column=self.column - (self.current - self.start),
|
392
|
-
suggestion="Add a closing '$' to complete the variable substitution"
|
393
|
-
)
|
394
|
-
|
395
|
-
def _scan_raw_start(self, source: str):
|
396
|
-
"""Scan raw block start marker ($!raw)."""
|
397
|
-
# Check if we're already in raw mode (nested raw blocks are not allowed)
|
398
|
-
if self.in_raw_mode:
|
399
|
-
raise create_lexer_error(
|
400
|
-
message="Nested raw blocks are not allowed",
|
401
|
-
file_path=self.source_file,
|
402
|
-
line=self.line,
|
403
|
-
column=self.column,
|
404
|
-
line_content=source[self.line-1:self.line] if self.line <= len(source.split('\n')) else "",
|
405
|
-
suggestion="Close the current raw block with 'raw!$' before starting a new one"
|
406
|
-
)
|
407
|
-
|
408
|
-
# Consume the $!raw
|
409
|
-
self.current += 5
|
410
|
-
self.column += 5
|
411
|
-
|
412
|
-
text = source[self.start:self.current]
|
413
|
-
self.tokens.append(Token(TokenType.RAW_START, text, self.line, self.column - len(text)))
|
414
|
-
|
415
|
-
# Enter raw mode
|
416
|
-
self.in_raw_mode = True
|
417
|
-
|
418
|
-
def _scan_raw_end(self, source: str):
|
419
|
-
"""Scan raw block end marker (raw!$)."""
|
420
|
-
# Consume the raw!$
|
421
|
-
self.current += 5
|
422
|
-
self.column += 5
|
423
|
-
|
424
|
-
text = source[self.start:self.current]
|
425
|
-
self.tokens.append(Token(TokenType.RAW_END, text, self.line, self.column - len(text)))
|
426
|
-
|
427
|
-
# Exit raw mode
|
428
|
-
self.in_raw_mode = False
|
429
|
-
|
430
|
-
def _scan_raw_text(self, source: str):
|
431
|
-
"""Scan raw text content between $!raw and raw!$."""
|
432
|
-
content_parts = []
|
433
|
-
raw_start_line = self.line
|
434
|
-
raw_start_column = self.column
|
435
|
-
|
436
|
-
while self.current < len(source):
|
437
|
-
char = source[self.current]
|
438
|
-
|
439
|
-
# Check if we've reached the end of the raw block
|
440
|
-
if (char == 'r' and
|
441
|
-
self.current + 4 < len(source) and
|
442
|
-
source[self.current:self.current + 5] == 'raw!$'):
|
443
|
-
# Consume the end marker and exit raw mode
|
444
|
-
self.current += 5
|
445
|
-
self.column += 5
|
446
|
-
self.in_raw_mode = False
|
447
|
-
|
448
|
-
# Add the raw content as a single RAW token
|
449
|
-
content = ''.join(content_parts)
|
450
|
-
if content.strip(): # Only add non-empty content
|
451
|
-
# Keep newlines for proper command splitting, but trim leading/trailing whitespace
|
452
|
-
clean_content = content.rstrip().lstrip()
|
453
|
-
self.tokens.append(Token(TokenType.RAW, clean_content, raw_start_line, raw_start_column))
|
454
|
-
|
455
|
-
# Add the end marker token
|
456
|
-
self.tokens.append(Token(TokenType.RAW_END, 'raw!$', self.line, self.column - 5))
|
457
|
-
return
|
458
|
-
|
459
|
-
# Add character to content
|
460
|
-
content_parts.append(char)
|
461
|
-
|
462
|
-
# Update position
|
463
|
-
if char == '\n':
|
464
|
-
self.line += 1
|
465
|
-
self.column = 1
|
466
|
-
else:
|
467
|
-
self.column += 1
|
468
|
-
self.current += 1
|
469
|
-
|
470
|
-
# Check if we reached the end of source without finding the end marker
|
471
|
-
if self.current >= len(source):
|
472
|
-
# Unterminated raw block - report error
|
473
|
-
raise create_lexer_error(
|
474
|
-
message="Unterminated raw block - missing 'raw!$' end marker",
|
475
|
-
file_path=self.source_file,
|
476
|
-
line=raw_start_line,
|
477
|
-
column=raw_start_column,
|
478
|
-
line_content=source[raw_start_line-1:raw_start_line] if raw_start_line <= len(source.split('\n')) else "",
|
479
|
-
suggestion="Add 'raw!$' to terminate the raw block"
|
480
|
-
)
|
481
|
-
|
482
|
-
def _scan_say_command(self, source: str):
|
483
|
-
"""Scan a say command and its content until semicolon."""
|
484
|
-
print(f"DEBUG: _scan_say_command called at position {self.current}, char: {source[self.current:self.current+10]}")
|
485
|
-
|
486
|
-
# Consume 'say'
|
487
|
-
self.current += 3
|
488
|
-
self.column += 3
|
489
|
-
|
490
|
-
say_start_line = self.line
|
491
|
-
say_start_column = self.column
|
492
|
-
|
493
|
-
# Skip whitespace after 'say'
|
494
|
-
while (self.current < len(source) and
|
495
|
-
source[self.current].isspace()):
|
496
|
-
if source[self.current] == '\n':
|
497
|
-
self.line += 1
|
498
|
-
self.column = 1
|
499
|
-
else:
|
500
|
-
self.column += 1
|
501
|
-
self.current += 1
|
502
|
-
|
503
|
-
print(f"DEBUG: After whitespace, position {self.current}, char: {source[self.current:self.current+20]}")
|
504
|
-
|
505
|
-
# Scan content until we find a semicolon, but preserve $variable$ syntax
|
506
|
-
content_parts = []
|
507
|
-
while self.current < len(source):
|
508
|
-
char = source[self.current]
|
509
|
-
|
510
|
-
if char == ';':
|
511
|
-
# Found the end of the say command
|
512
|
-
break
|
513
|
-
|
514
|
-
# Check for variable substitution syntax
|
515
|
-
if char == '$':
|
516
|
-
print(f"DEBUG: Found $ at position {self.current}")
|
517
|
-
# This might be the start of a variable substitution
|
518
|
-
# Look ahead to see if it's a valid variable name
|
519
|
-
temp_current = self.current + 1
|
520
|
-
temp_column = self.column + 1
|
521
|
-
variable_name = ""
|
522
|
-
|
523
|
-
# Scan potential variable name
|
524
|
-
while (temp_current < len(source) and
|
525
|
-
(source[temp_current].isalnum() or source[temp_current] == '_')):
|
526
|
-
variable_name += source[temp_current]
|
527
|
-
temp_current += 1
|
528
|
-
|
529
|
-
print(f"DEBUG: Variable name: '{variable_name}', next char: '{source[temp_current] if temp_current < len(source) else 'EOF'}'")
|
530
|
-
|
531
|
-
# Check if we have a valid variable substitution
|
532
|
-
if (variable_name and
|
533
|
-
temp_current < len(source) and
|
534
|
-
source[temp_current] == '$' and
|
535
|
-
(not variable_name[0].isdigit())):
|
536
|
-
print(f"DEBUG: Valid variable substitution: ${variable_name}$")
|
537
|
-
# This is a valid variable substitution, preserve the $variable$ syntax
|
538
|
-
content_parts.append(char)
|
539
|
-
content_parts.append(variable_name)
|
540
|
-
content_parts.append('$')
|
541
|
-
|
542
|
-
# Update position
|
543
|
-
self.current = temp_current + 1
|
544
|
-
self.column = temp_current + 1
|
545
|
-
continue
|
546
|
-
|
547
|
-
# Add character to content
|
548
|
-
content_parts.append(char)
|
549
|
-
|
550
|
-
# Update position
|
551
|
-
if char == '\n':
|
552
|
-
self.line += 1
|
553
|
-
self.column = 1
|
554
|
-
else:
|
555
|
-
self.column += 1
|
556
|
-
self.current += 1
|
557
|
-
|
558
|
-
if self.current >= len(source):
|
559
|
-
# Unterminated say command - report error
|
560
|
-
raise create_lexer_error(
|
561
|
-
message="Unterminated say command - missing semicolon",
|
562
|
-
file_path=self.source_file,
|
563
|
-
line=say_start_line,
|
564
|
-
column=say_start_column,
|
565
|
-
line_content=source[say_start_line-1:say_start_line] if say_start_line <= len(source.split('\n')) else "",
|
566
|
-
suggestion="Add a semicolon (;) to terminate the say command"
|
567
|
-
)
|
568
|
-
|
569
|
-
# Consume the semicolon and add it as a separate token
|
570
|
-
self.current += 1
|
571
|
-
self.column += 1
|
572
|
-
|
573
|
-
# Create the say command token with content (without semicolon)
|
574
|
-
content = ''.join(content_parts).strip()
|
575
|
-
print(f"DEBUG: Final say command content: '{content}'")
|
576
|
-
self.tokens.append(Token(TokenType.SAY, content, say_start_line, say_start_column))
|
577
|
-
|
578
|
-
# Add the semicolon as a separate token
|
579
|
-
self.tokens.append(Token(TokenType.SEMICOLON, ';', self.line, self.column))
|
580
|
-
|
581
|
-
def _scan_execute_command(self, source: str):
|
582
|
-
"""Scan an execute command and its content until semicolon."""
|
583
|
-
# Consume 'execute'
|
584
|
-
self.current += 7
|
585
|
-
self.column += 7
|
586
|
-
|
587
|
-
execute_start_line = self.line
|
588
|
-
execute_start_column = self.column
|
589
|
-
|
590
|
-
# Skip whitespace after 'execute'
|
591
|
-
while (self.current < len(source) and
|
592
|
-
source[self.current].isspace()):
|
593
|
-
if source[self.current] == '\n':
|
594
|
-
self.line += 1
|
595
|
-
self.column = 1
|
596
|
-
else:
|
597
|
-
self.column += 1
|
598
|
-
self.current += 1
|
599
|
-
|
600
|
-
# Scan content until we find a semicolon
|
601
|
-
content_parts = []
|
602
|
-
while self.current < len(source):
|
603
|
-
char = source[self.current]
|
604
|
-
|
605
|
-
if char == ';':
|
606
|
-
# Found the end of the execute command
|
607
|
-
break
|
608
|
-
|
609
|
-
# Add character to content
|
610
|
-
content_parts.append(char)
|
611
|
-
|
612
|
-
# Update position
|
613
|
-
if char == '\n':
|
614
|
-
self.line += 1
|
615
|
-
self.column = 1
|
616
|
-
else:
|
617
|
-
self.column += 1
|
618
|
-
self.current += 1
|
619
|
-
|
620
|
-
if self.current >= len(source):
|
621
|
-
# Unterminated execute command - report error
|
622
|
-
raise create_lexer_error(
|
623
|
-
message="Unterminated execute command - missing semicolon",
|
624
|
-
file_path=self.source_file,
|
625
|
-
line=execute_start_line,
|
626
|
-
column=execute_start_column,
|
627
|
-
line_content=source[execute_start_line-1:execute_start_line] if execute_start_line <= len(source.split('\n')) else "",
|
628
|
-
suggestion="Add a semicolon (;) to terminate the execute command"
|
629
|
-
)
|
630
|
-
|
631
|
-
# Consume the semicolon
|
632
|
-
self.current += 1
|
633
|
-
self.column += 1
|
634
|
-
|
635
|
-
# Create the execute command token with full content
|
636
|
-
content = ''.join(content_parts).strip()
|
637
|
-
full_command = f"execute {content};"
|
638
|
-
self.tokens.append(Token(TokenType.EXECUTE, full_command, execute_start_line, execute_start_column))
|
639
|
-
|
640
|
-
def _is_inside_control_structure(self, source: str) -> bool:
|
641
|
-
"""Check if we're inside a control structure (if/while)."""
|
642
|
-
# Look backwards to see if we're inside a control structure
|
643
|
-
# This is a simplified check - in a more robust implementation,
|
644
|
-
# we would track the parsing context more carefully
|
645
|
-
|
646
|
-
# For now, let's be conservative and only apply special handling
|
647
|
-
# when we're clearly at the top level
|
648
|
-
brace_count = 0
|
649
|
-
for i in range(self.current):
|
650
|
-
if source[i] == '{':
|
651
|
-
brace_count += 1
|
652
|
-
elif source[i] == '}':
|
653
|
-
brace_count -= 1
|
654
|
-
|
655
|
-
# If we're inside braces, we're likely in a control structure
|
656
|
-
return brace_count > 0
|
657
|
-
|
658
|
-
def _scan_operator_or_delimiter(self, source: str):
|
659
|
-
"""Scan operators and delimiters."""
|
660
|
-
char = source[self.current]
|
661
|
-
next_char = source[self.current + 1] if self.current + 1 < len(source) else None
|
662
|
-
|
663
|
-
# Two-character operators
|
664
|
-
if next_char:
|
665
|
-
two_char = char + next_char
|
666
|
-
if two_char in ['==', '!=', '<=', '>=', '&&', '||']:
|
667
|
-
self.current += 2
|
668
|
-
self.column += 2
|
669
|
-
|
670
|
-
operator_map = {
|
671
|
-
'==': TokenType.EQUAL,
|
672
|
-
'!=': TokenType.NOT_EQUAL,
|
673
|
-
'<=': TokenType.LESS_EQUAL,
|
674
|
-
'>=': TokenType.GREATER_EQUAL,
|
675
|
-
'&&': TokenType.AND,
|
676
|
-
'||': TokenType.OR,
|
677
|
-
}
|
678
|
-
|
679
|
-
self.tokens.append(Token(operator_map[two_char], two_char, self.line, self.column - 2))
|
680
|
-
return
|
681
|
-
|
682
|
-
# Single-character operators and delimiters
|
683
|
-
self.current += 1
|
684
|
-
self.column += 1
|
685
|
-
|
686
|
-
operator_map = {
|
687
|
-
'=': TokenType.ASSIGN,
|
688
|
-
'+': TokenType.PLUS,
|
689
|
-
'-': TokenType.MINUS,
|
690
|
-
'*': TokenType.MULTIPLY,
|
691
|
-
'/': TokenType.DIVIDE,
|
692
|
-
'%': TokenType.MODULO,
|
693
|
-
'<': TokenType.LANGLE, # Use LANGLE for scope syntax, handle LESS in context
|
694
|
-
'>': TokenType.RANGLE, # Use RANGLE for scope syntax, handle GREATER in context
|
695
|
-
';': TokenType.SEMICOLON,
|
696
|
-
',': TokenType.COMMA,
|
697
|
-
'(': TokenType.LPAREN,
|
698
|
-
')': TokenType.RPAREN,
|
699
|
-
'{': TokenType.LBRACE,
|
700
|
-
'}': TokenType.RBRACE,
|
701
|
-
'[': TokenType.LBRACKET,
|
702
|
-
']': TokenType.RBRACKET,
|
703
|
-
'.': TokenType.DOT,
|
704
|
-
':': TokenType.COLON,
|
705
|
-
'!': TokenType.RAW, # Allow exclamation marks in text
|
706
|
-
'?': TokenType.RAW, # Allow question marks in text
|
707
|
-
'@': TokenType.RAW, # Allow @ for player selectors
|
708
|
-
'#': TokenType.RAW, # Allow # for tags
|
709
|
-
'~': TokenType.RAW, # Allow ~ for relative coordinates
|
710
|
-
'^': TokenType.RAW, # Allow ^ for relative coordinates
|
711
|
-
}
|
712
|
-
|
713
|
-
if char in operator_map:
|
714
|
-
self.tokens.append(Token(operator_map[char], char, self.line, self.column - 1))
|
715
|
-
else:
|
716
|
-
# Unknown character - report error
|
717
|
-
raise create_lexer_error(
|
718
|
-
message=f"Unexpected character '{char}'",
|
719
|
-
file_path=self.source_file,
|
720
|
-
line=self.line,
|
721
|
-
column=self.column - 1,
|
722
|
-
line_content=source[self.line-1:self.line] if self.line <= len(source.split('\n')) else "",
|
723
|
-
suggestion=f"Remove or replace the unexpected character '{char}'"
|
724
|
-
)
|
725
|
-
|
726
|
-
|
727
|
-
def create_lexer_error(message: str, file_path: Optional[str] = None,
|
728
|
-
line: Optional[int] = None, column: Optional[int] = None,
|
729
|
-
line_content: Optional[str] = None, suggestion: Optional[str] = None) -> MDLLexerError:
|
730
|
-
"""Create a lexer error with common suggestions."""
|
731
|
-
if not suggestion:
|
732
|
-
if "unterminated string" in message.lower():
|
733
|
-
suggestion = "Add a closing quote to terminate the string"
|
734
|
-
elif "unterminated scope selector" in message.lower():
|
735
|
-
suggestion = "Add a closing '>' to terminate the scope selector"
|
736
|
-
elif "missing closing" in message.lower():
|
737
|
-
suggestion = "Add the missing closing character"
|
738
|
-
elif "unexpected character" in message.lower():
|
739
|
-
suggestion = "Remove or replace the unexpected character"
|
740
|
-
|
741
|
-
return MDLLexerError(
|
742
|
-
message=message,
|
743
|
-
file_path=file_path,
|
744
|
-
line=line,
|
745
|
-
column=column,
|
746
|
-
line_content=line_content,
|
747
|
-
suggestion=suggestion
|
748
|
-
)
|
749
|
-
|
750
|
-
|
751
|
-
def lex_mdl_js(source: str, source_file: str = None) -> List[Token]:
|
752
|
-
"""Lex JavaScript-style MDL source code into tokens."""
|
753
|
-
lexer = MDLLexer(source_file)
|
754
|
-
return lexer.lex(source)
|