minecraft-datapack-language 15.4.28__py3-none-any.whl → 15.4.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. minecraft_datapack_language/__init__.py +23 -2
  2. minecraft_datapack_language/_version.py +2 -2
  3. minecraft_datapack_language/ast_nodes.py +87 -59
  4. minecraft_datapack_language/cli.py +276 -139
  5. minecraft_datapack_language/mdl_compiler.py +470 -0
  6. minecraft_datapack_language/mdl_errors.py +14 -0
  7. minecraft_datapack_language/mdl_lexer.py +624 -0
  8. minecraft_datapack_language/mdl_parser.py +573 -0
  9. minecraft_datapack_language-15.4.30.dist-info/METADATA +266 -0
  10. minecraft_datapack_language-15.4.30.dist-info/RECORD +17 -0
  11. minecraft_datapack_language/cli_build.py +0 -1292
  12. minecraft_datapack_language/cli_check.py +0 -155
  13. minecraft_datapack_language/cli_colors.py +0 -264
  14. minecraft_datapack_language/cli_help.py +0 -508
  15. minecraft_datapack_language/cli_new.py +0 -300
  16. minecraft_datapack_language/cli_utils.py +0 -276
  17. minecraft_datapack_language/expression_processor.py +0 -352
  18. minecraft_datapack_language/linter.py +0 -409
  19. minecraft_datapack_language/mdl_lexer_js.py +0 -754
  20. minecraft_datapack_language/mdl_parser_js.py +0 -1049
  21. minecraft_datapack_language/pack.py +0 -758
  22. minecraft_datapack_language-15.4.28.dist-info/METADATA +0 -1274
  23. minecraft_datapack_language-15.4.28.dist-info/RECORD +0 -25
  24. {minecraft_datapack_language-15.4.28.dist-info → minecraft_datapack_language-15.4.30.dist-info}/WHEEL +0 -0
  25. {minecraft_datapack_language-15.4.28.dist-info → minecraft_datapack_language-15.4.30.dist-info}/entry_points.txt +0 -0
  26. {minecraft_datapack_language-15.4.28.dist-info → minecraft_datapack_language-15.4.30.dist-info}/licenses/LICENSE +0 -0
  27. {minecraft_datapack_language-15.4.28.dist-info → minecraft_datapack_language-15.4.30.dist-info}/top_level.txt +0 -0
@@ -1,754 +0,0 @@
1
- """
2
- MDL Lexer - Simplified JavaScript-style syntax with curly braces and semicolons
3
- Handles basic control structures and number variables only
4
- """
5
-
6
- import re
7
- from dataclasses import dataclass
8
- from typing import List, Optional
9
- from .mdl_errors import MDLLexerError
10
-
11
-
12
- @dataclass
13
- class Token:
14
- type: str
15
- value: str
16
- line: int
17
- column: int
18
-
19
-
20
- class TokenType:
21
- # Keywords
22
- PACK = "PACK"
23
- NAMESPACE = "NAMESPACE"
24
- FUNCTION = "FUNCTION"
25
- VAR = "VAR"
26
- NUM = "NUM"
27
- SCOPE = "SCOPE"
28
- IF = "IF"
29
- ELSE = "ELSE"
30
- ELSE_IF = "ELSE_IF"
31
- WHILE = "WHILE"
32
- ON_TICK = "ON_TICK"
33
- ON_LOAD = "ON_LOAD"
34
- TAG = "TAG"
35
- ADD = "ADD"
36
- RAW = "RAW"
37
- RAW_START = "RAW_START"
38
- RAW_END = "RAW_END"
39
- EXECUTE = "EXECUTE"
40
-
41
- # Registry types
42
- RECIPE = "RECIPE"
43
- LOOT_TABLE = "LOOT_TABLE"
44
- ADVANCEMENT = "ADVANCEMENT"
45
- PREDICATE = "PREDICATE"
46
- ITEM_MODIFIER = "ITEM_MODIFIER"
47
- STRUCTURE = "STRUCTURE"
48
-
49
- # Operators
50
- ASSIGN = "ASSIGN"
51
- PLUS = "PLUS"
52
- MINUS = "MINUS"
53
- MULTIPLY = "MULTIPLY"
54
- DIVIDE = "DIVIDE"
55
- MODULO = "MODULO"
56
- EQUAL = "EQUAL"
57
- NOT_EQUAL = "NOT_EQUAL"
58
- LESS = "LESS"
59
- LESS_EQUAL = "LESS_EQUAL"
60
- GREATER = "GREATER"
61
- GREATER_EQUAL = "GREATER_EQUAL"
62
- AND = "AND"
63
- OR = "OR"
64
-
65
- # Delimiters
66
- SEMICOLON = "SEMICOLON"
67
- COMMA = "COMMA"
68
- LPAREN = "LPAREN"
69
- RPAREN = "RPAREN"
70
- LBRACE = "LBRACE"
71
- RBRACE = "RBRACE"
72
- LBRACKET = "LBRACKET"
73
- RBRACKET = "RBRACKET"
74
- LANGLE = "LANGLE" # < for scope syntax
75
- RANGLE = "RANGLE" # > for scope syntax
76
- DOT = "DOT"
77
- COLON = "COLON"
78
-
79
- # Literals
80
- IDENTIFIER = "IDENTIFIER"
81
- NUMBER = "NUMBER"
82
- STRING = "STRING"
83
-
84
- # Special
85
- NEWLINE = "NEWLINE"
86
- EOF = "EOF"
87
- COMMENT = "COMMENT"
88
- COMMAND = "COMMAND"
89
- SAY = "SAY" # Say command
90
-
91
- # Variable substitution
92
- VARIABLE_SUB = "VARIABLE_SUB" # $variable$
93
-
94
-
95
- class MDLLexer:
96
- """Lexer for simplified MDL language."""
97
-
98
- def __init__(self, source_file: str = None):
99
- self.tokens = []
100
- self.current = 0
101
- self.start = 0
102
- self.line = 1
103
- self.column = 1
104
- self.source_file = source_file
105
- self.in_raw_mode = False
106
-
107
- def lex(self, source: str) -> List[Token]:
108
- """Lex the source code into tokens."""
109
- self.tokens = []
110
- self.current = 0
111
- self.start = 0
112
- self.line = 1
113
- self.column = 1
114
- self.in_raw_mode = False
115
-
116
- while self.current < len(source):
117
- self.start = self.current
118
- self._scan_token(source)
119
-
120
- # Add EOF token
121
- self.tokens.append(Token(TokenType.EOF, "", self.line, self.column))
122
- return self.tokens
123
-
124
- def _scan_token(self, source: str):
125
- """Scan a single token."""
126
- char = source[self.current]
127
-
128
- # If in raw mode, scan raw text until we find raw!$
129
- if self.in_raw_mode:
130
- self._scan_raw_text(source)
131
- return
132
-
133
- # Handle whitespace and newlines
134
- if char.isspace():
135
- if char == '\n':
136
- self.line += 1
137
- self.column = 1
138
- else:
139
- self.column += 1
140
- self.current += 1
141
- return
142
-
143
- # Handle comments
144
- if char == '/' and self.current + 1 < len(source) and source[self.current + 1] == '/':
145
- self._scan_comment(source)
146
- return
147
-
148
- # Handle strings
149
- if char in ['"', "'"]:
150
- self._scan_string(source, char)
151
- return
152
-
153
- # Handle raw block markers and variable substitutions (before numbers to avoid conflicts)
154
- if char == '$':
155
- # Check if this is a raw block start marker
156
- if (self.current + 4 < len(source) and
157
- source[self.current:self.current + 5] == '$!raw'):
158
- self._scan_raw_start(source)
159
- return
160
- else:
161
- self._scan_variable_substitution(source)
162
- return
163
-
164
- # Handle raw block end marker (only if not in raw mode)
165
- if char == 'r' and not self.in_raw_mode:
166
- # Check if this is a raw block end marker
167
- if (self.current + 4 < len(source) and
168
- source[self.current:self.current + 5] == 'raw!$'):
169
- self._scan_raw_end(source)
170
- return
171
-
172
- # Handle numbers
173
- if char.isdigit():
174
- self._scan_number(source)
175
- return
176
-
177
- # Handle identifiers and keywords
178
- if char.isalpha() or char == '_':
179
- # Special handling for 'say' command
180
- if (char == 's' and
181
- self.current + 2 < len(source) and
182
- source[self.current:self.current + 3] == 'say'):
183
- self._scan_say_command(source)
184
- return
185
- else:
186
- self._scan_identifier(source)
187
- return
188
-
189
- # Handle operators and delimiters
190
- self._scan_operator_or_delimiter(source)
191
-
192
- def _scan_comment(self, source: str):
193
- """Scan a comment."""
194
- # Skip the //
195
- self.current += 2
196
- self.column += 2
197
-
198
- # Scan until end of line or end of source
199
- while (self.current < len(source) and
200
- source[self.current] != '\n'):
201
- self.current += 1
202
- self.column += 1
203
-
204
- # Don't add comment tokens to the output
205
- # Comments are ignored during parsing
206
-
207
- def _scan_string(self, source: str, quote_char: str):
208
- """Scan a string literal."""
209
- self.current += 1 # Skip opening quote
210
- self.column += 1
211
-
212
- start_column = self.column
213
- start_line = self.line
214
-
215
- while (self.current < len(source) and
216
- source[self.current] != quote_char):
217
- if source[self.current] == '\n':
218
- # Unterminated string - report error
219
- raise create_lexer_error(
220
- message=f"Unterminated string literal",
221
- file_path=self.source_file,
222
- line=start_line,
223
- column=start_column,
224
- line_content=source[start_line-1:start_line] if start_line <= len(source.split('\n')) else "",
225
- suggestion="Add a closing quote to terminate the string"
226
- )
227
-
228
- if source[self.current] == '\\' and self.current + 1 < len(source):
229
- # Handle escape sequences
230
- self.current += 2
231
- self.column += 2
232
- else:
233
- self.current += 1
234
- self.column += 1
235
-
236
- if self.current >= len(source):
237
- # Unterminated string at end of file
238
- raise create_lexer_error(
239
- message=f"Unterminated string literal at end of file",
240
- file_path=self.source_file,
241
- line=start_line,
242
- column=start_column,
243
- line_content=source[start_line-1:start_line] if start_line <= len(source.split('\n')) else "",
244
- suggestion="Add a closing quote to terminate the string"
245
- )
246
-
247
- # Include the closing quote
248
- self.current += 1
249
- self.column += 1
250
-
251
- text = source[self.start:self.current]
252
- self.tokens.append(Token(TokenType.STRING, text, start_line, start_column))
253
-
254
- def _scan_number(self, source: str):
255
- """Scan a number literal."""
256
- while (self.current < len(source) and
257
- source[self.current].isdigit()):
258
- self.current += 1
259
- self.column += 1
260
-
261
- # Check for decimal point
262
- if (self.current < len(source) and
263
- source[self.current] == '.' and
264
- self.current + 1 < len(source) and
265
- source[self.current + 1].isdigit()):
266
- self.current += 1 # consume the decimal point
267
- self.column += 1
268
-
269
- while (self.current < len(source) and
270
- source[self.current].isdigit()):
271
- self.current += 1
272
- self.column += 1
273
-
274
- text = source[self.start:self.current]
275
- self.tokens.append(Token(TokenType.NUMBER, text, self.line, self.column - len(text)))
276
-
277
- def _scan_identifier(self, source: str):
278
- """Scan an identifier or keyword."""
279
- while (self.current < len(source) and
280
- (source[self.current].isalnum() or source[self.current] == '_')):
281
- self.current += 1
282
- self.column += 1
283
-
284
- text = source[self.start:self.current]
285
-
286
- # Check if it's a keyword
287
- keyword_map = {
288
- 'pack': TokenType.PACK,
289
- 'namespace': TokenType.NAMESPACE,
290
- 'function': TokenType.FUNCTION,
291
- 'var': TokenType.VAR,
292
- 'num': TokenType.NUM,
293
- 'scope': TokenType.SCOPE,
294
- 'if': TokenType.IF,
295
- 'else': TokenType.ELSE,
296
- 'while': TokenType.WHILE,
297
- 'on_tick': TokenType.ON_TICK,
298
- 'on_load': TokenType.ON_LOAD,
299
- 'tag': TokenType.TAG,
300
- 'add': TokenType.ADD,
301
- 'raw': TokenType.RAW,
302
- 'execute': TokenType.EXECUTE,
303
- 'recipe': TokenType.RECIPE,
304
- 'loot_table': TokenType.LOOT_TABLE,
305
- 'advancement': TokenType.ADVANCEMENT,
306
- 'predicate': TokenType.PREDICATE,
307
- 'item_modifier': TokenType.ITEM_MODIFIER,
308
- 'structure': TokenType.STRUCTURE
309
- }
310
-
311
- token_type = keyword_map.get(text.lower(), TokenType.IDENTIFIER)
312
-
313
- # Note: Special handling for say and execute commands is done in _scan_token
314
- # to avoid duplicate processing
315
- self.tokens.append(Token(token_type, text, self.line, self.column - len(text)))
316
-
317
- def _scan_variable_substitution(self, source: str):
318
- """Scan variable substitution ($variable$)."""
319
- self.current += 1 # Skip the $
320
- self.column += 1
321
-
322
- scope_selector = None
323
- variable_name = ""
324
-
325
- # Scan the variable name
326
- while (self.current < len(source) and
327
- (source[self.current].isalnum() or source[self.current] == '_')):
328
- variable_name += source[self.current]
329
- self.current += 1
330
- self.column += 1
331
-
332
- # Validate variable name starts with letter or underscore
333
- if variable_name and (variable_name[0].isdigit()):
334
- raise create_lexer_error(
335
- message=f"Invalid variable name '{variable_name}' - cannot start with a digit",
336
- file_path=self.source_file,
337
- line=self.line,
338
- column=self.column - len(variable_name),
339
- line_content=source[self.line-1:self.line] if self.line <= len(source.split('\n')) else "",
340
- suggestion="Variable names must start with a letter or underscore"
341
- )
342
-
343
- # Check for scope selector after variable name
344
- if (self.current < len(source) and
345
- source[self.current] == '<'):
346
- self.current += 1 # consume <
347
- self.column += 1
348
-
349
- scope_start = self.current
350
- bracket_count = 1
351
-
352
- # Scan until we find the matching closing >
353
- while (self.current < len(source) and bracket_count > 0):
354
- if source[self.current] == '<':
355
- bracket_count += 1
356
- elif source[self.current] == '>':
357
- bracket_count -= 1
358
- self.current += 1
359
- self.column += 1
360
-
361
- if bracket_count == 0:
362
- # Successfully found closing >
363
- scope_selector = source[scope_start:self.current-1] # Exclude the closing >
364
- else:
365
- # Unterminated scope selector - report error
366
- raise create_lexer_error(
367
- message="Unterminated scope selector in variable substitution",
368
- file_path=self.source_file,
369
- line=self.line,
370
- column=self.column - (self.current - self.start),
371
- suggestion="Add a closing '>' to terminate the scope selector"
372
- )
373
-
374
- # Check for closing $
375
- if (self.current < len(source) and
376
- source[self.current] == '$'):
377
- self.current += 1
378
- self.column += 1
379
-
380
- # If we have a scope selector, include it in the token
381
- if scope_selector:
382
- variable_name = f"{variable_name}<{scope_selector}>"
383
-
384
- self.tokens.append(Token(TokenType.VARIABLE_SUB, variable_name, self.line, self.column - len(variable_name) - 2))
385
- else:
386
- # Not a valid variable substitution - report error
387
- raise create_lexer_error(
388
- message="Invalid variable substitution - missing closing '$'",
389
- file_path=self.source_file,
390
- line=self.line,
391
- column=self.column - (self.current - self.start),
392
- suggestion="Add a closing '$' to complete the variable substitution"
393
- )
394
-
395
- def _scan_raw_start(self, source: str):
396
- """Scan raw block start marker ($!raw)."""
397
- # Check if we're already in raw mode (nested raw blocks are not allowed)
398
- if self.in_raw_mode:
399
- raise create_lexer_error(
400
- message="Nested raw blocks are not allowed",
401
- file_path=self.source_file,
402
- line=self.line,
403
- column=self.column,
404
- line_content=source[self.line-1:self.line] if self.line <= len(source.split('\n')) else "",
405
- suggestion="Close the current raw block with 'raw!$' before starting a new one"
406
- )
407
-
408
- # Consume the $!raw
409
- self.current += 5
410
- self.column += 5
411
-
412
- text = source[self.start:self.current]
413
- self.tokens.append(Token(TokenType.RAW_START, text, self.line, self.column - len(text)))
414
-
415
- # Enter raw mode
416
- self.in_raw_mode = True
417
-
418
- def _scan_raw_end(self, source: str):
419
- """Scan raw block end marker (raw!$)."""
420
- # Consume the raw!$
421
- self.current += 5
422
- self.column += 5
423
-
424
- text = source[self.start:self.current]
425
- self.tokens.append(Token(TokenType.RAW_END, text, self.line, self.column - len(text)))
426
-
427
- # Exit raw mode
428
- self.in_raw_mode = False
429
-
430
- def _scan_raw_text(self, source: str):
431
- """Scan raw text content between $!raw and raw!$."""
432
- content_parts = []
433
- raw_start_line = self.line
434
- raw_start_column = self.column
435
-
436
- while self.current < len(source):
437
- char = source[self.current]
438
-
439
- # Check if we've reached the end of the raw block
440
- if (char == 'r' and
441
- self.current + 4 < len(source) and
442
- source[self.current:self.current + 5] == 'raw!$'):
443
- # Consume the end marker and exit raw mode
444
- self.current += 5
445
- self.column += 5
446
- self.in_raw_mode = False
447
-
448
- # Add the raw content as a single RAW token
449
- content = ''.join(content_parts)
450
- if content.strip(): # Only add non-empty content
451
- # Keep newlines for proper command splitting, but trim leading/trailing whitespace
452
- clean_content = content.rstrip().lstrip()
453
- self.tokens.append(Token(TokenType.RAW, clean_content, raw_start_line, raw_start_column))
454
-
455
- # Add the end marker token
456
- self.tokens.append(Token(TokenType.RAW_END, 'raw!$', self.line, self.column - 5))
457
- return
458
-
459
- # Add character to content
460
- content_parts.append(char)
461
-
462
- # Update position
463
- if char == '\n':
464
- self.line += 1
465
- self.column = 1
466
- else:
467
- self.column += 1
468
- self.current += 1
469
-
470
- # Check if we reached the end of source without finding the end marker
471
- if self.current >= len(source):
472
- # Unterminated raw block - report error
473
- raise create_lexer_error(
474
- message="Unterminated raw block - missing 'raw!$' end marker",
475
- file_path=self.source_file,
476
- line=raw_start_line,
477
- column=raw_start_column,
478
- line_content=source[raw_start_line-1:raw_start_line] if raw_start_line <= len(source.split('\n')) else "",
479
- suggestion="Add 'raw!$' to terminate the raw block"
480
- )
481
-
482
- def _scan_say_command(self, source: str):
483
- """Scan a say command and its content until semicolon."""
484
- print(f"DEBUG: _scan_say_command called at position {self.current}, char: {source[self.current:self.current+10]}")
485
-
486
- # Consume 'say'
487
- self.current += 3
488
- self.column += 3
489
-
490
- say_start_line = self.line
491
- say_start_column = self.column
492
-
493
- # Skip whitespace after 'say'
494
- while (self.current < len(source) and
495
- source[self.current].isspace()):
496
- if source[self.current] == '\n':
497
- self.line += 1
498
- self.column = 1
499
- else:
500
- self.column += 1
501
- self.current += 1
502
-
503
- print(f"DEBUG: After whitespace, position {self.current}, char: {source[self.current:self.current+20]}")
504
-
505
- # Scan content until we find a semicolon, but preserve $variable$ syntax
506
- content_parts = []
507
- while self.current < len(source):
508
- char = source[self.current]
509
-
510
- if char == ';':
511
- # Found the end of the say command
512
- break
513
-
514
- # Check for variable substitution syntax
515
- if char == '$':
516
- print(f"DEBUG: Found $ at position {self.current}")
517
- # This might be the start of a variable substitution
518
- # Look ahead to see if it's a valid variable name
519
- temp_current = self.current + 1
520
- temp_column = self.column + 1
521
- variable_name = ""
522
-
523
- # Scan potential variable name
524
- while (temp_current < len(source) and
525
- (source[temp_current].isalnum() or source[temp_current] == '_')):
526
- variable_name += source[temp_current]
527
- temp_current += 1
528
-
529
- print(f"DEBUG: Variable name: '{variable_name}', next char: '{source[temp_current] if temp_current < len(source) else 'EOF'}'")
530
-
531
- # Check if we have a valid variable substitution
532
- if (variable_name and
533
- temp_current < len(source) and
534
- source[temp_current] == '$' and
535
- (not variable_name[0].isdigit())):
536
- print(f"DEBUG: Valid variable substitution: ${variable_name}$")
537
- # This is a valid variable substitution, preserve the $variable$ syntax
538
- content_parts.append(char)
539
- content_parts.append(variable_name)
540
- content_parts.append('$')
541
-
542
- # Update position
543
- self.current = temp_current + 1
544
- self.column = temp_current + 1
545
- continue
546
-
547
- # Add character to content
548
- content_parts.append(char)
549
-
550
- # Update position
551
- if char == '\n':
552
- self.line += 1
553
- self.column = 1
554
- else:
555
- self.column += 1
556
- self.current += 1
557
-
558
- if self.current >= len(source):
559
- # Unterminated say command - report error
560
- raise create_lexer_error(
561
- message="Unterminated say command - missing semicolon",
562
- file_path=self.source_file,
563
- line=say_start_line,
564
- column=say_start_column,
565
- line_content=source[say_start_line-1:say_start_line] if say_start_line <= len(source.split('\n')) else "",
566
- suggestion="Add a semicolon (;) to terminate the say command"
567
- )
568
-
569
- # Consume the semicolon and add it as a separate token
570
- self.current += 1
571
- self.column += 1
572
-
573
- # Create the say command token with content (without semicolon)
574
- content = ''.join(content_parts).strip()
575
- print(f"DEBUG: Final say command content: '{content}'")
576
- self.tokens.append(Token(TokenType.SAY, content, say_start_line, say_start_column))
577
-
578
- # Add the semicolon as a separate token
579
- self.tokens.append(Token(TokenType.SEMICOLON, ';', self.line, self.column))
580
-
581
- def _scan_execute_command(self, source: str):
582
- """Scan an execute command and its content until semicolon."""
583
- # Consume 'execute'
584
- self.current += 7
585
- self.column += 7
586
-
587
- execute_start_line = self.line
588
- execute_start_column = self.column
589
-
590
- # Skip whitespace after 'execute'
591
- while (self.current < len(source) and
592
- source[self.current].isspace()):
593
- if source[self.current] == '\n':
594
- self.line += 1
595
- self.column = 1
596
- else:
597
- self.column += 1
598
- self.current += 1
599
-
600
- # Scan content until we find a semicolon
601
- content_parts = []
602
- while self.current < len(source):
603
- char = source[self.current]
604
-
605
- if char == ';':
606
- # Found the end of the execute command
607
- break
608
-
609
- # Add character to content
610
- content_parts.append(char)
611
-
612
- # Update position
613
- if char == '\n':
614
- self.line += 1
615
- self.column = 1
616
- else:
617
- self.column += 1
618
- self.current += 1
619
-
620
- if self.current >= len(source):
621
- # Unterminated execute command - report error
622
- raise create_lexer_error(
623
- message="Unterminated execute command - missing semicolon",
624
- file_path=self.source_file,
625
- line=execute_start_line,
626
- column=execute_start_column,
627
- line_content=source[execute_start_line-1:execute_start_line] if execute_start_line <= len(source.split('\n')) else "",
628
- suggestion="Add a semicolon (;) to terminate the execute command"
629
- )
630
-
631
- # Consume the semicolon
632
- self.current += 1
633
- self.column += 1
634
-
635
- # Create the execute command token with full content
636
- content = ''.join(content_parts).strip()
637
- full_command = f"execute {content};"
638
- self.tokens.append(Token(TokenType.EXECUTE, full_command, execute_start_line, execute_start_column))
639
-
640
- def _is_inside_control_structure(self, source: str) -> bool:
641
- """Check if we're inside a control structure (if/while)."""
642
- # Look backwards to see if we're inside a control structure
643
- # This is a simplified check - in a more robust implementation,
644
- # we would track the parsing context more carefully
645
-
646
- # For now, let's be conservative and only apply special handling
647
- # when we're clearly at the top level
648
- brace_count = 0
649
- for i in range(self.current):
650
- if source[i] == '{':
651
- brace_count += 1
652
- elif source[i] == '}':
653
- brace_count -= 1
654
-
655
- # If we're inside braces, we're likely in a control structure
656
- return brace_count > 0
657
-
658
- def _scan_operator_or_delimiter(self, source: str):
659
- """Scan operators and delimiters."""
660
- char = source[self.current]
661
- next_char = source[self.current + 1] if self.current + 1 < len(source) else None
662
-
663
- # Two-character operators
664
- if next_char:
665
- two_char = char + next_char
666
- if two_char in ['==', '!=', '<=', '>=', '&&', '||']:
667
- self.current += 2
668
- self.column += 2
669
-
670
- operator_map = {
671
- '==': TokenType.EQUAL,
672
- '!=': TokenType.NOT_EQUAL,
673
- '<=': TokenType.LESS_EQUAL,
674
- '>=': TokenType.GREATER_EQUAL,
675
- '&&': TokenType.AND,
676
- '||': TokenType.OR,
677
- }
678
-
679
- self.tokens.append(Token(operator_map[two_char], two_char, self.line, self.column - 2))
680
- return
681
-
682
- # Single-character operators and delimiters
683
- self.current += 1
684
- self.column += 1
685
-
686
- operator_map = {
687
- '=': TokenType.ASSIGN,
688
- '+': TokenType.PLUS,
689
- '-': TokenType.MINUS,
690
- '*': TokenType.MULTIPLY,
691
- '/': TokenType.DIVIDE,
692
- '%': TokenType.MODULO,
693
- '<': TokenType.LANGLE, # Use LANGLE for scope syntax, handle LESS in context
694
- '>': TokenType.RANGLE, # Use RANGLE for scope syntax, handle GREATER in context
695
- ';': TokenType.SEMICOLON,
696
- ',': TokenType.COMMA,
697
- '(': TokenType.LPAREN,
698
- ')': TokenType.RPAREN,
699
- '{': TokenType.LBRACE,
700
- '}': TokenType.RBRACE,
701
- '[': TokenType.LBRACKET,
702
- ']': TokenType.RBRACKET,
703
- '.': TokenType.DOT,
704
- ':': TokenType.COLON,
705
- '!': TokenType.RAW, # Allow exclamation marks in text
706
- '?': TokenType.RAW, # Allow question marks in text
707
- '@': TokenType.RAW, # Allow @ for player selectors
708
- '#': TokenType.RAW, # Allow # for tags
709
- '~': TokenType.RAW, # Allow ~ for relative coordinates
710
- '^': TokenType.RAW, # Allow ^ for relative coordinates
711
- }
712
-
713
- if char in operator_map:
714
- self.tokens.append(Token(operator_map[char], char, self.line, self.column - 1))
715
- else:
716
- # Unknown character - report error
717
- raise create_lexer_error(
718
- message=f"Unexpected character '{char}'",
719
- file_path=self.source_file,
720
- line=self.line,
721
- column=self.column - 1,
722
- line_content=source[self.line-1:self.line] if self.line <= len(source.split('\n')) else "",
723
- suggestion=f"Remove or replace the unexpected character '{char}'"
724
- )
725
-
726
-
727
- def create_lexer_error(message: str, file_path: Optional[str] = None,
728
- line: Optional[int] = None, column: Optional[int] = None,
729
- line_content: Optional[str] = None, suggestion: Optional[str] = None) -> MDLLexerError:
730
- """Create a lexer error with common suggestions."""
731
- if not suggestion:
732
- if "unterminated string" in message.lower():
733
- suggestion = "Add a closing quote to terminate the string"
734
- elif "unterminated scope selector" in message.lower():
735
- suggestion = "Add a closing '>' to terminate the scope selector"
736
- elif "missing closing" in message.lower():
737
- suggestion = "Add the missing closing character"
738
- elif "unexpected character" in message.lower():
739
- suggestion = "Remove or replace the unexpected character"
740
-
741
- return MDLLexerError(
742
- message=message,
743
- file_path=file_path,
744
- line=line,
745
- column=column,
746
- line_content=line_content,
747
- suggestion=suggestion
748
- )
749
-
750
-
751
- def lex_mdl_js(source: str, source_file: str = None) -> List[Token]:
752
- """Lex JavaScript-style MDL source code into tokens."""
753
- lexer = MDLLexer(source_file)
754
- return lexer.lex(source)