minecraft-datapack-language 15.4.27__py3-none-any.whl → 15.4.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. minecraft_datapack_language/__init__.py +17 -2
  2. minecraft_datapack_language/_version.py +2 -2
  3. minecraft_datapack_language/ast_nodes.py +87 -59
  4. minecraft_datapack_language/mdl_compiler.py +470 -0
  5. minecraft_datapack_language/mdl_errors.py +14 -0
  6. minecraft_datapack_language/mdl_lexer.py +624 -0
  7. minecraft_datapack_language/mdl_parser.py +573 -0
  8. minecraft_datapack_language-15.4.29.dist-info/METADATA +266 -0
  9. minecraft_datapack_language-15.4.29.dist-info/RECORD +16 -0
  10. minecraft_datapack_language/cli.py +0 -159
  11. minecraft_datapack_language/cli_build.py +0 -1292
  12. minecraft_datapack_language/cli_check.py +0 -155
  13. minecraft_datapack_language/cli_colors.py +0 -264
  14. minecraft_datapack_language/cli_help.py +0 -508
  15. minecraft_datapack_language/cli_new.py +0 -300
  16. minecraft_datapack_language/cli_utils.py +0 -276
  17. minecraft_datapack_language/expression_processor.py +0 -352
  18. minecraft_datapack_language/linter.py +0 -409
  19. minecraft_datapack_language/mdl_lexer_js.py +0 -754
  20. minecraft_datapack_language/mdl_parser_js.py +0 -1049
  21. minecraft_datapack_language/pack.py +0 -758
  22. minecraft_datapack_language-15.4.27.dist-info/METADATA +0 -1274
  23. minecraft_datapack_language-15.4.27.dist-info/RECORD +0 -25
  24. {minecraft_datapack_language-15.4.27.dist-info → minecraft_datapack_language-15.4.29.dist-info}/WHEEL +0 -0
  25. {minecraft_datapack_language-15.4.27.dist-info → minecraft_datapack_language-15.4.29.dist-info}/entry_points.txt +0 -0
  26. {minecraft_datapack_language-15.4.27.dist-info → minecraft_datapack_language-15.4.29.dist-info}/licenses/LICENSE +0 -0
  27. {minecraft_datapack_language-15.4.27.dist-info → minecraft_datapack_language-15.4.29.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,624 @@
1
+ """
2
+ MDL Lexer - Clean, extensible lexer for Minecraft Datapack Language
3
+ Fully supports the language specification defined in language-reference.md
4
+ """
5
+
6
+ import re
7
+ from dataclasses import dataclass
8
+ from typing import List, Optional, Dict, Any
9
+ from .mdl_errors import MDLLexerError
10
+
11
+
12
+ @dataclass
13
+ class Token:
14
+ """Represents a single token in the MDL language."""
15
+ type: str
16
+ value: str
17
+ line: int
18
+ column: int
19
+
20
+ def __repr__(self) -> str:
21
+ return f"Token({self.type}, '{self.value}', line={self.line}, col={self.column})"
22
+
23
+
24
+ class TokenType:
25
+ """All possible token types in the MDL language."""
26
+
27
+ # Keywords (Reserved Words)
28
+ PACK = "PACK"
29
+ NAMESPACE = "NAMESPACE"
30
+ FUNCTION = "FUNCTION"
31
+ VAR = "VAR"
32
+ NUM = "NUM"
33
+ IF = "IF"
34
+ ELSE = "ELSE"
35
+ WHILE = "WHILE"
36
+ ON_LOAD = "ON_LOAD"
37
+ ON_TICK = "ON_TICK"
38
+ EXEC = "EXEC"
39
+ TAG = "TAG"
40
+
41
+ # Tag Types (Resource Categories)
42
+ RECIPE = "RECIPE"
43
+ LOOT_TABLE = "LOOT_TABLE"
44
+ ADVANCEMENT = "ADVANCEMENT"
45
+ ITEM_MODIFIER = "ITEM_MODIFIER"
46
+ PREDICATE = "PREDICATE"
47
+ STRUCTURE = "STRUCTURE"
48
+
49
+ # Operators
50
+ PLUS = "PLUS" # +
51
+ MINUS = "MINUS" # -
52
+ MULTIPLY = "MULTIPLY" # *
53
+ DIVIDE = "DIVIDE" # /
54
+ ASSIGN = "ASSIGN" # =
55
+ EQUAL = "EQUAL" # ==
56
+ NOT_EQUAL = "NOT_EQUAL" # !=
57
+ GREATER = "GREATER" # >
58
+ LESS = "LESS" # <
59
+ GREATER_EQUAL = "GREATER_EQUAL" # >=
60
+ LESS_EQUAL = "LESS_EQUAL" # <=
61
+
62
+ # Delimiters
63
+ SEMICOLON = "SEMICOLON" # ;
64
+ COMMA = "COMMA" # ,
65
+ COLON = "COLON" # :
66
+
67
+ # Brackets and Braces
68
+ LPAREN = "LPAREN" # (
69
+ RPAREN = "RPAREN" # )
70
+ LBRACE = "LBRACE" # {
71
+ RBRACE = "RBRACE" # }
72
+ LBRACKET = "LBRACKET" # [
73
+ RBRACKET = "RBRACKET" # ]
74
+ LANGLE = "LANGLE" # < (for scope syntax)
75
+ RANGLE = "RANGLE" # > (for scope syntax)
76
+
77
+ # Special Tokens
78
+ DOLLAR = "DOLLAR" # $ (variable substitution)
79
+ QUOTE = "QUOTE" # " (string literal delimiter)
80
+ EXCLAMATION = "EXCLAMATION" # ! (for raw blocks)
81
+ RANGE = "RANGE" # .. (range operator)
82
+
83
+ # Literals
84
+ IDENTIFIER = "IDENTIFIER" # Variable names, function names, etc.
85
+ NUMBER = "NUMBER" # Numbers (integers and floats)
86
+
87
+ # Special
88
+ NEWLINE = "NEWLINE"
89
+ EOF = "EOF"
90
+ COMMENT = "COMMENT" # Comments (ignored during parsing)
91
+ RAW_CONTENT = "RAW_CONTENT" # Raw content inside raw blocks
92
+
93
+
94
+ class MDLLexer:
95
+ """
96
+ Clean, extensible lexer for the MDL language.
97
+
98
+ Features:
99
+ - Full support for all language constructs defined in the spec
100
+ - Clean, readable code structure
101
+ - Easy to extend with new token types
102
+ - Comprehensive error handling
103
+ - Efficient tokenization with minimal memory usage
104
+ """
105
+
106
+ def __init__(self, source_file: str = None):
107
+ self.source_file = source_file
108
+ self.reset()
109
+
110
+ def reset(self):
111
+ """Reset the lexer state."""
112
+ self.tokens = []
113
+ self.current = 0
114
+ self.start = 0
115
+ self.line = 1
116
+ self.column = 1
117
+ self.in_raw_mode = False
118
+ self.source = ""
119
+
120
+ def lex(self, source: str) -> List[Token]:
121
+ """
122
+ Lex the source code into tokens.
123
+
124
+ Args:
125
+ source: The source code string to tokenize
126
+
127
+ Returns:
128
+ List of Token objects representing the source code
129
+
130
+ Raises:
131
+ MDLLexerError: If there's a lexical error in the source code
132
+ """
133
+ self.reset()
134
+ self.source = source
135
+
136
+ while self.current < len(source):
137
+ self.start = self.current
138
+ self._scan_token()
139
+
140
+ # Add EOF token
141
+ self.tokens.append(Token(TokenType.EOF, "", self.line, self.column))
142
+ return self.tokens
143
+
144
+ def _scan_token(self):
145
+ """Scan a single token from the source."""
146
+ if self.current >= len(self.source):
147
+ return
148
+
149
+ char = self.source[self.current]
150
+
151
+ # Handle raw mode
152
+ if self.in_raw_mode:
153
+ self._scan_raw_text()
154
+ return
155
+
156
+ # Handle whitespace and newlines
157
+ if char.isspace():
158
+ self._scan_whitespace()
159
+ return
160
+
161
+ # Handle comments
162
+ if char == '/' and self._peek(1) == '/':
163
+ self._scan_single_line_comment()
164
+ return
165
+
166
+ if char == '/' and self._peek(1) == '*':
167
+ self._scan_multi_line_comment()
168
+ return
169
+
170
+ # Handle strings (quotes)
171
+ if char == '"':
172
+ self._scan_string()
173
+ return
174
+
175
+ # Handle raw block markers
176
+ if char == '$' and self._peek(1) == '!' and self._peek(2) == 'r':
177
+ if self._peek(3) == 'a' and self._peek(4) == 'w':
178
+ self._scan_raw_block_start()
179
+ return
180
+
181
+
182
+
183
+ # Handle variable substitution
184
+ if char == '$':
185
+ self._scan_variable_substitution()
186
+ return
187
+
188
+ # Handle numbers
189
+ if char.isdigit():
190
+ self._scan_number()
191
+ return
192
+
193
+ # Handle identifiers and keywords
194
+ if char.isalpha() or char == '_':
195
+ self._scan_identifier()
196
+ return
197
+
198
+ # Handle @ selectors (like @s, @a, @e[type=armor_stand])
199
+ if char == '@':
200
+ self._scan_selector()
201
+ return
202
+
203
+ # Handle scope selectors (<@s>, <@a[team=red]>, etc.)
204
+ if char == '<':
205
+ # Check if this is a scope selector (followed by @ or identifier)
206
+ if (self.current + 1 < len(self.source) and
207
+ (self.source[self.current + 1] == '@' or
208
+ self.source[self.current + 1].isalpha() or
209
+ self.source[self.current + 1] == '_')):
210
+ self._scan_scope_selector()
211
+ return
212
+ # Otherwise, treat as LESS operator (handled by _scan_operator_or_delimiter)
213
+
214
+ # Handle operators and delimiters
215
+ self._scan_operator_or_delimiter()
216
+
217
+ def _scan_whitespace(self):
218
+ """Scan whitespace characters."""
219
+ while (self.current < len(self.source) and
220
+ self.source[self.current].isspace()):
221
+ char = self.source[self.current]
222
+ if char == '\n':
223
+ self.line += 1
224
+ self.column = 1
225
+ else:
226
+ self.column += 1
227
+ self.current += 1
228
+
229
+ def _scan_single_line_comment(self):
230
+ """Scan a single-line comment (// ...)."""
231
+ # Skip //
232
+ self.current += 2
233
+ self.column += 2
234
+
235
+ # Scan until end of line or end of source
236
+ while (self.current < len(self.source) and
237
+ self.source[self.current] != '\n'):
238
+ self.current += 1
239
+ self.column += 1
240
+
241
+ # Comments are ignored - no token generated
242
+
243
+ def _scan_multi_line_comment(self):
244
+ """Scan a multi-line comment (/* ... */)."""
245
+ # Skip /*
246
+ self.current += 2
247
+ self.column += 2
248
+
249
+ # Scan until we find */
250
+ while (self.current < len(self.source) - 1):
251
+ if (self.source[self.current] == '*' and
252
+ self.source[self.current + 1] == '/'):
253
+ self.current += 2
254
+ self.column += 2
255
+ return
256
+
257
+ if self.source[self.current] == '\n':
258
+ self.line += 1
259
+ self.column = 1
260
+ else:
261
+ self.column += 1
262
+ self.current += 1
263
+
264
+ # Unterminated comment
265
+ self._error("Unterminated multi-line comment", "Add */ to close the comment")
266
+
267
+ def _scan_string(self):
268
+ """Scan a string literal (quoted text)."""
269
+ # Skip opening quote
270
+ self.current += 1
271
+ self.column += 1
272
+
273
+ start_line = self.line
274
+ start_column = self.column
275
+
276
+ # Scan until closing quote
277
+ while (self.current < len(self.source) and
278
+ self.source[self.current] != '"'):
279
+ if self.source[self.current] == '\n':
280
+ self._error("Unterminated string literal", "Add a closing quote")
281
+
282
+ if self.source[self.current] == '\\' and self.current + 1 < len(self.source):
283
+ # Handle escape sequences
284
+ self.current += 2
285
+ self.column += 2
286
+ else:
287
+ self.current += 1
288
+ self.column += 1
289
+
290
+ if self.current >= len(self.source):
291
+ self._error("Unterminated string literal at end of file", "Add a closing quote")
292
+
293
+ # Include closing quote
294
+ self.current += 1
295
+ self.column += 1
296
+
297
+ # Generate QUOTE token for the opening quote
298
+ self.tokens.append(Token(TokenType.QUOTE, '"', start_line, start_column))
299
+
300
+ # Generate IDENTIFIER token for the string content
301
+ string_content = self.source[self.start + 1:self.current - 1]
302
+ self.tokens.append(Token(TokenType.IDENTIFIER, string_content, start_line, start_column + 1))
303
+
304
+ # Generate QUOTE token for the closing quote
305
+ self.tokens.append(Token(TokenType.QUOTE, '"', self.line, self.column - 1))
306
+
307
+ def _scan_raw_block_start(self):
308
+ """Scan the start of a raw block ($!raw)."""
309
+ # Consume $!raw
310
+ self.current += 5
311
+ self.column += 5
312
+
313
+ # Generate tokens: $ ! raw
314
+ self.tokens.append(Token(TokenType.DOLLAR, "$", self.line, self.column - 5))
315
+ self.tokens.append(Token(TokenType.EXCLAMATION, "!", self.line, self.column - 4))
316
+ self.tokens.append(Token(TokenType.IDENTIFIER, "raw", self.line, self.column - 3))
317
+
318
+ self.in_raw_mode = True
319
+
320
+
321
+
322
+ def _scan_raw_text(self):
323
+ """Scan raw text inside a raw block."""
324
+ # Remember where the raw content starts
325
+ content_start = self.current
326
+
327
+ # Consume all characters until we find raw!$
328
+ while self.current < len(self.source) - 4:
329
+ if (self.source[self.current:self.current + 5] == 'raw!$'):
330
+ # Found the end marker - extract the content
331
+ raw_content = self.source[content_start:self.current]
332
+
333
+ # Generate a single RAW_CONTENT token with all the content
334
+ self.tokens.append(Token(TokenType.RAW_CONTENT, raw_content, self.line, self.column))
335
+
336
+ # Consume the end marker and exit raw mode
337
+ self.current += 5
338
+ self.column += 5
339
+ self.in_raw_mode = False
340
+
341
+ # Generate tokens for the end marker: raw ! $
342
+ self.tokens.append(Token(TokenType.IDENTIFIER, "raw", self.line, self.column - 5))
343
+ self.tokens.append(Token(TokenType.EXCLAMATION, "!", self.line, self.column - 2))
344
+ self.tokens.append(Token(TokenType.DOLLAR, "$", self.line, self.column - 1))
345
+ return
346
+
347
+ if self.source[self.current] == '\n':
348
+ self.line += 1
349
+ self.column = 1
350
+ else:
351
+ self.column += 1
352
+ self.current += 1
353
+
354
+ # If we didn't find the end marker, it's an error
355
+ if self.current >= len(self.source) - 4:
356
+ self._error("Unterminated raw block", "Add 'raw!$' to close the raw block")
357
+
358
+ def _scan_variable_substitution(self):
359
+ """Scan variable substitution ($variable<scope>$)."""
360
+ # Skip opening $
361
+ self.current += 1
362
+ self.column += 1
363
+
364
+ # Generate DOLLAR token
365
+ self.tokens.append(Token(TokenType.DOLLAR, "$", self.line, self.column - 1))
366
+
367
+ # Scan variable name (start from current position, not from start)
368
+ self.start = self.current
369
+ self._scan_identifier()
370
+
371
+ # Check for scope selector
372
+ if (self.current < len(self.source) and
373
+ self.source[self.current] == '<'):
374
+ self._scan_scope_selector()
375
+
376
+ # Check for closing $
377
+ if (self.current < len(self.source) and
378
+ self.source[self.current] == '$'):
379
+ self.current += 1
380
+ self.column += 1
381
+ self.tokens.append(Token(TokenType.DOLLAR, "$", self.line, self.column - 1))
382
+ else:
383
+ self._error("Unterminated variable substitution", "Add $ to close the variable substitution")
384
+
385
+ def _scan_selector(self):
386
+ """Scan a selector (@s, @a, @e[type=armor_stand], etc.)."""
387
+ # Consume @
388
+ self.current += 1
389
+ self.column += 1
390
+
391
+ # Scan selector identifier
392
+ self._scan_identifier()
393
+
394
+ # Check for bracket parameters
395
+ if (self.current < len(self.source) and
396
+ self.source[self.current] == '['):
397
+ self._scan_selector_parameters()
398
+
399
+ def _scan_selector_parameters(self):
400
+ """Scan selector parameters in brackets."""
401
+ # Consume [
402
+ self.current += 1
403
+ self.column += 1
404
+
405
+ # Generate LBRACKET token
406
+ self.tokens.append(Token(TokenType.LBRACKET, "[", self.line, self.column - 1))
407
+
408
+ # Remember where the parameters start (after the opening [)
409
+ param_start = self.current
410
+
411
+ # Scan until we find the matching ]
412
+ bracket_count = 1
413
+ while (self.current < len(self.source) and bracket_count > 0):
414
+ if self.source[self.current] == '[':
415
+ bracket_count += 1
416
+ elif self.source[self.current] == ']':
417
+ bracket_count -= 1
418
+
419
+ if bracket_count > 0:
420
+ if self.source[self.current] == '\n':
421
+ self.line += 1
422
+ self.column = 1
423
+ else:
424
+ self.column += 1
425
+ self.current += 1
426
+
427
+ if bracket_count == 0:
428
+ # Successfully found closing ]
429
+ # Generate IDENTIFIER token for the entire parameter content
430
+ param_content = self.source[param_start:self.current]
431
+ self.tokens.append(Token(TokenType.IDENTIFIER, param_content, self.line, self.column - len(param_content)))
432
+
433
+ # Generate RBRACKET token
434
+ self.current += 1
435
+ self.column += 1
436
+ self.tokens.append(Token(TokenType.RBRACKET, "]", self.line, self.column - 1))
437
+ else:
438
+ # Unterminated selector parameters
439
+ self._error("Unterminated selector parameters", "Add ] to close the selector parameters")
440
+
441
+ def _scan_scope_selector(self):
442
+ """Scan a scope selector (<@s>, <@a[team=red]>, etc.)."""
443
+ # Consume <
444
+ self.current += 1
445
+ self.column += 1
446
+
447
+ # Generate LANGLE token
448
+ self.tokens.append(Token(TokenType.LANGLE, "<", self.line, self.column - 1))
449
+
450
+ # Scan selector content - this could be @s, @a[team=red], etc.
451
+ if (self.current < len(self.source) and
452
+ self.source[self.current] == '@'):
453
+ # Handle @ selector - start from current position
454
+ self.start = self.current
455
+ self._scan_selector()
456
+ else:
457
+ # Handle other identifier - start from current position
458
+ self.start = self.current
459
+ self._scan_identifier()
460
+
461
+ # Consume >
462
+ if (self.current < len(self.source) and
463
+ self.source[self.current] == '>'):
464
+ self.current += 1
465
+ self.column += 1
466
+ self.tokens.append(Token(TokenType.RANGLE, ">", self.line, self.column - 1))
467
+ else:
468
+ self._error("Unterminated scope selector", "Add > to close the scope selector")
469
+
470
+ def _scan_number(self):
471
+ """Scan a number literal."""
472
+ # Scan integer part
473
+ while (self.current < len(self.source) and
474
+ self.source[self.current].isdigit()):
475
+ self.current += 1
476
+ self.column += 1
477
+
478
+ # Check for decimal point
479
+ if (self.current < len(self.source) and
480
+ self.source[self.current] == '.' and
481
+ self.current + 1 < len(self.source) and
482
+ self.source[self.current + 1].isdigit()):
483
+ self.current += 1 # consume decimal point
484
+ self.column += 1
485
+
486
+ # Scan fractional part
487
+ while (self.current < len(self.source) and
488
+ self.source[self.current].isdigit()):
489
+ self.current += 1
490
+ self.column += 1
491
+
492
+ number_text = self.source[self.start:self.current]
493
+ self.tokens.append(Token(TokenType.NUMBER, number_text, self.line, self.column - len(number_text)))
494
+
495
+ def _scan_identifier(self):
496
+ """Scan an identifier or keyword."""
497
+ # Scan identifier characters
498
+ while (self.current < len(self.source) and
499
+ (self.source[self.current].isalnum() or
500
+ self.source[self.current] == '_')):
501
+ self.current += 1
502
+ self.column += 1
503
+
504
+ identifier_text = self.source[self.start:self.current]
505
+
506
+ # Check if it's a keyword
507
+ token_type = self._get_keyword_type(identifier_text)
508
+
509
+ self.tokens.append(Token(token_type, identifier_text, self.line, self.column - len(identifier_text)))
510
+
511
+ def _scan_operator_or_delimiter(self):
512
+ """Scan operators and delimiters."""
513
+ char = self.source[self.current]
514
+
515
+ # Handle two-character operators first
516
+ if self.current + 1 < len(self.source):
517
+ two_char = self.source[self.current:self.current + 2]
518
+
519
+ if two_char in ['==', '!=', '>=', '<=', '..']:
520
+ self.current += 2
521
+ self.column += 2
522
+
523
+ token_type = {
524
+ '==': TokenType.EQUAL,
525
+ '!=': TokenType.NOT_EQUAL,
526
+ '>=': TokenType.GREATER_EQUAL,
527
+ '<=': TokenType.LESS_EQUAL,
528
+ '..': TokenType.RANGE
529
+ }[two_char]
530
+
531
+ self.tokens.append(Token(token_type, two_char, self.line, self.column - 2))
532
+ return
533
+
534
+ # Handle single-character operators and delimiters
535
+ token_map = {
536
+ '+': TokenType.PLUS,
537
+ '-': TokenType.MINUS,
538
+ '*': TokenType.MULTIPLY,
539
+ '/': TokenType.DIVIDE,
540
+ '=': TokenType.ASSIGN,
541
+ '>': TokenType.GREATER,
542
+ '<': TokenType.LESS,
543
+ ';': TokenType.SEMICOLON,
544
+ ',': TokenType.COMMA,
545
+ ':': TokenType.COLON,
546
+ '(': TokenType.LPAREN,
547
+ ')': TokenType.RPAREN,
548
+ '{': TokenType.LBRACE,
549
+ '}': TokenType.RBRACE,
550
+ '[': TokenType.LBRACKET,
551
+ ']': TokenType.RBRACKET
552
+ }
553
+
554
+ if char in token_map:
555
+ self.current += 1
556
+ self.column += 1
557
+ self.tokens.append(Token(token_map[char], char, self.line, self.column - 1))
558
+ else:
559
+ # Unknown character
560
+ self._error(f"Unknown character '{char}'", f"Remove or replace the character '{char}'")
561
+
562
+ def _get_keyword_type(self, text: str) -> str:
563
+ """Get the token type for a keyword."""
564
+ keyword_map = {
565
+ # Keywords
566
+ 'pack': TokenType.PACK,
567
+ 'namespace': TokenType.NAMESPACE,
568
+ 'function': TokenType.FUNCTION,
569
+ 'var': TokenType.VAR,
570
+ 'num': TokenType.NUM,
571
+ 'if': TokenType.IF,
572
+ 'else': TokenType.ELSE,
573
+ 'while': TokenType.WHILE,
574
+ 'on_load': TokenType.ON_LOAD,
575
+ 'on_tick': TokenType.ON_TICK,
576
+ 'exec': TokenType.EXEC,
577
+ 'tag': TokenType.TAG,
578
+
579
+ # Tag types
580
+ 'recipe': TokenType.RECIPE,
581
+ 'loot_table': TokenType.LOOT_TABLE,
582
+ 'advancement': TokenType.ADVANCEMENT,
583
+ 'item_modifier': TokenType.ITEM_MODIFIER,
584
+ 'predicate': TokenType.PREDICATE,
585
+ 'structure': TokenType.STRUCTURE
586
+ }
587
+
588
+ return keyword_map.get(text.lower(), TokenType.IDENTIFIER)
589
+
590
+ def _peek(self, offset: int) -> Optional[str]:
591
+ """Peek ahead in the source without consuming characters."""
592
+ if self.current + offset < len(self.source):
593
+ return self.source[self.current + offset]
594
+ return None
595
+
596
+ def _error(self, message: str, suggestion: str):
597
+ """Raise a lexer error with context information."""
598
+ # Get the current line content for better error reporting
599
+ lines = self.source.split('\n')
600
+ line_content = ""
601
+ if self.line - 1 < len(lines):
602
+ line_content = lines[self.line - 1]
603
+
604
+ raise MDLLexerError(
605
+ message=message,
606
+ file_path=self.source_file,
607
+ line=self.line,
608
+ column=self.column,
609
+ line_content=line_content,
610
+ suggestion=suggestion
611
+ )
612
+
613
+ def get_token_summary(self) -> Dict[str, Any]:
614
+ """Get a summary of the tokenization results."""
615
+ token_counts = {}
616
+ for token in self.tokens:
617
+ if token.type != TokenType.EOF:
618
+ token_counts[token.type] = token_counts.get(token.type, 0) + 1
619
+
620
+ return {
621
+ 'total_tokens': len(self.tokens),
622
+ 'token_counts': token_counts,
623
+ 'lines_processed': self.line
624
+ }