dbrownell-parserlib 0.4.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dbrownell-parserlib
3
- Version: 0.4.0
3
+ Version: 0.6.0
4
4
  Summary: Functionality useful when creating parsers.
5
5
  Author: Dave Brownell
6
6
  Author-email: Dave Brownell <github@DavidBrownell.com>
@@ -10,6 +10,10 @@ Classifier: Operating System :: Microsoft :: Windows
10
10
  Classifier: Operating System :: POSIX :: Linux
11
11
  Classifier: Programming Language :: Python
12
12
  Classifier: Programming Language :: Python :: 3.14
13
+ Requires-Dist: antlr-denter>=1.3.1
14
+ Requires-Dist: antlr4-python3-runtime>=4.13.2
15
+ Requires-Dist: dbrownell-common>=0.16.8
16
+ Requires-Dist: plum>=2.9.0
13
17
  Requires-Python: >=3.14
14
18
  Project-URL: Homepage, https://github.com/davidbrownell/dbrownell_ParserLib
15
19
  Project-URL: Documentation, https://github.com/davidbrownell/dbrownell_ParserLib
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dbrownell-parserlib"
3
- version = "0.4.0"
3
+ version = "0.6.0"
4
4
  # ^^^^^
5
5
  # Wheel names will be generated according to this value. Do not manually modify this value; instead
6
6
  # update it according to committed changes by running this command from the root of the repository:
@@ -13,7 +13,12 @@ authors = [
13
13
  { name = "Dave Brownell", email = "github@DavidBrownell.com" }
14
14
  ]
15
15
  requires-python = ">= 3.14"
16
- dependencies = []
16
+ dependencies = [
17
+ "antlr-denter>=1.3.1",
18
+ "antlr4-python3-runtime>=4.13.2",
19
+ "dbrownell-common>=0.16.8",
20
+ "plum>=2.9.0",
21
+ ]
17
22
  classifiers = [
18
23
  "Operating System :: MacOS",
19
24
  "Operating System :: Microsoft :: Windows",
@@ -90,5 +95,8 @@ max-args = 10
90
95
  max-branches = 20
91
96
  max-returns = 20
92
97
 
98
+ [tool.ty.rules]
99
+ useless-overload-body = "ignore" # This rule is not compatible with plum overloads
100
+
93
101
  [tool.uv.build-backend]
94
102
  module-name = "dbrownell_ParserLib"
@@ -2,12 +2,18 @@
2
2
 
3
3
  from importlib.metadata import version
4
4
 
5
+ from dbrownell_ParserLib.antlr import AntlrParser, AntlrParserException, BuildAntlrGrammar, CreateAntlrParser
5
6
  from dbrownell_ParserLib.errors import Error, PythonError
6
7
  from dbrownell_ParserLib.location import Location
7
8
  from dbrownell_ParserLib.region import Region
8
9
 
10
+
9
11
  # ----------------------------------------------------------------------
10
12
  __all__ = [
13
+ "AntlrParser",
14
+ "AntlrParserException",
15
+ "BuildAntlrGrammar",
16
+ "CreateAntlrParser",
11
17
  "Error",
12
18
  "Location",
13
19
  "PythonError",
@@ -0,0 +1,12 @@
1
+ # noqa: D104
2
+ from dbrownell_ParserLib.antlr.build_antlr_grammar import BuildAntlrGrammar
3
+ from dbrownell_ParserLib.antlr.antlr_parser import AntlrParser, AntlrParserException, CreateAntlrParser
4
+
5
+
6
+ # ----------------------------------------------------------------------
7
+ __all__ = [
8
+ "AntlrParser",
9
+ "AntlrParserException",
10
+ "BuildAntlrGrammar",
11
+ "CreateAntlrParser",
12
+ ]
@@ -0,0 +1,503 @@
1
+ """Contains functionality used when parsing with ANTLR parsers."""
2
+
3
+ import os
4
+ import threading
5
+
6
+ from abc import ABC, abstractmethod
7
+ from collections.abc import Callable # noqa: TC003
8
+ from dataclasses import dataclass, field
9
+ from datetime import datetime, UTC
10
+ from pathlib import Path
11
+ from typing import cast, Protocol
12
+
13
+ import antlr4
14
+
15
+ from dbrownell_Common.ContextlibEx import ExitStack
16
+ from dbrownell_Common import ExecuteTasks
17
+ from dbrownell_Common.Streams.DoneManager import DoneManager # noqa: TC002
18
+ from plum import dispatch, overload
19
+
20
+ from dbrownell_ParserLib.errors import Error, PythonError
21
+ from dbrownell_ParserLib.region import Location, Region
22
+
23
+
24
+ # ----------------------------------------------------------------------
25
+ # |
26
+ # | Public Types
27
+ # |
28
+ # ----------------------------------------------------------------------
29
+ class AntlrParserException(Exception): # noqa: N818
30
+ """Exception raised by the ANTLR parser or a visitor processing the AST."""
31
+
32
+ # ----------------------------------------------------------------------
33
+ def __init__(self, error: Error) -> None:
34
+ super().__init__(str(error))
35
+
36
+ self.error = error
37
+
38
+
39
+ # ----------------------------------------------------------------------
40
+ class AntlrParser(ABC):
41
+ """Abstract base class for parsers created with `CreateAntlrParser`."""
42
+
43
+ # ----------------------------------------------------------------------
44
+ @overload
45
+ @abstractmethod
46
+ def __call__(
47
+ self,
48
+ dm: DoneManager,
49
+ workspaces: dict[
50
+ Path, # workspace root
51
+ dict[
52
+ Path, # Relative path to a translation unit
53
+ Callable[
54
+ [], tuple[str, datetime]
55
+ ], # Function that returns the text and last modified time of the translation unit
56
+ ],
57
+ ],
58
+ supported_file_extensions: set[str] | None,
59
+ *,
60
+ single_threaded: bool = False,
61
+ quiet: bool = False,
62
+ ) -> dict[
63
+ Path, # workspace root
64
+ dict[
65
+ Path, # Relative path to a translation unit
66
+ Error | antlr4.ParseTreeVisitor,
67
+ ],
68
+ ]:
69
+ """Parse files from different workspaces; includes (if any) are allowed from each of the workspaces."""
70
+
71
+ # ----------------------------------------------------------------------
72
+ @overload
73
+ @abstractmethod
74
+ def __call__( # noqa: F811
75
+ self,
76
+ dm: DoneManager,
77
+ filenames: list[Path],
78
+ supported_file_extensions: set[str] | None,
79
+ *,
80
+ single_threaded: bool = False,
81
+ quiet: bool = False,
82
+ ) -> dict[Path, Error | antlr4.ParseTreeVisitor]:
83
+ """Parse a list of files that reside in the same directory and its descendants; includes (if any) are allowed from the same root directory."""
84
+
85
+ # ----------------------------------------------------------------------
86
+ @overload
87
+ @abstractmethod
88
+ def __call__( # noqa: F811
89
+ self,
90
+ dm: DoneManager,
91
+ filename: Path,
92
+ supported_file_extensions: set[str] | None,
93
+ *,
94
+ single_threaded: bool = False,
95
+ quiet: bool = False,
96
+ ) -> dict[Path, Error | antlr4.ParseTreeVisitor]:
97
+ """Parse a single file; includes (if any) are allowed from the same directory as the file."""
98
+
99
+ # ----------------------------------------------------------------------
100
+ @dispatch
101
+ @abstractmethod
102
+ def __call__(self, *args, **kwargs): # noqa: ANN204, F811
103
+ """Parse one or more translation units across one or more workspaces."""
104
+
105
+
106
+ # ----------------------------------------------------------------------
107
+ class CreateVisitorFuncType(Protocol):
108
+ """Signature for the function used to create the visitor for use with `CreateAntlrParser`."""
109
+
110
+ def __call__( # noqa: D102
111
+ self,
112
+ filename: Path,
113
+ on_progress_func: Callable[[int], None],
114
+ *,
115
+ is_included_file: bool,
116
+ ) -> antlr4.ParseTreeVisitor: ...
117
+
118
+
119
+ # ----------------------------------------------------------------------
120
+ # |
121
+ # | Public Functions
122
+ # |
123
+ # ----------------------------------------------------------------------
124
+ def CreateAntlrParser(
125
+ antlr_lexer: type[antlr4.Lexer],
126
+ antlr_parser: type[antlr4.Parser],
127
+ create_visitor_func: CreateVisitorFuncType,
128
+ get_ast_func: Callable[[antlr4.Parser], antlr4.ParserRuleContext],
129
+ custom_lexer_init_func: Callable[[antlr4.Lexer], None] | None = None,
130
+ custom_parser_init_func: Callable[[antlr4.Parser], None] | None = None,
131
+ ) -> AntlrParser:
132
+ """Create an `AntlrParser` that uses the given code generated by ANTLR."""
133
+
134
+ # ----------------------------------------------------------------------
135
+ def CreateLexer(input_stream: antlr4.InputStream) -> antlr4.Lexer:
136
+ lexer = antlr_lexer(input_stream)
137
+
138
+ if custom_lexer_init_func is not None:
139
+ custom_lexer_init_func(lexer)
140
+
141
+ return lexer
142
+
143
+ # ----------------------------------------------------------------------
144
+ def CreateParser(token_stream: antlr4.CommonTokenStream) -> antlr4.Parser:
145
+ parser = antlr_parser(token_stream)
146
+
147
+ if custom_parser_init_func is not None:
148
+ custom_parser_init_func(parser)
149
+
150
+ return parser
151
+
152
+ # ----------------------------------------------------------------------
153
+
154
+ return _AntlrParser(
155
+ CreateLexer,
156
+ CreateParser,
157
+ create_visitor_func,
158
+ get_ast_func,
159
+ )
160
+
161
+
162
+ # ----------------------------------------------------------------------
163
+ # |
164
+ # | Private Types
165
+ # |
166
+ # ----------------------------------------------------------------------
167
+ class _AntlrParser(AntlrParser):
168
+ # ----------------------------------------------------------------------
169
+ # | Public Methods
170
+ def __init__(
171
+ self,
172
+ create_lexer_func: Callable[[antlr4.InputStream], antlr4.Lexer],
173
+ create_parser_func: Callable[[antlr4.CommonTokenStream], antlr4.Parser],
174
+ create_visitor_func: CreateVisitorFuncType,
175
+ get_ast_func: Callable[[antlr4.Parser], antlr4.ParserRuleContext],
176
+ ) -> None:
177
+ self._create_lexer_func = create_lexer_func
178
+ self._create_parser_func = create_parser_func
179
+ self._create_visitor_func = create_visitor_func
180
+ self._get_ast_func = get_ast_func
181
+
182
+ self._execute_state: _AntlrParser._ExecuteState | None = None
183
+
184
+ # ----------------------------------------------------------------------
185
+ @overload
186
+ def __call__( # noqa: C901
187
+ self,
188
+ dm: DoneManager,
189
+ workspaces: dict[Path, dict[Path, Callable[[], tuple[str, datetime]]]],
190
+ supported_file_extensions: set[str] | None,
191
+ *,
192
+ single_threaded: bool = False,
193
+ quiet: bool = False,
194
+ ) -> dict[Path, dict[Path, Error | antlr4.ParseTreeVisitor]]:
195
+ _ = supported_file_extensions # Currently unused
196
+
197
+ # Create the workspace infos for all of the workspaces. At the same time, ensure that
198
+ # there aren't any roots that are nested within other roots.
199
+ all_roots: set[Path] = set()
200
+
201
+ # ----------------------------------------------------------------------
202
+ def CheckRoot(path: Path) -> None:
203
+ for root in all_roots:
204
+ if path == root:
205
+ msg = f"The path '{path.as_posix()}' is duplicated as a root."
206
+ raise ValueError(msg)
207
+
208
+ if path.is_relative_to(root) or root.is_relative_to(path):
209
+ if path.is_relative_to(root):
210
+ nested = path
211
+ container = root
212
+ elif root.is_relative_to(path):
213
+ nested = root
214
+ container = path
215
+ else:
216
+ assert False, (path, root) # noqa: B011, PT015 # pragma: no cover
217
+
218
+ msg = (
219
+ f"The path '{nested.as_posix()}' is nested within the root '{container.as_posix()}'."
220
+ )
221
+ raise ValueError(msg)
222
+
223
+ all_roots.add(path)
224
+
225
+ # ----------------------------------------------------------------------
226
+
227
+ workspace_infos: dict[Path, _AntlrParser._WorkspaceInfo] = {}
228
+
229
+ for workspace_name in workspaces:
230
+ workspace_path = workspace_name.resolve()
231
+
232
+ CheckRoot(workspace_path)
233
+ workspace_infos[workspace_path] = _AntlrParser._WorkspaceInfo()
234
+
235
+ # ----------------------------------------------------------------------
236
+ def OnExit() -> None:
237
+ self._execute_state = None
238
+
239
+ # ----------------------------------------------------------------------
240
+
241
+ with ExitStack(OnExit):
242
+ with ExecuteTasks.YieldQueueExecutor(
243
+ dm,
244
+ "Parsing...",
245
+ quiet=quiet,
246
+ max_num_threads=1 if single_threaded else None,
247
+ ) as enqueue_func:
248
+ self._execute_state = _AntlrParser._ExecuteState(enqueue_func, workspace_infos)
249
+
250
+ if len(workspace_infos) == 1:
251
+ create_description_func = lambda w, t: t # noqa: ARG005, E731
252
+ else:
253
+ create_description_func = lambda w, t: (w / t).as_posix() # noqa: E731
254
+
255
+ for (workspace_path, workspace_info), translation_units in zip(
256
+ workspace_infos.items(), workspaces.values(), strict=True
257
+ ):
258
+ with workspace_info.results_lock:
259
+ for translation_unit, content_func in translation_units.items():
260
+ # ----------------------------------------------------------------------
261
+ def ThisExecuteTask(
262
+ on_simple_status_func: Callable[[str], None], # noqa: ARG001
263
+ workspace_path: Path = workspace_path,
264
+ translation_unit: Path = translation_unit,
265
+ content_func: Callable[[], tuple[str, datetime]] = content_func,
266
+ ) -> tuple[int, ExecuteTasks.YieldQueueExecutorTypes.ExecuteFuncType]:
267
+ return self._ExecuteTask(
268
+ workspace_path,
269
+ translation_unit,
270
+ content_func,
271
+ is_included_file=False,
272
+ )
273
+
274
+ # ----------------------------------------------------------------------
275
+
276
+ workspace_info.results[translation_unit] = None
277
+
278
+ enqueue_func(
279
+ create_description_func(workspace_path, translation_unit),
280
+ ThisExecuteTask,
281
+ )
282
+
283
+ # Capture the results
284
+ results: dict[Path, dict[Path, Error | antlr4.ParseTreeVisitor]] = {}
285
+ errors: list[Error] = []
286
+
287
+ for workspace_path, workspace_info in workspace_infos.items():
288
+ these_results: dict[Path, Error | antlr4.ParseTreeVisitor] = {}
289
+
290
+ for translation_unit, translation_unit_result in workspace_info.results.items():
291
+ if translation_unit_result is None:
292
+ continue
293
+
294
+ these_results[translation_unit] = translation_unit_result
295
+
296
+ if isinstance(translation_unit_result, Error):
297
+ errors.append(translation_unit_result)
298
+
299
+ results[workspace_path] = these_results
300
+
301
+ dm.result = len(errors)
302
+
303
+ return results
304
+
305
+ # ----------------------------------------------------------------------
306
+ @overload
307
+ def __call__( # noqa: F811
308
+ self,
309
+ dm: DoneManager,
310
+ filenames: list[Path],
311
+ supported_file_extensions: set[str] | None,
312
+ *,
313
+ single_threaded: bool = False,
314
+ quiet: bool = False,
315
+ ) -> dict[Path, Error | antlr4.ParseTreeVisitor]:
316
+ if not filenames:
317
+ msg = "Invalid filenames"
318
+ raise ValueError(msg)
319
+
320
+ workspace_root: Path | None = None
321
+ translation_units: dict[Path, Callable[[], tuple[str, datetime]]] = {}
322
+
323
+ if len(filenames) == 1:
324
+ workspace_root = filenames[0].parent
325
+ translation_units[Path(filenames[0].name)] = lambda filename=filenames[0]: _LoadFile(filename)
326
+ else:
327
+ potential_root = os.path.commonpath(filenames)
328
+ if not potential_root:
329
+ msg = "Filenames must share a common root"
330
+ raise ValueError(msg)
331
+
332
+ workspace_root = Path(potential_root)
333
+
334
+ for filename in filenames:
335
+ translation_units[filename.relative_to(workspace_root)] = lambda filename=filename: _LoadFile(
336
+ filename
337
+ )
338
+
339
+ assert workspace_root is not None
340
+ assert translation_units
341
+
342
+ result = self(
343
+ dm,
344
+ {workspace_root: translation_units},
345
+ supported_file_extensions,
346
+ single_threaded=single_threaded,
347
+ quiet=quiet,
348
+ )
349
+
350
+ assert len(result) == 1
351
+ return next(iter(result.values()))
352
+
353
+ # ----------------------------------------------------------------------
354
+ @overload
355
+ def __call__( # noqa: F811
356
+ self,
357
+ dm: DoneManager,
358
+ filename: Path,
359
+ supported_file_extensions: set[str] | None,
360
+ *,
361
+ single_threaded: bool = False,
362
+ quiet: bool = False,
363
+ ) -> dict[Path, Error | antlr4.ParseTreeVisitor]:
364
+ return self(
365
+ dm,
366
+ [filename],
367
+ supported_file_extensions,
368
+ single_threaded=single_threaded,
369
+ quiet=quiet,
370
+ )
371
+
372
+ # ----------------------------------------------------------------------
373
+ @dispatch
374
+ def __call__(self, *args, **kwargs): # noqa: ANN204, F811
375
+ pass # pragma: no cover
376
+
377
+ # ----------------------------------------------------------------------
378
+ # | Private Types
379
+ @dataclass(frozen=True)
380
+ class _WorkspaceInfo:
381
+ results: dict[Path, None | Error | antlr4.ParseTreeVisitor] = field(init=False, default_factory=dict)
382
+ results_lock: threading.Lock = field(init=False, default_factory=threading.Lock)
383
+
384
+ # ----------------------------------------------------------------------
385
+ @dataclass(frozen=True)
386
+ class _ExecuteState:
387
+ enqueue_func: ExecuteTasks.YieldQueueExecutorTypes.EnqueueFuncType
388
+ workspace_infos: dict[Path, _AntlrParser._WorkspaceInfo]
389
+
390
+ # ----------------------------------------------------------------------
391
+ # | Private Methods
392
+ def _ExecuteTask(
393
+ self,
394
+ workspace_path: Path,
395
+ translation_unit: Path,
396
+ content_func: Callable[[], tuple[str, datetime]],
397
+ *,
398
+ is_included_file: bool,
399
+ ) -> tuple[int, ExecuteTasks.YieldQueueExecutorTypes.ExecuteFuncType]:
400
+ content, last_modified_time = content_func()
401
+
402
+ _ = last_modified_time # Currently unused
403
+
404
+ # ----------------------------------------------------------------------
405
+ def Execute(
406
+ status: ExecuteTasks.Status,
407
+ ) -> str | None:
408
+ result: None | Error | antlr4.ParseTreeVisitor = None
409
+
410
+ # ----------------------------------------------------------------------
411
+ def OnExit() -> None:
412
+ assert result is not None
413
+ assert self._execute_state is not None
414
+
415
+ workspace_info = self._execute_state.workspace_infos[workspace_path]
416
+
417
+ with workspace_info.results_lock:
418
+ assert workspace_info.results[translation_unit] is None
419
+ workspace_info.results[translation_unit] = result
420
+
421
+ # ----------------------------------------------------------------------
422
+
423
+ with ExitStack(OnExit):
424
+ try:
425
+ fullpath = workspace_path / translation_unit
426
+ error_listener = _ErrorListener(fullpath)
427
+
428
+ antlr_stream = antlr4.InputStream(content)
429
+
430
+ lexer = self._create_lexer_func(antlr_stream)
431
+
432
+ lexer.removeErrorListeners()
433
+ lexer.addErrorListener(error_listener)
434
+
435
+ tokens = antlr4.CommonTokenStream(lexer)
436
+ tokens.fill()
437
+
438
+ parser = self._create_parser_func(tokens)
439
+
440
+ parser.removeErrorListeners()
441
+ parser.addErrorListener(error_listener)
442
+
443
+ visitor = self._create_visitor_func(
444
+ fullpath,
445
+ lambda line: cast(None, status.OnProgress(line, None)),
446
+ is_included_file=is_included_file,
447
+ )
448
+
449
+ ast = self._get_ast_func(parser)
450
+ ast.accept(visitor)
451
+
452
+ result = visitor
453
+
454
+ except AntlrParserException as ex:
455
+ result = ex.error
456
+ except Exception as ex:
457
+ result = PythonError.Create(ex)
458
+
459
+ # ----------------------------------------------------------------------
460
+
461
+ return len(content.split("\n")), Execute
462
+
463
+
464
+ # ----------------------------------------------------------------------
465
+ class _ErrorListener(antlr4.DiagnosticErrorListener):
466
+ # ----------------------------------------------------------------------
467
+ def __init__(self, source: Path, *args, **kwargs) -> None:
468
+ super().__init__(*args, **kwargs)
469
+
470
+ self._source = source
471
+
472
+ # ----------------------------------------------------------------------
473
+ def syntaxError(
474
+ self,
475
+ recognizer: antlr4.Parser, # noqa: ARG002
476
+ offendingSymbol: antlr4.Token, # noqa: ARG002, N803
477
+ line: int,
478
+ column: int,
479
+ msg: str,
480
+ e: antlr4.RecognitionException | None,
481
+ ) -> None:
482
+ location = Location(line, column + 1)
483
+ error = Error(msg, Region(self._source, location, location))
484
+ ex = AntlrParserException(error)
485
+
486
+ if e is None:
487
+ raise ex
488
+
489
+ raise ex from e
490
+
491
+
492
+ # ----------------------------------------------------------------------
493
+ # |
494
+ # | Private Functions
495
+ # |
496
+ # ----------------------------------------------------------------------
497
+ def _LoadFile(filename: Path) -> tuple[str, datetime]:
498
+ """Load the contents of a file and return it along with its last modified time."""
499
+
500
+ text = filename.read_text(encoding="utf-8")
501
+ last_modified_time = datetime.fromtimestamp(filename.stat().st_mtime, tz=UTC)
502
+
503
+ return text, last_modified_time
@@ -0,0 +1,45 @@
1
+ # noqa: D104
2
+ from pathlib import Path
3
+ from typing import TYPE_CHECKING
4
+
5
+ from dbrownell_Common import SubprocessEx
6
+
7
+ if TYPE_CHECKING:
8
+ from dbrownell_Common.Streams.DoneManager import DoneManager
9
+
10
+
11
+ # ----------------------------------------------------------------------
12
+ def BuildAntlrGrammar(
13
+ dm: DoneManager,
14
+ antlr_grammar_filename: Path,
15
+ output_dir: Path,
16
+ *,
17
+ create_init_file: bool = True,
18
+ create_gitignore_file: bool = True,
19
+ ) -> None:
20
+ """Build the Antlr grammar; note that java must be available on the command line."""
21
+
22
+ with dm.Nested(f"Generating '{antlr_grammar_filename.name}'...") as generate_dm:
23
+ jar_filename = Path(__file__).parent / "antlr-4.13.2-complete.jar"
24
+ assert jar_filename.is_file(), jar_filename
25
+
26
+ command_line = f'java -jar "{jar_filename}" -Dlanguage=Python3 -o "{output_dir}" -no-listener -visitor "{antlr_grammar_filename}"'
27
+
28
+ generate_dm.WriteVerbose(f"Command line: {command_line}\n\n")
29
+
30
+ with generate_dm.YieldStream() as stream:
31
+ generate_dm.result = SubprocessEx.Stream(command_line, stream)
32
+ if generate_dm.result != 0:
33
+ return
34
+
35
+ if create_init_file:
36
+ init_filename = output_dir / "__init__.py"
37
+
38
+ if not init_filename.is_file():
39
+ init_filename.touch()
40
+
41
+ if create_gitignore_file:
42
+ gitignore_filename = output_dir / ".gitignore"
43
+
44
+ if not gitignore_filename.is_file():
45
+ gitignore_filename.write_text("*\n")