mkdocstrings-matlab 0.3.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,654 @@
1
+ # %%
2
+ from collections import OrderedDict
3
+ from typing import Any
4
+
5
+ from tree_sitter import Language, Parser, Node
6
+ import tree_sitter_matlab as tsmatlab
7
+
8
+ from pathlib import Path
9
+
10
+ import charset_normalizer
11
+
12
+ from mkdocstrings_handlers.matlab.models import (
13
+ AccessEnum,
14
+ Class,
15
+ Classfolder,
16
+ Docstring,
17
+ Function,
18
+ MatlabMixin,
19
+ Parameters,
20
+ Parameter,
21
+ Property,
22
+ Script,
23
+ )
24
+ from mkdocstrings_handlers.matlab.enums import ParameterKind
25
+
26
+
27
+ __all__ = ["FileParser"]
28
+
29
+
30
+ LANGUAGE = Language(tsmatlab.language())
31
+
32
+ PARSER = Parser(LANGUAGE)
33
+
34
+ FILE_QUERY = LANGUAGE.query("""(source_file
35
+ (comment)* @header .
36
+ (function_definition)? @function .
37
+ (class_definition)? @class
38
+ )
39
+ """)
40
+
41
+
42
+ FUNCTION_QUERY = LANGUAGE.query("""(function_definition .
43
+ ("function") .
44
+ (function_output .
45
+ [
46
+ (identifier) @output
47
+ (multioutput_variable .
48
+ ((identifier) @output (",")?)+
49
+ )
50
+ ]
51
+ )? .
52
+ [
53
+ ("set.") @setter
54
+ ("get.") @getter
55
+ ]? .
56
+ (identifier) @name .
57
+ (function_arguments .
58
+ ((identifier) @input (",")?)*
59
+ )? .
60
+ (comment)* @docstring .
61
+ (arguments_statement)* @arguments
62
+ )""")
63
+
64
+
65
+ ARGUMENTS_QUERY = LANGUAGE.query("""(arguments_statement .
66
+ ("arguments") .
67
+ (attributes
68
+ (identifier) @attributes
69
+ )? .
70
+ ("\\n")? .
71
+ (property)+ @arguments
72
+ )""")
73
+
74
+
75
+ PROPERTY_QUERY = LANGUAGE.query("""(property .
76
+ [
77
+ (identifier) @name
78
+ (property_name
79
+ (identifier) @options .
80
+ (".") .
81
+ (identifier) @name
82
+ )
83
+ ] .
84
+ (dimensions)? @dimensions .
85
+ (identifier)? @class .
86
+ (validation_functions)? @validators .
87
+ (default_value
88
+ ("=") .
89
+ _+ @default
90
+ )? .
91
+ (comment)* @comment
92
+ )""")
93
+
94
+
95
+ ATTRIBUTE_QUERY = LANGUAGE.query("""(attribute
96
+ (identifier) @name .
97
+ (
98
+ ("=") .
99
+ _+ @value
100
+ )?
101
+ )""")
102
+
103
+
104
+ CLASS_QUERY = LANGUAGE.query("""("classdef" .
105
+ (attributes
106
+ (attribute) @attributes
107
+ )? .
108
+ (identifier) @name ?
109
+ (superclasses
110
+ (property_name)+ @bases
111
+ )? .
112
+ (comment)* @docstring .
113
+ ("\\n")? .
114
+ [
115
+ (comment)
116
+ (methods) @methods
117
+ (properties) @properties
118
+ (enumeration) @enumeration
119
+ ]*
120
+ )""")
121
+
122
+
123
+ METHODS_QUERY = LANGUAGE.query("""("methods" .
124
+ (attributes
125
+ (attribute) @attributes
126
+ )? .
127
+ ("\\n")? .
128
+ (function_definition)* @methods
129
+ )""")
130
+
131
+ PROPERTIES_QUERY = LANGUAGE.query("""("properties" .
132
+ (attributes
133
+ (attribute) @attributes
134
+ )? .
135
+ ("\\n")? .
136
+ (property)* @properties
137
+ )""")
138
+
139
+
140
+ def _strtobool(value: str) -> bool:
141
+ """
142
+ Convert a string representation of truth to boolean.
143
+
144
+ Args:
145
+ value (str): The string to convert. Expected values are "true", "1" for True, and any other value for False.
146
+
147
+ Returns:
148
+ bool: True if the input string is "true" or "1" (case insensitive), otherwise False.
149
+ """
150
+ if value.lower() in ["true", "1"]:
151
+ return True
152
+ else:
153
+ return False
154
+
155
+
156
+ def _dedent(lines: list[str]) -> list[str]:
157
+ """
158
+ Remove the common leading whitespace from each line in the given list of lines.
159
+
160
+ Args:
161
+ lines (list[str]): A list of strings where each string represents a line of text.
162
+
163
+ Returns:
164
+ list[str]: A list of strings with the common leading whitespace removed from each line.
165
+ """
166
+ indents = [len(line) - len(line.lstrip()) for line in lines if line.strip()]
167
+ indent = min(indents)
168
+ if indent == 0:
169
+ return lines
170
+ else:
171
+ return [line[indent:] if line.strip() else line for line in lines]
172
+
173
+
174
+ class FileParser(object):
175
+ """
176
+ A class to parse MATLAB files using Tree-sitter.
177
+
178
+ Attributes:
179
+ filepath (Path): The path to the MATLAB file.
180
+ encoding (str): The encoding of the file content.
181
+ content: Returns the decoded content of the file.
182
+
183
+ Methods:
184
+ parse(**kwargs) -> MatlabMixin: Parses the MATLAB file and returns a MatlabMixin.
185
+ """
186
+
187
+ def __init__(self, filepath: Path):
188
+ """
189
+ Initialize the object with the given file path.
190
+
191
+ Args:
192
+ filepath (Path): The path to the file to be processed.
193
+ """
194
+ self.filepath: Path = filepath
195
+ result = charset_normalizer.from_path(filepath).best()
196
+ self.encoding: str = result.encoding if result else "utf-8"
197
+ with open(filepath, "rb") as f:
198
+ self._content: bytes = f.read()
199
+
200
+ @property
201
+ def content(self):
202
+ """
203
+ Property that decodes and returns the content using the specified encoding.
204
+
205
+ Returns:
206
+ str: The decoded content.
207
+ """
208
+ return self._content.decode(self.encoding)
209
+
210
+ def parse(self, **kwargs) -> MatlabMixin:
211
+ """
212
+ Parse the content of the file and return a MatlabMixin.
213
+
214
+ This method uses a tree-sitter parser to parse the content of the file
215
+ and extract relevant information to create a MatlabMixin. It handles
216
+ different types of Matlab constructs such as functions and classes.
217
+
218
+ Args:
219
+ **kwargs: Additional keyword arguments to pass to the parsing methods.
220
+
221
+ Returns:
222
+ MatlabMixin: An instance of MatlabMixin representing the parsed content.
223
+
224
+ Raises:
225
+ ValueError: If the file could not be parsed.
226
+ """
227
+ tree = PARSER.parse(self._content)
228
+ cursor = tree.walk()
229
+
230
+ if cursor.node is None:
231
+ raise ValueError(f"The file {self.filepath} could not be parsed.")
232
+ captures = FILE_QUERY.captures(cursor.node)
233
+
234
+ if "function" in captures:
235
+ model = self._parse_function(captures["function"][0], **kwargs)
236
+ elif "class" in captures:
237
+ model = self._parse_class(captures["class"][0], **kwargs)
238
+ else:
239
+ model = Script(self.filepath.stem, filepath=self.filepath, **kwargs)
240
+
241
+ if not model.docstring:
242
+ model.docstring = self._comment_docstring(
243
+ captures.get("header", None), parent=model
244
+ )
245
+
246
+ return model
247
+
248
+ def _parse_class(self, node: Node, **kwargs) -> Class:
249
+ """
250
+ Parse a class node and return a Class or Classfolder model.
251
+
252
+ This method processes a class node captured by the CLASS_QUERY and extracts
253
+ its bases, docstring, attributes, properties, and methods. It constructs
254
+ and returns a Class or Classfolder model based on the parsed information.
255
+
256
+ Args:
257
+ node (Node): The class node to parse.
258
+ **kwargs: Additional keyword arguments to pass to the Class or Classfolder model.
259
+
260
+ Returns:
261
+ Class: The parsed Class or Classfolder model.
262
+ """
263
+ saved_kwargs = {key: value for key, value in kwargs.items()}
264
+ captures = CLASS_QUERY.captures(node)
265
+
266
+ bases = self._decode_from_capture(captures, "bases")
267
+ docstring = self._comment_docstring(captures.get("docstring", None))
268
+
269
+ attribute_pairs = [
270
+ self._parse_attribute(node) for node in captures.get("attributes", [])
271
+ ]
272
+ for key, value in attribute_pairs:
273
+ if key in ["Sealed", "Abstract", "Hidden"]:
274
+ kwargs[key] = value
275
+
276
+ if self.filepath.parent.stem[0] == "@":
277
+ model = Classfolder(
278
+ self.filepath.stem,
279
+ lineno=node.range.start_point.row + 1,
280
+ endlineno=node.range.end_point.row + 1,
281
+ bases=bases,
282
+ docstring=docstring,
283
+ filepath=self.filepath,
284
+ **kwargs,
285
+ )
286
+ else:
287
+ model = Class(
288
+ self.filepath.stem,
289
+ lineno=node.range.start_point.row + 1,
290
+ endlineno=node.range.end_point.row + 1,
291
+ bases=bases,
292
+ docstring=docstring,
293
+ filepath=self.filepath,
294
+ **kwargs,
295
+ )
296
+
297
+ for property_captures in [
298
+ PROPERTIES_QUERY.captures(node) for node in captures.get("properties", [])
299
+ ]:
300
+ property_kwargs = {key: value for key, value in saved_kwargs.items()}
301
+ attribute_pairs = [
302
+ self._parse_attribute(node)
303
+ for node in property_captures.get("attributes", [])
304
+ ]
305
+ for key, value in attribute_pairs:
306
+ if key in [
307
+ "AbortSet",
308
+ "Abstract",
309
+ "Constant",
310
+ "Dependant",
311
+ "GetObservable",
312
+ "Hidden",
313
+ "NonCopyable",
314
+ "SetObservable",
315
+ "Transient",
316
+ "WeakHandle",
317
+ ]:
318
+ property_kwargs[key] = value
319
+ elif key in ["GetAccess", "SetAccess"]:
320
+ if value in ["public", "protected", "private", "immutable"]:
321
+ property_kwargs[key] = AccessEnum(value)
322
+ else:
323
+ property_kwargs[key] = AccessEnum.private
324
+ for property_node in property_captures.get("properties", []):
325
+ property_captures = PROPERTY_QUERY.captures(property_node)
326
+
327
+ prop = Property(
328
+ self._first_from_capture(property_captures, "name"),
329
+ annotation=self._first_from_capture(property_captures, "class"),
330
+ value=self._decode_from_capture(property_captures, "default"),
331
+ docstring=self._comment_docstring(
332
+ property_captures.get("comment", None)
333
+ ),
334
+ parent=model,
335
+ )
336
+ model.members[prop.name] = prop
337
+
338
+ for method_captures in [
339
+ METHODS_QUERY.captures(node) for node in captures.get("methods", [])
340
+ ]:
341
+ method_kwargs = {key: value for key, value in saved_kwargs.items()}
342
+ attribute_pairs = [
343
+ self._parse_attribute(node)
344
+ for node in method_captures.get("attributes", [])
345
+ ]
346
+ for key, value in attribute_pairs:
347
+ if key in [
348
+ "Abstract",
349
+ "Hidden",
350
+ "Sealed",
351
+ "Static",
352
+ ]:
353
+ method_kwargs[key] = value
354
+ elif key in ["GetAccess", "SetAccess"]:
355
+ if value in ["public", "protected", "private", "immutable"]:
356
+ method_kwargs[key] = AccessEnum(value)
357
+ else:
358
+ method_kwargs[key] = AccessEnum.private
359
+ for method_node in method_captures.get("methods", []):
360
+ method = self._parse_function(
361
+ method_node, method=True, parent=model, **method_kwargs
362
+ )
363
+ if (
364
+ method.name != self.filepath.stem
365
+ and not method.static
366
+ and method.parameters
367
+ ):
368
+ # Remove self from first method argument
369
+ method.parameters._params = method.parameters._params[1:]
370
+ if method._is_getter and method.name in model.members:
371
+ prop = model.members[method.name]
372
+ if isinstance(prop, Property):
373
+ prop.getter = method
374
+ else:
375
+ # This can be either an error or that it is a getter in an inherited class
376
+ pass
377
+ elif method._is_setter and method.name in model.members:
378
+ prop = model.members[method.name]
379
+ if isinstance(prop, Property):
380
+ prop.setter = method
381
+ else:
382
+ # This can be either an error or that it is a setter in an inherited class
383
+ pass
384
+ else:
385
+ model.members[method.name] = method
386
+
387
+ return model
388
+
389
+ def _parse_attribute(self, node: Node) -> tuple[str, Any]:
390
+ """
391
+ Parse an attribute from a given node.
392
+
393
+ Args:
394
+ node (Node): The node to parse the attribute from.
395
+
396
+ Returns:
397
+ tuple[str, Any]: A tuple containing the attribute key and its value.
398
+ The value is `True` if no value is specified,
399
+ otherwise it is the parsed value which can be a boolean or a string.
400
+ """
401
+ captures = ATTRIBUTE_QUERY.captures(node)
402
+
403
+ key = self._first_from_capture(captures, "name")
404
+ if "value" not in captures:
405
+ value = True
406
+ elif captures["value"][0].type == "boolean":
407
+ value = _strtobool(self._first_from_capture(captures, "value"))
408
+ else:
409
+ value = self._first_from_capture(captures, "value")
410
+
411
+ return (key, value)
412
+
413
+ def _parse_function(self, node: Node, method: bool = False, **kwargs) -> Function:
414
+ """
415
+ Parse a function node and return a Function model.
416
+
417
+ Args:
418
+ node (Node): The node representing the function in the syntax tree.
419
+ method (bool, optional): Whether the function is a method. Defaults to False.
420
+ **kwargs: Additional keyword arguments to pass to the Function model.
421
+
422
+ Returns:
423
+ Function: The parsed function model.
424
+
425
+ Raises:
426
+ KeyError: If required captures are missing from the node.
427
+
428
+ """
429
+ captures: dict = FUNCTION_QUERY.matches(node)[0][1]
430
+
431
+ input_names = self._decode_from_capture(captures, "input")
432
+ parameters: dict = (
433
+ OrderedDict(
434
+ (name, Parameter(name, kind=ParameterKind.positional_only))
435
+ for name in input_names
436
+ )
437
+ if input_names
438
+ else {}
439
+ )
440
+ output_names = self._decode_from_capture(captures, "output")
441
+ returns: dict = (
442
+ OrderedDict(
443
+ (name, Parameter(name, kind=ParameterKind.positional_only))
444
+ for name in output_names
445
+ )
446
+ if output_names
447
+ else {}
448
+ )
449
+ if method:
450
+ name = self._first_from_capture(captures, "name")
451
+ else:
452
+ name = self.filepath.stem
453
+
454
+ model = Function(
455
+ name,
456
+ lineno=node.range.start_point.row + 1,
457
+ endlineno=node.range.end_point.row + 1,
458
+ filepath=self.filepath,
459
+ docstring=self._comment_docstring(captures.get("docstring", None)),
460
+ getter="getter" in captures,
461
+ setter="setter" in captures,
462
+ **kwargs,
463
+ )
464
+
465
+ captures_arguments = [
466
+ ARGUMENTS_QUERY.captures(node) for node in captures.get("arguments", [])
467
+ ]
468
+ for arguments in captures_arguments:
469
+ attributes = self._decode_from_capture(arguments, "attributes")
470
+ is_input = (
471
+ attributes is None
472
+ or "Input" in attributes
473
+ or "Output" not in attributes
474
+ )
475
+ # is_repeating = "Repeating" in attributes
476
+
477
+ captures_argument = [
478
+ PROPERTY_QUERY.captures(node) for node in arguments["arguments"]
479
+ ]
480
+ for argument in captures_argument:
481
+ name = self._first_from_capture(argument, "name")
482
+
483
+ if "options" in argument:
484
+ options_name = self._first_from_capture(argument, "options")
485
+ parameters.pop(options_name, None)
486
+ parameter = parameters[name] = Parameter(
487
+ name, kind=ParameterKind.keyword_only
488
+ )
489
+ else:
490
+ if is_input:
491
+ parameter = parameters.get(name, Parameter(name))
492
+ else:
493
+ parameter = returns.get(name, Parameter(name))
494
+
495
+ if "default" in argument:
496
+ parameter.kind = ParameterKind.optional
497
+ else:
498
+ parameter.kind = ParameterKind.positional_only
499
+
500
+ annotation = self._first_from_capture(argument, "class")
501
+ if annotation:
502
+ parameter.annotation = annotation
503
+
504
+ default = self._first_from_capture(argument, "default")
505
+ if default:
506
+ parameter.default = default
507
+
508
+ docstring = self._comment_docstring(
509
+ argument.get("comment", None), parent=model
510
+ )
511
+ if docstring:
512
+ parameter.docstring = docstring
513
+
514
+ model.parameters = Parameters(*list(parameters.values()))
515
+ model.returns = Parameters(*list(returns.values())) if returns else None
516
+
517
+ return model
518
+
519
+ def _decode(self, node: Node) -> str:
520
+ """
521
+ Decode the text of a given node using the specified encoding.
522
+
523
+ Args:
524
+ node (Node): The node whose text needs to be decoded.
525
+
526
+ Returns:
527
+ str: The decoded text of the node. If the node or its text is None, returns an empty string.
528
+ """
529
+ return (
530
+ node.text.decode(self.encoding)
531
+ if node is not None and node.text is not None
532
+ else ""
533
+ )
534
+
535
+ def _decode_from_capture(
536
+ self, capture: dict[str, list[Node]], key: str
537
+ ) -> list[str]:
538
+ """
539
+ Decode elements from a capture dictionary based on a specified key.
540
+
541
+ Args:
542
+ capture (dict[str, list[Node]]): A dictionary where the keys are strings and the values are lists of Node objects.
543
+ key (str): The key to look for in the capture dictionary.
544
+
545
+ Returns:
546
+ list[str]: A list of decoded strings corresponding to the elements associated with the specified key in the capture dictionary.
547
+ """
548
+ if key not in capture:
549
+ return []
550
+ else:
551
+ return [self._decode(element) for element in capture[key]]
552
+
553
+ def _first_from_capture(self, capture: dict[str, list[Node]], key: str) -> str:
554
+ """
555
+ Retrieve the first decoded string from a capture dictionary for a given key.
556
+
557
+ Args:
558
+ capture (dict[str, list[Node]]): A dictionary where the key is a string and the value is a list of Node objects.
559
+ key (str): The key to look up in the capture dictionary.
560
+
561
+ Returns:
562
+ str: The first decoded string if available, otherwise an empty string.
563
+ """
564
+ decoded = self._decode_from_capture(capture, key)
565
+ if decoded:
566
+ return decoded[0]
567
+ else:
568
+ return ""
569
+
570
+ def _comment_docstring(
571
+ self, nodes: list[Node] | Node | None, parent: MatlabMixin | None = None
572
+ ) -> Docstring | None:
573
+ """
574
+ Extract and process a docstring from given nodes.
575
+
576
+ This method processes nodes to extract a docstring, handling different
577
+ comment styles and blocks. It supports both single-line and multi-line
578
+ comments, as well as special comment blocks delimited by `%{` and `%}`.
579
+
580
+ Args:
581
+ nodes (list[Node] | Node | None): The nodes from which to extract the docstring.
582
+ parent (MatlabMixin | None, optional): The parent MatlabMixin. Defaults to None.
583
+
584
+ Returns:
585
+ Docstring | None: The extracted and processed docstring, or None if no docstring is found.
586
+
587
+ Raises:
588
+ LookupError: If a line does not start with a comment character.
589
+ """
590
+ if nodes is None:
591
+ return None
592
+ elif isinstance(nodes, list):
593
+ lineno = nodes[0].range.start_point.row + 1
594
+ endlineno = nodes[-1].range.end_point.row + 1
595
+ lines = iter(
596
+ [
597
+ line
598
+ for lines in [self._decode(node).splitlines() for node in nodes]
599
+ for line in lines
600
+ ]
601
+ )
602
+ else:
603
+ lineno = nodes.range.start_point.row + 1
604
+ endlineno = nodes.range.end_point.row + 1
605
+ lines = iter(self._decode(nodes).splitlines())
606
+
607
+ docstring, uncommented = [], []
608
+
609
+ while True:
610
+ try:
611
+ line = next(lines).lstrip()
612
+ except StopIteration:
613
+ break
614
+
615
+ if "--8<--" in line:
616
+ continue
617
+
618
+ if line[:2] == "%{" or line[:2] == "%%":
619
+ if uncommented:
620
+ docstring += _dedent(uncommented)
621
+ uncommented = []
622
+ if line[:2] == "%%":
623
+ docstring.append(line[2:].lstrip())
624
+ continue
625
+
626
+ comment_block = []
627
+ line = line[2:]
628
+ while "%}" not in line:
629
+ comment_block.append(line)
630
+ try:
631
+ line = next(lines)
632
+ except StopIteration:
633
+ break
634
+ else:
635
+ last_line = line[: line.index("%}")]
636
+ if last_line:
637
+ comment_block.append(last_line)
638
+ docstring.append(comment_block[0])
639
+ docstring += _dedent(comment_block[1:])
640
+
641
+ elif line[0] == "%":
642
+ uncommented.append(line[1:])
643
+ else:
644
+ raise LookupError
645
+
646
+ if uncommented:
647
+ docstring += _dedent(uncommented)
648
+
649
+ return Docstring(
650
+ "\n".join(docstring),
651
+ lineno=lineno,
652
+ endlineno=endlineno,
653
+ parent=parent,
654
+ )