mkdocstrings-matlab 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,654 @@
1
+ # %%
2
+ from collections import OrderedDict
3
+ from typing import Any
4
+
5
+ from tree_sitter import Language, Parser, Node
6
+ import tree_sitter_matlab as tsmatlab
7
+
8
+ from pathlib import Path
9
+
10
+ import charset_normalizer
11
+
12
+ from mkdocstrings_handlers.matlab.models import (
13
+ AccessEnum,
14
+ Class,
15
+ Classfolder,
16
+ Docstring,
17
+ Function,
18
+ MatlabMixin,
19
+ Parameters,
20
+ Parameter,
21
+ Property,
22
+ Script,
23
+ )
24
+ from mkdocstrings_handlers.matlab.enums import ParameterKind
25
+
26
+
27
+ __all__ = ["FileParser"]
28
+
29
+
30
+ LANGUAGE = Language(tsmatlab.language())
31
+
32
+ PARSER = Parser(LANGUAGE)
33
+
34
+ FILE_QUERY = LANGUAGE.query("""(source_file
35
+ (comment)* @header .
36
+ (function_definition)? @function .
37
+ (class_definition)? @class
38
+ )
39
+ """)
40
+
41
+
42
+ FUNCTION_QUERY = LANGUAGE.query("""(function_definition .
43
+ ("function") .
44
+ (function_output .
45
+ [
46
+ (identifier) @output
47
+ (multioutput_variable .
48
+ ((identifier) @output (",")?)+
49
+ )
50
+ ]
51
+ )? .
52
+ [
53
+ ("set.") @setter
54
+ ("get.") @getter
55
+ ]? .
56
+ (identifier) @name .
57
+ (function_arguments .
58
+ ((identifier) @input (",")?)*
59
+ )? .
60
+ (comment)* @docstring .
61
+ (arguments_statement)* @arguments
62
+ )""")
63
+
64
+
65
+ ARGUMENTS_QUERY = LANGUAGE.query("""(arguments_statement .
66
+ ("arguments") .
67
+ (attributes
68
+ (identifier) @attributes
69
+ )? .
70
+ ("\\n")? .
71
+ (property)+ @arguments
72
+ )""")
73
+
74
+
75
+ PROPERTY_QUERY = LANGUAGE.query("""(property .
76
+ [
77
+ (identifier) @name
78
+ (property_name
79
+ (identifier) @options .
80
+ (".") .
81
+ (identifier) @name
82
+ )
83
+ ] .
84
+ (dimensions)? @dimensions .
85
+ (identifier)? @class .
86
+ (validation_functions)? @validators .
87
+ (default_value
88
+ ("=") .
89
+ _+ @default
90
+ )? .
91
+ (comment)* @comment
92
+ )""")
93
+
94
+
95
+ ATTRIBUTE_QUERY = LANGUAGE.query("""(attribute
96
+ (identifier) @name .
97
+ (
98
+ ("=") .
99
+ _+ @value
100
+ )?
101
+ )""")
102
+
103
+
104
+ CLASS_QUERY = LANGUAGE.query("""("classdef" .
105
+ (attributes
106
+ (attribute) @attributes
107
+ )? .
108
+ (identifier) @name ?
109
+ (superclasses
110
+ (property_name)+ @bases
111
+ )? .
112
+ (comment)* @docstring .
113
+ ("\\n")? .
114
+ [
115
+ (comment)
116
+ (methods) @methods
117
+ (properties) @properties
118
+ (enumeration) @enumeration
119
+ ]*
120
+ )""")
121
+
122
+
123
+ METHODS_QUERY = LANGUAGE.query("""("methods" .
124
+ (attributes
125
+ (attribute) @attributes
126
+ )? .
127
+ ("\\n")? .
128
+ (function_definition)* @methods
129
+ )""")
130
+
131
+ PROPERTIES_QUERY = LANGUAGE.query("""("properties" .
132
+ (attributes
133
+ (attribute) @attributes
134
+ )? .
135
+ ("\\n")? .
136
+ (property)* @properties
137
+ )""")
138
+
139
+
140
+ def _strtobool(value: str) -> bool:
141
+ """
142
+ Convert a string representation of truth to boolean.
143
+
144
+ Args:
145
+ value (str): The string to convert. Expected values are "true", "1" for True, and any other value for False.
146
+
147
+ Returns:
148
+ bool: True if the input string is "true" or "1" (case insensitive), otherwise False.
149
+ """
150
+ if value.lower() in ["true", "1"]:
151
+ return True
152
+ else:
153
+ return False
154
+
155
+
156
+ def _dedent(lines: list[str]) -> list[str]:
157
+ """
158
+ Remove the common leading whitespace from each line in the given list of lines.
159
+
160
+ Args:
161
+ lines (list[str]): A list of strings where each string represents a line of text.
162
+
163
+ Returns:
164
+ list[str]: A list of strings with the common leading whitespace removed from each line.
165
+ """
166
+ indents = [len(line) - len(line.lstrip()) for line in lines if line.strip()]
167
+ indent = min(indents)
168
+ if indent == 0:
169
+ return lines
170
+ else:
171
+ return [line[indent:] if line.strip() else line for line in lines]
172
+
173
+
174
+ class FileParser(object):
175
+ """
176
+ A class to parse MATLAB files using Tree-sitter.
177
+
178
+ Attributes:
179
+ filepath (Path): The path to the MATLAB file.
180
+ encoding (str): The encoding of the file content.
181
+ content: Returns the decoded content of the file.
182
+
183
+ Methods:
184
+ parse(**kwargs) -> MatlabMixin: Parses the MATLAB file and returns a MatlabMixin.
185
+ """
186
+
187
+ def __init__(self, filepath: Path):
188
+ """
189
+ Initialize the object with the given file path.
190
+
191
+ Args:
192
+ filepath (Path): The path to the file to be processed.
193
+ """
194
+ self.filepath: Path = filepath
195
+ result = charset_normalizer.from_path(filepath).best()
196
+ self.encoding: str = result.encoding if result else "utf-8"
197
+ with open(filepath, "rb") as f:
198
+ self._content: bytes = f.read()
199
+
200
+ @property
201
+ def content(self):
202
+ """
203
+ Property that decodes and returns the content using the specified encoding.
204
+
205
+ Returns:
206
+ str: The decoded content.
207
+ """
208
+ return self._content.decode(self.encoding)
209
+
210
+ def parse(self, **kwargs) -> MatlabMixin:
211
+ """
212
+ Parse the content of the file and return a MatlabMixin.
213
+
214
+ This method uses a tree-sitter parser to parse the content of the file
215
+ and extract relevant information to create a MatlabMixin. It handles
216
+ different types of Matlab constructs such as functions and classes.
217
+
218
+ Args:
219
+ **kwargs: Additional keyword arguments to pass to the parsing methods.
220
+
221
+ Returns:
222
+ MatlabMixin: An instance of MatlabMixin representing the parsed content.
223
+
224
+ Raises:
225
+ ValueError: If the file could not be parsed.
226
+ """
227
+ tree = PARSER.parse(self._content)
228
+ cursor = tree.walk()
229
+
230
+ if cursor.node is None:
231
+ raise ValueError(f"The file {self.filepath} could not be parsed.")
232
+ captures = FILE_QUERY.captures(cursor.node)
233
+
234
+ if "function" in captures:
235
+ model = self._parse_function(captures["function"][0], **kwargs)
236
+ elif "class" in captures:
237
+ model = self._parse_class(captures["class"][0], **kwargs)
238
+ else:
239
+ model = Script(self.filepath.stem, filepath=self.filepath, **kwargs)
240
+
241
+ if not model.docstring:
242
+ model.docstring = self._comment_docstring(
243
+ captures.get("header", None), parent=model
244
+ )
245
+
246
+ return model
247
+
248
+ def _parse_class(self, node: Node, **kwargs) -> Class:
249
+ """
250
+ Parse a class node and return a Class or Classfolder model.
251
+
252
+ This method processes a class node captured by the CLASS_QUERY and extracts
253
+ its bases, docstring, attributes, properties, and methods. It constructs
254
+ and returns a Class or Classfolder model based on the parsed information.
255
+
256
+ Args:
257
+ node (Node): The class node to parse.
258
+ **kwargs: Additional keyword arguments to pass to the Class or Classfolder model.
259
+
260
+ Returns:
261
+ Class: The parsed Class or Classfolder model.
262
+ """
263
+ saved_kwargs = {key: value for key, value in kwargs.items()}
264
+ captures = CLASS_QUERY.captures(node)
265
+
266
+ bases = self._decode_from_capture(captures, "bases")
267
+ docstring = self._comment_docstring(captures.get("docstring", None))
268
+
269
+ attribute_pairs = [
270
+ self._parse_attribute(node) for node in captures.get("attributes", [])
271
+ ]
272
+ for key, value in attribute_pairs:
273
+ if key in ["Sealed", "Abstract", "Hidden"]:
274
+ kwargs[key] = value
275
+
276
+ if self.filepath.parent.stem[0] == "@":
277
+ model = Classfolder(
278
+ self.filepath.stem,
279
+ lineno=node.range.start_point.row + 1,
280
+ endlineno=node.range.end_point.row + 1,
281
+ bases=bases,
282
+ docstring=docstring,
283
+ filepath=self.filepath,
284
+ **kwargs,
285
+ )
286
+ else:
287
+ model = Class(
288
+ self.filepath.stem,
289
+ lineno=node.range.start_point.row + 1,
290
+ endlineno=node.range.end_point.row + 1,
291
+ bases=bases,
292
+ docstring=docstring,
293
+ filepath=self.filepath,
294
+ **kwargs,
295
+ )
296
+
297
+ for property_captures in [
298
+ PROPERTIES_QUERY.captures(node) for node in captures.get("properties", [])
299
+ ]:
300
+ property_kwargs = {key: value for key, value in saved_kwargs.items()}
301
+ attribute_pairs = [
302
+ self._parse_attribute(node)
303
+ for node in property_captures.get("attributes", [])
304
+ ]
305
+ for key, value in attribute_pairs:
306
+ if key in [
307
+ "AbortSet",
308
+ "Abstract",
309
+ "Constant",
310
+ "Dependant",
311
+ "GetObservable",
312
+ "Hidden",
313
+ "NonCopyable",
314
+ "SetObservable",
315
+ "Transient",
316
+ "WeakHandle",
317
+ ]:
318
+ property_kwargs[key] = value
319
+ elif key in ["GetAccess", "SetAccess"]:
320
+ if value in ["public", "protected", "private", "immutable"]:
321
+ property_kwargs[key] = AccessEnum(value)
322
+ else:
323
+ property_kwargs[key] = AccessEnum.private
324
+ for property_node in property_captures.get("properties", []):
325
+ property_captures = PROPERTY_QUERY.captures(property_node)
326
+
327
+ prop = Property(
328
+ self._first_from_capture(property_captures, "name"),
329
+ annotation=self._first_from_capture(property_captures, "class"),
330
+ value=self._decode_from_capture(property_captures, "default"),
331
+ docstring=self._comment_docstring(
332
+ property_captures.get("comment", None)
333
+ ),
334
+ parent=model,
335
+ )
336
+ model.members[prop.name] = prop
337
+
338
+ for method_captures in [
339
+ METHODS_QUERY.captures(node) for node in captures.get("methods", [])
340
+ ]:
341
+ method_kwargs = {key: value for key, value in saved_kwargs.items()}
342
+ attribute_pairs = [
343
+ self._parse_attribute(node)
344
+ for node in method_captures.get("attributes", [])
345
+ ]
346
+ for key, value in attribute_pairs:
347
+ if key in [
348
+ "Abstract",
349
+ "Hidden",
350
+ "Sealed",
351
+ "Static",
352
+ ]:
353
+ method_kwargs[key] = value
354
+ elif key in ["GetAccess", "SetAccess"]:
355
+ if value in ["public", "protected", "private", "immutable"]:
356
+ method_kwargs[key] = AccessEnum(value)
357
+ else:
358
+ method_kwargs[key] = AccessEnum.private
359
+ for method_node in method_captures.get("methods", []):
360
+ method = self._parse_function(
361
+ method_node, method=True, parent=model, **method_kwargs
362
+ )
363
+ if (
364
+ method.name != self.filepath.stem
365
+ and not method.static
366
+ and method.parameters
367
+ ):
368
+ # Remove self from first method argument
369
+ method.parameters._params = method.parameters._params[1:]
370
+ if method._is_getter and method.name in model.members:
371
+ prop = model.members[method.name]
372
+ if isinstance(prop, Property):
373
+ prop.getter = method
374
+ else:
375
+ # This can be either an error or that it is a getter in an inherited class
376
+ pass
377
+ elif method._is_setter and method.name in model.members:
378
+ prop = model.members[method.name]
379
+ if isinstance(prop, Property):
380
+ prop.setter = method
381
+ else:
382
+ # This can be either an error or that it is a setter in an inherited class
383
+ pass
384
+ else:
385
+ model.members[method.name] = method
386
+
387
+ return model
388
+
389
+ def _parse_attribute(self, node: Node) -> tuple[str, Any]:
390
+ """
391
+ Parse an attribute from a given node.
392
+
393
+ Args:
394
+ node (Node): The node to parse the attribute from.
395
+
396
+ Returns:
397
+ tuple[str, Any]: A tuple containing the attribute key and its value.
398
+ The value is `True` if no value is specified,
399
+ otherwise it is the parsed value which can be a boolean or a string.
400
+ """
401
+ captures = ATTRIBUTE_QUERY.captures(node)
402
+
403
+ key = self._first_from_capture(captures, "name")
404
+ if "value" not in captures:
405
+ value = True
406
+ elif captures["value"][0].type == "boolean":
407
+ value = _strtobool(self._first_from_capture(captures, "value"))
408
+ else:
409
+ value = self._first_from_capture(captures, "value")
410
+
411
+ return (key, value)
412
+
413
+ def _parse_function(self, node: Node, method: bool = False, **kwargs) -> Function:
414
+ """
415
+ Parse a function node and return a Function model.
416
+
417
+ Args:
418
+ node (Node): The node representing the function in the syntax tree.
419
+ method (bool, optional): Whether the function is a method. Defaults to False.
420
+ **kwargs: Additional keyword arguments to pass to the Function model.
421
+
422
+ Returns:
423
+ Function: The parsed function model.
424
+
425
+ Raises:
426
+ KeyError: If required captures are missing from the node.
427
+
428
+ """
429
+ captures: dict = FUNCTION_QUERY.matches(node)[0][1]
430
+
431
+ input_names = self._decode_from_capture(captures, "input")
432
+ parameters: dict = (
433
+ OrderedDict(
434
+ (name, Parameter(name, kind=ParameterKind.positional_only))
435
+ for name in input_names
436
+ )
437
+ if input_names
438
+ else {}
439
+ )
440
+ output_names = self._decode_from_capture(captures, "output")
441
+ returns: dict = (
442
+ OrderedDict(
443
+ (name, Parameter(name, kind=ParameterKind.positional_only))
444
+ for name in output_names
445
+ )
446
+ if output_names
447
+ else {}
448
+ )
449
+ if method:
450
+ name = self._first_from_capture(captures, "name")
451
+ else:
452
+ name = self.filepath.stem
453
+
454
+ model = Function(
455
+ name,
456
+ lineno=node.range.start_point.row + 1,
457
+ endlineno=node.range.end_point.row + 1,
458
+ filepath=self.filepath,
459
+ docstring=self._comment_docstring(captures.get("docstring", None)),
460
+ getter="getter" in captures,
461
+ setter="setter" in captures,
462
+ **kwargs,
463
+ )
464
+
465
+ captures_arguments = [
466
+ ARGUMENTS_QUERY.captures(node) for node in captures.get("arguments", [])
467
+ ]
468
+ for arguments in captures_arguments:
469
+ attributes = self._decode_from_capture(arguments, "attributes")
470
+ is_input = (
471
+ attributes is None
472
+ or "Input" in attributes
473
+ or "Output" not in attributes
474
+ )
475
+ # is_repeating = "Repeating" in attributes
476
+
477
+ captures_argument = [
478
+ PROPERTY_QUERY.captures(node) for node in arguments["arguments"]
479
+ ]
480
+ for argument in captures_argument:
481
+ name = self._first_from_capture(argument, "name")
482
+
483
+ if "options" in argument:
484
+ options_name = self._first_from_capture(argument, "options")
485
+ parameters.pop(options_name, None)
486
+ parameter = parameters[name] = Parameter(
487
+ name, kind=ParameterKind.keyword_only
488
+ )
489
+ else:
490
+ if is_input:
491
+ parameter = parameters.get(name, Parameter(name))
492
+ else:
493
+ parameter = returns.get(name, Parameter(name))
494
+
495
+ if "default" in argument:
496
+ parameter.kind = ParameterKind.optional
497
+ else:
498
+ parameter.kind = ParameterKind.positional_only
499
+
500
+ annotation = self._first_from_capture(argument, "class")
501
+ if annotation:
502
+ parameter.annotation = annotation
503
+
504
+ default = self._first_from_capture(argument, "default")
505
+ if default:
506
+ parameter.default = default
507
+
508
+ docstring = self._comment_docstring(
509
+ argument.get("comment", None), parent=model
510
+ )
511
+ if docstring:
512
+ parameter.docstring = docstring
513
+
514
+ model.parameters = Parameters(*list(parameters.values()))
515
+ model.returns = Parameters(*list(returns.values())) if returns else None
516
+
517
+ return model
518
+
519
+ def _decode(self, node: Node) -> str:
520
+ """
521
+ Decode the text of a given node using the specified encoding.
522
+
523
+ Args:
524
+ node (Node): The node whose text needs to be decoded.
525
+
526
+ Returns:
527
+ str: The decoded text of the node. If the node or its text is None, returns an empty string.
528
+ """
529
+ return (
530
+ node.text.decode(self.encoding)
531
+ if node is not None and node.text is not None
532
+ else ""
533
+ )
534
+
535
+ def _decode_from_capture(
536
+ self, capture: dict[str, list[Node]], key: str
537
+ ) -> list[str]:
538
+ """
539
+ Decode elements from a capture dictionary based on a specified key.
540
+
541
+ Args:
542
+ capture (dict[str, list[Node]]): A dictionary where the keys are strings and the values are lists of Node objects.
543
+ key (str): The key to look for in the capture dictionary.
544
+
545
+ Returns:
546
+ list[str]: A list of decoded strings corresponding to the elements associated with the specified key in the capture dictionary.
547
+ """
548
+ if key not in capture:
549
+ return []
550
+ else:
551
+ return [self._decode(element) for element in capture[key]]
552
+
553
+ def _first_from_capture(self, capture: dict[str, list[Node]], key: str) -> str:
554
+ """
555
+ Retrieve the first decoded string from a capture dictionary for a given key.
556
+
557
+ Args:
558
+ capture (dict[str, list[Node]]): A dictionary where the key is a string and the value is a list of Node objects.
559
+ key (str): The key to look up in the capture dictionary.
560
+
561
+ Returns:
562
+ str: The first decoded string if available, otherwise an empty string.
563
+ """
564
+ decoded = self._decode_from_capture(capture, key)
565
+ if decoded:
566
+ return decoded[0]
567
+ else:
568
+ return ""
569
+
570
+ def _comment_docstring(
571
+ self, nodes: list[Node] | Node | None, parent: MatlabMixin | None = None
572
+ ) -> Docstring | None:
573
+ """
574
+ Extract and process a docstring from given nodes.
575
+
576
+ This method processes nodes to extract a docstring, handling different
577
+ comment styles and blocks. It supports both single-line and multi-line
578
+ comments, as well as special comment blocks delimited by `%{` and `%}`.
579
+
580
+ Args:
581
+ nodes (list[Node] | Node | None): The nodes from which to extract the docstring.
582
+ parent (MatlabMixin | None, optional): The parent MatlabMixin. Defaults to None.
583
+
584
+ Returns:
585
+ Docstring | None: The extracted and processed docstring, or None if no docstring is found.
586
+
587
+ Raises:
588
+ LookupError: If a line does not start with a comment character.
589
+ """
590
+ if nodes is None:
591
+ return None
592
+ elif isinstance(nodes, list):
593
+ lineno = nodes[0].range.start_point.row + 1
594
+ endlineno = nodes[-1].range.end_point.row + 1
595
+ lines = iter(
596
+ [
597
+ line
598
+ for lines in [self._decode(node).splitlines() for node in nodes]
599
+ for line in lines
600
+ ]
601
+ )
602
+ else:
603
+ lineno = nodes.range.start_point.row + 1
604
+ endlineno = nodes.range.end_point.row + 1
605
+ lines = iter(self._decode(nodes).splitlines())
606
+
607
+ docstring, uncommented = [], []
608
+
609
+ while True:
610
+ try:
611
+ line = next(lines).lstrip()
612
+ except StopIteration:
613
+ break
614
+
615
+ if "--8<--" in line:
616
+ continue
617
+
618
+ if line[:2] == "%{" or line[:2] == "%%":
619
+ if uncommented:
620
+ docstring += _dedent(uncommented)
621
+ uncommented = []
622
+ if line[:2] == "%%":
623
+ docstring.append(line[2:].lstrip())
624
+ continue
625
+
626
+ comment_block = []
627
+ line = line[2:]
628
+ while "%}" not in line:
629
+ comment_block.append(line)
630
+ try:
631
+ line = next(lines)
632
+ except StopIteration:
633
+ break
634
+ else:
635
+ last_line = line[: line.index("%}")]
636
+ if last_line:
637
+ comment_block.append(last_line)
638
+ docstring.append(comment_block[0])
639
+ docstring += _dedent(comment_block[1:])
640
+
641
+ elif line[0] == "%":
642
+ uncommented.append(line[1:])
643
+ else:
644
+ raise LookupError
645
+
646
+ if uncommented:
647
+ docstring += _dedent(uncommented)
648
+
649
+ return Docstring(
650
+ "\n".join(docstring),
651
+ lineno=lineno,
652
+ endlineno=endlineno,
653
+ parent=parent,
654
+ )