docstring-tailor 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docstring_tailor/__init__.py +1 -0
- docstring_tailor/cli_config.py +25 -0
- docstring_tailor/constants.py +50 -0
- docstring_tailor/docstring_visitor.py +269 -0
- docstring_tailor/main.py +107 -0
- docstring_tailor/multi_line_docstring_formatter.py +474 -0
- docstring_tailor/utils.py +70 -0
- docstring_tailor-0.1.0.dist-info/METADATA +153 -0
- docstring_tailor-0.1.0.dist-info/RECORD +12 -0
- docstring_tailor-0.1.0.dist-info/WHEEL +4 -0
- docstring_tailor-0.1.0.dist-info/entry_points.txt +2 -0
- docstring_tailor-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
""""""
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""CLI configuration constants and allowed values for docstring_tailor."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
# Initial argument that specifies the file(s) and/or folder(s)
|
|
6
|
+
DEFAULT_PATHS = [("src")]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# Argument: '--style'
|
|
10
|
+
class DocstringStyle(str, Enum):
|
|
11
|
+
"""Supported docstring styles."""
|
|
12
|
+
|
|
13
|
+
google = "google"
|
|
14
|
+
numpy = "numpy"
|
|
15
|
+
sphinx = "sphinx"
|
|
16
|
+
epydoc = "epydoc"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
SUPPORTED_STYLES = {DocstringStyle.google}
|
|
20
|
+
DEFAULT_STYLE = DocstringStyle.google
|
|
21
|
+
|
|
22
|
+
# Argument: '--line-length'
|
|
23
|
+
LINE_LENGTH_MIN = 30
|
|
24
|
+
LINE_LENGTH_MAX = 300
|
|
25
|
+
LINE_LENGTH_DEFAULT = 100
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Module for storing project constants."""
|
|
2
|
+
|
|
3
|
+
# Encoding for reading .py files.
|
|
4
|
+
ENCODING: str = "utf-8"
|
|
5
|
+
|
|
6
|
+
# Docstring delimiters.
|
|
7
|
+
DOCSTRING_DELIMITER: str = '"""'
|
|
8
|
+
DOCSTRING_DELIMITER_LENGTH: int = len(DOCSTRING_DELIMITER)
|
|
9
|
+
|
|
10
|
+
# Google-style docstring section keywords.
|
|
11
|
+
GOOGLE_PLAIN_SECTIONS = frozenset(
|
|
12
|
+
{"Note", "Notes", "References", "See Also", "Todo", "Warning", "Warnings"}
|
|
13
|
+
)
|
|
14
|
+
GOOGLE_ITEM_SECTIONS = frozenset(
|
|
15
|
+
{"Args", "Arguments", "Attributes", "Raises", "Returns", "Yields"}
|
|
16
|
+
)
|
|
17
|
+
GOOGLE_CODE_SECTIONS = frozenset({"Example", "Examples"})
|
|
18
|
+
GOOGLE_SECTION_HEADERS = (
|
|
19
|
+
GOOGLE_PLAIN_SECTIONS | GOOGLE_ITEM_SECTIONS | GOOGLE_CODE_SECTIONS
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# NumPy-style docstring section keywords.
|
|
23
|
+
NUMPY_ITEM_SECTIONS = frozenset(
|
|
24
|
+
{
|
|
25
|
+
"Attributes",
|
|
26
|
+
"Methods",
|
|
27
|
+
"Other Parameters",
|
|
28
|
+
"Parameters",
|
|
29
|
+
"Raises",
|
|
30
|
+
"Receives",
|
|
31
|
+
"Returns",
|
|
32
|
+
"Yields",
|
|
33
|
+
}
|
|
34
|
+
)
|
|
35
|
+
NUMPY_PLAIN_SECTIONS = frozenset({"Examples", "Notes", "References", "See Also"})
|
|
36
|
+
NUMPY_SECTION_HEADERS = NUMPY_ITEM_SECTIONS | NUMPY_PLAIN_SECTIONS
|
|
37
|
+
|
|
38
|
+
# Sphinx/reST-style docstring directive markers.
|
|
39
|
+
# Directive-based rather than section-based — no section headers in the Google/NumPy sense.
|
|
40
|
+
SPHINX_ITEM_DIRECTIVES = frozenset({":param", ":raises", ":returns", ":rtype", ":type"})
|
|
41
|
+
SPHINX_PLAIN_DIRECTIVES = frozenset(
|
|
42
|
+
{".. example::", ".. note::", ".. seealso::", ".. warning::"}
|
|
43
|
+
)
|
|
44
|
+
SPHINX_DIRECTIVES = SPHINX_ITEM_DIRECTIVES | SPHINX_PLAIN_DIRECTIVES
|
|
45
|
+
|
|
46
|
+
# Epydoc-style docstring tag markers.
|
|
47
|
+
# Tag-based rather than section-based — no section headers in the Google/NumPy sense.
|
|
48
|
+
EPYDOC_ITEM_TAGS = frozenset({"@param", "@raise", "@return", "@rtype", "@type"})
|
|
49
|
+
EPYDOC_PLAIN_TAGS = frozenset({"@note", "@warning"})
|
|
50
|
+
EPYDOC_TAGS = EPYDOC_ITEM_TAGS | EPYDOC_PLAIN_TAGS
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""Module providing functionality to format Python docstrings"""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
import libcst as cst
|
|
6
|
+
|
|
7
|
+
from docstring_tailor.constants import (
|
|
8
|
+
DOCSTRING_DELIMITER,
|
|
9
|
+
DOCSTRING_DELIMITER_LENGTH,
|
|
10
|
+
)
|
|
11
|
+
from docstring_tailor.multi_line_docstring_formatter import MultiLineDocstringFormatter
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DocstringVisitor(cst.CSTTransformer):
|
|
15
|
+
"""A transformer for traversing and formatting docstrings.
|
|
16
|
+
|
|
17
|
+
Subclasses libcst's CSTTransformer, which implements the visitor pattern over a Concrete Syntax
|
|
18
|
+
Tree (CST). When tree.visit(DocstringVisitor()) is called, libcst traverses every node in the
|
|
19
|
+
tree and automatically dispatches to the corresponding visit_* or leave_* method on this class
|
|
20
|
+
if one exists. Method names are derived directly from the CST node class names — for example,
|
|
21
|
+
visit_IndentedBlock is called for every cst.IndentedBlock node encountered. visit_* methods are
|
|
22
|
+
called on entry into a node (pre-order) and leave_* methods are called on exit (post-order).
|
|
23
|
+
leave_* methods receive both the original and updated node, and their return value replaces the
|
|
24
|
+
node in the reconstructed tree.
|
|
25
|
+
|
|
26
|
+
The visit_* methods in this class (visit_Module, visit_IndentedBlock) are used exclusively to
|
|
27
|
+
track indentation state as the tree is traversed, not to modify it. The leave_IndentedBlock is
|
|
28
|
+
also to track indentation. Modifications to the tree happen only in in the
|
|
29
|
+
leave_SimpleStatementLine method.
|
|
30
|
+
|
|
31
|
+
Workflow: The entry point for all transformations is leave_SimpleStatementLine, called for every
|
|
32
|
+
simple statement encountered. It delegates to _is_docstring to determine whether the statement
|
|
33
|
+
is a docstring. If not, the node is returned unchanged via the super() call. If it is, the node
|
|
34
|
+
is passed to _format_docstring, which extracts the raw string content and passes it to
|
|
35
|
+
_build_docstring. From there, two paths are possible: if the content fits on one line and
|
|
36
|
+
contains no deliberate paragraph breaks, _build_one_line_docstring is called; otherwise
|
|
37
|
+
_build_multi_line_docstring is called, which delegates to the MultiLineDocstringFormatter class
|
|
38
|
+
for the more complex multi-line formatting logic.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
_line_length (int): Maximum characters per line including indentation and triple double
|
|
42
|
+
quotes.
|
|
43
|
+
_current_indent (str): The accumulated indentation string at the current nesting level,
|
|
44
|
+
updated as the tree is traversed.
|
|
45
|
+
_indent_unit (str): The indentation unit string used in the source file, captured from the
|
|
46
|
+
module node on entry. Initialised to four spaces as a safety placeholder.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, line_length: int) -> None:
|
|
50
|
+
"""Initialises the DocstringVisitor.
|
|
51
|
+
|
|
52
|
+
Sets up the indentation tracker used to correctly format multi-line docstrings at any
|
|
53
|
+
nesting level. The initial value of _indent_unit is a four-space placeholder for safety, as
|
|
54
|
+
it will always be overwritten by visit_Module before any docstring is processed.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
line_length (int): Maximum characters per line including indentation and triple double
|
|
58
|
+
quotes.
|
|
59
|
+
"""
|
|
60
|
+
self._line_length = line_length
|
|
61
|
+
self._current_indent = ""
|
|
62
|
+
self._indent_unit = " "
|
|
63
|
+
|
|
64
|
+
def visit_Module(self, node: cst.Module) -> None:
|
|
65
|
+
"""Captures the default indentation unit from the module on first entry.
|
|
66
|
+
|
|
67
|
+
Called automatically by libcst when entering the root Module node, before any other node is
|
|
68
|
+
visited. Overwrites the placeholder _indent_unit set in __init__ with the actual indentation
|
|
69
|
+
string used in the source file.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
node (cst.Module): The root module node.
|
|
73
|
+
"""
|
|
74
|
+
self._indent_unit = node.default_indent
|
|
75
|
+
|
|
76
|
+
def visit_IndentedBlock(self, node: cst.IndentedBlock) -> None:
|
|
77
|
+
"""Tracks the current indentation level when entering an indented block.
|
|
78
|
+
|
|
79
|
+
Called automatically by libcst each time it enters a cst.IndentedBlock node, such as the
|
|
80
|
+
body of a function, class, or control flow statement. Accumulates the indentation depth by
|
|
81
|
+
appending one indent unit to _current_indent.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
node (cst.IndentedBlock): The indented block node being visited.
|
|
85
|
+
"""
|
|
86
|
+
self._current_indent += self._indent_unit
|
|
87
|
+
|
|
88
|
+
def leave_IndentedBlock(
|
|
89
|
+
self,
|
|
90
|
+
original_node: cst.IndentedBlock,
|
|
91
|
+
updated_node: cst.IndentedBlock,
|
|
92
|
+
) -> cst.IndentedBlock:
|
|
93
|
+
"""Restores the current indentation level when leaving an indented block.
|
|
94
|
+
|
|
95
|
+
Called automatically by libcst each time it exits a cst.IndentedBlock node. Counterpart to
|
|
96
|
+
visit_IndentedBlock — strips one indent unit from _current_indent to restore the indentation
|
|
97
|
+
level of the enclosing scope.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
original_node (cst.IndentedBlock): The original indented block node.
|
|
101
|
+
updated_node (cst.IndentedBlock): The updated indented block node.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
updated_node (cst.IndentedBlock): The updated node, unchanged.
|
|
105
|
+
"""
|
|
106
|
+
self._current_indent = self._current_indent[: -len(self._indent_unit)]
|
|
107
|
+
|
|
108
|
+
return updated_node
|
|
109
|
+
|
|
110
|
+
def _build_multi_line_docstring(self, content: str) -> str:
|
|
111
|
+
"""Builds a raw multi-line docstring from the stripped content.
|
|
112
|
+
|
|
113
|
+
Multi-line docstrings are significantly more complex than one-line docstrings — Taking the
|
|
114
|
+
'Google' docstring format as example, they contain multiple sections of different types
|
|
115
|
+
(plain paragraphs, item sections such as Args and Returns, and code sections such as
|
|
116
|
+
Examples) each requiring different formatting logic. This complexity is delegated entirely
|
|
117
|
+
to MultiLineDocstringFormatter, which handles section detection and formatting
|
|
118
|
+
independently.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
content (str): The stripped docstring content, excluding the triple quote delimiters.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
multi_line_docstring (str): The formatted multi-line docstring including the triple
|
|
125
|
+
quote delimiters.
|
|
126
|
+
"""
|
|
127
|
+
multi_line_docstring_formatter = MultiLineDocstringFormatter(
|
|
128
|
+
line_length=self._line_length,
|
|
129
|
+
current_indent=self._current_indent,
|
|
130
|
+
indent_unit=self._indent_unit,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
formatted_sections = multi_line_docstring_formatter.format(content=content)
|
|
134
|
+
|
|
135
|
+
multi_line_docstring = (
|
|
136
|
+
DOCSTRING_DELIMITER
|
|
137
|
+
+ formatted_sections
|
|
138
|
+
+ "\n"
|
|
139
|
+
+ self._current_indent
|
|
140
|
+
+ DOCSTRING_DELIMITER
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
return multi_line_docstring
|
|
144
|
+
|
|
145
|
+
def _build_one_line_docstring(self, content: str) -> str:
|
|
146
|
+
"""Builds a raw one-line docstring from the normalized content.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
content (str): The normalized stripped docstring content, excluding the triple quote
|
|
150
|
+
delimiters.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
one_line_docstring (str): The formatted one-line docstring including the triple quote
|
|
154
|
+
delimiters.
|
|
155
|
+
"""
|
|
156
|
+
one_line_docstring = DOCSTRING_DELIMITER + content + DOCSTRING_DELIMITER
|
|
157
|
+
|
|
158
|
+
return one_line_docstring
|
|
159
|
+
|
|
160
|
+
def _build_docstring(self, content: str) -> str:
|
|
161
|
+
"""Builds the formatted docstring from the stripped content.
|
|
162
|
+
|
|
163
|
+
Determines whether the content fits on one line and contains no deliberate paragraph breaks.
|
|
164
|
+
If both conditions are met, delegates to _build_one_line_docstring. Otherwise delegates to
|
|
165
|
+
_build_multi_line_docstring.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
content (str): The stripped docstring content, excluding the triple quote delimiters.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
docstring (str): The formatted docstring including the triple quote delimiters.
|
|
172
|
+
"""
|
|
173
|
+
is_deliberately_multiline = bool(re.search(r"\n\s*\n", content))
|
|
174
|
+
normalized_content = re.sub(r"\s+", " ", content.strip())
|
|
175
|
+
fits_on_one_line = (
|
|
176
|
+
len(self._current_indent)
|
|
177
|
+
+ len(normalized_content)
|
|
178
|
+
+ 2 * DOCSTRING_DELIMITER_LENGTH
|
|
179
|
+
<= self._line_length
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if fits_on_one_line and not is_deliberately_multiline:
|
|
183
|
+
docstring = self._build_one_line_docstring(content=normalized_content)
|
|
184
|
+
else:
|
|
185
|
+
docstring = self._build_multi_line_docstring(content=content)
|
|
186
|
+
|
|
187
|
+
return docstring
|
|
188
|
+
|
|
189
|
+
def _format_docstring(
|
|
190
|
+
self, node: cst.SimpleStatementLine
|
|
191
|
+
) -> cst.SimpleStatementLine:
|
|
192
|
+
"""Extracts, transforms, and reattaches the formatted docstring on the given node.
|
|
193
|
+
|
|
194
|
+
Extracts the raw string value from the CST node, strips the triple quote delimiters, and
|
|
195
|
+
passes the content to _build_docstring. The resulting formatted docstring is then wrapped
|
|
196
|
+
back into the appropriate CST node types and reattached to the statement. One-line
|
|
197
|
+
docstrings have the closing triple quotes on the same line; multi-line docstrings have them
|
|
198
|
+
on a new line.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
node (cst.SimpleStatementLine): A CST node containing a docstring.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
updated_node (cst.SimpleStatementLine): The updated node with the formatted docstring.
|
|
205
|
+
"""
|
|
206
|
+
# Extract
|
|
207
|
+
raw_docstring = node.body[0].value.value # type: ignore
|
|
208
|
+
raw_docstring_without_triple_quotes = raw_docstring[
|
|
209
|
+
DOCSTRING_DELIMITER_LENGTH:-DOCSTRING_DELIMITER_LENGTH
|
|
210
|
+
].strip()
|
|
211
|
+
|
|
212
|
+
# Transform
|
|
213
|
+
updated_docstring = self._build_docstring(
|
|
214
|
+
content=raw_docstring_without_triple_quotes
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Update
|
|
218
|
+
updated_simple_string = cst.SimpleString(updated_docstring)
|
|
219
|
+
updated_expression = node.body[0].with_changes(value=updated_simple_string)
|
|
220
|
+
updated_node = node.with_changes(body=(updated_expression,))
|
|
221
|
+
|
|
222
|
+
return updated_node
|
|
223
|
+
|
|
224
|
+
def _is_docstring(self, node: cst.SimpleStatementLine) -> bool:
|
|
225
|
+
"""Determines if a given node is a docstring.
|
|
226
|
+
|
|
227
|
+
Checks that the statement contains exactly one expression, that the expression is a simple
|
|
228
|
+
string, and that the string begins with the triple quote delimiter.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
node (cst.SimpleStatementLine): A node in the CST representing a statement.
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
is_docstring (bool): True if the node is a docstring, False otherwise.
|
|
235
|
+
"""
|
|
236
|
+
is_docstring: bool = (
|
|
237
|
+
len(node.body) == 1
|
|
238
|
+
and isinstance(node.body[0], cst.Expr)
|
|
239
|
+
and isinstance(node.body[0].value, cst.SimpleString)
|
|
240
|
+
and node.body[0].value.value.startswith(DOCSTRING_DELIMITER)
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return is_docstring
|
|
244
|
+
|
|
245
|
+
def leave_SimpleStatementLine(
|
|
246
|
+
self,
|
|
247
|
+
original_node: cst.SimpleStatementLine,
|
|
248
|
+
updated_node: cst.SimpleStatementLine,
|
|
249
|
+
) -> (
|
|
250
|
+
cst.BaseStatement | cst.FlattenSentinel[cst.BaseStatement] | cst.RemovalSentinel
|
|
251
|
+
):
|
|
252
|
+
"""Processes a simple statement line during traversal and formats it if it is a docstring.
|
|
253
|
+
|
|
254
|
+
Called automatically by libcst for every simple statement in the file. Acts as the entry
|
|
255
|
+
point for all docstring transformations. If the statement is not a docstring, the node is
|
|
256
|
+
returned unchanged via the super() call. If it is, it is passed to _format_docstring for
|
|
257
|
+
formatting.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
original_node (cst.SimpleStatementLine): The original CST node.
|
|
261
|
+
updated_node (cst.SimpleStatementLine): The updated CST node.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
node (cst.BaseStatement): The final statement after transformation.
|
|
265
|
+
"""
|
|
266
|
+
if self._is_docstring(node=updated_node):
|
|
267
|
+
updated_node = self._format_docstring(node=updated_node)
|
|
268
|
+
|
|
269
|
+
return super().leave_SimpleStatementLine(original_node, updated_node)
|
docstring_tailor/main.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""Main module"""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Annotated
|
|
5
|
+
|
|
6
|
+
import libcst as cst
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
from docstring_tailor.cli_config import (
|
|
10
|
+
DEFAULT_PATHS,
|
|
11
|
+
DEFAULT_STYLE,
|
|
12
|
+
LINE_LENGTH_DEFAULT,
|
|
13
|
+
LINE_LENGTH_MAX,
|
|
14
|
+
LINE_LENGTH_MIN,
|
|
15
|
+
SUPPORTED_STYLES,
|
|
16
|
+
DocstringStyle,
|
|
17
|
+
)
|
|
18
|
+
from docstring_tailor.constants import ENCODING
|
|
19
|
+
from docstring_tailor.docstring_visitor import DocstringVisitor
|
|
20
|
+
from docstring_tailor.utils import collect_python_files, load_config, validate_paths
|
|
21
|
+
|
|
22
|
+
app = typer.Typer()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@app.command()
|
|
26
|
+
def main(
|
|
27
|
+
paths: Annotated[
|
|
28
|
+
list[Path] | None,
|
|
29
|
+
typer.Argument(help="Files or directories to process. Defaults to 'src/'."),
|
|
30
|
+
] = None,
|
|
31
|
+
style: Annotated[
|
|
32
|
+
DocstringStyle | None,
|
|
33
|
+
typer.Option("--style", help="Docstring style to format to."),
|
|
34
|
+
] = None,
|
|
35
|
+
line_length: Annotated[
|
|
36
|
+
int | None,
|
|
37
|
+
typer.Option(
|
|
38
|
+
"--line-length",
|
|
39
|
+
help=f"Maximum line length. Must be between {LINE_LENGTH_MIN} and {LINE_LENGTH_MAX}.",
|
|
40
|
+
min=LINE_LENGTH_MIN,
|
|
41
|
+
max=LINE_LENGTH_MAX,
|
|
42
|
+
),
|
|
43
|
+
] = None,
|
|
44
|
+
) -> None:
|
|
45
|
+
"""Formats Python docstrings in the given files or directories to the specified style.
|
|
46
|
+
|
|
47
|
+
Processes all .py files found at the provided paths, reformatting their docstrings in place.
|
|
48
|
+
Directories are searched recursively.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
paths (list[Path] | None): Files or directories to process. Defaults to 'src/'.
|
|
52
|
+
style (DocstringStyle | None): The docstring style to format to.
|
|
53
|
+
line_length (int | None): The maximum line length to wrap docstrings to.
|
|
54
|
+
"""
|
|
55
|
+
# Resolve configuration with priority: CLI argument > config file > built-in default.
|
|
56
|
+
file_config = load_config()
|
|
57
|
+
resolved_paths = paths or [Path(p) for p in DEFAULT_PATHS]
|
|
58
|
+
resolved_style = style or DocstringStyle(
|
|
59
|
+
file_config.get("style", DEFAULT_STYLE.value)
|
|
60
|
+
)
|
|
61
|
+
resolved_line_length = line_length or file_config.get(
|
|
62
|
+
"line-length", LINE_LENGTH_DEFAULT
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if resolved_style not in SUPPORTED_STYLES:
|
|
66
|
+
typer.echo(
|
|
67
|
+
f"Style '{resolved_style.value}' is not yet supported. "
|
|
68
|
+
f"Currently supported: {', '.join(s.value for s in SUPPORTED_STYLES)}."
|
|
69
|
+
)
|
|
70
|
+
raise typer.Exit(code=1)
|
|
71
|
+
|
|
72
|
+
validate_paths(paths=resolved_paths)
|
|
73
|
+
python_files = collect_python_files(paths=resolved_paths)
|
|
74
|
+
|
|
75
|
+
for file_path in python_files:
|
|
76
|
+
input_data = file_path.read_text(encoding=ENCODING)
|
|
77
|
+
input_tree = cst.parse_module(source=input_data)
|
|
78
|
+
modified_tree = input_tree.visit(
|
|
79
|
+
DocstringVisitor(line_length=resolved_line_length)
|
|
80
|
+
)
|
|
81
|
+
file_path.write_text(modified_tree.code, encoding=ENCODING)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __name__ == "__main__":
|
|
85
|
+
app()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
"""TODO:
|
|
89
|
+
|
|
90
|
+
- Currently, this code has been written specifically for the 'Google' docstring format. Fine for
|
|
91
|
+
now, but the end state goal is to have the functionality that the user can specify the style in the
|
|
92
|
+
pyproject.toml and that everything formats correctly to that style. The reading from pyproject.toml
|
|
93
|
+
is already there, the biggest effort is in reformatting docstring_section_formatter a bit to make it
|
|
94
|
+
work for all styles.
|
|
95
|
+
|
|
96
|
+
- Check all the parameters in the docstringformatter package to see which ones I also want.
|
|
97
|
+
|
|
98
|
+
- Implement feature that you can display the diff in terminal, instead of immediately formatting and
|
|
99
|
+
overwriting the .py files.
|
|
100
|
+
|
|
101
|
+
- Testing. Write more tests. Get to 100% code coverage. Redesign the structure of the 'tests/'
|
|
102
|
+
folder. All tests will be roughly the same, they have an input .py file from the 'raw' folder, which
|
|
103
|
+
will be formatted, and then it should be equal to the content of one of the files in the 'formatted'
|
|
104
|
+
folder. This means we can define a mapping that states which files in the raw folder should be
|
|
105
|
+
identical after formatting to a certain file in the formatted folder. In this way you may only need
|
|
106
|
+
one test function that just iterates over this mapping.
|
|
107
|
+
"""
|
|
@@ -0,0 +1,474 @@
|
|
|
1
|
+
"""Module for MultiLineDocstringFormatter."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import textwrap
|
|
5
|
+
from collections import namedtuple
|
|
6
|
+
|
|
7
|
+
from docstring_tailor.constants import (
|
|
8
|
+
DOCSTRING_DELIMITER_LENGTH,
|
|
9
|
+
GOOGLE_CODE_SECTIONS,
|
|
10
|
+
GOOGLE_ITEM_SECTIONS,
|
|
11
|
+
GOOGLE_PLAIN_SECTIONS,
|
|
12
|
+
GOOGLE_SECTION_HEADERS,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
Section = namedtuple("Section", ["name", "body"])
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MultiLineDocstringFormatter:
|
|
19
|
+
"""Formats the content sections of a docstring into the Google Docstring format.
|
|
20
|
+
|
|
21
|
+
The formatting pipeline starts in format(), which delegates to _split_content() to divide the
|
|
22
|
+
docstring into a preamble and a list of named sections. The preamble — everything before the
|
|
23
|
+
first section header — is split on double newlines and each paragraph formatted independently.
|
|
24
|
+
Named sections are dispatched to a dedicated formatter based on their type: item sections (Args,
|
|
25
|
+
Returns, etc.), plain sections (Note, etc.), or code sections (Examples). Code sections are
|
|
26
|
+
preserved verbatim since they contain doctest-format code that must not be wrapped or modified.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
_line_length (int): Maximum characters per line including indentation and triple double
|
|
30
|
+
quotes.
|
|
31
|
+
_current_indent (str): The accumulated indentation string at the current nesting level,
|
|
32
|
+
updated as the tree is traversed.
|
|
33
|
+
_indent_unit (str): The indentation unit string used in the source file, captured from the
|
|
34
|
+
module node on entry. Initialised to four spaces as a safety placeholder.
|
|
35
|
+
_indent_length (int): The length of the `_ident_unit`, which is the same as the number of
|
|
36
|
+
spaces used for a single indentation.
|
|
37
|
+
_paragraph_separator (str): Paragraphs are separated using two '\n' value and the current
|
|
38
|
+
indentation. Examples of paragraphs are the 'Args' section, or the 'Returns' section.
|
|
39
|
+
_line_separator (str): Lines are separated using a single '\n' value and the current
|
|
40
|
+
indentation. When formatting, this is used for moving something to the next line.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, line_length: int, current_indent: str, indent_unit: str) -> None:
|
|
44
|
+
"""Initialises the MultiLineDocstringFormatter.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
line_length (int): Maximum characters per line including indentation.
|
|
48
|
+
current_indent (str): The accumulated indentation string at the current nesting level.
|
|
49
|
+
indent_unit (str): The indentation unit string used in the source file.
|
|
50
|
+
"""
|
|
51
|
+
self._line_length = line_length
|
|
52
|
+
self._current_indent = current_indent
|
|
53
|
+
self._indent_unit = indent_unit
|
|
54
|
+
self._indent_length = len(self._indent_unit)
|
|
55
|
+
|
|
56
|
+
self._paragraph_separator = "\n\n" + self._current_indent
|
|
57
|
+
self._line_separator = "\n" + self._current_indent
|
|
58
|
+
|
|
59
|
+
def _format_plain_paragraph(self, paragraph: str) -> str:
|
|
60
|
+
"""Formats a plain text paragraph within an indented section body.
|
|
61
|
+
|
|
62
|
+
Used for plain sections such as Note and Warning where the body is indented one level beyond
|
|
63
|
+
the section header. All lines including continuations are at the same indentation level.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
paragraph (str): A plain text paragraph.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
formatted_plain_paragraph (str): The wrapped paragraph string.
|
|
70
|
+
"""
|
|
71
|
+
normalized = re.sub(r"\s+", " ", paragraph.strip())
|
|
72
|
+
wrap_width = self._line_length - len(self._current_indent) - self._indent_length
|
|
73
|
+
lines = textwrap.wrap(normalized, width=wrap_width)
|
|
74
|
+
|
|
75
|
+
line_separator_indented = self._line_separator + self._indent_unit
|
|
76
|
+
formatted_plain_paragraph = line_separator_indented.join(lines)
|
|
77
|
+
|
|
78
|
+
return formatted_plain_paragraph
|
|
79
|
+
|
|
80
|
+
def _format_plain_section(self, section_name: str, section_body: str) -> str:
|
|
81
|
+
"""Formats a plain text section such as Note or Warning.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
section_name (str): The section header name, e.g. 'Note'.
|
|
85
|
+
section_body (str): The section content, excluding the header line.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
formatted_plain_section (str): The formatted section string.
|
|
89
|
+
"""
|
|
90
|
+
formatted_content = self._format_plain_paragraph(paragraph=section_body)
|
|
91
|
+
formatted_plain_section = (
|
|
92
|
+
section_name
|
|
93
|
+
+ ":\n"
|
|
94
|
+
+ self._current_indent
|
|
95
|
+
+ self._indent_unit
|
|
96
|
+
+ formatted_content
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
return formatted_plain_section
|
|
100
|
+
|
|
101
|
+
def _format_item(self, item_text: str) -> str:
|
|
102
|
+
"""Formats a single labelled item, wrapping its description if it exceeds the line length.
|
|
103
|
+
|
|
104
|
+
Continuation lines are indented one additional level beyond the item's base indent by
|
|
105
|
+
passing a subsequent_indent to textwrap, which is then preserved when the lines are joined
|
|
106
|
+
with the item separator.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
item_text (str): The full stripped item string, e.g. 'name (str): Description.'.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
formatted (str): The formatted item string with correct indentation.
|
|
113
|
+
"""
|
|
114
|
+
wrap_width = self._line_length - len(self._current_indent) - self._indent_length
|
|
115
|
+
lines = textwrap.wrap(
|
|
116
|
+
item_text.strip(), width=wrap_width, subsequent_indent=self._indent_unit
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
line_separator = "\n" + self._current_indent + self._indent_unit
|
|
120
|
+
formatted = line_separator.join(lines)
|
|
121
|
+
|
|
122
|
+
return formatted
|
|
123
|
+
|
|
124
|
+
def _parse_items(self, section_content: str) -> list[str]:
|
|
125
|
+
"""Groups lines from a section body into individual item strings.
|
|
126
|
+
|
|
127
|
+
Detects item boundaries by indentation level. A new item starts whenever a line returns to
|
|
128
|
+
the minimum indentation level found in the section. Continuation lines at a deeper
|
|
129
|
+
indentation are joined to the current item.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
section_content (str): The section body, excluding the header line.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
items (list[str]): A list of stripped item strings, one per labelled item.
|
|
136
|
+
"""
|
|
137
|
+
lines = [line for line in section_content.split("\n") if line.strip()]
|
|
138
|
+
|
|
139
|
+
if not lines:
|
|
140
|
+
return []
|
|
141
|
+
|
|
142
|
+
base_indent = min(len(line) - len(line.lstrip()) for line in lines)
|
|
143
|
+
|
|
144
|
+
items: list[str] = []
|
|
145
|
+
current_item_lines: list[str] = []
|
|
146
|
+
for line in lines:
|
|
147
|
+
indent = len(line) - len(line.lstrip())
|
|
148
|
+
if indent == base_indent and current_item_lines:
|
|
149
|
+
items.append(" ".join(l.strip() for l in current_item_lines))
|
|
150
|
+
current_item_lines = [line]
|
|
151
|
+
else:
|
|
152
|
+
current_item_lines.append(line)
|
|
153
|
+
|
|
154
|
+
if current_item_lines:
|
|
155
|
+
items.append(" ".join(l.strip() for l in current_item_lines))
|
|
156
|
+
|
|
157
|
+
return items
|
|
158
|
+
|
|
159
|
+
def _format_items(self, section_content: str) -> str:
|
|
160
|
+
"""Formats the body of an item section by parsing and formatting each item.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
section_content (str): The section body, excluding the header line.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
formatted (str): The formatted items joined with the correct indentation.
|
|
167
|
+
"""
|
|
168
|
+
item_texts = self._parse_items(section_content=section_content)
|
|
169
|
+
formatted_items = [
|
|
170
|
+
self._format_item(item_text=item_text) for item_text in item_texts
|
|
171
|
+
]
|
|
172
|
+
|
|
173
|
+
item_separator = self._line_separator + self._indent_unit
|
|
174
|
+
formatted_items = item_separator.join(formatted_items)
|
|
175
|
+
|
|
176
|
+
return formatted_items
|
|
177
|
+
|
|
178
|
+
def _format_item_section(self, section_name: str, section_body: str) -> str:
|
|
179
|
+
"""Formats a named section whose body consists of labelled items.
|
|
180
|
+
|
|
181
|
+
Formats each item independently and reassembles the section with the header on the first
|
|
182
|
+
line.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
section_name (str): The section header name, e.g. 'Args' or 'Returns'.
|
|
186
|
+
section_body (str): The section content, excluding the header line.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
formatted (str): The formatted section string.
|
|
190
|
+
"""
|
|
191
|
+
formatted_items = self._format_items(section_content=section_body)
|
|
192
|
+
formatted_item_section = (
|
|
193
|
+
section_name
|
|
194
|
+
+ ":\n"
|
|
195
|
+
+ self._current_indent
|
|
196
|
+
+ self._indent_unit
|
|
197
|
+
+ formatted_items
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return formatted_item_section
|
|
201
|
+
|
|
202
|
+
def _format_code_chunk(self, chunk: str) -> str:
|
|
203
|
+
"""Formats a single verbatim code block by stripping original indentation and re-indenting.
|
|
204
|
+
|
|
205
|
+
Preserves the content exactly as written, only adjusting the leading indentation to match
|
|
206
|
+
the current nesting level. Blank lines within the block are preserved.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
chunk (str): A single code block string, delimited by double newlines in the caller.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
formatted_code_chunk (str): The re-indented code block with no leading prefix on the
|
|
213
|
+
first line, since the prefix is added by the outer join in _format_code_section.
|
|
214
|
+
"""
|
|
215
|
+
lines = chunk.split("\n")
|
|
216
|
+
non_empty_lines = [line for line in lines if line.strip()]
|
|
217
|
+
|
|
218
|
+
if not non_empty_lines:
|
|
219
|
+
return ""
|
|
220
|
+
|
|
221
|
+
base_indent = min(len(line) - len(line.lstrip()) for line in non_empty_lines)
|
|
222
|
+
|
|
223
|
+
formatted_lines: list[str] = []
|
|
224
|
+
for line in lines:
|
|
225
|
+
if line.strip():
|
|
226
|
+
formatted_lines.append(line[base_indent:])
|
|
227
|
+
else:
|
|
228
|
+
formatted_lines.append("")
|
|
229
|
+
|
|
230
|
+
while formatted_lines and not formatted_lines[0]:
|
|
231
|
+
formatted_lines.pop(0)
|
|
232
|
+
while formatted_lines and not formatted_lines[-1]:
|
|
233
|
+
formatted_lines.pop()
|
|
234
|
+
|
|
235
|
+
line_separator = self._line_separator + self._indent_unit
|
|
236
|
+
formatted_code_chunk = line_separator.join(formatted_lines)
|
|
237
|
+
|
|
238
|
+
return formatted_code_chunk
|
|
239
|
+
|
|
240
|
+
def _format_code_section(self, section_name: str, section_body: str) -> str:
|
|
241
|
+
"""Formats a code section such as Examples, preserving code verbatim and wrapping plain
|
|
242
|
+
text.
|
|
243
|
+
|
|
244
|
+
Splits the section body on double newlines into chunks. A chunk is treated as a code block
|
|
245
|
+
if its first non-empty line starts with '>>>'; otherwise it is treated as plain text and
|
|
246
|
+
formatted with _format_plain_paragraph. This distinction cannot be made perfectly — program
|
|
247
|
+
output that follows a blank line is indistinguishable from plain text — but the convention
|
|
248
|
+
that plain text between code blocks does not start with '>>>' covers all practical cases.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
section_name (str): The section header name, e.g. 'Examples'.
|
|
252
|
+
section_body (str): The section content, excluding the header line.
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
formatted_code_section (str): The formatted section string with verbatim code blocks and
|
|
256
|
+
wrapped plain text.
|
|
257
|
+
"""
|
|
258
|
+
chunks = re.split(r"\n\s*\n", section_body)
|
|
259
|
+
formatted_chunks: list[str] = []
|
|
260
|
+
|
|
261
|
+
for chunk in chunks:
|
|
262
|
+
if not chunk.strip():
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
first_content_line = next(
|
|
266
|
+
(line.strip() for line in chunk.split("\n") if line.strip()), ""
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
if first_content_line.startswith(">>>"):
|
|
270
|
+
formatted_chunks.append(self._format_code_chunk(chunk=chunk))
|
|
271
|
+
else:
|
|
272
|
+
formatted_chunks.append(self._format_plain_paragraph(paragraph=chunk))
|
|
273
|
+
|
|
274
|
+
if not formatted_chunks:
|
|
275
|
+
return section_name + ":"
|
|
276
|
+
|
|
277
|
+
chunk_separator = self._paragraph_separator + self._indent_unit
|
|
278
|
+
content = chunk_separator.join(formatted_chunks)
|
|
279
|
+
|
|
280
|
+
formatted_code_section = (
|
|
281
|
+
section_name + ":\n" + self._current_indent + self._indent_unit + content
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
return formatted_code_section
|
|
285
|
+
|
|
286
|
+
def _format_section(self, section_name: str, section_body: str) -> str:
|
|
287
|
+
"""Detects the section type and dispatches to the appropriate formatter.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
section_name (str): The section header name, e.g. 'Args' or 'Examples'.
|
|
291
|
+
section_body (str): The section content, excluding the header line.
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
(str): The formatted section string.
|
|
295
|
+
"""
|
|
296
|
+
if section_name in GOOGLE_PLAIN_SECTIONS:
|
|
297
|
+
return self._format_plain_section(
|
|
298
|
+
section_name=section_name, section_body=section_body
|
|
299
|
+
)
|
|
300
|
+
elif section_name in GOOGLE_ITEM_SECTIONS:
|
|
301
|
+
return self._format_item_section(
|
|
302
|
+
section_name=section_name, section_body=section_body
|
|
303
|
+
)
|
|
304
|
+
elif section_name in GOOGLE_CODE_SECTIONS:
|
|
305
|
+
return self._format_code_section(
|
|
306
|
+
section_name=section_name, section_body=section_body
|
|
307
|
+
)
|
|
308
|
+
else:
|
|
309
|
+
raise ValueError(f"Unsupported section_name: {section_name}")
|
|
310
|
+
|
|
311
|
+
def _format_middle_paragraph(self, paragraph: str) -> str:
|
|
312
|
+
"""Formats a plain text paragraph that appears between the opening paragraph and a section.
|
|
313
|
+
|
|
314
|
+
Unlike the opening paragraph, the first line here starts at the full current indentation
|
|
315
|
+
level rather than after the triple quotes, so the full line width is available.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
paragraph (str): A plain text paragraph.
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
formatted (str): The wrapped paragraph string.
|
|
322
|
+
"""
|
|
323
|
+
normalized = re.sub(r"\s+", " ", paragraph.strip())
|
|
324
|
+
wrap_width = self._line_length - len(self._current_indent)
|
|
325
|
+
lines = textwrap.wrap(normalized, width=wrap_width)
|
|
326
|
+
|
|
327
|
+
formatted_paragraph = self._line_separator.join(lines)
|
|
328
|
+
|
|
329
|
+
return formatted_paragraph
|
|
330
|
+
|
|
331
|
+
def _format_opening_paragraph(self, paragraph: str) -> str:
|
|
332
|
+
"""Formats the first paragraph of a docstring, which starts after the opening triple double
|
|
333
|
+
quotes.
|
|
334
|
+
|
|
335
|
+
Uses initial_indent to simulate the triple quotes consuming space on the first line,
|
|
336
|
+
ensuring it wraps correctly. The placeholder is stripped before returning since the actual
|
|
337
|
+
triple quotes are prepended by the caller.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
paragraph (str): The first plain text paragraph.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
formatted_paragraph (str): The wrapped paragraph string.
|
|
344
|
+
"""
|
|
345
|
+
normalized = re.sub(r"\s+", " ", paragraph.strip())
|
|
346
|
+
width = self._line_length - len(self._current_indent)
|
|
347
|
+
lines = textwrap.wrap(
|
|
348
|
+
normalized,
|
|
349
|
+
width=width,
|
|
350
|
+
initial_indent=" " * DOCSTRING_DELIMITER_LENGTH,
|
|
351
|
+
subsequent_indent="",
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
if lines:
|
|
355
|
+
lines[0] = lines[0][DOCSTRING_DELIMITER_LENGTH:]
|
|
356
|
+
|
|
357
|
+
formatted_paragraph = self._line_separator.join(lines)
|
|
358
|
+
|
|
359
|
+
return formatted_paragraph
|
|
360
|
+
|
|
361
|
+
def _format_preamble(self, preamble: str) -> str:
|
|
362
|
+
"""Formats the preamble — the content before the first named section header.
|
|
363
|
+
|
|
364
|
+
Splits on double newlines to separate paragraphs. The first paragraph is formatted with
|
|
365
|
+
_format_opening_paragraph to account for the triple quotes consuming space on the first
|
|
366
|
+
line. Subsequent paragraphs are formatted with _format_middle_paragraph at full line width.
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
preamble (str): The preamble content string.
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
formatted_preamble (str): The formatted preamble string.
|
|
373
|
+
"""
|
|
374
|
+
paragraphs = re.split(r"\n\s*\n", preamble.strip())
|
|
375
|
+
formatted_paragraphs: list[str] = []
|
|
376
|
+
|
|
377
|
+
for i, paragraph in enumerate(paragraphs):
|
|
378
|
+
if not paragraph.strip():
|
|
379
|
+
continue
|
|
380
|
+
if i == 0:
|
|
381
|
+
formatted_paragraphs.append(
|
|
382
|
+
self._format_opening_paragraph(paragraph=paragraph)
|
|
383
|
+
)
|
|
384
|
+
else:
|
|
385
|
+
formatted_paragraphs.append(
|
|
386
|
+
self._format_middle_paragraph(paragraph=paragraph)
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
formatted_preamble = self._paragraph_separator.join(formatted_paragraphs)
|
|
390
|
+
|
|
391
|
+
return formatted_preamble
|
|
392
|
+
|
|
393
|
+
def _split_content(self, content: str) -> tuple[str, list[Section]]:
|
|
394
|
+
"""Splits docstring content into a preamble and a list of named sections.
|
|
395
|
+
|
|
396
|
+
Scans the content line by line for section headers from GOOGLE_SECTION_HEADERS. Everything
|
|
397
|
+
before the first header is the preamble. Each header and the lines that follow it up to the
|
|
398
|
+
next header form one section. Section headers are matched with longer names first to avoid
|
|
399
|
+
partial matches (e.g. 'See Also' before 'Also').
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
content (str): The stripped docstring content, excluding the triple double quote
|
|
403
|
+
delimiters.
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
result (tuple[str, list[tuple[str, str]]]): A tuple of the preamble string and a list of
|
|
407
|
+
(section_name, section_body) pairs.
|
|
408
|
+
"""
|
|
409
|
+
lines = content.split("\n")
|
|
410
|
+
|
|
411
|
+
sorted_headers: list[str] = sorted(
|
|
412
|
+
GOOGLE_SECTION_HEADERS, key=lambda h: len(h), reverse=True
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
header_pattern = re.compile(
|
|
416
|
+
r"^\s*(" + "|".join(re.escape(h) for h in sorted_headers) + r"):\s*$"
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
header_positions: list[tuple[int, str]] = []
|
|
420
|
+
|
|
421
|
+
for i, line in enumerate(lines):
|
|
422
|
+
match = header_pattern.match(line)
|
|
423
|
+
if match:
|
|
424
|
+
header_positions.append((i, match.group(1)))
|
|
425
|
+
|
|
426
|
+
if not header_positions:
|
|
427
|
+
return content, []
|
|
428
|
+
|
|
429
|
+
preamble = "\n".join(lines[: header_positions[0][0]])
|
|
430
|
+
sections: list[Section] = []
|
|
431
|
+
|
|
432
|
+
for i, (line_number, section_name) in enumerate(header_positions):
|
|
433
|
+
end = (
|
|
434
|
+
header_positions[i + 1][0]
|
|
435
|
+
if i + 1 < len(header_positions)
|
|
436
|
+
else len(lines)
|
|
437
|
+
)
|
|
438
|
+
section_body = "\n".join(lines[line_number + 1 : end])
|
|
439
|
+
section = Section(name=section_name, body=section_body)
|
|
440
|
+
sections.append(section)
|
|
441
|
+
|
|
442
|
+
return preamble, sections
|
|
443
|
+
|
|
444
|
+
def format(self, content: str) -> str:
|
|
445
|
+
"""Formats the full docstring content into the Google Docstring format.
|
|
446
|
+
|
|
447
|
+
Splits the content into a preamble and named sections by detecting section headers first, so
|
|
448
|
+
that double newlines inside sections such as Examples are not mistakenly treated as
|
|
449
|
+
paragraph breaks. Formats each part independently and rejoins with a blank line between
|
|
450
|
+
each.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
content (str): The stripped docstring content, excluding the triple quote delimiters.
|
|
454
|
+
|
|
455
|
+
Returns:
|
|
456
|
+
formatted_sections (str): The fully formatted docstring content.
|
|
457
|
+
"""
|
|
458
|
+
preamble, sections = self._split_content(content=content)
|
|
459
|
+
|
|
460
|
+
formatted_parts: list[str] = []
|
|
461
|
+
|
|
462
|
+
if preamble.strip():
|
|
463
|
+
formatted_parts.append(self._format_preamble(preamble=preamble))
|
|
464
|
+
|
|
465
|
+
for section in sections:
|
|
466
|
+
formatted_parts.append(
|
|
467
|
+
self._format_section(
|
|
468
|
+
section_name=section.name, section_body=section.body
|
|
469
|
+
)
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
formatted_sections = self._paragraph_separator.join(formatted_parts)
|
|
473
|
+
|
|
474
|
+
return formatted_sections
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Module containing various utility functions."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_config() -> dict:
|
|
9
|
+
"""Loads configuration from docstring_tailor.toml or pyproject.toml.
|
|
10
|
+
|
|
11
|
+
Walks up from the current directory. docstring_tailor.toml takes priority over pyproject.toml if
|
|
12
|
+
both exist at the same level. Stops at the first file found containing docstring_tailor
|
|
13
|
+
configuration.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
config (dict): Configuration settings, or an empty dict if none found.
|
|
17
|
+
"""
|
|
18
|
+
import tomllib
|
|
19
|
+
|
|
20
|
+
for directory in [Path.cwd(), *Path.cwd().parents]:
|
|
21
|
+
tailor_config = directory / "docstring_tailor.toml"
|
|
22
|
+
if tailor_config.exists():
|
|
23
|
+
with open(tailor_config, "rb") as file:
|
|
24
|
+
return tomllib.load(file)
|
|
25
|
+
|
|
26
|
+
pyproject = directory / "pyproject.toml"
|
|
27
|
+
if pyproject.exists():
|
|
28
|
+
with open(pyproject, "rb") as file:
|
|
29
|
+
data = tomllib.load(file)
|
|
30
|
+
tool_config = data.get("tool", {}).get("docstring_tailor", {})
|
|
31
|
+
if tool_config:
|
|
32
|
+
return tool_config
|
|
33
|
+
|
|
34
|
+
return {}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def collect_python_files(paths: list[Path]) -> list[Path]:
|
|
38
|
+
"""Collects all Python files from a list of file and/or directory paths.
|
|
39
|
+
|
|
40
|
+
Directories are searched recursively. Files are included directly.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
paths (list[Path]): A list of file and/or directory paths to search.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
python_files (list[Path]): A flat list of all collected .py file paths.
|
|
47
|
+
"""
|
|
48
|
+
python_files: list[Path] = []
|
|
49
|
+
for path in paths:
|
|
50
|
+
if path.is_dir():
|
|
51
|
+
python_files.extend(path.rglob("*.py"))
|
|
52
|
+
else:
|
|
53
|
+
python_files.append(path)
|
|
54
|
+
|
|
55
|
+
return python_files
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def validate_paths(paths: list[Path]) -> None:
|
|
59
|
+
"""Validates that all provided paths exist on the filesystem.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
paths (list[Path]): A list of file and/or directory paths to validate.
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
typer.Exit: If any path does not exist.
|
|
66
|
+
"""
|
|
67
|
+
for path in paths:
|
|
68
|
+
if not path.exists():
|
|
69
|
+
typer.echo(f"Error: path '{path}' does not exist.")
|
|
70
|
+
raise typer.Exit(code=1)
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: docstring-tailor
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Automatic formatting of Python docstrings according to PEP 257
|
|
5
|
+
Author-email: Auke Bruinsma <afbruinsma@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Auke Bruinsma
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Python: >=3.11
|
|
29
|
+
Requires-Dist: libcst>=1.8.6
|
|
30
|
+
Requires-Dist: logging>=0.4.9.6
|
|
31
|
+
Requires-Dist: typer>=0.26.4
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# Docstring Tailor 🪡
|
|
35
|
+
|
|
36
|
+
Automatic formatting of Python docstrings according to PEP 257 and a predefined maximum number of chacacters per line.
|
|
37
|
+
|
|
38
|
+
## Docstring conventions according to PEP 257
|
|
39
|
+
|
|
40
|
+
See https://peps.python.org/pep-0257/ for the complete document.
|
|
41
|
+
|
|
42
|
+
**What is a docstring?** A docstring is a string literal that occurs as the first statement in a module, function, class, or method definition. Such a docstring becomes the `__doc__` special attribute of that object.
|
|
43
|
+
|
|
44
|
+
## Different types and forms of docstrings
|
|
45
|
+
|
|
46
|
+
The PEP document makes distinctions between different types of docstrings. For some docstring properties, the convention depends on the kind of object it belongs to. For module docstrings, it can also further depend on the type of python file. Therefore, a distinction is made between these **types** of docstrings:
|
|
47
|
+
|
|
48
|
+
**Four types of docstrings**:
|
|
49
|
+
1. Module docstrings
|
|
50
|
+
- Module docstrings for scripts (stand-alone program)
|
|
51
|
+
- Module docstrings for `__init__.py` files
|
|
52
|
+
- Module docstrings for 'normal' modules.
|
|
53
|
+
2. Class docstrings
|
|
54
|
+
3. Method or function docstrings
|
|
55
|
+
|
|
56
|
+
Besides the types, docstrings can occur in two different **forms**.
|
|
57
|
+
|
|
58
|
+
**Two forms of docstrings**:
|
|
59
|
+
1. One-line docstrings
|
|
60
|
+
2. Multi-line docstrings
|
|
61
|
+
|
|
62
|
+
### Conventions for one-line docstrings
|
|
63
|
+
1. Triple quotes are used even though the string fits on one line. This makes it easy to later expand it.
|
|
64
|
+
2. The closing quotes are on the same line as the opening quotes. This looks better for one-liners.
|
|
65
|
+
3. There’s no blank line either before or after the docstring, except when the the type of docstring is a class docstring. Then there should be a blank line after the docstring.
|
|
66
|
+
4. The docstring is a phrase ending in a period. It prescribes the function or method’s effect as a command (“Do this”, “Return that”), not as a description; e.g. don’t write “Returns the pathname …”.
|
|
67
|
+
5. The one-line docstring should NOT be a “signature” reiterating the function/method parameters (which can be obtained by introspection).
|
|
68
|
+
|
|
69
|
+
### Conventions for multi-line docstrings
|
|
70
|
+
1. Multi-line docstrings consist of a summary line just like a one-line docstring, followed by a blank line, followed by a more elaborate description. The summary line may be used by automatic indexing tools; it is important that it fits on one line and is separated from the rest of the docstring by a blank line.
|
|
71
|
+
2. The summary line may be on the same line as the opening quotes or on the next line.
|
|
72
|
+
3. The entire docstring is indented the same as the quotes at its first line.
|
|
73
|
+
4. Insert a blank line after all docstrings (one-line or multi-line) that document a class.
|
|
74
|
+
5. Blank lines should be removed from the beginning and end of the docstring.
|
|
75
|
+
|
|
76
|
+
#### Module docstrings
|
|
77
|
+
|
|
78
|
+
5. The docstring for a module should generally list the classes, exceptions and functions (and any other objects) that are exported by the module, with a one-line summary of each. (These summaries generally give less detail than the summary line in the object’s docstring.)
|
|
79
|
+
6. The docstring of a script (a stand-alone program) should be usable as its “usage” message, printed when the script is invoked with incorrect or missing arguments (or perhaps with a “-h” option, for “help”)
|
|
80
|
+
7. The docstring for a package (i.e., the docstring of the package’s `__init__.py` module) should also list the modules and subpackages exported by the package.
|
|
81
|
+
|
|
82
|
+
#### Function or method docstrings
|
|
83
|
+
|
|
84
|
+
8. The docstring for a function or method should summarize its behavior and document its arguments, return value(s), side effects, exceptions raised, and restrictions on when it can be called (all if applicable). Optional arguments should be indicated. It should be documented whether keyword arguments are part of the interface.
|
|
85
|
+
|
|
86
|
+
#### Class docstrings
|
|
87
|
+
|
|
88
|
+
9. The docstring for a class should summarize its behavior and list the public methods and instance variables. If the class is intended to be subclassed, and has an additional interface for subclasses, this interface should be listed separately (in the docstring). The class constructor should be documented in the docstring for its __init__ method. Individual methods should be documented by their own docstring.
|
|
89
|
+
10. If a class subclasses another class and its behavior is mostly inherited from that class, its docstring should mention this and summarize the differences.
|
|
90
|
+
|
|
91
|
+
## Formatting operations applied by this tool
|
|
92
|
+
|
|
93
|
+
**How does this tool detect docstrings?**: All string literals that start with triple double quotes (""") are recognized as docstrings and will potentially be formatted.
|
|
94
|
+
|
|
95
|
+
**One-line docstrings**
|
|
96
|
+
- Regarding the 5 conventions mentioned above for one-line docstrings, only convention number 2 is enforced (The closing quotes are on the same line as the opening quotes).
|
|
97
|
+
- It is the user's reponsibility to adhere to convention number 1, since the triple quotes are used by this tool to detect the docstring. The same goes for convention number 3, 4 and 5.
|
|
98
|
+
|
|
99
|
+
- Besides the conventions mentioned above, the docstring is formatted according to a pre-defined maximum number of characters per line (**line length**). This means:
|
|
100
|
+
- If a docstring is spread out over multiple lines, but it could fit on one line, it will be converted to a one-line docstring.
|
|
101
|
+
- If a docstring exceeds the line length, it will be converted to a multi-line docstring.
|
|
102
|
+
|
|
103
|
+
**Multi-line docstrings**
|
|
104
|
+
- Regarding the conventions for multi-line docstrings mentioned above, number 2 is applied. This means if the summary line is on the next line, it will be enforced on the same line as the opening triple double quotes.
|
|
105
|
+
- Convention number 5 is also enforced, blank lines at the start and end of the docstring are removed.
|
|
106
|
+
- Most of the other conventions are about the content of the docstring, which are not checked by this tool.
|
|
107
|
+
|
|
108
|
+
Next to that, the layout of the docstring is preserved. For now the focus is on the 'Google' type docstring format, which in its most basic form, looks like this.
|
|
109
|
+
|
|
110
|
+
```
|
|
111
|
+
def function_with_pep484_type_annotations(param1: int, param2: str) -> bool:
|
|
112
|
+
"""Example function with PEP 484 type annotations.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
param1: The first parameter.
|
|
116
|
+
param2: The second parameter.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
The return value. True for success, False otherwise.
|
|
120
|
+
"""
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Or it can look like this:
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
def example_generator(n):
|
|
127
|
+
"""Generators have a ``Yields`` section instead of a ``Returns`` section.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
n (int): The upper limit of the range to generate, from 0 to `n` - 1.
|
|
131
|
+
|
|
132
|
+
Yields:
|
|
133
|
+
int: The next number in the range of 0 to `n` - 1.
|
|
134
|
+
|
|
135
|
+
Examples:
|
|
136
|
+
Examples should be written in doctest format, and should illustrate how
|
|
137
|
+
to use the function.
|
|
138
|
+
|
|
139
|
+
>>> print([i for i in example_generator(4)])
|
|
140
|
+
[0, 1, 2, 3]
|
|
141
|
+
|
|
142
|
+
"""
|
|
143
|
+
for i in range(n):
|
|
144
|
+
yield i
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Maintaining this structure, while enforcing a maximum characters per line, means the following rules have to be implented in the formatting logic.
|
|
148
|
+
|
|
149
|
+
- If the user starts a new line, even though there is still space on the current line for the next word, it should be corrected and the word should be moved to the current line.
|
|
150
|
+
- If the line is too long, the part that exceed the line limit should be moved to the next line.
|
|
151
|
+
- If the user uses two '\n' values, it means this was a deliberate choice (for example, starting the 'Args' section) and this should be preserved.
|
|
152
|
+
- Single '\n' characters in for example the 'Args' section should be preserved, as each parameter should start on a new line.
|
|
153
|
+
- If the docstring contains an 'Example' or 'Examples' section, the indentation for the code part and the code itself (indicated with '>>>' and '...') should be untouched, as this code should be able to be interpreted.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
docstring_tailor/__init__.py,sha256=IjHRV0k2DNwvFrEHebmsXiBvmITE8nQUnsR07h9tVkU,7
|
|
2
|
+
docstring_tailor/cli_config.py,sha256=Y1ZtKqmj6jsOK4nOwed_KBHH_k_mHStEqiNwv7fRTlQ,551
|
|
3
|
+
docstring_tailor/constants.py,sha256=jzWpfAr3-QRuBuy7zGfWAH8Sf-VbN0D7r8hUeebjlMY,1770
|
|
4
|
+
docstring_tailor/docstring_visitor.py,sha256=DXSVx-NaJsgrj-NaqEPsFjXGgb5K84oxzBBK5yxRMZE,11796
|
|
5
|
+
docstring_tailor/main.py,sha256=qOEtufWJ_62BKNEeWrDjd1JmHphxvAL6_Ase_6YfscY,3971
|
|
6
|
+
docstring_tailor/multi_line_docstring_formatter.py,sha256=89OkyyQpc0g2Vzlr8DKnqzMT4fAZh00uxG2KHTW1068,19024
|
|
7
|
+
docstring_tailor/utils.py,sha256=kCzUpzIuDtRFcc0_H58yC8_haoGvDsPGh4n7ZIVre5I,2136
|
|
8
|
+
docstring_tailor-0.1.0.dist-info/METADATA,sha256=4YUGnXRNJRcSQj9ZWBQ6i-AgGWsIqfMmtN4hPBI11uc,9359
|
|
9
|
+
docstring_tailor-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
10
|
+
docstring_tailor-0.1.0.dist-info/entry_points.txt,sha256=krQaPqsMrevpU_ZeNs5-a01fxWDRLPkBeF2VySB394M,63
|
|
11
|
+
docstring_tailor-0.1.0.dist-info/licenses/LICENSE,sha256=o3AOsIM_IeJ8-bPxcB5DBbCA3EtsC5zo4HAnB4b-Kao,1070
|
|
12
|
+
docstring_tailor-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Auke Bruinsma
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|