format-docstring 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- format_docstring/__init__.py +5 -0
- format_docstring/base_fixer.py +70 -0
- format_docstring/config.py +211 -0
- format_docstring/docstring_rewriter.py +314 -0
- format_docstring/line_wrap_google.py +7 -0
- format_docstring/line_wrap_numpy.py +387 -0
- format_docstring/line_wrap_utils.py +781 -0
- format_docstring/main_jupyter.py +165 -0
- format_docstring/main_py.py +125 -0
- format_docstring-0.1.0.dist-info/METADATA +311 -0
- format_docstring-0.1.0.dist-info/RECORD +15 -0
- format_docstring-0.1.0.dist-info/WHEEL +5 -0
- format_docstring-0.1.0.dist-info/entry_points.txt +3 -0
- format_docstring-0.1.0.dist-info/licenses/LICENSE +21 -0
- format_docstring-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from format_docstring.line_wrap_utils import (
|
|
6
|
+
add_leading_indent,
|
|
7
|
+
collect_to_temp_output,
|
|
8
|
+
finalize_lines,
|
|
9
|
+
process_temp_output,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def wrap_docstring_numpy(
|
|
14
|
+
docstring: str,
|
|
15
|
+
line_length: int,
|
|
16
|
+
leading_indent: int | None = None,
|
|
17
|
+
) -> str:
|
|
18
|
+
"""Wrap NumPy-style docstrings with light parsing rules.
|
|
19
|
+
|
|
20
|
+
Rules implemented (conservative):
|
|
21
|
+
- Do not wrap section headings or their underline lines.
|
|
22
|
+
- In "Parameters" (and similar) sections, do not wrap signature lines
|
|
23
|
+
like ``name : type, default=...``; wrap indented description lines only.
|
|
24
|
+
- In "Returns"/"Yields" sections, treat the first-level lines (either
|
|
25
|
+
``name : type`` or just ``type``) as signatures and do not wrap them;
|
|
26
|
+
wrap their indented descriptions.
|
|
27
|
+
- In the "Examples" section, do not wrap lines starting with ``>>> ``.
|
|
28
|
+
- Do not wrap any lines inside fenced code blocks (``` ... ```).
|
|
29
|
+
- Outside these special cases, wrap only lines that exceed ``line_length``
|
|
30
|
+
(keep existing intentional line breaks).
|
|
31
|
+
"""
|
|
32
|
+
# Pre-processing: if caller provides indentation context (i.e., the
|
|
33
|
+
# indentation level of the docstring's parent), and the docstring body
|
|
34
|
+
# doesn't begin with a newline followed by that many spaces, prepend it.
|
|
35
|
+
# This helps place the closing quotes on their own indented line later.
|
|
36
|
+
docstring_: str = add_leading_indent(docstring, leading_indent)
|
|
37
|
+
|
|
38
|
+
lines: list[str] = docstring_.splitlines()
|
|
39
|
+
if not lines:
|
|
40
|
+
return docstring_
|
|
41
|
+
|
|
42
|
+
# Track section state
|
|
43
|
+
SECTION_PARAMS = {
|
|
44
|
+
'parameters',
|
|
45
|
+
'parameter', # tolerate typo
|
|
46
|
+
'other parameters',
|
|
47
|
+
'other parameter', # tolerate typo
|
|
48
|
+
'attributes',
|
|
49
|
+
'attribute', # tolerate typo
|
|
50
|
+
}
|
|
51
|
+
SECTION_RETURNS = {
|
|
52
|
+
'returns',
|
|
53
|
+
'return', # tolerate typo
|
|
54
|
+
'yields',
|
|
55
|
+
'yield', # tolerate typo
|
|
56
|
+
'raises',
|
|
57
|
+
'raise', # tolerate typo
|
|
58
|
+
}
|
|
59
|
+
SECTION_EXAMPLES = {'examples'}
|
|
60
|
+
|
|
61
|
+
temp_out: list[str | list[str]] = []
|
|
62
|
+
in_code_fence: bool = False
|
|
63
|
+
current_section: str = ''
|
|
64
|
+
in_examples: bool = False
|
|
65
|
+
|
|
66
|
+
i: int = 0
|
|
67
|
+
while i < len(lines):
|
|
68
|
+
line: str = lines[i]
|
|
69
|
+
|
|
70
|
+
if line == '':
|
|
71
|
+
temp_out.append(line)
|
|
72
|
+
i += 1
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
stripped: str = line.lstrip(' ')
|
|
76
|
+
indent_length: int = len(line) - len(stripped)
|
|
77
|
+
|
|
78
|
+
# Detect code fence start/end first; always preserve fence lines
|
|
79
|
+
if stripped.startswith('```'):
|
|
80
|
+
in_code_fence = not in_code_fence
|
|
81
|
+
temp_out.append(line)
|
|
82
|
+
i += 1
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
# Detect and pass-through section headings with underline
|
|
86
|
+
if not in_code_fence:
|
|
87
|
+
heading: str | None = _get_section_heading_title(lines, i)
|
|
88
|
+
if heading:
|
|
89
|
+
current_section = heading
|
|
90
|
+
in_examples = heading in SECTION_EXAMPLES
|
|
91
|
+
temp_out.append(line)
|
|
92
|
+
temp_out.append(lines[i + 1])
|
|
93
|
+
i += 2
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
# Inside fenced code blocks: pass through unchanged
|
|
97
|
+
if in_code_fence:
|
|
98
|
+
temp_out.append(line)
|
|
99
|
+
i += 1
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
# In Examples, skip wrapping for REPL lines
|
|
103
|
+
if in_examples and stripped.startswith('>>> '):
|
|
104
|
+
temp_out.append(line)
|
|
105
|
+
i += 1
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
# Parameters-like sections
|
|
109
|
+
section_lower_case: str = current_section.lower()
|
|
110
|
+
if section_lower_case in SECTION_PARAMS:
|
|
111
|
+
if line.strip() == '':
|
|
112
|
+
temp_out.append(line)
|
|
113
|
+
i += 1
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
# Only treat as a signature if it appears at the top level of the
|
|
117
|
+
# section (indentation < 4). This prevents mis-detecting
|
|
118
|
+
# description lines that happen to contain a colon (e.g., tables,
|
|
119
|
+
# examples, notes) as new parameter signatures.
|
|
120
|
+
if _is_param_signature(line) and indent_length <= leading_indent: # type: ignore[operator]
|
|
121
|
+
fixed_line = _fix_colon_spacing(line)
|
|
122
|
+
fixed_line = _standardize_default_value(fixed_line)
|
|
123
|
+
temp_out.append(fixed_line)
|
|
124
|
+
i += 1
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
# Description lines (typically indented): wrap if too long
|
|
128
|
+
collect_to_temp_output(temp_out, line)
|
|
129
|
+
i += 1
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
# Returns/Yields sections
|
|
133
|
+
if section_lower_case in SECTION_RETURNS:
|
|
134
|
+
if line.strip() == '':
|
|
135
|
+
temp_out.append(line)
|
|
136
|
+
i += 1
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
# Treat top-level lines as signatures
|
|
140
|
+
if indent_length <= leading_indent: # type: ignore[operator]
|
|
141
|
+
temp_out.append(line)
|
|
142
|
+
i += 1
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
collect_to_temp_output(temp_out, line)
|
|
146
|
+
i += 1
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
# Examples or any other section
|
|
150
|
+
collect_to_temp_output(temp_out, line)
|
|
151
|
+
i += 1
|
|
152
|
+
|
|
153
|
+
out: list[str] = process_temp_output(temp_out, width=line_length)
|
|
154
|
+
return finalize_lines(out, leading_indent)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _is_hyphen_underline(s: str) -> bool:
|
|
158
|
+
"""Return True if the line consists of only hyphens (>= 2).
|
|
159
|
+
|
|
160
|
+
Leading/trailing whitespace is ignored. This is a relaxed detector for
|
|
161
|
+
NumPy-style section underlines such as the line beneath "Parameters".
|
|
162
|
+
|
|
163
|
+
Examples
|
|
164
|
+
--------
|
|
165
|
+
>>> _is_hyphen_underline('---')
|
|
166
|
+
True
|
|
167
|
+
>>> _is_hyphen_underline(' ---- ')
|
|
168
|
+
True
|
|
169
|
+
>>> _is_hyphen_underline('---')
|
|
170
|
+
True
|
|
171
|
+
>>> _is_hyphen_underline(' - - ')
|
|
172
|
+
False
|
|
173
|
+
|
|
174
|
+
"""
|
|
175
|
+
t = s.strip()
|
|
176
|
+
return len(t) >= 2 and set(t) <= {'-'}
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _get_section_heading_title(lines: list[str], idx: int) -> str | None:
|
|
180
|
+
"""Return the lowercased section title at ``idx`` if underlined.
|
|
181
|
+
|
|
182
|
+
Looks at ``lines[idx]`` for a non-empty title and ``lines[idx+1]`` for a
|
|
183
|
+
hyphen-only underline (at least 3 hyphens). If the pattern matches,
|
|
184
|
+
returns the lowercased title; otherwise returns ``None``.
|
|
185
|
+
"""
|
|
186
|
+
if idx + 1 >= len(lines):
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
title = lines[idx].strip()
|
|
190
|
+
underline = lines[idx + 1]
|
|
191
|
+
if not title:
|
|
192
|
+
return None
|
|
193
|
+
|
|
194
|
+
if _is_hyphen_underline(underline):
|
|
195
|
+
return title.lower()
|
|
196
|
+
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
# Character classes for building the parameter signature regex
|
|
201
|
+
START = r'[A-Za-z_]' # Valid identifier start characters
|
|
202
|
+
CONT = r'[A-Za-z0-9_]' # Valid identifier continuation characters
|
|
203
|
+
|
|
204
|
+
# Precompiled regex for NumPy parameter signatures
|
|
205
|
+
# Pattern: ^\s*\*{0,2}IDENTIFIER(?:\s*,\s*\*{0,2}IDENTIFIER)*\s*:\s*.*$
|
|
206
|
+
# Explanation:
|
|
207
|
+
# - ^\s*: optional leading spaces
|
|
208
|
+
# - \*{0,2}: zero, one, or two asterisks (for *args, **kwargs)
|
|
209
|
+
# - [A-Za-z_][A-Za-z0-9_]*: identifier (starts with letter/underscore)
|
|
210
|
+
# - (?:\s*,\s*\*{0,2}[A-Za-z_][A-Za-z0-9_]*)*: 0 or more comma+identifier pairs
|
|
211
|
+
# - \s*:\s*: a colon with optional surrounding spaces
|
|
212
|
+
# - .*$: anything (or nothing) on the right-hand side
|
|
213
|
+
_PARAM_SIGNATURE_RE = re.compile(
|
|
214
|
+
rf'^\s*\*{{0,2}}{START}{CONT}*(?:\s*,\s*\*{{0,2}}{START}{CONT}*)*\s*:\s*.*$'
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _is_param_signature(text: str) -> bool:
|
|
219
|
+
r"""Return True if a line looks like a NumPy parameter signature.
|
|
220
|
+
|
|
221
|
+
This function uses a single, precompiled regex to remain fast even when
|
|
222
|
+
scanning many lines. We purposefully accept a broad set of "signature"
|
|
223
|
+
shapes that appear in real-world NumPy-style docs and avoid false
|
|
224
|
+
negatives, while still rejecting obviously non-signature prose.
|
|
225
|
+
|
|
226
|
+
Accepted (examples)
|
|
227
|
+
-------------------
|
|
228
|
+
- ``name : type``
|
|
229
|
+
- ``name: type`` (missing space is fine)
|
|
230
|
+
- ``alpha, beta : list[str] | None`` (comma-separated names)
|
|
231
|
+
- ``abc :`` or ``abc:`` (empty annotation part)
|
|
232
|
+
- ``*args : Any`` (variadic positional arguments)
|
|
233
|
+
- ``**kwargs : dict[str, Any]`` (variadic keyword arguments)
|
|
234
|
+
- ``*args, **kwargs : Any`` (mixed with other parameters)
|
|
235
|
+
- Leading indentation allowed
|
|
236
|
+
|
|
237
|
+
Rejected (examples)
|
|
238
|
+
-------------------
|
|
239
|
+
- Lines without a colon
|
|
240
|
+
- Names that are not valid identifiers or comma-separated identifiers
|
|
241
|
+
(e.g. ``1name : int``, ``alpha, beta gamma : int``)
|
|
242
|
+
"""
|
|
243
|
+
return bool(_PARAM_SIGNATURE_RE.match(text))
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _fix_colon_spacing(line: str) -> str:
|
|
247
|
+
"""Fix spacing around colons in parameter signature lines.
|
|
248
|
+
|
|
249
|
+
Ensures there is exactly one space before and one space after the colon
|
|
250
|
+
in parameter signatures. Only operates on lines that are detected as
|
|
251
|
+
parameter signatures by _is_param_signature().
|
|
252
|
+
|
|
253
|
+
Parameters
|
|
254
|
+
----------
|
|
255
|
+
line : str
|
|
256
|
+
The line to fix
|
|
257
|
+
|
|
258
|
+
Returns
|
|
259
|
+
-------
|
|
260
|
+
str
|
|
261
|
+
The line with corrected colon spacing
|
|
262
|
+
|
|
263
|
+
Examples
|
|
264
|
+
--------
|
|
265
|
+
>>> _fix_colon_spacing('arg1: dict[str, list[str]]')
|
|
266
|
+
'arg1 : dict[str, list[str]]'
|
|
267
|
+
>>> _fix_colon_spacing('arg1 : dict[str, list[str]]')
|
|
268
|
+
'arg1 : dict[str, list[str]]'
|
|
269
|
+
>>> _fix_colon_spacing(' arg1:dict[str, list[str]]')
|
|
270
|
+
' arg1 : dict[str, list[str]]'
|
|
271
|
+
"""
|
|
272
|
+
# Find the colon's position
|
|
273
|
+
colon_idx = line.find(':')
|
|
274
|
+
if colon_idx == -1:
|
|
275
|
+
return line
|
|
276
|
+
|
|
277
|
+
# Split into parts: before colon, colon, after colon
|
|
278
|
+
before_colon = line[:colon_idx].rstrip()
|
|
279
|
+
after_colon = line[colon_idx + 1 :].lstrip()
|
|
280
|
+
|
|
281
|
+
# Reconstruct with proper spacing: " : "
|
|
282
|
+
return before_colon + ' : ' + after_colon
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
# Precompiled regex for default value standardization (colon format)
|
|
286
|
+
# Pattern: ^(.*?)(?:,\s*|\s+)default\s*:\s*(.+)$
|
|
287
|
+
# Matches formats like "default:XXX" or "default: XXX"
|
|
288
|
+
_DEFAULT_COLON_RE = re.compile(
|
|
289
|
+
r'^(.*?)' # Everything before default (non-greedy)
|
|
290
|
+
r'(?:,\s*|\s+)' # Either comma+spaces or just spaces
|
|
291
|
+
r'default' # The word "default"
|
|
292
|
+
r'\s*:\s*' # Colon with optional spaces
|
|
293
|
+
r'(.+)$', # The default value
|
|
294
|
+
re.IGNORECASE,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
# Precompiled regex for default value standardization (space format)
|
|
298
|
+
# Pattern: ^(.*?)(?:,\s*|\s+)default\s+(?:is\s+)?(.+)$
|
|
299
|
+
# Matches formats like "default XXX" or "default is XXX"
|
|
300
|
+
_DEFAULT_SPACE_RE = re.compile(
|
|
301
|
+
r'^(.*?)' # Everything before default (non-greedy)
|
|
302
|
+
r'(?:,\s*|\s+)' # Either comma+spaces or just spaces
|
|
303
|
+
r'default' # The word "default"
|
|
304
|
+
r'\s+' # Required space after "default"
|
|
305
|
+
r'(?:is\s+)?' # Optional "is "
|
|
306
|
+
r'(.+)$', # The default value
|
|
307
|
+
re.IGNORECASE,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _standardize_default_value(line: str) -> str:
|
|
312
|
+
"""Standardize default value declarations in parameter signatures.
|
|
313
|
+
|
|
314
|
+
Converts various formats of default value specifications to the standard
|
|
315
|
+
`, default=XXX` format. Handles formats like:
|
|
316
|
+
- ` default XXX`
|
|
317
|
+
- `, default XXX`
|
|
318
|
+
- `, default is XXX`
|
|
319
|
+
- ` default is XXX`
|
|
320
|
+
- ` default:XXX`
|
|
321
|
+
- ` default: XXX`
|
|
322
|
+
- `, default:XXX`
|
|
323
|
+
- `, default: XXX`
|
|
324
|
+
|
|
325
|
+
Parameters
|
|
326
|
+
----------
|
|
327
|
+
line : str
|
|
328
|
+
The parameter signature line to standardize
|
|
329
|
+
|
|
330
|
+
Returns
|
|
331
|
+
-------
|
|
332
|
+
str
|
|
333
|
+
The line with standardized default value format
|
|
334
|
+
|
|
335
|
+
Examples
|
|
336
|
+
--------
|
|
337
|
+
>>> _standardize_default_value('arg : int, default 10')
|
|
338
|
+
'arg : int, default=10'
|
|
339
|
+
>>> _standardize_default_value('arg : str, default is "hello"')
|
|
340
|
+
'arg : str, default="hello"'
|
|
341
|
+
>>> _standardize_default_value('arg : bool, default: True')
|
|
342
|
+
'arg : bool, default=True'
|
|
343
|
+
"""
|
|
344
|
+
# Check colon format first to avoid matching colons in space-based pattern
|
|
345
|
+
match = _DEFAULT_COLON_RE.match(line)
|
|
346
|
+
if match:
|
|
347
|
+
before = match.group(1).rstrip()
|
|
348
|
+
default_value = match.group(2).strip()
|
|
349
|
+
return f'{before}, default={default_value}'
|
|
350
|
+
|
|
351
|
+
# Try space-separated format with optional "is"
|
|
352
|
+
match = _DEFAULT_SPACE_RE.match(line)
|
|
353
|
+
if match:
|
|
354
|
+
before = match.group(1).rstrip()
|
|
355
|
+
default_value = match.group(2).strip()
|
|
356
|
+
return f'{before}, default={default_value}'
|
|
357
|
+
|
|
358
|
+
return line
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def handle_single_line_docstring_that_is_a_bit_too_long(
|
|
362
|
+
whole_docstring_literal: str | None,
|
|
363
|
+
docstring_content: str,
|
|
364
|
+
docstring_starting_col: int,
|
|
365
|
+
docstring_ending_col: int,
|
|
366
|
+
line_length: int = 79,
|
|
367
|
+
) -> str | None:
|
|
368
|
+
"""
|
|
369
|
+
Handle single-line docstring that's a bit too long: the docstring content
|
|
370
|
+
is not long enough to be wrapped, but with the leading and ending quotes
|
|
371
|
+
(6 quotes in total) the whole line exceeds length limit.
|
|
372
|
+
"""
|
|
373
|
+
if whole_docstring_literal is None:
|
|
374
|
+
return None
|
|
375
|
+
|
|
376
|
+
if '\n' in whole_docstring_literal: # multi-line: do not handle
|
|
377
|
+
return whole_docstring_literal
|
|
378
|
+
|
|
379
|
+
if docstring_ending_col >= line_length: # whole docstring exceeds limit
|
|
380
|
+
num_leading_indent: int = docstring_starting_col
|
|
381
|
+
parts: list[str] = whole_docstring_literal.split(docstring_content)
|
|
382
|
+
prefix: str = parts[0]
|
|
383
|
+
postfix: str = parts[-1]
|
|
384
|
+
indent: str = ' ' * num_leading_indent
|
|
385
|
+
return f'{prefix}\n{indent}{docstring_content}\n{indent}{postfix}'
|
|
386
|
+
|
|
387
|
+
return whole_docstring_literal
|