format-docstring 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,387 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ from format_docstring.line_wrap_utils import (
6
+ add_leading_indent,
7
+ collect_to_temp_output,
8
+ finalize_lines,
9
+ process_temp_output,
10
+ )
11
+
12
+
13
+ def wrap_docstring_numpy(
14
+ docstring: str,
15
+ line_length: int,
16
+ leading_indent: int | None = None,
17
+ ) -> str:
18
+ """Wrap NumPy-style docstrings with light parsing rules.
19
+
20
+ Rules implemented (conservative):
21
+ - Do not wrap section headings or their underline lines.
22
+ - In "Parameters" (and similar) sections, do not wrap signature lines
23
+ like ``name : type, default=...``; wrap indented description lines only.
24
+ - In "Returns"/"Yields" sections, treat the first-level lines (either
25
+ ``name : type`` or just ``type``) as signatures and do not wrap them;
26
+ wrap their indented descriptions.
27
+ - In the "Examples" section, do not wrap lines starting with ``>>> ``.
28
+ - Do not wrap any lines inside fenced code blocks (``` ... ```).
29
+ - Outside these special cases, wrap only lines that exceed ``line_length``
30
+ (keep existing intentional line breaks).
31
+ """
32
+ # Pre-processing: if caller provides indentation context (i.e., the
33
+ # indentation level of the docstring's parent), and the docstring body
34
+ # doesn't begin with a newline followed by that many spaces, prepend it.
35
+ # This helps place the closing quotes on their own indented line later.
36
+ docstring_: str = add_leading_indent(docstring, leading_indent)
37
+
38
+ lines: list[str] = docstring_.splitlines()
39
+ if not lines:
40
+ return docstring_
41
+
42
+ # Track section state
43
+ SECTION_PARAMS = {
44
+ 'parameters',
45
+ 'parameter', # tolerate typo
46
+ 'other parameters',
47
+ 'other parameter', # tolerate typo
48
+ 'attributes',
49
+ 'attribute', # tolerate typo
50
+ }
51
+ SECTION_RETURNS = {
52
+ 'returns',
53
+ 'return', # tolerate typo
54
+ 'yields',
55
+ 'yield', # tolerate typo
56
+ 'raises',
57
+ 'raise', # tolerate typo
58
+ }
59
+ SECTION_EXAMPLES = {'examples'}
60
+
61
+ temp_out: list[str | list[str]] = []
62
+ in_code_fence: bool = False
63
+ current_section: str = ''
64
+ in_examples: bool = False
65
+
66
+ i: int = 0
67
+ while i < len(lines):
68
+ line: str = lines[i]
69
+
70
+ if line == '':
71
+ temp_out.append(line)
72
+ i += 1
73
+ continue
74
+
75
+ stripped: str = line.lstrip(' ')
76
+ indent_length: int = len(line) - len(stripped)
77
+
78
+ # Detect code fence start/end first; always preserve fence lines
79
+ if stripped.startswith('```'):
80
+ in_code_fence = not in_code_fence
81
+ temp_out.append(line)
82
+ i += 1
83
+ continue
84
+
85
+ # Detect and pass-through section headings with underline
86
+ if not in_code_fence:
87
+ heading: str | None = _get_section_heading_title(lines, i)
88
+ if heading:
89
+ current_section = heading
90
+ in_examples = heading in SECTION_EXAMPLES
91
+ temp_out.append(line)
92
+ temp_out.append(lines[i + 1])
93
+ i += 2
94
+ continue
95
+
96
+ # Inside fenced code blocks: pass through unchanged
97
+ if in_code_fence:
98
+ temp_out.append(line)
99
+ i += 1
100
+ continue
101
+
102
+ # In Examples, skip wrapping for REPL lines
103
+ if in_examples and stripped.startswith('>>> '):
104
+ temp_out.append(line)
105
+ i += 1
106
+ continue
107
+
108
+ # Parameters-like sections
109
+ section_lower_case: str = current_section.lower()
110
+ if section_lower_case in SECTION_PARAMS:
111
+ if line.strip() == '':
112
+ temp_out.append(line)
113
+ i += 1
114
+ continue
115
+
116
+ # Only treat as a signature if it appears at the top level of the
117
+ # section (indentation < 4). This prevents mis-detecting
118
+ # description lines that happen to contain a colon (e.g., tables,
119
+ # examples, notes) as new parameter signatures.
120
+ if _is_param_signature(line) and indent_length <= leading_indent: # type: ignore[operator]
121
+ fixed_line = _fix_colon_spacing(line)
122
+ fixed_line = _standardize_default_value(fixed_line)
123
+ temp_out.append(fixed_line)
124
+ i += 1
125
+ continue
126
+
127
+ # Description lines (typically indented): wrap if too long
128
+ collect_to_temp_output(temp_out, line)
129
+ i += 1
130
+ continue
131
+
132
+ # Returns/Yields sections
133
+ if section_lower_case in SECTION_RETURNS:
134
+ if line.strip() == '':
135
+ temp_out.append(line)
136
+ i += 1
137
+ continue
138
+
139
+ # Treat top-level lines as signatures
140
+ if indent_length <= leading_indent: # type: ignore[operator]
141
+ temp_out.append(line)
142
+ i += 1
143
+ continue
144
+
145
+ collect_to_temp_output(temp_out, line)
146
+ i += 1
147
+ continue
148
+
149
+ # Examples or any other section
150
+ collect_to_temp_output(temp_out, line)
151
+ i += 1
152
+
153
+ out: list[str] = process_temp_output(temp_out, width=line_length)
154
+ return finalize_lines(out, leading_indent)
155
+
156
+
157
+ def _is_hyphen_underline(s: str) -> bool:
158
+ """Return True if the line consists of only hyphens (>= 2).
159
+
160
+ Leading/trailing whitespace is ignored. This is a relaxed detector for
161
+ NumPy-style section underlines such as the line beneath "Parameters".
162
+
163
+ Examples
164
+ --------
165
+ >>> _is_hyphen_underline('---')
166
+ True
167
+ >>> _is_hyphen_underline(' ---- ')
168
+ True
169
+ >>> _is_hyphen_underline('---')
170
+ True
171
+ >>> _is_hyphen_underline(' - - ')
172
+ False
173
+
174
+ """
175
+ t = s.strip()
176
+ return len(t) >= 2 and set(t) <= {'-'}
177
+
178
+
179
+ def _get_section_heading_title(lines: list[str], idx: int) -> str | None:
180
+ """Return the lowercased section title at ``idx`` if underlined.
181
+
182
+ Looks at ``lines[idx]`` for a non-empty title and ``lines[idx+1]`` for a
183
+ hyphen-only underline (at least 3 hyphens). If the pattern matches,
184
+ returns the lowercased title; otherwise returns ``None``.
185
+ """
186
+ if idx + 1 >= len(lines):
187
+ return None
188
+
189
+ title = lines[idx].strip()
190
+ underline = lines[idx + 1]
191
+ if not title:
192
+ return None
193
+
194
+ if _is_hyphen_underline(underline):
195
+ return title.lower()
196
+
197
+ return None
198
+
199
+
200
+ # Character classes for building the parameter signature regex
201
+ START = r'[A-Za-z_]' # Valid identifier start characters
202
+ CONT = r'[A-Za-z0-9_]' # Valid identifier continuation characters
203
+
204
+ # Precompiled regex for NumPy parameter signatures
205
+ # Pattern: ^\s*\*{0,2}IDENTIFIER(?:\s*,\s*\*{0,2}IDENTIFIER)*\s*:\s*.*$
206
+ # Explanation:
207
+ # - ^\s*: optional leading spaces
208
+ # - \*{0,2}: zero, one, or two asterisks (for *args, **kwargs)
209
+ # - [A-Za-z_][A-Za-z0-9_]*: identifier (starts with letter/underscore)
210
+ # - (?:\s*,\s*\*{0,2}[A-Za-z_][A-Za-z0-9_]*)*: 0 or more comma+identifier pairs
211
+ # - \s*:\s*: a colon with optional surrounding spaces
212
+ # - .*$: anything (or nothing) on the right-hand side
213
+ _PARAM_SIGNATURE_RE = re.compile(
214
+ rf'^\s*\*{{0,2}}{START}{CONT}*(?:\s*,\s*\*{{0,2}}{START}{CONT}*)*\s*:\s*.*$'
215
+ )
216
+
217
+
218
+ def _is_param_signature(text: str) -> bool:
219
+ r"""Return True if a line looks like a NumPy parameter signature.
220
+
221
+ This function uses a single, precompiled regex to remain fast even when
222
+ scanning many lines. We purposefully accept a broad set of "signature"
223
+ shapes that appear in real-world NumPy-style docs and avoid false
224
+ negatives, while still rejecting obviously non-signature prose.
225
+
226
+ Accepted (examples)
227
+ -------------------
228
+ - ``name : type``
229
+ - ``name: type`` (missing space is fine)
230
+ - ``alpha, beta : list[str] | None`` (comma-separated names)
231
+ - ``abc :`` or ``abc:`` (empty annotation part)
232
+ - ``*args : Any`` (variadic positional arguments)
233
+ - ``**kwargs : dict[str, Any]`` (variadic keyword arguments)
234
+ - ``*args, **kwargs : Any`` (mixed with other parameters)
235
+ - Leading indentation allowed
236
+
237
+ Rejected (examples)
238
+ -------------------
239
+ - Lines without a colon
240
+ - Names that are not valid identifiers or comma-separated identifiers
241
+ (e.g. ``1name : int``, ``alpha, beta gamma : int``)
242
+ """
243
+ return bool(_PARAM_SIGNATURE_RE.match(text))
244
+
245
+
246
+ def _fix_colon_spacing(line: str) -> str:
247
+ """Fix spacing around colons in parameter signature lines.
248
+
249
+ Ensures there is exactly one space before and one space after the colon
250
+ in parameter signatures. Only operates on lines that are detected as
251
+ parameter signatures by _is_param_signature().
252
+
253
+ Parameters
254
+ ----------
255
+ line : str
256
+ The line to fix
257
+
258
+ Returns
259
+ -------
260
+ str
261
+ The line with corrected colon spacing
262
+
263
+ Examples
264
+ --------
265
+ >>> _fix_colon_spacing('arg1: dict[str, list[str]]')
266
+ 'arg1 : dict[str, list[str]]'
267
+ >>> _fix_colon_spacing('arg1 : dict[str, list[str]]')
268
+ 'arg1 : dict[str, list[str]]'
269
+ >>> _fix_colon_spacing(' arg1:dict[str, list[str]]')
270
+ ' arg1 : dict[str, list[str]]'
271
+ """
272
+ # Find the colon's position
273
+ colon_idx = line.find(':')
274
+ if colon_idx == -1:
275
+ return line
276
+
277
+ # Split into parts: before colon, colon, after colon
278
+ before_colon = line[:colon_idx].rstrip()
279
+ after_colon = line[colon_idx + 1 :].lstrip()
280
+
281
+ # Reconstruct with proper spacing: " : "
282
+ return before_colon + ' : ' + after_colon
283
+
284
+
285
+ # Precompiled regex for default value standardization (colon format)
286
+ # Pattern: ^(.*?)(?:,\s*|\s+)default\s*:\s*(.+)$
287
+ # Matches formats like "default:XXX" or "default: XXX"
288
+ _DEFAULT_COLON_RE = re.compile(
289
+ r'^(.*?)' # Everything before default (non-greedy)
290
+ r'(?:,\s*|\s+)' # Either comma+spaces or just spaces
291
+ r'default' # The word "default"
292
+ r'\s*:\s*' # Colon with optional spaces
293
+ r'(.+)$', # The default value
294
+ re.IGNORECASE,
295
+ )
296
+
297
+ # Precompiled regex for default value standardization (space format)
298
+ # Pattern: ^(.*?)(?:,\s*|\s+)default\s+(?:is\s+)?(.+)$
299
+ # Matches formats like "default XXX" or "default is XXX"
300
+ _DEFAULT_SPACE_RE = re.compile(
301
+ r'^(.*?)' # Everything before default (non-greedy)
302
+ r'(?:,\s*|\s+)' # Either comma+spaces or just spaces
303
+ r'default' # The word "default"
304
+ r'\s+' # Required space after "default"
305
+ r'(?:is\s+)?' # Optional "is "
306
+ r'(.+)$', # The default value
307
+ re.IGNORECASE,
308
+ )
309
+
310
+
311
+ def _standardize_default_value(line: str) -> str:
312
+ """Standardize default value declarations in parameter signatures.
313
+
314
+ Converts various formats of default value specifications to the standard
315
+ `, default=XXX` format. Handles formats like:
316
+ - ` default XXX`
317
+ - `, default XXX`
318
+ - `, default is XXX`
319
+ - ` default is XXX`
320
+ - ` default:XXX`
321
+ - ` default: XXX`
322
+ - `, default:XXX`
323
+ - `, default: XXX`
324
+
325
+ Parameters
326
+ ----------
327
+ line : str
328
+ The parameter signature line to standardize
329
+
330
+ Returns
331
+ -------
332
+ str
333
+ The line with standardized default value format
334
+
335
+ Examples
336
+ --------
337
+ >>> _standardize_default_value('arg : int, default 10')
338
+ 'arg : int, default=10'
339
+ >>> _standardize_default_value('arg : str, default is "hello"')
340
+ 'arg : str, default="hello"'
341
+ >>> _standardize_default_value('arg : bool, default: True')
342
+ 'arg : bool, default=True'
343
+ """
344
+ # Check colon format first to avoid matching colons in space-based pattern
345
+ match = _DEFAULT_COLON_RE.match(line)
346
+ if match:
347
+ before = match.group(1).rstrip()
348
+ default_value = match.group(2).strip()
349
+ return f'{before}, default={default_value}'
350
+
351
+ # Try space-separated format with optional "is"
352
+ match = _DEFAULT_SPACE_RE.match(line)
353
+ if match:
354
+ before = match.group(1).rstrip()
355
+ default_value = match.group(2).strip()
356
+ return f'{before}, default={default_value}'
357
+
358
+ return line
359
+
360
+
361
+ def handle_single_line_docstring_that_is_a_bit_too_long(
362
+ whole_docstring_literal: str | None,
363
+ docstring_content: str,
364
+ docstring_starting_col: int,
365
+ docstring_ending_col: int,
366
+ line_length: int = 79,
367
+ ) -> str | None:
368
+ """
369
+ Handle single-line docstring that's a bit too long: the docstring content
370
+ is not long enough to be wrapped, but with the leading and ending quotes
371
+ (6 quotes in total) the whole line exceeds length limit.
372
+ """
373
+ if whole_docstring_literal is None:
374
+ return None
375
+
376
+ if '\n' in whole_docstring_literal: # multi-line: do not handle
377
+ return whole_docstring_literal
378
+
379
+ if docstring_ending_col >= line_length: # whole docstring exceeds limit
380
+ num_leading_indent: int = docstring_starting_col
381
+ parts: list[str] = whole_docstring_literal.split(docstring_content)
382
+ prefix: str = parts[0]
383
+ postfix: str = parts[-1]
384
+ indent: str = ' ' * num_leading_indent
385
+ return f'{prefix}\n{indent}{docstring_content}\n{indent}{postfix}'
386
+
387
+ return whole_docstring_literal