docstring-to-text 0.0.2__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docstring_to_text/__init__.py +161 -0
- docstring_to_text/__package_meta.py +1 -1
- docstring_to_text-1.0.1.dist-info/METADATA +74 -0
- docstring_to_text-1.0.1.dist-info/RECORD +6 -0
- docstring_to_text-0.0.2.dist-info/METADATA +0 -18
- docstring_to_text-0.0.2.dist-info/RECORD +0 -6
- {docstring_to_text-0.0.2.dist-info → docstring_to_text-1.0.1.dist-info}/WHEEL +0 -0
- {docstring_to_text-0.0.2.dist-info → docstring_to_text-1.0.1.dist-info}/licenses/LICENSE.md +0 -0
docstring_to_text/__init__.py
CHANGED
@@ -1,6 +1,167 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
"""
|
3
|
+
A simple pip package converting docstrings into clean text (proper paragraphs and indents).
|
3
4
|
"""
|
4
5
|
|
6
|
+
import typing as _t
|
7
|
+
|
8
|
+
from inspect import cleandoc, getdoc
|
9
|
+
import re as _re
|
10
|
+
|
5
11
|
from .__package_meta import VERSION
|
6
12
|
from .__package_meta import VERSION as __version__
|
13
|
+
|
14
|
+
# TODO:
|
15
|
+
# - lists
|
16
|
+
# formatted with indents
|
17
|
+
# in all lines except for the first one
|
18
|
+
#
|
19
|
+
# Also...
|
20
|
+
# - preserve indents
|
21
|
+
# - of the entire list
|
22
|
+
#
|
23
|
+
# And...
|
24
|
+
# - ensure
|
25
|
+
# that
|
26
|
+
# - it all works
|
27
|
+
# with nested lists
|
28
|
+
|
29
|
+
|
30
|
+
_re_indent_match = _re.compile(r"(\t*)( +)(\t*)(.*?)$").match
|
31
|
+
_re_tab_indent_match = _re.compile(r"(\t+)(.*?)$").match
|
32
|
+
_re_list_line_match = _re.compile(
|
33
|
+
r"(\s*)("
|
34
|
+
r"[-*•]+"
|
35
|
+
r"|"
|
36
|
+
r"[a-zA-Z]\s*[.)]"
|
37
|
+
r"|"
|
38
|
+
r"[0-9+]\s*[.)]"
|
39
|
+
r")\s+"
|
40
|
+
).match
|
41
|
+
|
42
|
+
|
43
|
+
def _recover_tab_indents(line: str, tab_size: int):
|
44
|
+
"""Turn indenting spaces back to tabs using regexp. Half-tab indents are rounded."""
|
45
|
+
assert bool(line) and isinstance(line, str)
|
46
|
+
|
47
|
+
n_tabs = 0.0
|
48
|
+
|
49
|
+
match = _re_indent_match(line)
|
50
|
+
while match:
|
51
|
+
pre_tabs, spaces, post_tabs, line = match.groups()
|
52
|
+
n_tabs_from_spaces = float(len(spaces)) / tab_size + 0.00001
|
53
|
+
n_post_tabs = len(post_tabs)
|
54
|
+
if n_post_tabs > 0:
|
55
|
+
# There are tabs after spaces. Don't preserve the fractional spaces-indent, truncate it:
|
56
|
+
n_tabs_from_spaces = int(n_tabs_from_spaces)
|
57
|
+
n_tabs += len(pre_tabs) + n_tabs_from_spaces + n_post_tabs
|
58
|
+
match = _re_indent_match(line)
|
59
|
+
|
60
|
+
if n_tabs < 0.5:
|
61
|
+
return line
|
62
|
+
|
63
|
+
tabs_prefix = '\t' * int(n_tabs + 0.50001)
|
64
|
+
return f"{tabs_prefix}{line}"
|
65
|
+
|
66
|
+
|
67
|
+
def _join_paragraph_and_format_tabs(paragraph: _t.List[str], tab_size: int):
|
68
|
+
"""
|
69
|
+
Given "continuous" paragraph (i.e., with no empty newlines between chunks), recover tabs for each chunk
|
70
|
+
and join them together into a single actual line.
|
71
|
+
Works as a generator to account for blocks with different indents - to make each its own line.
|
72
|
+
"""
|
73
|
+
pending_indent = 0
|
74
|
+
pending_chunks: _t.List[str] = list()
|
75
|
+
|
76
|
+
def join_pending_chunks() -> str:
|
77
|
+
return "{}{}".format('\t' * pending_indent, ' '.join(pending_chunks))
|
78
|
+
|
79
|
+
for chunk in paragraph:
|
80
|
+
chunk = _recover_tab_indents(chunk, tab_size)
|
81
|
+
|
82
|
+
cur_indent = 0
|
83
|
+
match = _re_tab_indent_match(chunk)
|
84
|
+
if match:
|
85
|
+
tab_indent, chunk = match.groups() # We've detected indent. Now, get rid of it.
|
86
|
+
cur_indent = len(tab_indent)
|
87
|
+
|
88
|
+
match_list_line = _re_list_line_match(chunk)
|
89
|
+
# In case of a bulleted/numbered list, we'll need to start a new block, too.
|
90
|
+
if cur_indent == pending_indent and not match_list_line:
|
91
|
+
pending_chunks.append(chunk)
|
92
|
+
continue
|
93
|
+
|
94
|
+
# Indent mismatch or a list line:
|
95
|
+
# we're either ended one block or entered another. Either way, the previous block ends.
|
96
|
+
if pending_chunks:
|
97
|
+
yield join_pending_chunks()
|
98
|
+
pending_chunks = list()
|
99
|
+
assert not pending_chunks
|
100
|
+
pending_chunks.append(chunk)
|
101
|
+
pending_indent = cur_indent
|
102
|
+
|
103
|
+
if pending_chunks:
|
104
|
+
yield join_pending_chunks()
|
105
|
+
|
106
|
+
|
107
|
+
def _formatted_paragraphs_gen(doc: str, tab_size: int):
|
108
|
+
"""
|
109
|
+
Generator, which splits docstring into lines and transforms them into an actual printable output:
|
110
|
+
- From each bulk of empty lines, the first one is skipped...
|
111
|
+
- ... thus, non-empty lines are joined into continuous paragraphs.
|
112
|
+
- Recover tabs in the beginning oh lines (``inspect.cleandoc()`` converts them into spaces).
|
113
|
+
"""
|
114
|
+
if not doc:
|
115
|
+
return
|
116
|
+
doc = str(doc)
|
117
|
+
if not doc.strip():
|
118
|
+
return
|
119
|
+
|
120
|
+
tab_size = max(int(tab_size), 1)
|
121
|
+
|
122
|
+
cur_paragraph: _t.List[str] = list()
|
123
|
+
|
124
|
+
for line in doc.splitlines():
|
125
|
+
line: str = line.rstrip()
|
126
|
+
if line:
|
127
|
+
cur_paragraph.append(line)
|
128
|
+
continue
|
129
|
+
|
130
|
+
assert not line
|
131
|
+
if cur_paragraph:
|
132
|
+
for block in _join_paragraph_and_format_tabs(cur_paragraph, tab_size):
|
133
|
+
yield block
|
134
|
+
cur_paragraph = list()
|
135
|
+
# Just skip the current empty line entirely - do nothing with it.
|
136
|
+
continue
|
137
|
+
|
138
|
+
# We're in a chain of empty lines, and we've already skipped the first one. Preserve the remaining ones:
|
139
|
+
yield ''
|
140
|
+
|
141
|
+
# Return the last paragraph post-loop:
|
142
|
+
if cur_paragraph:
|
143
|
+
for block in _join_paragraph_and_format_tabs(cur_paragraph, tab_size):
|
144
|
+
yield block
|
145
|
+
|
146
|
+
|
147
|
+
def format_docstring(doc: str, tab_size: int = 8) -> str:
|
148
|
+
"""
|
149
|
+
Turn a pre-cleaned-up docstring (with tabs as spaces and newlines mid-sentence)
|
150
|
+
into an actually printable output:
|
151
|
+
- mid-paragraph new lines are replaced with spaces...
|
152
|
+
- ... while still keeping indented blocks separate.
|
153
|
+
|
154
|
+
Remember to pass a pre-cleaned-up docstring - i.e., with one of:
|
155
|
+
- format_docstring(inspect.cleandoc(__doc__))
|
156
|
+
- format_docstring(inspect.getdoc(class_or_function))
|
157
|
+
"""
|
158
|
+
return '\n'.join(_formatted_paragraphs_gen(doc, tab_size))
|
159
|
+
|
160
|
+
|
161
|
+
def format_object_docstring(_obj, tab_size: int = 8) -> str:
|
162
|
+
"""Find the object's docstring and format it with ``format_docstring()``"""
|
163
|
+
doc = getdoc(_obj)
|
164
|
+
if not doc:
|
165
|
+
return ''
|
166
|
+
# noinspection PyArgumentList
|
167
|
+
return format_docstring(doc, tab_size=tab_size)
|
@@ -0,0 +1,74 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: docstring-to-text
|
3
|
+
Version: 1.0.1
|
4
|
+
Summary: A simple pip package converting docstrings into clean text (proper paragraphs and indents)
|
5
|
+
Project-URL: Source Code, https://github.com/Lex-DRL/Py-docstring-to-text
|
6
|
+
Project-URL: Issues, https://github.com/Lex-DRL/Py-docstring-to-text/issues
|
7
|
+
Author: Lex Darlog (Lex-DRL)
|
8
|
+
License-Expression: MPL-2.0
|
9
|
+
License-File: LICENSE.md
|
10
|
+
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
|
11
|
+
Classifier: Operating System :: OS Independent
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
13
|
+
Requires-Python: >=3.7
|
14
|
+
Description-Content-Type: text/markdown
|
15
|
+
|
16
|
+
# docstring-to-text
|
17
|
+
|
18
|
+
A simple pip package converting docstrings into clean text (proper paragraphs and indents).
|
19
|
+
|
20
|
+
For example, here's a class docstring:
|
21
|
+
```python
|
22
|
+
class MyClass:
|
23
|
+
"""
|
24
|
+
Here's a class.
|
25
|
+
|
26
|
+
|
27
|
+
It has sphinx-like paragraphs, which can
|
28
|
+
span multiple lines. Any modern IDE would
|
29
|
+
display them as a single line, that wraps
|
30
|
+
the given width.
|
31
|
+
|
32
|
+
You can't just remove all the new lines
|
33
|
+
in the entire string, because you want
|
34
|
+
to preserve paragraphs themselves.
|
35
|
+
|
36
|
+
Also, when it comes to lists:
|
37
|
+
- You probably want to separate items
|
38
|
+
with new lines.
|
39
|
+
- However, you don't want to preserve
|
40
|
+
lines inside each item.
|
41
|
+
|
42
|
+
And...
|
43
|
+
* ... you might need various bullet
|
44
|
+
characters.
|
45
|
+
• Including unicode ones.
|
46
|
+
|
47
|
+
And don't forget that the list still needs
|
48
|
+
to be separated from the following text.
|
49
|
+
"""
|
50
|
+
...
|
51
|
+
```
|
52
|
+
|
53
|
+
With this package, you could do:
|
54
|
+
```python
|
55
|
+
from docstring_to_text import *
|
56
|
+
|
57
|
+
clean_text = format_docstring(cleandoc(MyClass.__doc__))
|
58
|
+
clean_text = format_object_docstring(MyClass)
|
59
|
+
```
|
60
|
+
|
61
|
+
Then, the resulting string would be:
|
62
|
+
```text
|
63
|
+
Here's a class.
|
64
|
+
|
65
|
+
It has sphinx-like paragraphs, which can span multiple lines. Any modern IDE would display them as a single line, that wraps the given width.
|
66
|
+
You can't just remove all the new lines in the entire string, because you want to preserve paragraphs themselves.
|
67
|
+
Also, when it comes to lists:
|
68
|
+
- You probably want to separate items with new lines.
|
69
|
+
- However, you don't want to preserve lines inside each item.
|
70
|
+
And...
|
71
|
+
* ... you might need various bullet characters.
|
72
|
+
• Including unicode ones.
|
73
|
+
And don't forget that the list still needs to be separated from the following text.
|
74
|
+
```
|
@@ -0,0 +1,6 @@
|
|
1
|
+
docstring_to_text/__init__.py,sha256=tkoGbf8ZJv70Nf2osZSn4DoHwSZKK2nvRREpvaVHank,4798
|
2
|
+
docstring_to_text/__package_meta.py,sha256=AkzYkNZ1txGq77pbjfo4hVXLrvLpeJOHmfoOc0AZgiY,37
|
3
|
+
docstring_to_text-1.0.1.dist-info/METADATA,sha256=JrCksCfeZ8SLMwzZ9GlM-olZCQWLfHY41q6JKHUp5ME,2297
|
4
|
+
docstring_to_text-1.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
5
|
+
docstring_to_text-1.0.1.dist-info/licenses/LICENSE.md,sha256=quBniO1Sk1aqYUzwRO5adu_nnqOKV_4UVU4JVfnWiS0,15373
|
6
|
+
docstring_to_text-1.0.1.dist-info/RECORD,,
|
@@ -1,18 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.4
|
2
|
-
Name: docstring-to-text
|
3
|
-
Version: 0.0.2
|
4
|
-
Summary: A simple pip package converting docstrings into clean text (proper paragraphs and indents)
|
5
|
-
Project-URL: Source Code, https://github.com/Lex-DRL/Py-docstring-to-text
|
6
|
-
Project-URL: Issues, https://github.com/Lex-DRL/Py-docstring-to-text/issues
|
7
|
-
Author: Lex Darlog (Lex-DRL)
|
8
|
-
License-Expression: MPL-2.0
|
9
|
-
License-File: LICENSE.md
|
10
|
-
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
|
11
|
-
Classifier: Operating System :: OS Independent
|
12
|
-
Classifier: Programming Language :: Python :: 3
|
13
|
-
Requires-Python: >=3.7
|
14
|
-
Description-Content-Type: text/markdown
|
15
|
-
|
16
|
-
# docstring-to-text
|
17
|
-
|
18
|
-
A simple pip package converting docstrings into clean text (proper paragraphs and indents)
|
@@ -1,6 +0,0 @@
|
|
1
|
-
docstring_to_text/__init__.py,sha256=r9ETN77GhVqKnpdhzIxGeQhmTXHbTUfKwts3pOlE_y0,114
|
2
|
-
docstring_to_text/__package_meta.py,sha256=i_52QZglLR8ZztQCVGw9QLINyps_PTE--w17tzng68c,37
|
3
|
-
docstring_to_text-0.0.2.dist-info/METADATA,sha256=OhUTqYir_0ri7ispW04X3y5MmWDr8PDa9lv6S0HVehs,740
|
4
|
-
docstring_to_text-0.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
5
|
-
docstring_to_text-0.0.2.dist-info/licenses/LICENSE.md,sha256=quBniO1Sk1aqYUzwRO5adu_nnqOKV_4UVU4JVfnWiS0,15373
|
6
|
-
docstring_to_text-0.0.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|