docstring-to-text 0.0.2__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,167 @@
1
1
  # encoding: utf-8
2
2
  """
3
+ A simple pip package converting docstrings into clean text (proper paragraphs and indents).
3
4
  """
4
5
 
6
+ import typing as _t
7
+
8
+ from inspect import cleandoc, getdoc
9
+ import re as _re
10
+
5
11
  from .__package_meta import VERSION
6
12
  from .__package_meta import VERSION as __version__
13
+
14
+ # TODO:
15
+ # - lists
16
+ # formatted with indents
17
+ # in all lines except for the first one
18
+ #
19
+ # Also...
20
+ # - preserve indents
21
+ # - of the entire list
22
+ #
23
+ # And...
24
+ # - ensure
25
+ # that
26
+ # - it all works
27
+ # with nested lists
28
+
29
+
30
+ _re_indent_match = _re.compile(r"(\t*)( +)(\t*)(.*?)$").match
31
+ _re_tab_indent_match = _re.compile(r"(\t+)(.*?)$").match
32
+ _re_list_line_match = _re.compile(
33
+ r"(\s*)("
34
+ r"[-*•]+"
35
+ r"|"
36
+ r"[a-zA-Z]\s*[.)]"
37
+ r"|"
38
+ r"[0-9+]\s*[.)]"
39
+ r")\s+"
40
+ ).match
41
+
42
+
43
+ def _recover_tab_indents(line: str, tab_size: int):
44
+ """Turn indenting spaces back to tabs using regexp. Half-tab indents are rounded."""
45
+ assert bool(line) and isinstance(line, str)
46
+
47
+ n_tabs = 0.0
48
+
49
+ match = _re_indent_match(line)
50
+ while match:
51
+ pre_tabs, spaces, post_tabs, line = match.groups()
52
+ n_tabs_from_spaces = float(len(spaces)) / tab_size + 0.00001
53
+ n_post_tabs = len(post_tabs)
54
+ if n_post_tabs > 0:
55
+ # There are tabs after spaces. Don't preserve the fractional spaces-indent, truncate it:
56
+ n_tabs_from_spaces = int(n_tabs_from_spaces)
57
+ n_tabs += len(pre_tabs) + n_tabs_from_spaces + n_post_tabs
58
+ match = _re_indent_match(line)
59
+
60
+ if n_tabs < 0.5:
61
+ return line
62
+
63
+ tabs_prefix = '\t' * int(n_tabs + 0.50001)
64
+ return f"{tabs_prefix}{line}"
65
+
66
+
67
+ def _join_paragraph_and_format_tabs(paragraph: _t.List[str], tab_size: int):
68
+ """
69
+ Given "continuous" paragraph (i.e., with no empty newlines between chunks), recover tabs for each chunk
70
+ and join them together into a single actual line.
71
+ Works as a generator to account for blocks with different indents - to make each its own line.
72
+ """
73
+ pending_indent = 0
74
+ pending_chunks: _t.List[str] = list()
75
+
76
+ def join_pending_chunks() -> str:
77
+ return "{}{}".format('\t' * pending_indent, ' '.join(pending_chunks))
78
+
79
+ for chunk in paragraph:
80
+ chunk = _recover_tab_indents(chunk, tab_size)
81
+
82
+ cur_indent = 0
83
+ match = _re_tab_indent_match(chunk)
84
+ if match:
85
+ tab_indent, chunk = match.groups() # We've detected indent. Now, get rid of it.
86
+ cur_indent = len(tab_indent)
87
+
88
+ match_list_line = _re_list_line_match(chunk)
89
+ # In case of a bulleted/numbered list, we'll need to start a new block, too.
90
+ if cur_indent == pending_indent and not match_list_line:
91
+ pending_chunks.append(chunk)
92
+ continue
93
+
94
+ # Indent mismatch or a list line:
95
+ # we're either ended one block or entered another. Either way, the previous block ends.
96
+ if pending_chunks:
97
+ yield join_pending_chunks()
98
+ pending_chunks = list()
99
+ assert not pending_chunks
100
+ pending_chunks.append(chunk)
101
+ pending_indent = cur_indent
102
+
103
+ if pending_chunks:
104
+ yield join_pending_chunks()
105
+
106
+
107
+ def _formatted_paragraphs_gen(doc: str, tab_size: int):
108
+ """
109
+ Generator, which splits docstring into lines and transforms them into an actual printable output:
110
+ - From each bulk of empty lines, the first one is skipped...
111
+ - ... thus, non-empty lines are joined into continuous paragraphs.
112
+ - Recover tabs in the beginning oh lines (``inspect.cleandoc()`` converts them into spaces).
113
+ """
114
+ if not doc:
115
+ return
116
+ doc = str(doc)
117
+ if not doc.strip():
118
+ return
119
+
120
+ tab_size = max(int(tab_size), 1)
121
+
122
+ cur_paragraph: _t.List[str] = list()
123
+
124
+ for line in doc.splitlines():
125
+ line: str = line.rstrip()
126
+ if line:
127
+ cur_paragraph.append(line)
128
+ continue
129
+
130
+ assert not line
131
+ if cur_paragraph:
132
+ for block in _join_paragraph_and_format_tabs(cur_paragraph, tab_size):
133
+ yield block
134
+ cur_paragraph = list()
135
+ # Just skip the current empty line entirely - do nothing with it.
136
+ continue
137
+
138
+ # We're in a chain of empty lines, and we've already skipped the first one. Preserve the remaining ones:
139
+ yield ''
140
+
141
+ # Return the last paragraph post-loop:
142
+ if cur_paragraph:
143
+ for block in _join_paragraph_and_format_tabs(cur_paragraph, tab_size):
144
+ yield block
145
+
146
+
147
+ def format_docstring(doc: str, tab_size: int = 8) -> str:
148
+ """
149
+ Turn a pre-cleaned-up docstring (with tabs as spaces and newlines mid-sentence)
150
+ into an actually printable output:
151
+ - mid-paragraph new lines are replaced with spaces...
152
+ - ... while still keeping indented blocks separate.
153
+
154
+ Remember to pass a pre-cleaned-up docstring - i.e., with one of:
155
+ - format_docstring(inspect.cleandoc(__doc__))
156
+ - format_docstring(inspect.getdoc(class_or_function))
157
+ """
158
+ return '\n'.join(_formatted_paragraphs_gen(doc, tab_size))
159
+
160
+
161
+ def format_object_docstring(_obj, tab_size: int = 8) -> str:
162
+ """Find the object's docstring and format it with ``format_docstring()``"""
163
+ doc = getdoc(_obj)
164
+ if not doc:
165
+ return ''
166
+ # noinspection PyArgumentList
167
+ return format_docstring(doc, tab_size=tab_size)
@@ -1,3 +1,3 @@
1
1
  # encoding: utf-8
2
2
 
3
- VERSION = "0.0.2"
3
+ VERSION = "1.0.1"
@@ -0,0 +1,74 @@
1
+ Metadata-Version: 2.4
2
+ Name: docstring-to-text
3
+ Version: 1.0.1
4
+ Summary: A simple pip package converting docstrings into clean text (proper paragraphs and indents)
5
+ Project-URL: Source Code, https://github.com/Lex-DRL/Py-docstring-to-text
6
+ Project-URL: Issues, https://github.com/Lex-DRL/Py-docstring-to-text/issues
7
+ Author: Lex Darlog (Lex-DRL)
8
+ License-Expression: MPL-2.0
9
+ License-File: LICENSE.md
10
+ Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Requires-Python: >=3.7
14
+ Description-Content-Type: text/markdown
15
+
16
+ # docstring-to-text
17
+
18
+ A simple pip package converting docstrings into clean text (proper paragraphs and indents).
19
+
20
+ For example, here's a class docstring:
21
+ ```python
22
+ class MyClass:
23
+ """
24
+ Here's a class.
25
+
26
+
27
+ It has sphinx-like paragraphs, which can
28
+ span multiple lines. Any modern IDE would
29
+ display them as a single line, that wraps
30
+ the given width.
31
+
32
+ You can't just remove all the new lines
33
+ in the entire string, because you want
34
+ to preserve paragraphs themselves.
35
+
36
+ Also, when it comes to lists:
37
+ - You probably want to separate items
38
+ with new lines.
39
+ - However, you don't want to preserve
40
+ lines inside each item.
41
+
42
+ And...
43
+ * ... you might need various bullet
44
+ characters.
45
+ • Including unicode ones.
46
+
47
+ And don't forget that the list still needs
48
+ to be separated from the following text.
49
+ """
50
+ ...
51
+ ```
52
+
53
+ With this package, you could do:
54
+ ```python
55
+ from docstring_to_text import *
56
+
57
+ clean_text = format_docstring(cleandoc(MyClass.__doc__))
58
+ clean_text = format_object_docstring(MyClass)
59
+ ```
60
+
61
+ Then, the resulting string would be:
62
+ ```text
63
+ Here's a class.
64
+
65
+ It has sphinx-like paragraphs, which can span multiple lines. Any modern IDE would display them as a single line, that wraps the given width.
66
+ You can't just remove all the new lines in the entire string, because you want to preserve paragraphs themselves.
67
+ Also, when it comes to lists:
68
+ - You probably want to separate items with new lines.
69
+ - However, you don't want to preserve lines inside each item.
70
+ And...
71
+ * ... you might need various bullet characters.
72
+ • Including unicode ones.
73
+ And don't forget that the list still needs to be separated from the following text.
74
+ ```
@@ -0,0 +1,6 @@
1
+ docstring_to_text/__init__.py,sha256=tkoGbf8ZJv70Nf2osZSn4DoHwSZKK2nvRREpvaVHank,4798
2
+ docstring_to_text/__package_meta.py,sha256=AkzYkNZ1txGq77pbjfo4hVXLrvLpeJOHmfoOc0AZgiY,37
3
+ docstring_to_text-1.0.1.dist-info/METADATA,sha256=JrCksCfeZ8SLMwzZ9GlM-olZCQWLfHY41q6JKHUp5ME,2297
4
+ docstring_to_text-1.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
5
+ docstring_to_text-1.0.1.dist-info/licenses/LICENSE.md,sha256=quBniO1Sk1aqYUzwRO5adu_nnqOKV_4UVU4JVfnWiS0,15373
6
+ docstring_to_text-1.0.1.dist-info/RECORD,,
@@ -1,18 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: docstring-to-text
3
- Version: 0.0.2
4
- Summary: A simple pip package converting docstrings into clean text (proper paragraphs and indents)
5
- Project-URL: Source Code, https://github.com/Lex-DRL/Py-docstring-to-text
6
- Project-URL: Issues, https://github.com/Lex-DRL/Py-docstring-to-text/issues
7
- Author: Lex Darlog (Lex-DRL)
8
- License-Expression: MPL-2.0
9
- License-File: LICENSE.md
10
- Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
11
- Classifier: Operating System :: OS Independent
12
- Classifier: Programming Language :: Python :: 3
13
- Requires-Python: >=3.7
14
- Description-Content-Type: text/markdown
15
-
16
- # docstring-to-text
17
-
18
- A simple pip package converting docstrings into clean text (proper paragraphs and indents)
@@ -1,6 +0,0 @@
1
- docstring_to_text/__init__.py,sha256=r9ETN77GhVqKnpdhzIxGeQhmTXHbTUfKwts3pOlE_y0,114
2
- docstring_to_text/__package_meta.py,sha256=i_52QZglLR8ZztQCVGw9QLINyps_PTE--w17tzng68c,37
3
- docstring_to_text-0.0.2.dist-info/METADATA,sha256=OhUTqYir_0ri7ispW04X3y5MmWDr8PDa9lv6S0HVehs,740
4
- docstring_to_text-0.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
5
- docstring_to_text-0.0.2.dist-info/licenses/LICENSE.md,sha256=quBniO1Sk1aqYUzwRO5adu_nnqOKV_4UVU4JVfnWiS0,15373
6
- docstring_to_text-0.0.2.dist-info/RECORD,,