docstring-to-text 0.0.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,152 @@
1
1
  # encoding: utf-8
2
2
  """
3
+ A simple pip package converting docstrings into clean text (proper paragraphs and indents).
3
4
  """
4
5
 
6
+ import typing as _t
7
+
8
+ from inspect import cleandoc, getdoc
9
+ import re as _re
10
+
5
11
  from .__package_meta import VERSION
6
12
  from .__package_meta import VERSION as __version__
13
+
14
+
15
+ _re_indent_match = _re.compile(r"(\t*)( +)(\t*)(.*?)$").match
16
+ _re_tab_indent_match = _re.compile(r"(\t+)(.*?)$").match
17
+ _re_list_line_match = _re.compile(
18
+ r"(\s*)("
19
+ r"[-*•]+"
20
+ r"|"
21
+ r"[a-zA-Z]\s*[.)]"
22
+ r"|"
23
+ r"[0-9+]\s*[.)]"
24
+ r")\s+"
25
+ ).match
26
+
27
+
28
+ def _recover_tab_indents(line: str, tab_size: int):
29
+ """Turn indenting spaces back to tabs using regexp. Half-tab indents are rounded."""
30
+ assert bool(line) and isinstance(line, str)
31
+
32
+ n_tabs = 0.0
33
+
34
+ match = _re_indent_match(line)
35
+ while match:
36
+ pre_tabs, spaces, post_tabs, line = match.groups()
37
+ n_tabs_from_spaces = float(len(spaces)) / tab_size + 0.00001
38
+ n_post_tabs = len(post_tabs)
39
+ if n_post_tabs > 0:
40
+ # There are tabs after spaces. Don't preserve the fractional spaces-indent, truncate it:
41
+ n_tabs_from_spaces = int(n_tabs_from_spaces)
42
+ n_tabs += len(pre_tabs) + n_tabs_from_spaces + n_post_tabs
43
+ match = _re_indent_match(line)
44
+
45
+ if n_tabs < 0.5:
46
+ return line
47
+
48
+ tabs_prefix = '\t' * int(n_tabs + 0.50001)
49
+ return f"{tabs_prefix}{line}"
50
+
51
+
52
+ def _join_paragraph_and_format_tabs(paragraph: _t.List[str], tab_size: int):
53
+ """
54
+ Given "continuous" paragraph (i.e., with no empty newlines between chunks), recover tabs for each chunk
55
+ and join them together into a single actual line.
56
+ Works as a generator to account for blocks with different indents - to make each its own line.
57
+ """
58
+ pending_indent = 0
59
+ pending_chunks: _t.List[str] = list()
60
+
61
+ def join_pending_chunks() -> str:
62
+ return "{}{}".format('\t' * pending_indent, ' '.join(pending_chunks))
63
+
64
+ for chunk in paragraph:
65
+ chunk = _recover_tab_indents(chunk, tab_size)
66
+
67
+ cur_indent = 0
68
+ match = _re_tab_indent_match(chunk)
69
+ if match:
70
+ tab_indent, chunk = match.groups() # We've detected indent. Now, get rid of it.
71
+ cur_indent = len(tab_indent)
72
+
73
+ match_list_line = _re_list_line_match(chunk)
74
+ # In case of a bulleted/numbered list, we'll need to start a new block, too.
75
+ if cur_indent == pending_indent and not match_list_line:
76
+ pending_chunks.append(chunk)
77
+ continue
78
+
79
+ # Indent mismatch or a list line:
80
+ # we're either ended one block or entered another. Either way, the previous block ends.
81
+ if pending_chunks:
82
+ yield join_pending_chunks()
83
+ pending_chunks = list()
84
+ assert not pending_chunks
85
+ pending_chunks.append(chunk)
86
+ pending_indent = cur_indent
87
+
88
+ if pending_chunks:
89
+ yield join_pending_chunks()
90
+
91
+
92
+ def _formatted_paragraphs_gen(doc: str, tab_size: int):
93
+ """
94
+ Generator, which splits docstring into lines and transforms them into an actual printable output:
95
+ - From each bulk of empty lines, the first one is skipped...
96
+ - ... thus, non-empty lines are joined into continuous paragraphs.
97
+ - Recover tabs in the beginning oh lines (``inspect.cleandoc()`` converts them into spaces).
98
+ """
99
+ if not doc:
100
+ return
101
+ doc = str(doc)
102
+ if not doc.strip():
103
+ return
104
+
105
+ tab_size = max(int(tab_size), 1)
106
+
107
+ cur_paragraph: _t.List[str] = list()
108
+
109
+ for line in doc.splitlines():
110
+ line: str = line.rstrip()
111
+ if line:
112
+ cur_paragraph.append(line)
113
+ continue
114
+
115
+ assert not line
116
+ if cur_paragraph:
117
+ for block in _join_paragraph_and_format_tabs(cur_paragraph, tab_size):
118
+ yield block
119
+ cur_paragraph = list()
120
+ # Just skip the current empty line entirely - do nothing with it.
121
+ continue
122
+
123
+ # We're in a chain of empty lines, and we've already skipped the first one. Preserve the remaining ones:
124
+ yield ''
125
+
126
+ # Return the last paragraph post-loop:
127
+ if cur_paragraph:
128
+ for block in _join_paragraph_and_format_tabs(cur_paragraph, tab_size):
129
+ yield block
130
+
131
+
132
+ def format_docstring(doc: str, tab_size: int = 8) -> str:
133
+ """
134
+ Turn a pre-cleaned-up docstring (with tabs as spaces and newlines mid-sentence)
135
+ into an actually printable output:
136
+ - mid-paragraph new lines are replaced with spaces...
137
+ - ... while still keeping indented blocks separate.
138
+
139
+ Remember to pass a pre-cleaned-up docstring - i.e., with one of:
140
+ - format_docstring(inspect.cleandoc(__doc__))
141
+ - format_docstring(inspect.getdoc(class_or_function))
142
+ """
143
+ return '\n'.join(_formatted_paragraphs_gen(doc, tab_size))
144
+
145
+
146
+ def format_object_docstring(_obj, tab_size: int = 8) -> str:
147
+ """Find the object's docstring and format it with ``format_docstring()``"""
148
+ doc = getdoc(_obj)
149
+ if not doc:
150
+ return ''
151
+ # noinspection PyArgumentList
152
+ return format_docstring(doc, tab_size=tab_size)
@@ -1,3 +1,3 @@
1
1
  # encoding: utf-8
2
2
 
3
- VERSION = "0.0.2"
3
+ VERSION = "1.0.0"
@@ -0,0 +1,71 @@
1
+ Metadata-Version: 2.4
2
+ Name: docstring-to-text
3
+ Version: 1.0.0
4
+ Summary: A simple pip package converting docstrings into clean text (proper paragraphs and indents)
5
+ Project-URL: Source Code, https://github.com/Lex-DRL/Py-docstring-to-text
6
+ Project-URL: Issues, https://github.com/Lex-DRL/Py-docstring-to-text/issues
7
+ Author: Lex Darlog (Lex-DRL)
8
+ License-Expression: MPL-2.0
9
+ License-File: LICENSE.md
10
+ Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Requires-Python: >=3.7
14
+ Description-Content-Type: text/markdown
15
+
16
+ # docstring-to-text
17
+
18
+ A simple pip package converting docstrings into clean text (proper paragraphs and indents).
19
+
20
+ For example, here's a class docstring:
21
+ ```python
22
+ class MyClass:
23
+ """
24
+ Here's a class.
25
+
26
+
27
+ It has sphinx-like paragraphs, which can
28
+ span multiple lines. Any modern IDE would
29
+ display them as a single line, that wraps
30
+ the given width.
31
+
32
+ You can't just remove all the new lines
33
+ in the entire string, because you want
34
+ to preserve paragraphs themselves.
35
+
36
+ Also, when it comes to lists:
37
+ - You probably want to separate items
38
+ with new lines.
39
+ - However, you don't want to preserve
40
+ lines inside each item.
41
+ * And you might need various bullet
42
+ characters.
43
+ • Including unicode ones.
44
+
45
+ And don't forget that the list still needs
46
+ to be separated from the following text.
47
+ """
48
+ ...
49
+ ```
50
+
51
+ With this package, you could do:
52
+ ```python
53
+ from docstring_to_text import *
54
+
55
+ clean_text = format_docstring(cleandoc(MyClass.__doc__))
56
+ clean_text = format_object_docstring(MyClass)
57
+ ```
58
+
59
+ Then, the resulting string would be:
60
+ ```text
61
+ Here's a class.
62
+
63
+ It has sphinx-like paragraphs, which can span multiple lines. Any modern IDE would display them as a single line, that wraps the given width.
64
+ You can't just remove all the new lines in the entire string, because you want to preserve paragraphs themselves.
65
+ Also, when it comes to lists:
66
+ - You probably want to separate items with new lines.
67
+ - However, you don't want to preserve lines inside each item.
68
+ * And you might need various bullet characters.
69
+ • Including unicode ones.
70
+ And don't forget that the list still needs to be separated from the following text.
71
+ ```
@@ -0,0 +1,6 @@
1
+ docstring_to_text/__init__.py,sha256=2MJG9bHcgcFFLi5inNMPrX5jK0d5HVyQbQa7UCV1qK4,4568
2
+ docstring_to_text/__package_meta.py,sha256=oj-ODmFPhzd3LWRxF98TKGvRXngna0diWblm3bsLhKQ,37
3
+ docstring_to_text-1.0.0.dist-info/METADATA,sha256=Bl4mySaWiEnpNqTKnsP4m80J0x6KFzdrsHqVCG4Ou7Q,2276
4
+ docstring_to_text-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
5
+ docstring_to_text-1.0.0.dist-info/licenses/LICENSE.md,sha256=quBniO1Sk1aqYUzwRO5adu_nnqOKV_4UVU4JVfnWiS0,15373
6
+ docstring_to_text-1.0.0.dist-info/RECORD,,
@@ -1,18 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: docstring-to-text
3
- Version: 0.0.2
4
- Summary: A simple pip package converting docstrings into clean text (proper paragraphs and indents)
5
- Project-URL: Source Code, https://github.com/Lex-DRL/Py-docstring-to-text
6
- Project-URL: Issues, https://github.com/Lex-DRL/Py-docstring-to-text/issues
7
- Author: Lex Darlog (Lex-DRL)
8
- License-Expression: MPL-2.0
9
- License-File: LICENSE.md
10
- Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
11
- Classifier: Operating System :: OS Independent
12
- Classifier: Programming Language :: Python :: 3
13
- Requires-Python: >=3.7
14
- Description-Content-Type: text/markdown
15
-
16
- # docstring-to-text
17
-
18
- A simple pip package converting docstrings into clean text (proper paragraphs and indents)
@@ -1,6 +0,0 @@
1
- docstring_to_text/__init__.py,sha256=r9ETN77GhVqKnpdhzIxGeQhmTXHbTUfKwts3pOlE_y0,114
2
- docstring_to_text/__package_meta.py,sha256=i_52QZglLR8ZztQCVGw9QLINyps_PTE--w17tzng68c,37
3
- docstring_to_text-0.0.2.dist-info/METADATA,sha256=OhUTqYir_0ri7ispW04X3y5MmWDr8PDa9lv6S0HVehs,740
4
- docstring_to_text-0.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
5
- docstring_to_text-0.0.2.dist-info/licenses/LICENSE.md,sha256=quBniO1Sk1aqYUzwRO5adu_nnqOKV_4UVU4JVfnWiS0,15373
6
- docstring_to_text-0.0.2.dist-info/RECORD,,