micropython-stubber 1.20.5__py3-none-any.whl → 1.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {micropython_stubber-1.20.5.dist-info → micropython_stubber-1.23.0.dist-info}/LICENSE +30 -30
- {micropython_stubber-1.20.5.dist-info → micropython_stubber-1.23.0.dist-info}/METADATA +1 -1
- micropython_stubber-1.23.0.dist-info/RECORD +159 -0
- mpflash/README.md +184 -184
- mpflash/libusb_flash.ipynb +203 -203
- mpflash/mpflash/add_firmware.py +98 -98
- mpflash/mpflash/ask_input.py +236 -236
- mpflash/mpflash/bootloader/__init__.py +37 -36
- mpflash/mpflash/bootloader/manual.py +102 -102
- mpflash/mpflash/bootloader/micropython.py +10 -10
- mpflash/mpflash/bootloader/touch1200.py +45 -45
- mpflash/mpflash/cli_download.py +129 -129
- mpflash/mpflash/cli_flash.py +219 -219
- mpflash/mpflash/cli_group.py +98 -98
- mpflash/mpflash/cli_list.py +81 -81
- mpflash/mpflash/cli_main.py +41 -41
- mpflash/mpflash/common.py +164 -164
- mpflash/mpflash/config.py +43 -47
- mpflash/mpflash/connected.py +74 -74
- mpflash/mpflash/download.py +360 -360
- mpflash/mpflash/downloaded.py +130 -129
- mpflash/mpflash/errors.py +9 -9
- mpflash/mpflash/flash.py +55 -52
- mpflash/mpflash/flash_esp.py +59 -59
- mpflash/mpflash/flash_stm32.py +18 -24
- mpflash/mpflash/flash_stm32_cube.py +111 -111
- mpflash/mpflash/flash_stm32_dfu.py +104 -101
- mpflash/mpflash/flash_uf2.py +89 -67
- mpflash/mpflash/flash_uf2_boardid.py +15 -15
- mpflash/mpflash/flash_uf2_linux.py +129 -123
- mpflash/mpflash/flash_uf2_macos.py +37 -34
- mpflash/mpflash/flash_uf2_windows.py +38 -34
- mpflash/mpflash/list.py +89 -89
- mpflash/mpflash/logger.py +41 -41
- mpflash/mpflash/mpboard_id/__init__.py +93 -93
- mpflash/mpflash/mpboard_id/add_boards.py +255 -255
- mpflash/mpflash/mpboard_id/board.py +37 -37
- mpflash/mpflash/mpboard_id/board_id.py +86 -86
- mpflash/mpflash/mpboard_id/store.py +43 -43
- mpflash/mpflash/mpremoteboard/__init__.py +226 -221
- mpflash/mpflash/mpremoteboard/mpy_fw_info.py +141 -141
- mpflash/mpflash/mpremoteboard/runner.py +140 -140
- mpflash/mpflash/uf2disk.py +12 -12
- mpflash/mpflash/vendor/basicgit.py +288 -288
- mpflash/mpflash/vendor/click_aliases.py +91 -91
- mpflash/mpflash/vendor/dfu.py +165 -165
- mpflash/mpflash/vendor/pydfu.py +605 -605
- mpflash/mpflash/vendor/readme.md +2 -2
- mpflash/mpflash/vendor/versions.py +119 -117
- mpflash/mpflash/worklist.py +171 -170
- mpflash/poetry.lock +1588 -1588
- mpflash/pyproject.toml +64 -60
- mpflash/stm32_udev_rules.md +62 -62
- stubber/__init__.py +3 -3
- stubber/basicgit.py +294 -288
- stubber/board/board_info.csv +193 -193
- stubber/board/boot.py +34 -34
- stubber/board/createstubs.py +986 -986
- stubber/board/createstubs_db.py +825 -825
- stubber/board/createstubs_db_min.py +331 -331
- stubber/board/createstubs_db_mpy.mpy +0 -0
- stubber/board/createstubs_lvgl.py +741 -741
- stubber/board/createstubs_lvgl_min.py +741 -741
- stubber/board/createstubs_mem.py +766 -766
- stubber/board/createstubs_mem_min.py +306 -306
- stubber/board/createstubs_mem_mpy.mpy +0 -0
- stubber/board/createstubs_min.py +294 -294
- stubber/board/createstubs_mpy.mpy +0 -0
- stubber/board/fw_info.py +141 -141
- stubber/board/info.py +183 -183
- stubber/board/main.py +19 -19
- stubber/board/modulelist.txt +247 -247
- stubber/board/pyrightconfig.json +34 -34
- stubber/bulk/mcu_stubber.py +454 -454
- stubber/codemod/_partials/__init__.py +48 -48
- stubber/codemod/_partials/db_main.py +147 -147
- stubber/codemod/_partials/lvgl_main.py +77 -77
- stubber/codemod/_partials/modules_reader.py +80 -80
- stubber/codemod/add_comment.py +53 -53
- stubber/codemod/add_method.py +65 -65
- stubber/codemod/board.py +317 -317
- stubber/codemod/enrich.py +145 -145
- stubber/codemod/merge_docstub.py +284 -284
- stubber/codemod/modify_list.py +54 -54
- stubber/codemod/utils.py +57 -57
- stubber/commands/build_cmd.py +94 -94
- stubber/commands/cli.py +55 -51
- stubber/commands/clone_cmd.py +77 -66
- stubber/commands/config_cmd.py +29 -29
- stubber/commands/enrich_folder_cmd.py +71 -70
- stubber/commands/get_core_cmd.py +71 -69
- stubber/commands/get_docstubs_cmd.py +89 -87
- stubber/commands/get_frozen_cmd.py +114 -112
- stubber/commands/get_mcu_cmd.py +61 -56
- stubber/commands/merge_cmd.py +67 -66
- stubber/commands/publish_cmd.py +119 -119
- stubber/commands/stub_cmd.py +31 -30
- stubber/commands/switch_cmd.py +62 -54
- stubber/commands/variants_cmd.py +49 -48
- stubber/cst_transformer.py +178 -178
- stubber/data/board_info.csv +193 -193
- stubber/data/board_info.json +1729 -1729
- stubber/data/micropython_tags.csv +15 -15
- stubber/data/requirements-core-micropython.txt +38 -38
- stubber/data/requirements-core-pycopy.txt +39 -39
- stubber/downloader.py +36 -36
- stubber/freeze/common.py +68 -68
- stubber/freeze/freeze_folder.py +69 -69
- stubber/freeze/freeze_manifest_2.py +113 -113
- stubber/freeze/get_frozen.py +127 -127
- stubber/get_cpython.py +101 -101
- stubber/get_lobo.py +59 -59
- stubber/minify.py +418 -418
- stubber/publish/bump.py +86 -86
- stubber/publish/candidates.py +262 -262
- stubber/publish/database.py +18 -18
- stubber/publish/defaults.py +45 -45
- stubber/publish/enums.py +24 -24
- stubber/publish/helpers.py +29 -29
- stubber/publish/merge_docstubs.py +130 -130
- stubber/publish/missing_class_methods.py +49 -49
- stubber/publish/package.py +146 -146
- stubber/publish/pathnames.py +51 -51
- stubber/publish/publish.py +120 -120
- stubber/publish/pypi.py +38 -38
- stubber/publish/stubpackage.py +1029 -1029
- stubber/rst/__init__.py +9 -9
- stubber/rst/classsort.py +77 -77
- stubber/rst/lookup.py +530 -530
- stubber/rst/output_dict.py +401 -401
- stubber/rst/reader.py +822 -822
- stubber/rst/report_return.py +69 -69
- stubber/rst/rst_utils.py +540 -540
- stubber/stubber.py +38 -38
- stubber/stubs_from_docs.py +90 -90
- stubber/tools/manifestfile.py +655 -610
- stubber/tools/readme.md +7 -6
- stubber/update_fallback.py +117 -117
- stubber/update_module_list.py +123 -123
- stubber/utils/__init__.py +5 -5
- stubber/utils/config.py +127 -127
- stubber/utils/makeversionhdr.py +54 -54
- stubber/utils/manifest.py +92 -92
- stubber/utils/post.py +79 -79
- stubber/utils/repos.py +157 -154
- stubber/utils/stubmaker.py +139 -139
- stubber/utils/typed_config_toml.py +77 -77
- stubber/utils/versions.py +128 -120
- stubber/variants.py +106 -106
- micropython_stubber-1.20.5.dist-info/RECORD +0 -159
- {micropython_stubber-1.20.5.dist-info → micropython_stubber-1.23.0.dist-info}/WHEEL +0 -0
- {micropython_stubber-1.20.5.dist-info → micropython_stubber-1.23.0.dist-info}/entry_points.txt +0 -0
stubber/rst/rst_utils.py
CHANGED
@@ -1,540 +1,540 @@
|
|
1
|
-
"""
|
2
|
-
Work in Progress
|
3
|
-
----------------
|
4
|
-
|
5
|
-
Tries to determine the return type by parsing the docstring and the function signature
|
6
|
-
- if the signature contains a return type --> <something> then that is returned
|
7
|
-
- check a lookup dictionary of type overrides,
|
8
|
-
if the functionnae is listed, then use the override
|
9
|
-
- use re to find phrases such as:
|
10
|
-
- 'Returns ..... '
|
11
|
-
- 'Gets ..... '
|
12
|
-
- docstring is joined without newlines to simplify parsing
|
13
|
-
- then parses the docstring to find references to known types and give then a rating though a hand coded model ()
|
14
|
-
- builds a list return type candidates
|
15
|
-
- selects the highest ranking candidate
|
16
|
-
- the default Type is 'Any'
|
17
|
-
|
18
|
-
|
19
|
-
to do:
|
20
|
-
|
21
|
-
- regex :
|
22
|
-
- 'With no arguments the frequency in Hz is returned.'
|
23
|
-
- 'Get or set' --> indicates overloaded/optional return Union[None|...]
|
24
|
-
- add regex for 'Query' ` Otherwise, query current state if no argument is provided. `
|
25
|
-
|
26
|
-
- regex :
|
27
|
-
- 'With no arguments the frequency in Hz is returned.'
|
28
|
-
- 'Get or set' --> indicates overloaded/optional return Union[None|...]
|
29
|
-
- add regex for 'Query' ` Otherwise, query current state if no argument is provided. `
|
30
|
-
|
31
|
-
- try if an Azure Machine Learning works as well
|
32
|
-
https://docs.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources
|
33
|
-
-
|
34
|
-
"""
|
35
|
-
# ref: https://regex101.com/codegen?language=python
|
36
|
-
# https://regex101.com/r/Ni8g2z/2
|
37
|
-
|
38
|
-
import re
|
39
|
-
from typing import Dict, List, Optional, Union
|
40
|
-
|
41
|
-
from loguru import logger as log
|
42
|
-
|
43
|
-
from .lookup import LOOKUP_LIST, NONE_VERBS, TYPING_IMPORT
|
44
|
-
|
45
|
-
# These are shown to import
|
46
|
-
__all__ = [
|
47
|
-
"simple_candidates",
|
48
|
-
"compound_candidates",
|
49
|
-
"object_candidates",
|
50
|
-
"distill_return",
|
51
|
-
"return_type_from_context",
|
52
|
-
"_type_from_context", # For testing only
|
53
|
-
"TYPING_IMPORT",
|
54
|
-
]
|
55
|
-
|
56
|
-
|
57
|
-
# logging
|
58
|
-
# # log = logging.getLogger(__name__)
|
59
|
-
|
60
|
-
|
61
|
-
# --------------------------------------
|
62
|
-
# Confidence levels
|
63
|
-
# these heuristics are based a significant amout of manual testing,
|
64
|
-
# and not based on any statistical analysis
|
65
|
-
|
66
|
-
C_DEFAULT = 0 # Any , the default for all
|
67
|
-
C_NONE = 0.1 + C_DEFAULT # better than the default Any
|
68
|
-
C_BASE = 0.1 + C_NONE # the Base if a return type has been found
|
69
|
-
|
70
|
-
C_STR_NAMES = 0.3
|
71
|
-
|
72
|
-
C_GENERIC = 0.6
|
73
|
-
C_DICT = C_GENERIC
|
74
|
-
C_TUPLE = C_GENERIC
|
75
|
-
C_LIST = C_GENERIC
|
76
|
-
C_BOOL = C_GENERIC
|
77
|
-
C_FLOAT = C_GENERIC
|
78
|
-
C_STR = C_GENERIC
|
79
|
-
|
80
|
-
# tehere is a bit of logic - but mostly empirical
|
81
|
-
C_NONE_RETURN = C_GENERIC
|
82
|
-
C_OBJECTS = 0.01 + C_GENERIC
|
83
|
-
|
84
|
-
C_BYTES = 0.01 + C_GENERIC
|
85
|
-
C_BYTEARRAY = 0.03 + C_GENERIC
|
86
|
-
C_INT = 0.03 + C_GENERIC
|
87
|
-
C_UINT = 0.04 + C_GENERIC
|
88
|
-
C_ITERATOR = 0.4 + C_GENERIC
|
89
|
-
C_GENERATOR = 0.4 + C_GENERIC
|
90
|
-
|
91
|
-
C_INT_SIZES = 0.5 + C_GENERIC # better match than bytes and bytearray or object
|
92
|
-
C_INT_LIKE = 0.5 + C_GENERIC
|
93
|
-
|
94
|
-
C_LOOKUP = C_GENERIC + 1
|
95
|
-
|
96
|
-
# --------------------------------------
|
97
|
-
# Weights of the different Lookups
|
98
|
-
WEIGHT_LOOPUPS = 3.0 # Lookup list weight factor
|
99
|
-
WEIGHT_RETURN_VAL = 3.0 # Lookup list weight factor
|
100
|
-
WEIGHT_RETURNS = 1.8 # for Docstring returns
|
101
|
-
WEIGHT_GETS = 1.5 # For docstring Gets
|
102
|
-
|
103
|
-
# --------------------------------------
|
104
|
-
|
105
|
-
# base has a confidence that is quite low, but better than rubbish
|
106
|
-
BASE = {"type": "Incomplete", "confidence": C_BASE, "match": None}
|
107
|
-
|
108
|
-
# --------------------------------------
|
109
|
-
# Regexes
|
110
|
-
# --------------------------------------
|
111
|
-
|
112
|
-
# all regex matches stop at end of sentence:: . ! ? : ;
|
113
|
-
# Look for "Return Value: xxxx"
|
114
|
-
RE_RETURN_VALUE = r"Return value\s?:\s?(?P<return>[^.!?:;]*)"
|
115
|
-
# Look for Returns , but no 'Information'
|
116
|
-
RE_RETURN = r"Return(?:s?,?|(?:ing)?)\s(?!information)(?P<return>[^.!?:;]*)"
|
117
|
-
# Look for gets
|
118
|
-
RE_GETS = r"Gets?\s(?P<return>[^.!?:;]*)"
|
119
|
-
|
120
|
-
# --------------------------------------
|
121
|
-
# Regex for Literals
|
122
|
-
# --------------------------------------
|
123
|
-
RE_LIT_AS_A = r"as a\s?(?P<return>[^.!?:;]*)"
|
124
|
-
RE_LIT_SENTENCE = r"\s?(?P<return>[^.!?:;]*)"
|
125
|
-
|
126
|
-
|
127
|
-
def dist_rate(i: int) -> float:
|
128
|
-
""""""
|
129
|
-
max_len = 150 # must occur in the first 150 chars
|
130
|
-
return max((max_len - i), 1) / max_len
|
131
|
-
|
132
|
-
|
133
|
-
WORD_TERMINATORS = ".,!;:?"
|
134
|
-
|
135
|
-
|
136
|
-
def simple_candidates(
|
137
|
-
type: str,
|
138
|
-
match_string: str,
|
139
|
-
keywords: List[str],
|
140
|
-
rate: float = 0.5,
|
141
|
-
exclude: Optional[List[str]] = None,
|
142
|
-
):
|
143
|
-
"""
|
144
|
-
find and rate possible types and confidence weighting for simple types.
|
145
|
-
Case sensitive
|
146
|
-
"""
|
147
|
-
if exclude is None:
|
148
|
-
exclude = []
|
149
|
-
candidates = []
|
150
|
-
if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
|
151
|
-
# quick bailout , there are no matches, or there is an exclude
|
152
|
-
return []
|
153
|
-
|
154
|
-
# word matching
|
155
|
-
match_words = [w.strip(WORD_TERMINATORS) for w in match_string.split()]
|
156
|
-
# kw = single word -
|
157
|
-
for kw in keywords:
|
158
|
-
i = match_string.find(kw)
|
159
|
-
if " " not in kw and kw not in match_words or " " in kw and i < 0:
|
160
|
-
continue
|
161
|
-
# Assume unsigned are int
|
162
|
-
result = BASE.copy()
|
163
|
-
result["type"] = type
|
164
|
-
result["confidence"] = rate * dist_rate(i) # OK
|
165
|
-
log.trace(f" - found '{kw}' at position {i} with rating {dist_rate(i)}")
|
166
|
-
candidates.append(result)
|
167
|
-
return candidates
|
168
|
-
|
169
|
-
|
170
|
-
def compound_candidates(
|
171
|
-
type: str,
|
172
|
-
match_string: str,
|
173
|
-
keywords: List[str],
|
174
|
-
rate: float = 0.85,
|
175
|
-
exclude: Optional[List[str]] = None,
|
176
|
-
):
|
177
|
-
"""
|
178
|
-
find and rate possible types and confidence weighting for compound types that can have a subscription.
|
179
|
-
Case sensitive
|
180
|
-
"""
|
181
|
-
if exclude is None:
|
182
|
-
exclude = []
|
183
|
-
candidates = []
|
184
|
-
if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
|
185
|
-
# quick bailout , there are no matches, or there is an exclude
|
186
|
-
return []
|
187
|
-
|
188
|
-
# word matching
|
189
|
-
match_words = [w.strip(WORD_TERMINATORS) for w in match_string.split()]
|
190
|
-
# kw = single word -
|
191
|
-
for kw in keywords:
|
192
|
-
i = match_string.find(kw)
|
193
|
-
if " " not in kw and kw not in match_words or " " in kw and i < 0:
|
194
|
-
continue
|
195
|
-
# List / Dict / Generator of Any / Tuple /
|
196
|
-
sub = None
|
197
|
-
result = BASE.copy()
|
198
|
-
confidence = rate
|
199
|
-
for element in ("tuple", "string", "unsigned", "int"):
|
200
|
-
if element in match_string.casefold():
|
201
|
-
j = match_string.find(element)
|
202
|
-
if i == j:
|
203
|
-
# do not match on the same main and sub
|
204
|
-
continue
|
205
|
-
confidence += 0.10 # boost as we have a subtype
|
206
|
-
if element == "string":
|
207
|
-
sub = "str"
|
208
|
-
break
|
209
|
-
elif element == "tuple":
|
210
|
-
sub = "Tuple"
|
211
|
-
break
|
212
|
-
elif element == "unsigned":
|
213
|
-
sub = "int"
|
214
|
-
break
|
215
|
-
else:
|
216
|
-
sub = element
|
217
|
-
result["type"] = f"{type}[{sub}]" if sub else f"{type}"
|
218
|
-
confidence = confidence * dist_rate(i) # distance weighting
|
219
|
-
result["confidence"] = confidence
|
220
|
-
log.trace(
|
221
|
-
f" - found '{kw}' at position {i} with confidence {confidence} rating {dist_rate(i)}"
|
222
|
-
)
|
223
|
-
|
224
|
-
candidates.append(result)
|
225
|
-
return candidates
|
226
|
-
|
227
|
-
|
228
|
-
def object_candidates(match_string: str, rate: float = 0.81, exclude: Optional[List[str]] = None):
|
229
|
-
"""
|
230
|
-
find and rate possible types and confidence weighting for Object types.
|
231
|
-
Case sensitive
|
232
|
-
Exclude defaults to ["IRQ"]
|
233
|
-
"""
|
234
|
-
# defaults
|
235
|
-
if exclude is None:
|
236
|
-
exclude = ["IRQ"]
|
237
|
-
candidates = []
|
238
|
-
keywords = [
|
239
|
-
"Object",
|
240
|
-
"object",
|
241
|
-
] # Q&D
|
242
|
-
|
243
|
-
if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
|
244
|
-
# quick bailout , there are no matches, or there is an exclude
|
245
|
-
return []
|
246
|
-
for kw in keywords:
|
247
|
-
i = match_string.find(kw)
|
248
|
-
if i < 0:
|
249
|
-
continue
|
250
|
-
# List / Dict / Generator of Any / Tuple /
|
251
|
-
confidence = rate
|
252
|
-
|
253
|
-
# did the word actually occur, or is it just a partial
|
254
|
-
words = match_string.split(" ") # Return <multiple words object>
|
255
|
-
if kw in words:
|
256
|
-
pos = words.index(kw)
|
257
|
-
obj = "Incomplete" if pos == 0 else words[pos - 1]
|
258
|
-
if obj in ("stream-like", "file"):
|
259
|
-
obj = "IO" # needs from typing import IO
|
260
|
-
elif obj == "callback":
|
261
|
-
obj = "Callable[..., Incomplete]" # requires additional 'from typing import Callable'
|
262
|
-
else:
|
263
|
-
# clean
|
264
|
-
obj = re.sub(r"[^a-z.A-Z0-9]", "", obj)
|
265
|
-
result = BASE.copy()
|
266
|
-
result["type"] = obj
|
267
|
-
if obj in ["an", "any"]: # "Return an / any object"
|
268
|
-
result["type"] = "Incomplete"
|
269
|
-
confidence += 0.10 # abstract , but very good
|
270
|
-
elif obj[0].islower():
|
271
|
-
confidence -= 0.20 # not so good
|
272
|
-
result["confidence"] = confidence * dist_rate(i)
|
273
|
-
candidates.append(result)
|
274
|
-
return candidates
|
275
|
-
|
276
|
-
|
277
|
-
def has_none_verb(docstr: str) -> List:
|
278
|
-
"returns a None result if the docstring starts with a verb that indicates None"
|
279
|
-
docstr = docstr.strip().casefold()
|
280
|
-
if not any(docstr.startswith(kw.casefold()) for kw in NONE_VERBS):
|
281
|
-
return []
|
282
|
-
result = BASE.copy()
|
283
|
-
result["type"] = "None"
|
284
|
-
result["confidence"] = C_NONE # better than the default Any
|
285
|
-
return [result]
|
286
|
-
|
287
|
-
|
288
|
-
def distill_return(return_text: str) -> List[Dict]:
|
289
|
-
"""Find return type and confidence.
|
290
|
-
Returns a list of possible types and confidence weighting.
|
291
|
-
{
|
292
|
-
|
293
|
-
type :str # the return type
|
294
|
-
confidence: float # the confidence between 0.0 and 1
|
295
|
-
match: Optional[str] # for debugging : the reason the match was made
|
296
|
-
|
297
|
-
}
|
298
|
-
|
299
|
-
"""
|
300
|
-
candidates = [BASE] # Default to the base , which is 'Any'
|
301
|
-
|
302
|
-
# clean up match_string
|
303
|
-
match_string = return_text.strip().rstrip(".")
|
304
|
-
match_string = match_string.replace("`", "")
|
305
|
-
|
306
|
-
candidates += compound_candidates("Generator", match_string, ["generator"], C_GENERATOR)
|
307
|
-
candidates += compound_candidates("Iterator", match_string, ["iterator"], C_ITERATOR)
|
308
|
-
candidates += compound_candidates(
|
309
|
-
"List", match_string, ["a list of", "list of", "an array"], C_LIST
|
310
|
-
)
|
311
|
-
|
312
|
-
candidates += simple_candidates(
|
313
|
-
"Dict", match_string, ["a dictionary", "dict", "Dictionary"], C_DICT
|
314
|
-
)
|
315
|
-
candidates += simple_candidates(
|
316
|
-
"Tuple",
|
317
|
-
match_string,
|
318
|
-
[
|
319
|
-
"tuple",
|
320
|
-
"a pair",
|
321
|
-
"1-tuple",
|
322
|
-
"2-tuple",
|
323
|
-
"3-tuple",
|
324
|
-
"4-tuple",
|
325
|
-
"5-tuple",
|
326
|
-
"6-tuple",
|
327
|
-
"7-tuple",
|
328
|
-
"8-tuple",
|
329
|
-
"9-tuple",
|
330
|
-
],
|
331
|
-
C_TUPLE,
|
332
|
-
)
|
333
|
-
|
334
|
-
candidates += simple_candidates(
|
335
|
-
"int", match_string, ["unsigned integer", "unsigned int", "unsigned"], C_UINT
|
336
|
-
)
|
337
|
-
|
338
|
-
candidates += simple_candidates(
|
339
|
-
"int",
|
340
|
-
match_string,
|
341
|
-
[
|
342
|
-
"number",
|
343
|
-
"integer",
|
344
|
-
"count",
|
345
|
-
"int",
|
346
|
-
"0 or 1",
|
347
|
-
],
|
348
|
-
C_INT,
|
349
|
-
)
|
350
|
-
|
351
|
-
# good but nor perfect indicators of integers
|
352
|
-
# better match than bytes and bytearray or object
|
353
|
-
candidates += simple_candidates(
|
354
|
-
"int",
|
355
|
-
match_string,
|
356
|
-
[
|
357
|
-
"length",
|
358
|
-
"total size",
|
359
|
-
"size of",
|
360
|
-
"the index",
|
361
|
-
"number of",
|
362
|
-
"address of",
|
363
|
-
"the duration",
|
364
|
-
],
|
365
|
-
C_INT_SIZES,
|
366
|
-
)
|
367
|
-
|
368
|
-
candidates += simple_candidates("int", match_string, [], C_INT_SIZES)
|
369
|
-
|
370
|
-
# Assume numbers are signed int
|
371
|
-
candidates += simple_candidates(
|
372
|
-
"int",
|
373
|
-
match_string,
|
374
|
-
[
|
375
|
-
"index",
|
376
|
-
"**signed** value",
|
377
|
-
"seconds",
|
378
|
-
"nanoseconds",
|
379
|
-
"millisecond",
|
380
|
-
"offset",
|
381
|
-
],
|
382
|
-
C_INT_LIKE,
|
383
|
-
)
|
384
|
-
|
385
|
-
# better match than bytes
|
386
|
-
candidates += simple_candidates("bytearray", match_string, ["bytearray"], C_BYTEARRAY)
|
387
|
-
|
388
|
-
# OK, better than just string
|
389
|
-
candidates += simple_candidates("bytes", match_string, ["bytes", "byte string"], C_BYTES)
|
390
|
-
|
391
|
-
candidates += simple_candidates(
|
392
|
-
"bool", match_string, ["boolean", "bool", "True", "False"], C_BOOL
|
393
|
-
)
|
394
|
-
candidates += simple_candidates(
|
395
|
-
"float",
|
396
|
-
match_string,
|
397
|
-
[
|
398
|
-
"float",
|
399
|
-
"logarithm",
|
400
|
-
"sine",
|
401
|
-
"cosine",
|
402
|
-
"tangent",
|
403
|
-
"exponential",
|
404
|
-
"complex number",
|
405
|
-
"phase",
|
406
|
-
"ratio of",
|
407
|
-
],
|
408
|
-
C_FLOAT,
|
409
|
-
)
|
410
|
-
|
411
|
-
candidates += simple_candidates(
|
412
|
-
"str", match_string, ["string", "(sub)string", "sub-string", "substring"], C_STR
|
413
|
-
)
|
414
|
-
|
415
|
-
candidates += simple_candidates("str", match_string, ["name", "names"], C_STR_NAMES)
|
416
|
-
## "? contains 'None if there is no' --> Union[Null, xxx]"
|
417
|
-
candidates += simple_candidates(
|
418
|
-
"None",
|
419
|
-
match_string,
|
420
|
-
["``None``", "None"],
|
421
|
-
C_NONE_RETURN,
|
422
|
-
exclude=["previous value", "if there is no"],
|
423
|
-
)
|
424
|
-
|
425
|
-
candidates += object_candidates(match_string, C_OBJECTS)
|
426
|
-
|
427
|
-
return candidates
|
428
|
-
|
429
|
-
|
430
|
-
def return_type_from_context(
|
431
|
-
*, docstring: Union[str, List[str]], signature: str, module: str, literal: bool = False
|
432
|
-
):
|
433
|
-
try:
|
434
|
-
return str(
|
435
|
-
_type_from_context(
|
436
|
-
module=module, signature=signature, docstring=docstring, literal=literal
|
437
|
-
)["type"]
|
438
|
-
)
|
439
|
-
except Exception:
|
440
|
-
return "Incomplete"
|
441
|
-
|
442
|
-
|
443
|
-
def _type_from_context(
|
444
|
-
*, docstring: Union[str, List[str]], signature: str, module: str, literal: bool = False
|
445
|
-
): # -> Dict[str , Union[str,float]]:
|
446
|
-
"""Determine the return type of a function or method based on:
|
447
|
-
- the function signature
|
448
|
-
- the terminology used in the docstring
|
449
|
-
|
450
|
-
Logic:
|
451
|
-
- if the signature contains a return type --> <something> then that is returned
|
452
|
-
- use re to find phrases such as:
|
453
|
-
|
454
|
-
- 'Returns ..... '
|
455
|
-
- 'Gets ..... '
|
456
|
-
|
457
|
-
- docstring is joined without newlines to simplify parsing
|
458
|
-
- then parses the docstring to find references to known types and give then a rating though a hand coded model ()
|
459
|
-
- builds a list return type candidates
|
460
|
-
- selects the highest ranking candidate
|
461
|
-
- the default Type is 'Any'
|
462
|
-
"""
|
463
|
-
|
464
|
-
if isinstance(docstring, list):
|
465
|
-
# join with space to avoid ending at a newline
|
466
|
-
docstring = " ".join(docstring)
|
467
|
-
|
468
|
-
# give the regex that searches for returns a 0.2 boost as that is bound to be more relevant
|
469
|
-
|
470
|
-
weighted_regex = (
|
471
|
-
[
|
472
|
-
(RE_LIT_AS_A, 1.0),
|
473
|
-
(RE_LIT_SENTENCE, 2.0),
|
474
|
-
]
|
475
|
-
if literal
|
476
|
-
else [
|
477
|
-
(RE_RETURN_VALUE, WEIGHT_RETURN_VAL),
|
478
|
-
(RE_RETURN, WEIGHT_RETURNS),
|
479
|
-
(RE_GETS, WEIGHT_GETS),
|
480
|
-
# (reads_regex, 1.0),
|
481
|
-
]
|
482
|
-
)
|
483
|
-
# only the function name without the leading module
|
484
|
-
function_re = re.compile(r"[\w|.]+(?=\()")
|
485
|
-
|
486
|
-
# matches: List[re.Match] = []
|
487
|
-
candidates: List[Dict] = [{"match": "default", "type": "Incomplete", "confidence": 0}]
|
488
|
-
|
489
|
-
# if the signature contains a return type , then use that and do nothing else.
|
490
|
-
if "->" in signature:
|
491
|
-
sig_type = signature.split("->")[-1].strip(": ")
|
492
|
-
return {"type": sig_type, "confidence": WEIGHT_LOOPUPS, "match": signature}
|
493
|
-
|
494
|
-
# ------------------------------------------------------
|
495
|
-
# lookup returns that cannot be found based on the docstring from the lookup list
|
496
|
-
try:
|
497
|
-
function_name = function_re.findall(signature)[0]
|
498
|
-
except IndexError:
|
499
|
-
function_name = signature.strip().strip(":()")
|
500
|
-
|
501
|
-
function_name = ".".join((module, function_name))
|
502
|
-
|
503
|
-
if function_name in LOOKUP_LIST.keys():
|
504
|
-
sig_type = LOOKUP_LIST[function_name][0]
|
505
|
-
return {
|
506
|
-
"type": sig_type,
|
507
|
-
"confidence": C_LOOKUP * WEIGHT_LOOPUPS,
|
508
|
-
"match": function_name,
|
509
|
-
}
|
510
|
-
# ------------------------------------------------------
|
511
|
-
# parse the docstring for simple start verbs,
|
512
|
-
# and add them as a candidate
|
513
|
-
candidates += has_none_verb(docstring)
|
514
|
-
|
515
|
-
# ------------------------------------------------------
|
516
|
-
# parse the docstring for the regexes and weigh the results accordingly
|
517
|
-
for weighted in weighted_regex:
|
518
|
-
match_iter = re.finditer(weighted[0], docstring, re.MULTILINE | re.IGNORECASE)
|
519
|
-
for match in match_iter:
|
520
|
-
# matches.append(match)
|
521
|
-
distilled = distill_return(match.group("return"))
|
522
|
-
for item in distilled:
|
523
|
-
candidate = {
|
524
|
-
"match": match,
|
525
|
-
"type": item["type"],
|
526
|
-
"confidence": item["confidence"] * weighted[1], # add search boost
|
527
|
-
}
|
528
|
-
candidates.append(candidate)
|
529
|
-
# Sort
|
530
|
-
candidates = sorted(candidates, key=lambda x: x["confidence"], reverse=True)
|
531
|
-
best = candidates[0] # best candidate
|
532
|
-
|
533
|
-
# ref: https://docs.python.org/3/library/typing.html#typing.Coroutine
|
534
|
-
# Coroutine[YieldType, SendType, ReturnType]
|
535
|
-
# todo: sanity check against actual code .....
|
536
|
-
if "This is a coroutine" in docstring and "Coroutine" not in str(best["type"]): # type: ignore
|
537
|
-
best["type"] = f"Coroutine[{best['type']}, Any, Any]"
|
538
|
-
|
539
|
-
# return the best candidate, or Any
|
540
|
-
return best # best candidate
|
1
|
+
"""
|
2
|
+
Work in Progress
|
3
|
+
----------------
|
4
|
+
|
5
|
+
Tries to determine the return type by parsing the docstring and the function signature
|
6
|
+
- if the signature contains a return type --> <something> then that is returned
|
7
|
+
- check a lookup dictionary of type overrides,
|
8
|
+
if the functionnae is listed, then use the override
|
9
|
+
- use re to find phrases such as:
|
10
|
+
- 'Returns ..... '
|
11
|
+
- 'Gets ..... '
|
12
|
+
- docstring is joined without newlines to simplify parsing
|
13
|
+
- then parses the docstring to find references to known types and give then a rating though a hand coded model ()
|
14
|
+
- builds a list return type candidates
|
15
|
+
- selects the highest ranking candidate
|
16
|
+
- the default Type is 'Any'
|
17
|
+
|
18
|
+
|
19
|
+
to do:
|
20
|
+
|
21
|
+
- regex :
|
22
|
+
- 'With no arguments the frequency in Hz is returned.'
|
23
|
+
- 'Get or set' --> indicates overloaded/optional return Union[None|...]
|
24
|
+
- add regex for 'Query' ` Otherwise, query current state if no argument is provided. `
|
25
|
+
|
26
|
+
- regex :
|
27
|
+
- 'With no arguments the frequency in Hz is returned.'
|
28
|
+
- 'Get or set' --> indicates overloaded/optional return Union[None|...]
|
29
|
+
- add regex for 'Query' ` Otherwise, query current state if no argument is provided. `
|
30
|
+
|
31
|
+
- try if an Azure Machine Learning works as well
|
32
|
+
https://docs.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources
|
33
|
+
-
|
34
|
+
"""
|
35
|
+
# ref: https://regex101.com/codegen?language=python
|
36
|
+
# https://regex101.com/r/Ni8g2z/2
|
37
|
+
|
38
|
+
import re
|
39
|
+
from typing import Dict, List, Optional, Union
|
40
|
+
|
41
|
+
from loguru import logger as log
|
42
|
+
|
43
|
+
from .lookup import LOOKUP_LIST, NONE_VERBS, TYPING_IMPORT
|
44
|
+
|
45
|
+
# These are shown to import
|
46
|
+
__all__ = [
|
47
|
+
"simple_candidates",
|
48
|
+
"compound_candidates",
|
49
|
+
"object_candidates",
|
50
|
+
"distill_return",
|
51
|
+
"return_type_from_context",
|
52
|
+
"_type_from_context", # For testing only
|
53
|
+
"TYPING_IMPORT",
|
54
|
+
]
|
55
|
+
|
56
|
+
|
57
|
+
# logging
|
58
|
+
# # log = logging.getLogger(__name__)
|
59
|
+
|
60
|
+
|
61
|
+
# --------------------------------------
|
62
|
+
# Confidence levels
|
63
|
+
# these heuristics are based a significant amout of manual testing,
|
64
|
+
# and not based on any statistical analysis
|
65
|
+
|
66
|
+
C_DEFAULT = 0 # Any , the default for all
|
67
|
+
C_NONE = 0.1 + C_DEFAULT # better than the default Any
|
68
|
+
C_BASE = 0.1 + C_NONE # the Base if a return type has been found
|
69
|
+
|
70
|
+
C_STR_NAMES = 0.3
|
71
|
+
|
72
|
+
C_GENERIC = 0.6
|
73
|
+
C_DICT = C_GENERIC
|
74
|
+
C_TUPLE = C_GENERIC
|
75
|
+
C_LIST = C_GENERIC
|
76
|
+
C_BOOL = C_GENERIC
|
77
|
+
C_FLOAT = C_GENERIC
|
78
|
+
C_STR = C_GENERIC
|
79
|
+
|
80
|
+
# tehere is a bit of logic - but mostly empirical
|
81
|
+
C_NONE_RETURN = C_GENERIC
|
82
|
+
C_OBJECTS = 0.01 + C_GENERIC
|
83
|
+
|
84
|
+
C_BYTES = 0.01 + C_GENERIC
|
85
|
+
C_BYTEARRAY = 0.03 + C_GENERIC
|
86
|
+
C_INT = 0.03 + C_GENERIC
|
87
|
+
C_UINT = 0.04 + C_GENERIC
|
88
|
+
C_ITERATOR = 0.4 + C_GENERIC
|
89
|
+
C_GENERATOR = 0.4 + C_GENERIC
|
90
|
+
|
91
|
+
C_INT_SIZES = 0.5 + C_GENERIC # better match than bytes and bytearray or object
|
92
|
+
C_INT_LIKE = 0.5 + C_GENERIC
|
93
|
+
|
94
|
+
C_LOOKUP = C_GENERIC + 1
|
95
|
+
|
96
|
+
# --------------------------------------
|
97
|
+
# Weights of the different Lookups
|
98
|
+
WEIGHT_LOOPUPS = 3.0 # Lookup list weight factor
|
99
|
+
WEIGHT_RETURN_VAL = 3.0 # Lookup list weight factor
|
100
|
+
WEIGHT_RETURNS = 1.8 # for Docstring returns
|
101
|
+
WEIGHT_GETS = 1.5 # For docstring Gets
|
102
|
+
|
103
|
+
# --------------------------------------
|
104
|
+
|
105
|
+
# base has a confidence that is quite low, but better than rubbish
|
106
|
+
BASE = {"type": "Incomplete", "confidence": C_BASE, "match": None}
|
107
|
+
|
108
|
+
# --------------------------------------
|
109
|
+
# Regexes
|
110
|
+
# --------------------------------------
|
111
|
+
|
112
|
+
# all regex matches stop at end of sentence:: . ! ? : ;
|
113
|
+
# Look for "Return Value: xxxx"
|
114
|
+
RE_RETURN_VALUE = r"Return value\s?:\s?(?P<return>[^.!?:;]*)"
|
115
|
+
# Look for Returns , but no 'Information'
|
116
|
+
RE_RETURN = r"Return(?:s?,?|(?:ing)?)\s(?!information)(?P<return>[^.!?:;]*)"
|
117
|
+
# Look for gets
|
118
|
+
RE_GETS = r"Gets?\s(?P<return>[^.!?:;]*)"
|
119
|
+
|
120
|
+
# --------------------------------------
|
121
|
+
# Regex for Literals
|
122
|
+
# --------------------------------------
|
123
|
+
RE_LIT_AS_A = r"as a\s?(?P<return>[^.!?:;]*)"
|
124
|
+
RE_LIT_SENTENCE = r"\s?(?P<return>[^.!?:;]*)"
|
125
|
+
|
126
|
+
|
127
|
+
def dist_rate(i: int) -> float:
|
128
|
+
""""""
|
129
|
+
max_len = 150 # must occur in the first 150 chars
|
130
|
+
return max((max_len - i), 1) / max_len
|
131
|
+
|
132
|
+
|
133
|
+
WORD_TERMINATORS = ".,!;:?"
|
134
|
+
|
135
|
+
|
136
|
+
def simple_candidates(
|
137
|
+
type: str,
|
138
|
+
match_string: str,
|
139
|
+
keywords: List[str],
|
140
|
+
rate: float = 0.5,
|
141
|
+
exclude: Optional[List[str]] = None,
|
142
|
+
):
|
143
|
+
"""
|
144
|
+
find and rate possible types and confidence weighting for simple types.
|
145
|
+
Case sensitive
|
146
|
+
"""
|
147
|
+
if exclude is None:
|
148
|
+
exclude = []
|
149
|
+
candidates = []
|
150
|
+
if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
|
151
|
+
# quick bailout , there are no matches, or there is an exclude
|
152
|
+
return []
|
153
|
+
|
154
|
+
# word matching
|
155
|
+
match_words = [w.strip(WORD_TERMINATORS) for w in match_string.split()]
|
156
|
+
# kw = single word -
|
157
|
+
for kw in keywords:
|
158
|
+
i = match_string.find(kw)
|
159
|
+
if " " not in kw and kw not in match_words or " " in kw and i < 0:
|
160
|
+
continue
|
161
|
+
# Assume unsigned are int
|
162
|
+
result = BASE.copy()
|
163
|
+
result["type"] = type
|
164
|
+
result["confidence"] = rate * dist_rate(i) # OK
|
165
|
+
log.trace(f" - found '{kw}' at position {i} with rating {dist_rate(i)}")
|
166
|
+
candidates.append(result)
|
167
|
+
return candidates
|
168
|
+
|
169
|
+
|
170
|
+
def compound_candidates(
|
171
|
+
type: str,
|
172
|
+
match_string: str,
|
173
|
+
keywords: List[str],
|
174
|
+
rate: float = 0.85,
|
175
|
+
exclude: Optional[List[str]] = None,
|
176
|
+
):
|
177
|
+
"""
|
178
|
+
find and rate possible types and confidence weighting for compound types that can have a subscription.
|
179
|
+
Case sensitive
|
180
|
+
"""
|
181
|
+
if exclude is None:
|
182
|
+
exclude = []
|
183
|
+
candidates = []
|
184
|
+
if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
|
185
|
+
# quick bailout , there are no matches, or there is an exclude
|
186
|
+
return []
|
187
|
+
|
188
|
+
# word matching
|
189
|
+
match_words = [w.strip(WORD_TERMINATORS) for w in match_string.split()]
|
190
|
+
# kw = single word -
|
191
|
+
for kw in keywords:
|
192
|
+
i = match_string.find(kw)
|
193
|
+
if " " not in kw and kw not in match_words or " " in kw and i < 0:
|
194
|
+
continue
|
195
|
+
# List / Dict / Generator of Any / Tuple /
|
196
|
+
sub = None
|
197
|
+
result = BASE.copy()
|
198
|
+
confidence = rate
|
199
|
+
for element in ("tuple", "string", "unsigned", "int"):
|
200
|
+
if element in match_string.casefold():
|
201
|
+
j = match_string.find(element)
|
202
|
+
if i == j:
|
203
|
+
# do not match on the same main and sub
|
204
|
+
continue
|
205
|
+
confidence += 0.10 # boost as we have a subtype
|
206
|
+
if element == "string":
|
207
|
+
sub = "str"
|
208
|
+
break
|
209
|
+
elif element == "tuple":
|
210
|
+
sub = "Tuple"
|
211
|
+
break
|
212
|
+
elif element == "unsigned":
|
213
|
+
sub = "int"
|
214
|
+
break
|
215
|
+
else:
|
216
|
+
sub = element
|
217
|
+
result["type"] = f"{type}[{sub}]" if sub else f"{type}"
|
218
|
+
confidence = confidence * dist_rate(i) # distance weighting
|
219
|
+
result["confidence"] = confidence
|
220
|
+
log.trace(
|
221
|
+
f" - found '{kw}' at position {i} with confidence {confidence} rating {dist_rate(i)}"
|
222
|
+
)
|
223
|
+
|
224
|
+
candidates.append(result)
|
225
|
+
return candidates
|
226
|
+
|
227
|
+
|
228
|
+
def object_candidates(match_string: str, rate: float = 0.81, exclude: Optional[List[str]] = None):
|
229
|
+
"""
|
230
|
+
find and rate possible types and confidence weighting for Object types.
|
231
|
+
Case sensitive
|
232
|
+
Exclude defaults to ["IRQ"]
|
233
|
+
"""
|
234
|
+
# defaults
|
235
|
+
if exclude is None:
|
236
|
+
exclude = ["IRQ"]
|
237
|
+
candidates = []
|
238
|
+
keywords = [
|
239
|
+
"Object",
|
240
|
+
"object",
|
241
|
+
] # Q&D
|
242
|
+
|
243
|
+
if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
|
244
|
+
# quick bailout , there are no matches, or there is an exclude
|
245
|
+
return []
|
246
|
+
for kw in keywords:
|
247
|
+
i = match_string.find(kw)
|
248
|
+
if i < 0:
|
249
|
+
continue
|
250
|
+
# List / Dict / Generator of Any / Tuple /
|
251
|
+
confidence = rate
|
252
|
+
|
253
|
+
# did the word actually occur, or is it just a partial
|
254
|
+
words = match_string.split(" ") # Return <multiple words object>
|
255
|
+
if kw in words:
|
256
|
+
pos = words.index(kw)
|
257
|
+
obj = "Incomplete" if pos == 0 else words[pos - 1]
|
258
|
+
if obj in ("stream-like", "file"):
|
259
|
+
obj = "IO" # needs from typing import IO
|
260
|
+
elif obj == "callback":
|
261
|
+
obj = "Callable[..., Incomplete]" # requires additional 'from typing import Callable'
|
262
|
+
else:
|
263
|
+
# clean
|
264
|
+
obj = re.sub(r"[^a-z.A-Z0-9]", "", obj)
|
265
|
+
result = BASE.copy()
|
266
|
+
result["type"] = obj
|
267
|
+
if obj in ["an", "any"]: # "Return an / any object"
|
268
|
+
result["type"] = "Incomplete"
|
269
|
+
confidence += 0.10 # abstract , but very good
|
270
|
+
elif obj[0].islower():
|
271
|
+
confidence -= 0.20 # not so good
|
272
|
+
result["confidence"] = confidence * dist_rate(i)
|
273
|
+
candidates.append(result)
|
274
|
+
return candidates
|
275
|
+
|
276
|
+
|
277
|
+
def has_none_verb(docstr: str) -> List:
|
278
|
+
"returns a None result if the docstring starts with a verb that indicates None"
|
279
|
+
docstr = docstr.strip().casefold()
|
280
|
+
if not any(docstr.startswith(kw.casefold()) for kw in NONE_VERBS):
|
281
|
+
return []
|
282
|
+
result = BASE.copy()
|
283
|
+
result["type"] = "None"
|
284
|
+
result["confidence"] = C_NONE # better than the default Any
|
285
|
+
return [result]
|
286
|
+
|
287
|
+
|
288
|
+
def distill_return(return_text: str) -> List[Dict]:
|
289
|
+
"""Find return type and confidence.
|
290
|
+
Returns a list of possible types and confidence weighting.
|
291
|
+
{
|
292
|
+
|
293
|
+
type :str # the return type
|
294
|
+
confidence: float # the confidence between 0.0 and 1
|
295
|
+
match: Optional[str] # for debugging : the reason the match was made
|
296
|
+
|
297
|
+
}
|
298
|
+
|
299
|
+
"""
|
300
|
+
candidates = [BASE] # Default to the base , which is 'Any'
|
301
|
+
|
302
|
+
# clean up match_string
|
303
|
+
match_string = return_text.strip().rstrip(".")
|
304
|
+
match_string = match_string.replace("`", "")
|
305
|
+
|
306
|
+
candidates += compound_candidates("Generator", match_string, ["generator"], C_GENERATOR)
|
307
|
+
candidates += compound_candidates("Iterator", match_string, ["iterator"], C_ITERATOR)
|
308
|
+
candidates += compound_candidates(
|
309
|
+
"List", match_string, ["a list of", "list of", "an array"], C_LIST
|
310
|
+
)
|
311
|
+
|
312
|
+
candidates += simple_candidates(
|
313
|
+
"Dict", match_string, ["a dictionary", "dict", "Dictionary"], C_DICT
|
314
|
+
)
|
315
|
+
candidates += simple_candidates(
|
316
|
+
"Tuple",
|
317
|
+
match_string,
|
318
|
+
[
|
319
|
+
"tuple",
|
320
|
+
"a pair",
|
321
|
+
"1-tuple",
|
322
|
+
"2-tuple",
|
323
|
+
"3-tuple",
|
324
|
+
"4-tuple",
|
325
|
+
"5-tuple",
|
326
|
+
"6-tuple",
|
327
|
+
"7-tuple",
|
328
|
+
"8-tuple",
|
329
|
+
"9-tuple",
|
330
|
+
],
|
331
|
+
C_TUPLE,
|
332
|
+
)
|
333
|
+
|
334
|
+
candidates += simple_candidates(
|
335
|
+
"int", match_string, ["unsigned integer", "unsigned int", "unsigned"], C_UINT
|
336
|
+
)
|
337
|
+
|
338
|
+
candidates += simple_candidates(
|
339
|
+
"int",
|
340
|
+
match_string,
|
341
|
+
[
|
342
|
+
"number",
|
343
|
+
"integer",
|
344
|
+
"count",
|
345
|
+
"int",
|
346
|
+
"0 or 1",
|
347
|
+
],
|
348
|
+
C_INT,
|
349
|
+
)
|
350
|
+
|
351
|
+
# good but nor perfect indicators of integers
|
352
|
+
# better match than bytes and bytearray or object
|
353
|
+
candidates += simple_candidates(
|
354
|
+
"int",
|
355
|
+
match_string,
|
356
|
+
[
|
357
|
+
"length",
|
358
|
+
"total size",
|
359
|
+
"size of",
|
360
|
+
"the index",
|
361
|
+
"number of",
|
362
|
+
"address of",
|
363
|
+
"the duration",
|
364
|
+
],
|
365
|
+
C_INT_SIZES,
|
366
|
+
)
|
367
|
+
|
368
|
+
candidates += simple_candidates("int", match_string, [], C_INT_SIZES)
|
369
|
+
|
370
|
+
# Assume numbers are signed int
|
371
|
+
candidates += simple_candidates(
|
372
|
+
"int",
|
373
|
+
match_string,
|
374
|
+
[
|
375
|
+
"index",
|
376
|
+
"**signed** value",
|
377
|
+
"seconds",
|
378
|
+
"nanoseconds",
|
379
|
+
"millisecond",
|
380
|
+
"offset",
|
381
|
+
],
|
382
|
+
C_INT_LIKE,
|
383
|
+
)
|
384
|
+
|
385
|
+
# better match than bytes
|
386
|
+
candidates += simple_candidates("bytearray", match_string, ["bytearray"], C_BYTEARRAY)
|
387
|
+
|
388
|
+
# OK, better than just string
|
389
|
+
candidates += simple_candidates("bytes", match_string, ["bytes", "byte string"], C_BYTES)
|
390
|
+
|
391
|
+
candidates += simple_candidates(
|
392
|
+
"bool", match_string, ["boolean", "bool", "True", "False"], C_BOOL
|
393
|
+
)
|
394
|
+
candidates += simple_candidates(
|
395
|
+
"float",
|
396
|
+
match_string,
|
397
|
+
[
|
398
|
+
"float",
|
399
|
+
"logarithm",
|
400
|
+
"sine",
|
401
|
+
"cosine",
|
402
|
+
"tangent",
|
403
|
+
"exponential",
|
404
|
+
"complex number",
|
405
|
+
"phase",
|
406
|
+
"ratio of",
|
407
|
+
],
|
408
|
+
C_FLOAT,
|
409
|
+
)
|
410
|
+
|
411
|
+
candidates += simple_candidates(
|
412
|
+
"str", match_string, ["string", "(sub)string", "sub-string", "substring"], C_STR
|
413
|
+
)
|
414
|
+
|
415
|
+
candidates += simple_candidates("str", match_string, ["name", "names"], C_STR_NAMES)
|
416
|
+
## "? contains 'None if there is no' --> Union[Null, xxx]"
|
417
|
+
candidates += simple_candidates(
|
418
|
+
"None",
|
419
|
+
match_string,
|
420
|
+
["``None``", "None"],
|
421
|
+
C_NONE_RETURN,
|
422
|
+
exclude=["previous value", "if there is no"],
|
423
|
+
)
|
424
|
+
|
425
|
+
candidates += object_candidates(match_string, C_OBJECTS)
|
426
|
+
|
427
|
+
return candidates
|
428
|
+
|
429
|
+
|
430
|
+
def return_type_from_context(
|
431
|
+
*, docstring: Union[str, List[str]], signature: str, module: str, literal: bool = False
|
432
|
+
):
|
433
|
+
try:
|
434
|
+
return str(
|
435
|
+
_type_from_context(
|
436
|
+
module=module, signature=signature, docstring=docstring, literal=literal
|
437
|
+
)["type"]
|
438
|
+
)
|
439
|
+
except Exception:
|
440
|
+
return "Incomplete"
|
441
|
+
|
442
|
+
|
443
|
+
def _type_from_context(
|
444
|
+
*, docstring: Union[str, List[str]], signature: str, module: str, literal: bool = False
|
445
|
+
): # -> Dict[str , Union[str,float]]:
|
446
|
+
"""Determine the return type of a function or method based on:
|
447
|
+
- the function signature
|
448
|
+
- the terminology used in the docstring
|
449
|
+
|
450
|
+
Logic:
|
451
|
+
- if the signature contains a return type --> <something> then that is returned
|
452
|
+
- use re to find phrases such as:
|
453
|
+
|
454
|
+
- 'Returns ..... '
|
455
|
+
- 'Gets ..... '
|
456
|
+
|
457
|
+
- docstring is joined without newlines to simplify parsing
|
458
|
+
- then parses the docstring to find references to known types and give then a rating though a hand coded model ()
|
459
|
+
- builds a list return type candidates
|
460
|
+
- selects the highest ranking candidate
|
461
|
+
- the default Type is 'Any'
|
462
|
+
"""
|
463
|
+
|
464
|
+
if isinstance(docstring, list):
|
465
|
+
# join with space to avoid ending at a newline
|
466
|
+
docstring = " ".join(docstring)
|
467
|
+
|
468
|
+
# give the regex that searches for returns a 0.2 boost as that is bound to be more relevant
|
469
|
+
|
470
|
+
weighted_regex = (
|
471
|
+
[
|
472
|
+
(RE_LIT_AS_A, 1.0),
|
473
|
+
(RE_LIT_SENTENCE, 2.0),
|
474
|
+
]
|
475
|
+
if literal
|
476
|
+
else [
|
477
|
+
(RE_RETURN_VALUE, WEIGHT_RETURN_VAL),
|
478
|
+
(RE_RETURN, WEIGHT_RETURNS),
|
479
|
+
(RE_GETS, WEIGHT_GETS),
|
480
|
+
# (reads_regex, 1.0),
|
481
|
+
]
|
482
|
+
)
|
483
|
+
# only the function name without the leading module
|
484
|
+
function_re = re.compile(r"[\w|.]+(?=\()")
|
485
|
+
|
486
|
+
# matches: List[re.Match] = []
|
487
|
+
candidates: List[Dict] = [{"match": "default", "type": "Incomplete", "confidence": 0}]
|
488
|
+
|
489
|
+
# if the signature contains a return type , then use that and do nothing else.
|
490
|
+
if "->" in signature:
|
491
|
+
sig_type = signature.split("->")[-1].strip(": ")
|
492
|
+
return {"type": sig_type, "confidence": WEIGHT_LOOPUPS, "match": signature}
|
493
|
+
|
494
|
+
# ------------------------------------------------------
|
495
|
+
# lookup returns that cannot be found based on the docstring from the lookup list
|
496
|
+
try:
|
497
|
+
function_name = function_re.findall(signature)[0]
|
498
|
+
except IndexError:
|
499
|
+
function_name = signature.strip().strip(":()")
|
500
|
+
|
501
|
+
function_name = ".".join((module, function_name))
|
502
|
+
|
503
|
+
if function_name in LOOKUP_LIST.keys():
|
504
|
+
sig_type = LOOKUP_LIST[function_name][0]
|
505
|
+
return {
|
506
|
+
"type": sig_type,
|
507
|
+
"confidence": C_LOOKUP * WEIGHT_LOOPUPS,
|
508
|
+
"match": function_name,
|
509
|
+
}
|
510
|
+
# ------------------------------------------------------
|
511
|
+
# parse the docstring for simple start verbs,
|
512
|
+
# and add them as a candidate
|
513
|
+
candidates += has_none_verb(docstring)
|
514
|
+
|
515
|
+
# ------------------------------------------------------
|
516
|
+
# parse the docstring for the regexes and weigh the results accordingly
|
517
|
+
for weighted in weighted_regex:
|
518
|
+
match_iter = re.finditer(weighted[0], docstring, re.MULTILINE | re.IGNORECASE)
|
519
|
+
for match in match_iter:
|
520
|
+
# matches.append(match)
|
521
|
+
distilled = distill_return(match.group("return"))
|
522
|
+
for item in distilled:
|
523
|
+
candidate = {
|
524
|
+
"match": match,
|
525
|
+
"type": item["type"],
|
526
|
+
"confidence": item["confidence"] * weighted[1], # add search boost
|
527
|
+
}
|
528
|
+
candidates.append(candidate)
|
529
|
+
# Sort
|
530
|
+
candidates = sorted(candidates, key=lambda x: x["confidence"], reverse=True)
|
531
|
+
best = candidates[0] # best candidate
|
532
|
+
|
533
|
+
# ref: https://docs.python.org/3/library/typing.html#typing.Coroutine
|
534
|
+
# Coroutine[YieldType, SendType, ReturnType]
|
535
|
+
# todo: sanity check against actual code .....
|
536
|
+
if "This is a coroutine" in docstring and "Coroutine" not in str(best["type"]): # type: ignore
|
537
|
+
best["type"] = f"Coroutine[{best['type']}, Any, Any]"
|
538
|
+
|
539
|
+
# return the best candidate, or Any
|
540
|
+
return best # best candidate
|