micropython-stubber 1.20.5__py3-none-any.whl → 1.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. {micropython_stubber-1.20.5.dist-info → micropython_stubber-1.23.0.dist-info}/LICENSE +30 -30
  2. {micropython_stubber-1.20.5.dist-info → micropython_stubber-1.23.0.dist-info}/METADATA +1 -1
  3. micropython_stubber-1.23.0.dist-info/RECORD +159 -0
  4. mpflash/README.md +184 -184
  5. mpflash/libusb_flash.ipynb +203 -203
  6. mpflash/mpflash/add_firmware.py +98 -98
  7. mpflash/mpflash/ask_input.py +236 -236
  8. mpflash/mpflash/bootloader/__init__.py +37 -36
  9. mpflash/mpflash/bootloader/manual.py +102 -102
  10. mpflash/mpflash/bootloader/micropython.py +10 -10
  11. mpflash/mpflash/bootloader/touch1200.py +45 -45
  12. mpflash/mpflash/cli_download.py +129 -129
  13. mpflash/mpflash/cli_flash.py +219 -219
  14. mpflash/mpflash/cli_group.py +98 -98
  15. mpflash/mpflash/cli_list.py +81 -81
  16. mpflash/mpflash/cli_main.py +41 -41
  17. mpflash/mpflash/common.py +164 -164
  18. mpflash/mpflash/config.py +43 -47
  19. mpflash/mpflash/connected.py +74 -74
  20. mpflash/mpflash/download.py +360 -360
  21. mpflash/mpflash/downloaded.py +130 -129
  22. mpflash/mpflash/errors.py +9 -9
  23. mpflash/mpflash/flash.py +55 -52
  24. mpflash/mpflash/flash_esp.py +59 -59
  25. mpflash/mpflash/flash_stm32.py +18 -24
  26. mpflash/mpflash/flash_stm32_cube.py +111 -111
  27. mpflash/mpflash/flash_stm32_dfu.py +104 -101
  28. mpflash/mpflash/flash_uf2.py +89 -67
  29. mpflash/mpflash/flash_uf2_boardid.py +15 -15
  30. mpflash/mpflash/flash_uf2_linux.py +129 -123
  31. mpflash/mpflash/flash_uf2_macos.py +37 -34
  32. mpflash/mpflash/flash_uf2_windows.py +38 -34
  33. mpflash/mpflash/list.py +89 -89
  34. mpflash/mpflash/logger.py +41 -41
  35. mpflash/mpflash/mpboard_id/__init__.py +93 -93
  36. mpflash/mpflash/mpboard_id/add_boards.py +255 -255
  37. mpflash/mpflash/mpboard_id/board.py +37 -37
  38. mpflash/mpflash/mpboard_id/board_id.py +86 -86
  39. mpflash/mpflash/mpboard_id/store.py +43 -43
  40. mpflash/mpflash/mpremoteboard/__init__.py +226 -221
  41. mpflash/mpflash/mpremoteboard/mpy_fw_info.py +141 -141
  42. mpflash/mpflash/mpremoteboard/runner.py +140 -140
  43. mpflash/mpflash/uf2disk.py +12 -12
  44. mpflash/mpflash/vendor/basicgit.py +288 -288
  45. mpflash/mpflash/vendor/click_aliases.py +91 -91
  46. mpflash/mpflash/vendor/dfu.py +165 -165
  47. mpflash/mpflash/vendor/pydfu.py +605 -605
  48. mpflash/mpflash/vendor/readme.md +2 -2
  49. mpflash/mpflash/vendor/versions.py +119 -117
  50. mpflash/mpflash/worklist.py +171 -170
  51. mpflash/poetry.lock +1588 -1588
  52. mpflash/pyproject.toml +64 -60
  53. mpflash/stm32_udev_rules.md +62 -62
  54. stubber/__init__.py +3 -3
  55. stubber/basicgit.py +294 -288
  56. stubber/board/board_info.csv +193 -193
  57. stubber/board/boot.py +34 -34
  58. stubber/board/createstubs.py +986 -986
  59. stubber/board/createstubs_db.py +825 -825
  60. stubber/board/createstubs_db_min.py +331 -331
  61. stubber/board/createstubs_db_mpy.mpy +0 -0
  62. stubber/board/createstubs_lvgl.py +741 -741
  63. stubber/board/createstubs_lvgl_min.py +741 -741
  64. stubber/board/createstubs_mem.py +766 -766
  65. stubber/board/createstubs_mem_min.py +306 -306
  66. stubber/board/createstubs_mem_mpy.mpy +0 -0
  67. stubber/board/createstubs_min.py +294 -294
  68. stubber/board/createstubs_mpy.mpy +0 -0
  69. stubber/board/fw_info.py +141 -141
  70. stubber/board/info.py +183 -183
  71. stubber/board/main.py +19 -19
  72. stubber/board/modulelist.txt +247 -247
  73. stubber/board/pyrightconfig.json +34 -34
  74. stubber/bulk/mcu_stubber.py +454 -454
  75. stubber/codemod/_partials/__init__.py +48 -48
  76. stubber/codemod/_partials/db_main.py +147 -147
  77. stubber/codemod/_partials/lvgl_main.py +77 -77
  78. stubber/codemod/_partials/modules_reader.py +80 -80
  79. stubber/codemod/add_comment.py +53 -53
  80. stubber/codemod/add_method.py +65 -65
  81. stubber/codemod/board.py +317 -317
  82. stubber/codemod/enrich.py +145 -145
  83. stubber/codemod/merge_docstub.py +284 -284
  84. stubber/codemod/modify_list.py +54 -54
  85. stubber/codemod/utils.py +57 -57
  86. stubber/commands/build_cmd.py +94 -94
  87. stubber/commands/cli.py +55 -51
  88. stubber/commands/clone_cmd.py +77 -66
  89. stubber/commands/config_cmd.py +29 -29
  90. stubber/commands/enrich_folder_cmd.py +71 -70
  91. stubber/commands/get_core_cmd.py +71 -69
  92. stubber/commands/get_docstubs_cmd.py +89 -87
  93. stubber/commands/get_frozen_cmd.py +114 -112
  94. stubber/commands/get_mcu_cmd.py +61 -56
  95. stubber/commands/merge_cmd.py +67 -66
  96. stubber/commands/publish_cmd.py +119 -119
  97. stubber/commands/stub_cmd.py +31 -30
  98. stubber/commands/switch_cmd.py +62 -54
  99. stubber/commands/variants_cmd.py +49 -48
  100. stubber/cst_transformer.py +178 -178
  101. stubber/data/board_info.csv +193 -193
  102. stubber/data/board_info.json +1729 -1729
  103. stubber/data/micropython_tags.csv +15 -15
  104. stubber/data/requirements-core-micropython.txt +38 -38
  105. stubber/data/requirements-core-pycopy.txt +39 -39
  106. stubber/downloader.py +36 -36
  107. stubber/freeze/common.py +68 -68
  108. stubber/freeze/freeze_folder.py +69 -69
  109. stubber/freeze/freeze_manifest_2.py +113 -113
  110. stubber/freeze/get_frozen.py +127 -127
  111. stubber/get_cpython.py +101 -101
  112. stubber/get_lobo.py +59 -59
  113. stubber/minify.py +418 -418
  114. stubber/publish/bump.py +86 -86
  115. stubber/publish/candidates.py +262 -262
  116. stubber/publish/database.py +18 -18
  117. stubber/publish/defaults.py +45 -45
  118. stubber/publish/enums.py +24 -24
  119. stubber/publish/helpers.py +29 -29
  120. stubber/publish/merge_docstubs.py +130 -130
  121. stubber/publish/missing_class_methods.py +49 -49
  122. stubber/publish/package.py +146 -146
  123. stubber/publish/pathnames.py +51 -51
  124. stubber/publish/publish.py +120 -120
  125. stubber/publish/pypi.py +38 -38
  126. stubber/publish/stubpackage.py +1029 -1029
  127. stubber/rst/__init__.py +9 -9
  128. stubber/rst/classsort.py +77 -77
  129. stubber/rst/lookup.py +530 -530
  130. stubber/rst/output_dict.py +401 -401
  131. stubber/rst/reader.py +822 -822
  132. stubber/rst/report_return.py +69 -69
  133. stubber/rst/rst_utils.py +540 -540
  134. stubber/stubber.py +38 -38
  135. stubber/stubs_from_docs.py +90 -90
  136. stubber/tools/manifestfile.py +655 -610
  137. stubber/tools/readme.md +7 -6
  138. stubber/update_fallback.py +117 -117
  139. stubber/update_module_list.py +123 -123
  140. stubber/utils/__init__.py +5 -5
  141. stubber/utils/config.py +127 -127
  142. stubber/utils/makeversionhdr.py +54 -54
  143. stubber/utils/manifest.py +92 -92
  144. stubber/utils/post.py +79 -79
  145. stubber/utils/repos.py +157 -154
  146. stubber/utils/stubmaker.py +139 -139
  147. stubber/utils/typed_config_toml.py +77 -77
  148. stubber/utils/versions.py +128 -120
  149. stubber/variants.py +106 -106
  150. micropython_stubber-1.20.5.dist-info/RECORD +0 -159
  151. {micropython_stubber-1.20.5.dist-info → micropython_stubber-1.23.0.dist-info}/WHEEL +0 -0
  152. {micropython_stubber-1.20.5.dist-info → micropython_stubber-1.23.0.dist-info}/entry_points.txt +0 -0
stubber/rst/rst_utils.py CHANGED
@@ -1,540 +1,540 @@
1
- """
2
- Work in Progress
3
- ----------------
4
-
5
- Tries to determine the return type by parsing the docstring and the function signature
6
- - if the signature contains a return type --> <something> then that is returned
7
- - check a lookup dictionary of type overrides,
8
- if the functionnae is listed, then use the override
9
- - use re to find phrases such as:
10
- - 'Returns ..... '
11
- - 'Gets ..... '
12
- - docstring is joined without newlines to simplify parsing
13
- - then parses the docstring to find references to known types and give then a rating though a hand coded model ()
14
- - builds a list return type candidates
15
- - selects the highest ranking candidate
16
- - the default Type is 'Any'
17
-
18
-
19
- to do:
20
-
21
- - regex :
22
- - 'With no arguments the frequency in Hz is returned.'
23
- - 'Get or set' --> indicates overloaded/optional return Union[None|...]
24
- - add regex for 'Query' ` Otherwise, query current state if no argument is provided. `
25
-
26
- - regex :
27
- - 'With no arguments the frequency in Hz is returned.'
28
- - 'Get or set' --> indicates overloaded/optional return Union[None|...]
29
- - add regex for 'Query' ` Otherwise, query current state if no argument is provided. `
30
-
31
- - try if an Azure Machine Learning works as well
32
- https://docs.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources
33
- -
34
- """
35
- # ref: https://regex101.com/codegen?language=python
36
- # https://regex101.com/r/Ni8g2z/2
37
-
38
- import re
39
- from typing import Dict, List, Optional, Union
40
-
41
- from loguru import logger as log
42
-
43
- from .lookup import LOOKUP_LIST, NONE_VERBS, TYPING_IMPORT
44
-
45
- # These are shown to import
46
- __all__ = [
47
- "simple_candidates",
48
- "compound_candidates",
49
- "object_candidates",
50
- "distill_return",
51
- "return_type_from_context",
52
- "_type_from_context", # For testing only
53
- "TYPING_IMPORT",
54
- ]
55
-
56
-
57
- # logging
58
- # # log = logging.getLogger(__name__)
59
-
60
-
61
- # --------------------------------------
62
- # Confidence levels
63
- # these heuristics are based a significant amout of manual testing,
64
- # and not based on any statistical analysis
65
-
66
- C_DEFAULT = 0 # Any , the default for all
67
- C_NONE = 0.1 + C_DEFAULT # better than the default Any
68
- C_BASE = 0.1 + C_NONE # the Base if a return type has been found
69
-
70
- C_STR_NAMES = 0.3
71
-
72
- C_GENERIC = 0.6
73
- C_DICT = C_GENERIC
74
- C_TUPLE = C_GENERIC
75
- C_LIST = C_GENERIC
76
- C_BOOL = C_GENERIC
77
- C_FLOAT = C_GENERIC
78
- C_STR = C_GENERIC
79
-
80
- # tehere is a bit of logic - but mostly empirical
81
- C_NONE_RETURN = C_GENERIC
82
- C_OBJECTS = 0.01 + C_GENERIC
83
-
84
- C_BYTES = 0.01 + C_GENERIC
85
- C_BYTEARRAY = 0.03 + C_GENERIC
86
- C_INT = 0.03 + C_GENERIC
87
- C_UINT = 0.04 + C_GENERIC
88
- C_ITERATOR = 0.4 + C_GENERIC
89
- C_GENERATOR = 0.4 + C_GENERIC
90
-
91
- C_INT_SIZES = 0.5 + C_GENERIC # better match than bytes and bytearray or object
92
- C_INT_LIKE = 0.5 + C_GENERIC
93
-
94
- C_LOOKUP = C_GENERIC + 1
95
-
96
- # --------------------------------------
97
- # Weights of the different Lookups
98
- WEIGHT_LOOPUPS = 3.0 # Lookup list weight factor
99
- WEIGHT_RETURN_VAL = 3.0 # Lookup list weight factor
100
- WEIGHT_RETURNS = 1.8 # for Docstring returns
101
- WEIGHT_GETS = 1.5 # For docstring Gets
102
-
103
- # --------------------------------------
104
-
105
- # base has a confidence that is quite low, but better than rubbish
106
- BASE = {"type": "Incomplete", "confidence": C_BASE, "match": None}
107
-
108
- # --------------------------------------
109
- # Regexes
110
- # --------------------------------------
111
-
112
- # all regex matches stop at end of sentence:: . ! ? : ;
113
- # Look for "Return Value: xxxx"
114
- RE_RETURN_VALUE = r"Return value\s?:\s?(?P<return>[^.!?:;]*)"
115
- # Look for Returns , but no 'Information'
116
- RE_RETURN = r"Return(?:s?,?|(?:ing)?)\s(?!information)(?P<return>[^.!?:;]*)"
117
- # Look for gets
118
- RE_GETS = r"Gets?\s(?P<return>[^.!?:;]*)"
119
-
120
- # --------------------------------------
121
- # Regex for Literals
122
- # --------------------------------------
123
- RE_LIT_AS_A = r"as a\s?(?P<return>[^.!?:;]*)"
124
- RE_LIT_SENTENCE = r"\s?(?P<return>[^.!?:;]*)"
125
-
126
-
127
- def dist_rate(i: int) -> float:
128
- """"""
129
- max_len = 150 # must occur in the first 150 chars
130
- return max((max_len - i), 1) / max_len
131
-
132
-
133
- WORD_TERMINATORS = ".,!;:?"
134
-
135
-
136
- def simple_candidates(
137
- type: str,
138
- match_string: str,
139
- keywords: List[str],
140
- rate: float = 0.5,
141
- exclude: Optional[List[str]] = None,
142
- ):
143
- """
144
- find and rate possible types and confidence weighting for simple types.
145
- Case sensitive
146
- """
147
- if exclude is None:
148
- exclude = []
149
- candidates = []
150
- if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
151
- # quick bailout , there are no matches, or there is an exclude
152
- return []
153
-
154
- # word matching
155
- match_words = [w.strip(WORD_TERMINATORS) for w in match_string.split()]
156
- # kw = single word -
157
- for kw in keywords:
158
- i = match_string.find(kw)
159
- if " " not in kw and kw not in match_words or " " in kw and i < 0:
160
- continue
161
- # Assume unsigned are int
162
- result = BASE.copy()
163
- result["type"] = type
164
- result["confidence"] = rate * dist_rate(i) # OK
165
- log.trace(f" - found '{kw}' at position {i} with rating {dist_rate(i)}")
166
- candidates.append(result)
167
- return candidates
168
-
169
-
170
- def compound_candidates(
171
- type: str,
172
- match_string: str,
173
- keywords: List[str],
174
- rate: float = 0.85,
175
- exclude: Optional[List[str]] = None,
176
- ):
177
- """
178
- find and rate possible types and confidence weighting for compound types that can have a subscription.
179
- Case sensitive
180
- """
181
- if exclude is None:
182
- exclude = []
183
- candidates = []
184
- if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
185
- # quick bailout , there are no matches, or there is an exclude
186
- return []
187
-
188
- # word matching
189
- match_words = [w.strip(WORD_TERMINATORS) for w in match_string.split()]
190
- # kw = single word -
191
- for kw in keywords:
192
- i = match_string.find(kw)
193
- if " " not in kw and kw not in match_words or " " in kw and i < 0:
194
- continue
195
- # List / Dict / Generator of Any / Tuple /
196
- sub = None
197
- result = BASE.copy()
198
- confidence = rate
199
- for element in ("tuple", "string", "unsigned", "int"):
200
- if element in match_string.casefold():
201
- j = match_string.find(element)
202
- if i == j:
203
- # do not match on the same main and sub
204
- continue
205
- confidence += 0.10 # boost as we have a subtype
206
- if element == "string":
207
- sub = "str"
208
- break
209
- elif element == "tuple":
210
- sub = "Tuple"
211
- break
212
- elif element == "unsigned":
213
- sub = "int"
214
- break
215
- else:
216
- sub = element
217
- result["type"] = f"{type}[{sub}]" if sub else f"{type}"
218
- confidence = confidence * dist_rate(i) # distance weighting
219
- result["confidence"] = confidence
220
- log.trace(
221
- f" - found '{kw}' at position {i} with confidence {confidence} rating {dist_rate(i)}"
222
- )
223
-
224
- candidates.append(result)
225
- return candidates
226
-
227
-
228
- def object_candidates(match_string: str, rate: float = 0.81, exclude: Optional[List[str]] = None):
229
- """
230
- find and rate possible types and confidence weighting for Object types.
231
- Case sensitive
232
- Exclude defaults to ["IRQ"]
233
- """
234
- # defaults
235
- if exclude is None:
236
- exclude = ["IRQ"]
237
- candidates = []
238
- keywords = [
239
- "Object",
240
- "object",
241
- ] # Q&D
242
-
243
- if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
244
- # quick bailout , there are no matches, or there is an exclude
245
- return []
246
- for kw in keywords:
247
- i = match_string.find(kw)
248
- if i < 0:
249
- continue
250
- # List / Dict / Generator of Any / Tuple /
251
- confidence = rate
252
-
253
- # did the word actually occur, or is it just a partial
254
- words = match_string.split(" ") # Return <multiple words object>
255
- if kw in words:
256
- pos = words.index(kw)
257
- obj = "Incomplete" if pos == 0 else words[pos - 1]
258
- if obj in ("stream-like", "file"):
259
- obj = "IO" # needs from typing import IO
260
- elif obj == "callback":
261
- obj = "Callable[..., Incomplete]" # requires additional 'from typing import Callable'
262
- else:
263
- # clean
264
- obj = re.sub(r"[^a-z.A-Z0-9]", "", obj)
265
- result = BASE.copy()
266
- result["type"] = obj
267
- if obj in ["an", "any"]: # "Return an / any object"
268
- result["type"] = "Incomplete"
269
- confidence += 0.10 # abstract , but very good
270
- elif obj[0].islower():
271
- confidence -= 0.20 # not so good
272
- result["confidence"] = confidence * dist_rate(i)
273
- candidates.append(result)
274
- return candidates
275
-
276
-
277
- def has_none_verb(docstr: str) -> List:
278
- "returns a None result if the docstring starts with a verb that indicates None"
279
- docstr = docstr.strip().casefold()
280
- if not any(docstr.startswith(kw.casefold()) for kw in NONE_VERBS):
281
- return []
282
- result = BASE.copy()
283
- result["type"] = "None"
284
- result["confidence"] = C_NONE # better than the default Any
285
- return [result]
286
-
287
-
288
- def distill_return(return_text: str) -> List[Dict]:
289
- """Find return type and confidence.
290
- Returns a list of possible types and confidence weighting.
291
- {
292
-
293
- type :str # the return type
294
- confidence: float # the confidence between 0.0 and 1
295
- match: Optional[str] # for debugging : the reason the match was made
296
-
297
- }
298
-
299
- """
300
- candidates = [BASE] # Default to the base , which is 'Any'
301
-
302
- # clean up match_string
303
- match_string = return_text.strip().rstrip(".")
304
- match_string = match_string.replace("`", "")
305
-
306
- candidates += compound_candidates("Generator", match_string, ["generator"], C_GENERATOR)
307
- candidates += compound_candidates("Iterator", match_string, ["iterator"], C_ITERATOR)
308
- candidates += compound_candidates(
309
- "List", match_string, ["a list of", "list of", "an array"], C_LIST
310
- )
311
-
312
- candidates += simple_candidates(
313
- "Dict", match_string, ["a dictionary", "dict", "Dictionary"], C_DICT
314
- )
315
- candidates += simple_candidates(
316
- "Tuple",
317
- match_string,
318
- [
319
- "tuple",
320
- "a pair",
321
- "1-tuple",
322
- "2-tuple",
323
- "3-tuple",
324
- "4-tuple",
325
- "5-tuple",
326
- "6-tuple",
327
- "7-tuple",
328
- "8-tuple",
329
- "9-tuple",
330
- ],
331
- C_TUPLE,
332
- )
333
-
334
- candidates += simple_candidates(
335
- "int", match_string, ["unsigned integer", "unsigned int", "unsigned"], C_UINT
336
- )
337
-
338
- candidates += simple_candidates(
339
- "int",
340
- match_string,
341
- [
342
- "number",
343
- "integer",
344
- "count",
345
- "int",
346
- "0 or 1",
347
- ],
348
- C_INT,
349
- )
350
-
351
- # good but nor perfect indicators of integers
352
- # better match than bytes and bytearray or object
353
- candidates += simple_candidates(
354
- "int",
355
- match_string,
356
- [
357
- "length",
358
- "total size",
359
- "size of",
360
- "the index",
361
- "number of",
362
- "address of",
363
- "the duration",
364
- ],
365
- C_INT_SIZES,
366
- )
367
-
368
- candidates += simple_candidates("int", match_string, [], C_INT_SIZES)
369
-
370
- # Assume numbers are signed int
371
- candidates += simple_candidates(
372
- "int",
373
- match_string,
374
- [
375
- "index",
376
- "**signed** value",
377
- "seconds",
378
- "nanoseconds",
379
- "millisecond",
380
- "offset",
381
- ],
382
- C_INT_LIKE,
383
- )
384
-
385
- # better match than bytes
386
- candidates += simple_candidates("bytearray", match_string, ["bytearray"], C_BYTEARRAY)
387
-
388
- # OK, better than just string
389
- candidates += simple_candidates("bytes", match_string, ["bytes", "byte string"], C_BYTES)
390
-
391
- candidates += simple_candidates(
392
- "bool", match_string, ["boolean", "bool", "True", "False"], C_BOOL
393
- )
394
- candidates += simple_candidates(
395
- "float",
396
- match_string,
397
- [
398
- "float",
399
- "logarithm",
400
- "sine",
401
- "cosine",
402
- "tangent",
403
- "exponential",
404
- "complex number",
405
- "phase",
406
- "ratio of",
407
- ],
408
- C_FLOAT,
409
- )
410
-
411
- candidates += simple_candidates(
412
- "str", match_string, ["string", "(sub)string", "sub-string", "substring"], C_STR
413
- )
414
-
415
- candidates += simple_candidates("str", match_string, ["name", "names"], C_STR_NAMES)
416
- ## "? contains 'None if there is no' --> Union[Null, xxx]"
417
- candidates += simple_candidates(
418
- "None",
419
- match_string,
420
- ["``None``", "None"],
421
- C_NONE_RETURN,
422
- exclude=["previous value", "if there is no"],
423
- )
424
-
425
- candidates += object_candidates(match_string, C_OBJECTS)
426
-
427
- return candidates
428
-
429
-
430
- def return_type_from_context(
431
- *, docstring: Union[str, List[str]], signature: str, module: str, literal: bool = False
432
- ):
433
- try:
434
- return str(
435
- _type_from_context(
436
- module=module, signature=signature, docstring=docstring, literal=literal
437
- )["type"]
438
- )
439
- except Exception:
440
- return "Incomplete"
441
-
442
-
443
- def _type_from_context(
444
- *, docstring: Union[str, List[str]], signature: str, module: str, literal: bool = False
445
- ): # -> Dict[str , Union[str,float]]:
446
- """Determine the return type of a function or method based on:
447
- - the function signature
448
- - the terminology used in the docstring
449
-
450
- Logic:
451
- - if the signature contains a return type --> <something> then that is returned
452
- - use re to find phrases such as:
453
-
454
- - 'Returns ..... '
455
- - 'Gets ..... '
456
-
457
- - docstring is joined without newlines to simplify parsing
458
- - then parses the docstring to find references to known types and give then a rating though a hand coded model ()
459
- - builds a list return type candidates
460
- - selects the highest ranking candidate
461
- - the default Type is 'Any'
462
- """
463
-
464
- if isinstance(docstring, list):
465
- # join with space to avoid ending at a newline
466
- docstring = " ".join(docstring)
467
-
468
- # give the regex that searches for returns a 0.2 boost as that is bound to be more relevant
469
-
470
- weighted_regex = (
471
- [
472
- (RE_LIT_AS_A, 1.0),
473
- (RE_LIT_SENTENCE, 2.0),
474
- ]
475
- if literal
476
- else [
477
- (RE_RETURN_VALUE, WEIGHT_RETURN_VAL),
478
- (RE_RETURN, WEIGHT_RETURNS),
479
- (RE_GETS, WEIGHT_GETS),
480
- # (reads_regex, 1.0),
481
- ]
482
- )
483
- # only the function name without the leading module
484
- function_re = re.compile(r"[\w|.]+(?=\()")
485
-
486
- # matches: List[re.Match] = []
487
- candidates: List[Dict] = [{"match": "default", "type": "Incomplete", "confidence": 0}]
488
-
489
- # if the signature contains a return type , then use that and do nothing else.
490
- if "->" in signature:
491
- sig_type = signature.split("->")[-1].strip(": ")
492
- return {"type": sig_type, "confidence": WEIGHT_LOOPUPS, "match": signature}
493
-
494
- # ------------------------------------------------------
495
- # lookup returns that cannot be found based on the docstring from the lookup list
496
- try:
497
- function_name = function_re.findall(signature)[0]
498
- except IndexError:
499
- function_name = signature.strip().strip(":()")
500
-
501
- function_name = ".".join((module, function_name))
502
-
503
- if function_name in LOOKUP_LIST.keys():
504
- sig_type = LOOKUP_LIST[function_name][0]
505
- return {
506
- "type": sig_type,
507
- "confidence": C_LOOKUP * WEIGHT_LOOPUPS,
508
- "match": function_name,
509
- }
510
- # ------------------------------------------------------
511
- # parse the docstring for simple start verbs,
512
- # and add them as a candidate
513
- candidates += has_none_verb(docstring)
514
-
515
- # ------------------------------------------------------
516
- # parse the docstring for the regexes and weigh the results accordingly
517
- for weighted in weighted_regex:
518
- match_iter = re.finditer(weighted[0], docstring, re.MULTILINE | re.IGNORECASE)
519
- for match in match_iter:
520
- # matches.append(match)
521
- distilled = distill_return(match.group("return"))
522
- for item in distilled:
523
- candidate = {
524
- "match": match,
525
- "type": item["type"],
526
- "confidence": item["confidence"] * weighted[1], # add search boost
527
- }
528
- candidates.append(candidate)
529
- # Sort
530
- candidates = sorted(candidates, key=lambda x: x["confidence"], reverse=True)
531
- best = candidates[0] # best candidate
532
-
533
- # ref: https://docs.python.org/3/library/typing.html#typing.Coroutine
534
- # Coroutine[YieldType, SendType, ReturnType]
535
- # todo: sanity check against actual code .....
536
- if "This is a coroutine" in docstring and "Coroutine" not in str(best["type"]): # type: ignore
537
- best["type"] = f"Coroutine[{best['type']}, Any, Any]"
538
-
539
- # return the best candidate, or Any
540
- return best # best candidate
1
+ """
2
+ Work in Progress
3
+ ----------------
4
+
5
+ Tries to determine the return type by parsing the docstring and the function signature
6
+ - if the signature contains a return type --> <something> then that is returned
7
+ - check a lookup dictionary of type overrides,
8
+ if the functionnae is listed, then use the override
9
+ - use re to find phrases such as:
10
+ - 'Returns ..... '
11
+ - 'Gets ..... '
12
+ - docstring is joined without newlines to simplify parsing
13
+ - then parses the docstring to find references to known types and give then a rating though a hand coded model ()
14
+ - builds a list return type candidates
15
+ - selects the highest ranking candidate
16
+ - the default Type is 'Any'
17
+
18
+
19
+ to do:
20
+
21
+ - regex :
22
+ - 'With no arguments the frequency in Hz is returned.'
23
+ - 'Get or set' --> indicates overloaded/optional return Union[None|...]
24
+ - add regex for 'Query' ` Otherwise, query current state if no argument is provided. `
25
+
26
+ - regex :
27
+ - 'With no arguments the frequency in Hz is returned.'
28
+ - 'Get or set' --> indicates overloaded/optional return Union[None|...]
29
+ - add regex for 'Query' ` Otherwise, query current state if no argument is provided. `
30
+
31
+ - try if an Azure Machine Learning works as well
32
+ https://docs.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources
33
+ -
34
+ """
35
+ # ref: https://regex101.com/codegen?language=python
36
+ # https://regex101.com/r/Ni8g2z/2
37
+
38
+ import re
39
+ from typing import Dict, List, Optional, Union
40
+
41
+ from loguru import logger as log
42
+
43
+ from .lookup import LOOKUP_LIST, NONE_VERBS, TYPING_IMPORT
44
+
45
+ # These are shown to import
46
+ __all__ = [
47
+ "simple_candidates",
48
+ "compound_candidates",
49
+ "object_candidates",
50
+ "distill_return",
51
+ "return_type_from_context",
52
+ "_type_from_context", # For testing only
53
+ "TYPING_IMPORT",
54
+ ]
55
+
56
+
57
+ # logging
58
+ # # log = logging.getLogger(__name__)
59
+
60
+
61
+ # --------------------------------------
62
+ # Confidence levels
63
+ # these heuristics are based a significant amout of manual testing,
64
+ # and not based on any statistical analysis
65
+
66
+ C_DEFAULT = 0 # Any , the default for all
67
+ C_NONE = 0.1 + C_DEFAULT # better than the default Any
68
+ C_BASE = 0.1 + C_NONE # the Base if a return type has been found
69
+
70
+ C_STR_NAMES = 0.3
71
+
72
+ C_GENERIC = 0.6
73
+ C_DICT = C_GENERIC
74
+ C_TUPLE = C_GENERIC
75
+ C_LIST = C_GENERIC
76
+ C_BOOL = C_GENERIC
77
+ C_FLOAT = C_GENERIC
78
+ C_STR = C_GENERIC
79
+
80
+ # tehere is a bit of logic - but mostly empirical
81
+ C_NONE_RETURN = C_GENERIC
82
+ C_OBJECTS = 0.01 + C_GENERIC
83
+
84
+ C_BYTES = 0.01 + C_GENERIC
85
+ C_BYTEARRAY = 0.03 + C_GENERIC
86
+ C_INT = 0.03 + C_GENERIC
87
+ C_UINT = 0.04 + C_GENERIC
88
+ C_ITERATOR = 0.4 + C_GENERIC
89
+ C_GENERATOR = 0.4 + C_GENERIC
90
+
91
+ C_INT_SIZES = 0.5 + C_GENERIC # better match than bytes and bytearray or object
92
+ C_INT_LIKE = 0.5 + C_GENERIC
93
+
94
+ C_LOOKUP = C_GENERIC + 1
95
+
96
+ # --------------------------------------
97
+ # Weights of the different Lookups
98
+ WEIGHT_LOOPUPS = 3.0 # Lookup list weight factor
99
+ WEIGHT_RETURN_VAL = 3.0 # Lookup list weight factor
100
+ WEIGHT_RETURNS = 1.8 # for Docstring returns
101
+ WEIGHT_GETS = 1.5 # For docstring Gets
102
+
103
+ # --------------------------------------
104
+
105
+ # base has a confidence that is quite low, but better than rubbish
106
+ BASE = {"type": "Incomplete", "confidence": C_BASE, "match": None}
107
+
108
+ # --------------------------------------
109
+ # Regexes
110
+ # --------------------------------------
111
+
112
+ # all regex matches stop at end of sentence:: . ! ? : ;
113
+ # Look for "Return Value: xxxx"
114
+ RE_RETURN_VALUE = r"Return value\s?:\s?(?P<return>[^.!?:;]*)"
115
+ # Look for Returns , but no 'Information'
116
+ RE_RETURN = r"Return(?:s?,?|(?:ing)?)\s(?!information)(?P<return>[^.!?:;]*)"
117
+ # Look for gets
118
+ RE_GETS = r"Gets?\s(?P<return>[^.!?:;]*)"
119
+
120
+ # --------------------------------------
121
+ # Regex for Literals
122
+ # --------------------------------------
123
+ RE_LIT_AS_A = r"as a\s?(?P<return>[^.!?:;]*)"
124
+ RE_LIT_SENTENCE = r"\s?(?P<return>[^.!?:;]*)"
125
+
126
+
127
+ def dist_rate(i: int) -> float:
128
+ """"""
129
+ max_len = 150 # must occur in the first 150 chars
130
+ return max((max_len - i), 1) / max_len
131
+
132
+
133
+ WORD_TERMINATORS = ".,!;:?"
134
+
135
+
136
+ def simple_candidates(
137
+ type: str,
138
+ match_string: str,
139
+ keywords: List[str],
140
+ rate: float = 0.5,
141
+ exclude: Optional[List[str]] = None,
142
+ ):
143
+ """
144
+ find and rate possible types and confidence weighting for simple types.
145
+ Case sensitive
146
+ """
147
+ if exclude is None:
148
+ exclude = []
149
+ candidates = []
150
+ if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
151
+ # quick bailout , there are no matches, or there is an exclude
152
+ return []
153
+
154
+ # word matching
155
+ match_words = [w.strip(WORD_TERMINATORS) for w in match_string.split()]
156
+ # kw = single word -
157
+ for kw in keywords:
158
+ i = match_string.find(kw)
159
+ if " " not in kw and kw not in match_words or " " in kw and i < 0:
160
+ continue
161
+ # Assume unsigned are int
162
+ result = BASE.copy()
163
+ result["type"] = type
164
+ result["confidence"] = rate * dist_rate(i) # OK
165
+ log.trace(f" - found '{kw}' at position {i} with rating {dist_rate(i)}")
166
+ candidates.append(result)
167
+ return candidates
168
+
169
+
170
+ def compound_candidates(
171
+ type: str,
172
+ match_string: str,
173
+ keywords: List[str],
174
+ rate: float = 0.85,
175
+ exclude: Optional[List[str]] = None,
176
+ ):
177
+ """
178
+ find and rate possible types and confidence weighting for compound types that can have a subscription.
179
+ Case sensitive
180
+ """
181
+ if exclude is None:
182
+ exclude = []
183
+ candidates = []
184
+ if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
185
+ # quick bailout , there are no matches, or there is an exclude
186
+ return []
187
+
188
+ # word matching
189
+ match_words = [w.strip(WORD_TERMINATORS) for w in match_string.split()]
190
+ # kw = single word -
191
+ for kw in keywords:
192
+ i = match_string.find(kw)
193
+ if " " not in kw and kw not in match_words or " " in kw and i < 0:
194
+ continue
195
+ # List / Dict / Generator of Any / Tuple /
196
+ sub = None
197
+ result = BASE.copy()
198
+ confidence = rate
199
+ for element in ("tuple", "string", "unsigned", "int"):
200
+ if element in match_string.casefold():
201
+ j = match_string.find(element)
202
+ if i == j:
203
+ # do not match on the same main and sub
204
+ continue
205
+ confidence += 0.10 # boost as we have a subtype
206
+ if element == "string":
207
+ sub = "str"
208
+ break
209
+ elif element == "tuple":
210
+ sub = "Tuple"
211
+ break
212
+ elif element == "unsigned":
213
+ sub = "int"
214
+ break
215
+ else:
216
+ sub = element
217
+ result["type"] = f"{type}[{sub}]" if sub else f"{type}"
218
+ confidence = confidence * dist_rate(i) # distance weighting
219
+ result["confidence"] = confidence
220
+ log.trace(
221
+ f" - found '{kw}' at position {i} with confidence {confidence} rating {dist_rate(i)}"
222
+ )
223
+
224
+ candidates.append(result)
225
+ return candidates
226
+
227
+
228
+ def object_candidates(match_string: str, rate: float = 0.81, exclude: Optional[List[str]] = None):
229
+ """
230
+ find and rate possible types and confidence weighting for Object types.
231
+ Case sensitive
232
+ Exclude defaults to ["IRQ"]
233
+ """
234
+ # defaults
235
+ if exclude is None:
236
+ exclude = ["IRQ"]
237
+ candidates = []
238
+ keywords = [
239
+ "Object",
240
+ "object",
241
+ ] # Q&D
242
+
243
+ if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
244
+ # quick bailout , there are no matches, or there is an exclude
245
+ return []
246
+ for kw in keywords:
247
+ i = match_string.find(kw)
248
+ if i < 0:
249
+ continue
250
+ # List / Dict / Generator of Any / Tuple /
251
+ confidence = rate
252
+
253
+ # did the word actually occur, or is it just a partial
254
+ words = match_string.split(" ") # Return <multiple words object>
255
+ if kw in words:
256
+ pos = words.index(kw)
257
+ obj = "Incomplete" if pos == 0 else words[pos - 1]
258
+ if obj in ("stream-like", "file"):
259
+ obj = "IO" # needs from typing import IO
260
+ elif obj == "callback":
261
+ obj = "Callable[..., Incomplete]" # requires additional 'from typing import Callable'
262
+ else:
263
+ # clean
264
+ obj = re.sub(r"[^a-z.A-Z0-9]", "", obj)
265
+ result = BASE.copy()
266
+ result["type"] = obj
267
+ if obj in ["an", "any"]: # "Return an / any object"
268
+ result["type"] = "Incomplete"
269
+ confidence += 0.10 # abstract , but very good
270
+ elif obj[0].islower():
271
+ confidence -= 0.20 # not so good
272
+ result["confidence"] = confidence * dist_rate(i)
273
+ candidates.append(result)
274
+ return candidates
275
+
276
+
277
+ def has_none_verb(docstr: str) -> List:
278
+ "returns a None result if the docstring starts with a verb that indicates None"
279
+ docstr = docstr.strip().casefold()
280
+ if not any(docstr.startswith(kw.casefold()) for kw in NONE_VERBS):
281
+ return []
282
+ result = BASE.copy()
283
+ result["type"] = "None"
284
+ result["confidence"] = C_NONE # better than the default Any
285
+ return [result]
286
+
287
+
288
+ def distill_return(return_text: str) -> List[Dict]:
289
+ """Find return type and confidence.
290
+ Returns a list of possible types and confidence weighting.
291
+ {
292
+
293
+ type :str # the return type
294
+ confidence: float # the confidence between 0.0 and 1
295
+ match: Optional[str] # for debugging : the reason the match was made
296
+
297
+ }
298
+
299
+ """
300
+ candidates = [BASE] # Default to the base , which is 'Any'
301
+
302
+ # clean up match_string
303
+ match_string = return_text.strip().rstrip(".")
304
+ match_string = match_string.replace("`", "")
305
+
306
+ candidates += compound_candidates("Generator", match_string, ["generator"], C_GENERATOR)
307
+ candidates += compound_candidates("Iterator", match_string, ["iterator"], C_ITERATOR)
308
+ candidates += compound_candidates(
309
+ "List", match_string, ["a list of", "list of", "an array"], C_LIST
310
+ )
311
+
312
+ candidates += simple_candidates(
313
+ "Dict", match_string, ["a dictionary", "dict", "Dictionary"], C_DICT
314
+ )
315
+ candidates += simple_candidates(
316
+ "Tuple",
317
+ match_string,
318
+ [
319
+ "tuple",
320
+ "a pair",
321
+ "1-tuple",
322
+ "2-tuple",
323
+ "3-tuple",
324
+ "4-tuple",
325
+ "5-tuple",
326
+ "6-tuple",
327
+ "7-tuple",
328
+ "8-tuple",
329
+ "9-tuple",
330
+ ],
331
+ C_TUPLE,
332
+ )
333
+
334
+ candidates += simple_candidates(
335
+ "int", match_string, ["unsigned integer", "unsigned int", "unsigned"], C_UINT
336
+ )
337
+
338
+ candidates += simple_candidates(
339
+ "int",
340
+ match_string,
341
+ [
342
+ "number",
343
+ "integer",
344
+ "count",
345
+ "int",
346
+ "0 or 1",
347
+ ],
348
+ C_INT,
349
+ )
350
+
351
+ # good but nor perfect indicators of integers
352
+ # better match than bytes and bytearray or object
353
+ candidates += simple_candidates(
354
+ "int",
355
+ match_string,
356
+ [
357
+ "length",
358
+ "total size",
359
+ "size of",
360
+ "the index",
361
+ "number of",
362
+ "address of",
363
+ "the duration",
364
+ ],
365
+ C_INT_SIZES,
366
+ )
367
+
368
+ candidates += simple_candidates("int", match_string, [], C_INT_SIZES)
369
+
370
+ # Assume numbers are signed int
371
+ candidates += simple_candidates(
372
+ "int",
373
+ match_string,
374
+ [
375
+ "index",
376
+ "**signed** value",
377
+ "seconds",
378
+ "nanoseconds",
379
+ "millisecond",
380
+ "offset",
381
+ ],
382
+ C_INT_LIKE,
383
+ )
384
+
385
+ # better match than bytes
386
+ candidates += simple_candidates("bytearray", match_string, ["bytearray"], C_BYTEARRAY)
387
+
388
+ # OK, better than just string
389
+ candidates += simple_candidates("bytes", match_string, ["bytes", "byte string"], C_BYTES)
390
+
391
+ candidates += simple_candidates(
392
+ "bool", match_string, ["boolean", "bool", "True", "False"], C_BOOL
393
+ )
394
+ candidates += simple_candidates(
395
+ "float",
396
+ match_string,
397
+ [
398
+ "float",
399
+ "logarithm",
400
+ "sine",
401
+ "cosine",
402
+ "tangent",
403
+ "exponential",
404
+ "complex number",
405
+ "phase",
406
+ "ratio of",
407
+ ],
408
+ C_FLOAT,
409
+ )
410
+
411
+ candidates += simple_candidates(
412
+ "str", match_string, ["string", "(sub)string", "sub-string", "substring"], C_STR
413
+ )
414
+
415
+ candidates += simple_candidates("str", match_string, ["name", "names"], C_STR_NAMES)
416
+ ## "? contains 'None if there is no' --> Union[Null, xxx]"
417
+ candidates += simple_candidates(
418
+ "None",
419
+ match_string,
420
+ ["``None``", "None"],
421
+ C_NONE_RETURN,
422
+ exclude=["previous value", "if there is no"],
423
+ )
424
+
425
+ candidates += object_candidates(match_string, C_OBJECTS)
426
+
427
+ return candidates
428
+
429
+
430
+ def return_type_from_context(
431
+ *, docstring: Union[str, List[str]], signature: str, module: str, literal: bool = False
432
+ ):
433
+ try:
434
+ return str(
435
+ _type_from_context(
436
+ module=module, signature=signature, docstring=docstring, literal=literal
437
+ )["type"]
438
+ )
439
+ except Exception:
440
+ return "Incomplete"
441
+
442
+
443
+ def _type_from_context(
444
+ *, docstring: Union[str, List[str]], signature: str, module: str, literal: bool = False
445
+ ): # -> Dict[str , Union[str,float]]:
446
+ """Determine the return type of a function or method based on:
447
+ - the function signature
448
+ - the terminology used in the docstring
449
+
450
+ Logic:
451
+ - if the signature contains a return type --> <something> then that is returned
452
+ - use re to find phrases such as:
453
+
454
+ - 'Returns ..... '
455
+ - 'Gets ..... '
456
+
457
+ - docstring is joined without newlines to simplify parsing
458
+ - then parses the docstring to find references to known types and give then a rating though a hand coded model ()
459
+ - builds a list return type candidates
460
+ - selects the highest ranking candidate
461
+ - the default Type is 'Any'
462
+ """
463
+
464
+ if isinstance(docstring, list):
465
+ # join with space to avoid ending at a newline
466
+ docstring = " ".join(docstring)
467
+
468
+ # give the regex that searches for returns a 0.2 boost as that is bound to be more relevant
469
+
470
+ weighted_regex = (
471
+ [
472
+ (RE_LIT_AS_A, 1.0),
473
+ (RE_LIT_SENTENCE, 2.0),
474
+ ]
475
+ if literal
476
+ else [
477
+ (RE_RETURN_VALUE, WEIGHT_RETURN_VAL),
478
+ (RE_RETURN, WEIGHT_RETURNS),
479
+ (RE_GETS, WEIGHT_GETS),
480
+ # (reads_regex, 1.0),
481
+ ]
482
+ )
483
+ # only the function name without the leading module
484
+ function_re = re.compile(r"[\w|.]+(?=\()")
485
+
486
+ # matches: List[re.Match] = []
487
+ candidates: List[Dict] = [{"match": "default", "type": "Incomplete", "confidence": 0}]
488
+
489
+ # if the signature contains a return type , then use that and do nothing else.
490
+ if "->" in signature:
491
+ sig_type = signature.split("->")[-1].strip(": ")
492
+ return {"type": sig_type, "confidence": WEIGHT_LOOPUPS, "match": signature}
493
+
494
+ # ------------------------------------------------------
495
+ # lookup returns that cannot be found based on the docstring from the lookup list
496
+ try:
497
+ function_name = function_re.findall(signature)[0]
498
+ except IndexError:
499
+ function_name = signature.strip().strip(":()")
500
+
501
+ function_name = ".".join((module, function_name))
502
+
503
+ if function_name in LOOKUP_LIST.keys():
504
+ sig_type = LOOKUP_LIST[function_name][0]
505
+ return {
506
+ "type": sig_type,
507
+ "confidence": C_LOOKUP * WEIGHT_LOOPUPS,
508
+ "match": function_name,
509
+ }
510
+ # ------------------------------------------------------
511
+ # parse the docstring for simple start verbs,
512
+ # and add them as a candidate
513
+ candidates += has_none_verb(docstring)
514
+
515
+ # ------------------------------------------------------
516
+ # parse the docstring for the regexes and weigh the results accordingly
517
+ for weighted in weighted_regex:
518
+ match_iter = re.finditer(weighted[0], docstring, re.MULTILINE | re.IGNORECASE)
519
+ for match in match_iter:
520
+ # matches.append(match)
521
+ distilled = distill_return(match.group("return"))
522
+ for item in distilled:
523
+ candidate = {
524
+ "match": match,
525
+ "type": item["type"],
526
+ "confidence": item["confidence"] * weighted[1], # add search boost
527
+ }
528
+ candidates.append(candidate)
529
+ # Sort
530
+ candidates = sorted(candidates, key=lambda x: x["confidence"], reverse=True)
531
+ best = candidates[0] # best candidate
532
+
533
+ # ref: https://docs.python.org/3/library/typing.html#typing.Coroutine
534
+ # Coroutine[YieldType, SendType, ReturnType]
535
+ # todo: sanity check against actual code .....
536
+ if "This is a coroutine" in docstring and "Coroutine" not in str(best["type"]): # type: ignore
537
+ best["type"] = f"Coroutine[{best['type']}, Any, Any]"
538
+
539
+ # return the best candidate, or Any
540
+ return best # best candidate