micropython-stubber 1.23.1__py3-none-any.whl → 1.23.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. {micropython_stubber-1.23.1.dist-info → micropython_stubber-1.23.2.dist-info}/LICENSE +30 -30
  2. {micropython_stubber-1.23.1.dist-info → micropython_stubber-1.23.2.dist-info}/METADATA +32 -15
  3. micropython_stubber-1.23.2.dist-info/RECORD +158 -0
  4. micropython_stubber-1.23.2.dist-info/entry_points.txt +5 -0
  5. mpflash/README.md +220 -194
  6. mpflash/libusb_flash.ipynb +203 -203
  7. mpflash/mpflash/add_firmware.py +98 -98
  8. mpflash/mpflash/ask_input.py +236 -236
  9. mpflash/mpflash/basicgit.py +284 -284
  10. mpflash/mpflash/bootloader/__init__.py +2 -2
  11. mpflash/mpflash/bootloader/activate.py +60 -60
  12. mpflash/mpflash/bootloader/detect.py +82 -82
  13. mpflash/mpflash/bootloader/manual.py +101 -101
  14. mpflash/mpflash/bootloader/micropython.py +12 -12
  15. mpflash/mpflash/bootloader/touch1200.py +36 -36
  16. mpflash/mpflash/cli_download.py +129 -129
  17. mpflash/mpflash/cli_flash.py +224 -219
  18. mpflash/mpflash/cli_group.py +111 -111
  19. mpflash/mpflash/cli_list.py +87 -81
  20. mpflash/mpflash/cli_main.py +39 -39
  21. mpflash/mpflash/common.py +210 -165
  22. mpflash/mpflash/config.py +44 -44
  23. mpflash/mpflash/connected.py +96 -78
  24. mpflash/mpflash/download.py +364 -364
  25. mpflash/mpflash/downloaded.py +130 -130
  26. mpflash/mpflash/errors.py +9 -9
  27. mpflash/mpflash/flash/__init__.py +55 -55
  28. mpflash/mpflash/flash/esp.py +59 -59
  29. mpflash/mpflash/flash/stm32.py +19 -19
  30. mpflash/mpflash/flash/stm32_dfu.py +104 -104
  31. mpflash/mpflash/flash/uf2/__init__.py +88 -88
  32. mpflash/mpflash/flash/uf2/boardid.py +15 -15
  33. mpflash/mpflash/flash/uf2/linux.py +136 -130
  34. mpflash/mpflash/flash/uf2/macos.py +42 -42
  35. mpflash/mpflash/flash/uf2/uf2disk.py +12 -12
  36. mpflash/mpflash/flash/uf2/windows.py +43 -43
  37. mpflash/mpflash/flash/worklist.py +170 -170
  38. mpflash/mpflash/list.py +106 -99
  39. mpflash/mpflash/logger.py +41 -41
  40. mpflash/mpflash/mpboard_id/__init__.py +93 -93
  41. mpflash/mpflash/mpboard_id/add_boards.py +251 -251
  42. mpflash/mpflash/mpboard_id/board.py +37 -37
  43. mpflash/mpflash/mpboard_id/board_id.py +86 -86
  44. mpflash/mpflash/mpboard_id/store.py +43 -43
  45. mpflash/mpflash/mpremoteboard/__init__.py +266 -222
  46. mpflash/mpflash/mpremoteboard/mpy_fw_info.py +141 -141
  47. mpflash/mpflash/mpremoteboard/runner.py +140 -140
  48. mpflash/mpflash/vendor/click_aliases.py +91 -91
  49. mpflash/mpflash/vendor/dfu.py +165 -165
  50. mpflash/mpflash/vendor/pydfu.py +605 -605
  51. mpflash/mpflash/vendor/readme.md +2 -2
  52. mpflash/mpflash/versions.py +135 -135
  53. mpflash/poetry.lock +1599 -1599
  54. mpflash/pyproject.toml +65 -65
  55. mpflash/stm32_udev_rules.md +62 -62
  56. stubber/__init__.py +3 -3
  57. stubber/board/board_info.csv +193 -193
  58. stubber/board/boot.py +34 -34
  59. stubber/board/createstubs.py +1004 -986
  60. stubber/board/createstubs_db.py +826 -825
  61. stubber/board/createstubs_db_min.py +332 -331
  62. stubber/board/createstubs_db_mpy.mpy +0 -0
  63. stubber/board/createstubs_lvgl.py +741 -741
  64. stubber/board/createstubs_lvgl_min.py +741 -741
  65. stubber/board/createstubs_mem.py +767 -766
  66. stubber/board/createstubs_mem_min.py +307 -306
  67. stubber/board/createstubs_mem_mpy.mpy +0 -0
  68. stubber/board/createstubs_min.py +295 -294
  69. stubber/board/createstubs_mpy.mpy +0 -0
  70. stubber/board/fw_info.py +141 -141
  71. stubber/board/info.py +183 -183
  72. stubber/board/main.py +19 -19
  73. stubber/board/modulelist.txt +247 -247
  74. stubber/board/pyrightconfig.json +34 -34
  75. stubber/bulk/mcu_stubber.py +437 -454
  76. stubber/codemod/_partials/__init__.py +48 -48
  77. stubber/codemod/_partials/db_main.py +147 -147
  78. stubber/codemod/_partials/lvgl_main.py +77 -77
  79. stubber/codemod/_partials/modules_reader.py +80 -80
  80. stubber/codemod/add_comment.py +53 -53
  81. stubber/codemod/add_method.py +65 -65
  82. stubber/codemod/board.py +317 -317
  83. stubber/codemod/enrich.py +151 -145
  84. stubber/codemod/merge_docstub.py +284 -284
  85. stubber/codemod/modify_list.py +54 -54
  86. stubber/codemod/utils.py +56 -56
  87. stubber/commands/build_cmd.py +94 -94
  88. stubber/commands/cli.py +49 -55
  89. stubber/commands/clone_cmd.py +78 -78
  90. stubber/commands/config_cmd.py +29 -29
  91. stubber/commands/enrich_folder_cmd.py +71 -71
  92. stubber/commands/get_core_cmd.py +71 -71
  93. stubber/commands/get_docstubs_cmd.py +92 -89
  94. stubber/commands/get_frozen_cmd.py +117 -114
  95. stubber/commands/get_mcu_cmd.py +102 -61
  96. stubber/commands/merge_cmd.py +66 -66
  97. stubber/commands/publish_cmd.py +118 -118
  98. stubber/commands/stub_cmd.py +31 -31
  99. stubber/commands/switch_cmd.py +62 -62
  100. stubber/commands/variants_cmd.py +48 -48
  101. stubber/cst_transformer.py +178 -178
  102. stubber/data/board_info.csv +193 -193
  103. stubber/data/board_info.json +1729 -1729
  104. stubber/data/micropython_tags.csv +15 -15
  105. stubber/data/requirements-core-micropython.txt +38 -38
  106. stubber/data/requirements-core-pycopy.txt +39 -39
  107. stubber/downloader.py +37 -36
  108. stubber/freeze/common.py +72 -68
  109. stubber/freeze/freeze_folder.py +69 -69
  110. stubber/freeze/freeze_manifest_2.py +126 -113
  111. stubber/freeze/get_frozen.py +131 -127
  112. stubber/get_cpython.py +112 -101
  113. stubber/get_lobo.py +59 -59
  114. stubber/minify.py +423 -419
  115. stubber/publish/bump.py +86 -86
  116. stubber/publish/candidates.py +275 -256
  117. stubber/publish/database.py +18 -18
  118. stubber/publish/defaults.py +40 -40
  119. stubber/publish/enums.py +24 -24
  120. stubber/publish/helpers.py +29 -29
  121. stubber/publish/merge_docstubs.py +136 -130
  122. stubber/publish/missing_class_methods.py +51 -49
  123. stubber/publish/package.py +150 -146
  124. stubber/publish/pathnames.py +51 -51
  125. stubber/publish/publish.py +120 -120
  126. stubber/publish/pypi.py +42 -38
  127. stubber/publish/stubpackage.py +1055 -1027
  128. stubber/rst/__init__.py +9 -9
  129. stubber/rst/classsort.py +78 -77
  130. stubber/rst/lookup.py +533 -530
  131. stubber/rst/output_dict.py +401 -401
  132. stubber/rst/reader.py +814 -814
  133. stubber/rst/report_return.py +77 -69
  134. stubber/rst/rst_utils.py +541 -540
  135. stubber/stubber.py +38 -38
  136. stubber/stubs_from_docs.py +90 -90
  137. stubber/tools/manifestfile.py +654 -654
  138. stubber/tools/readme.md +6 -6
  139. stubber/update_fallback.py +117 -117
  140. stubber/update_module_list.py +123 -123
  141. stubber/utils/__init__.py +6 -6
  142. stubber/utils/config.py +137 -125
  143. stubber/utils/makeversionhdr.py +54 -54
  144. stubber/utils/manifest.py +90 -90
  145. stubber/utils/post.py +80 -79
  146. stubber/utils/repos.py +156 -150
  147. stubber/utils/stubmaker.py +139 -139
  148. stubber/utils/typed_config_toml.py +80 -77
  149. stubber/variants.py +106 -106
  150. micropython_stubber-1.23.1.dist-info/RECORD +0 -159
  151. micropython_stubber-1.23.1.dist-info/entry_points.txt +0 -3
  152. mpflash/basicgit.py +0 -288
  153. {micropython_stubber-1.23.1.dist-info → micropython_stubber-1.23.2.dist-info}/WHEEL +0 -0
stubber/rst/rst_utils.py CHANGED
@@ -1,540 +1,541 @@
1
- """
2
- Work in Progress
3
- ----------------
4
-
5
- Tries to determine the return type by parsing the docstring and the function signature
6
- - if the signature contains a return type --> <something> then that is returned
7
- - check a lookup dictionary of type overrides,
8
- if the functionnae is listed, then use the override
9
- - use re to find phrases such as:
10
- - 'Returns ..... '
11
- - 'Gets ..... '
12
- - docstring is joined without newlines to simplify parsing
13
- - then parses the docstring to find references to known types and give then a rating though a hand coded model ()
14
- - builds a list return type candidates
15
- - selects the highest ranking candidate
16
- - the default Type is 'Any'
17
-
18
-
19
- to do:
20
-
21
- - regex :
22
- - 'With no arguments the frequency in Hz is returned.'
23
- - 'Get or set' --> indicates overloaded/optional return Union[None|...]
24
- - add regex for 'Query' ` Otherwise, query current state if no argument is provided. `
25
-
26
- - regex :
27
- - 'With no arguments the frequency in Hz is returned.'
28
- - 'Get or set' --> indicates overloaded/optional return Union[None|...]
29
- - add regex for 'Query' ` Otherwise, query current state if no argument is provided. `
30
-
31
- - try if an Azure Machine Learning works as well
32
- https://docs.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources
33
- -
34
- """
35
- # ref: https://regex101.com/codegen?language=python
36
- # https://regex101.com/r/Ni8g2z/2
37
-
38
- import re
39
- from typing import Dict, List, Optional, Union
40
-
41
- from loguru import logger as log
42
-
43
- from .lookup import LOOKUP_LIST, NONE_VERBS, TYPING_IMPORT
44
-
45
- # These are shown to import
46
- __all__ = [
47
- "simple_candidates",
48
- "compound_candidates",
49
- "object_candidates",
50
- "distill_return",
51
- "return_type_from_context",
52
- "_type_from_context", # For testing only
53
- "TYPING_IMPORT",
54
- ]
55
-
56
-
57
- # logging
58
- # # log = logging.getLogger(__name__)
59
-
60
-
61
- # --------------------------------------
62
- # Confidence levels
63
- # these heuristics are based a significant amout of manual testing,
64
- # and not based on any statistical analysis
65
-
66
- C_DEFAULT = 0 # Any , the default for all
67
- C_NONE = 0.1 + C_DEFAULT # better than the default Any
68
- C_BASE = 0.1 + C_NONE # the Base if a return type has been found
69
-
70
- C_STR_NAMES = 0.3
71
-
72
- C_GENERIC = 0.6
73
- C_DICT = C_GENERIC
74
- C_TUPLE = C_GENERIC
75
- C_LIST = C_GENERIC
76
- C_BOOL = C_GENERIC
77
- C_FLOAT = C_GENERIC
78
- C_STR = C_GENERIC
79
-
80
- # tehere is a bit of logic - but mostly empirical
81
- C_NONE_RETURN = C_GENERIC
82
- C_OBJECTS = 0.01 + C_GENERIC
83
-
84
- C_BYTES = 0.01 + C_GENERIC
85
- C_BYTEARRAY = 0.03 + C_GENERIC
86
- C_INT = 0.03 + C_GENERIC
87
- C_UINT = 0.04 + C_GENERIC
88
- C_ITERATOR = 0.4 + C_GENERIC
89
- C_GENERATOR = 0.4 + C_GENERIC
90
-
91
- C_INT_SIZES = 0.5 + C_GENERIC # better match than bytes and bytearray or object
92
- C_INT_LIKE = 0.5 + C_GENERIC
93
-
94
- C_LOOKUP = C_GENERIC + 1
95
-
96
- # --------------------------------------
97
- # Weights of the different Lookups
98
- WEIGHT_LOOPUPS = 3.0 # Lookup list weight factor
99
- WEIGHT_RETURN_VAL = 3.0 # Lookup list weight factor
100
- WEIGHT_RETURNS = 1.8 # for Docstring returns
101
- WEIGHT_GETS = 1.5 # For docstring Gets
102
-
103
- # --------------------------------------
104
-
105
- # base has a confidence that is quite low, but better than rubbish
106
- BASE = {"type": "Incomplete", "confidence": C_BASE, "match": None}
107
-
108
- # --------------------------------------
109
- # Regexes
110
- # --------------------------------------
111
-
112
- # all regex matches stop at end of sentence:: . ! ? : ;
113
- # Look for "Return Value: xxxx"
114
- RE_RETURN_VALUE = r"Return value\s?:\s?(?P<return>[^.!?:;]*)"
115
- # Look for Returns , but no 'Information'
116
- RE_RETURN = r"Return(?:s?,?|(?:ing)?)\s(?!information)(?P<return>[^.!?:;]*)"
117
- # Look for gets
118
- RE_GETS = r"Gets?\s(?P<return>[^.!?:;]*)"
119
-
120
- # --------------------------------------
121
- # Regex for Literals
122
- # --------------------------------------
123
- RE_LIT_AS_A = r"as a\s?(?P<return>[^.!?:;]*)"
124
- RE_LIT_SENTENCE = r"\s?(?P<return>[^.!?:;]*)"
125
-
126
-
127
- def dist_rate(i: int) -> float:
128
- """"""
129
- max_len = 150 # must occur in the first 150 chars
130
- return max((max_len - i), 1) / max_len
131
-
132
-
133
- WORD_TERMINATORS = ".,!;:?"
134
-
135
-
136
- def simple_candidates(
137
- type: str,
138
- match_string: str,
139
- keywords: List[str],
140
- rate: float = 0.5,
141
- exclude: Optional[List[str]] = None,
142
- ):
143
- """
144
- find and rate possible types and confidence weighting for simple types.
145
- Case sensitive
146
- """
147
- if exclude is None:
148
- exclude = []
149
- candidates = []
150
- if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
151
- # quick bailout , there are no matches, or there is an exclude
152
- return []
153
-
154
- # word matching
155
- match_words = [w.strip(WORD_TERMINATORS) for w in match_string.split()]
156
- # kw = single word -
157
- for kw in keywords:
158
- i = match_string.find(kw)
159
- if " " not in kw and kw not in match_words or " " in kw and i < 0:
160
- continue
161
- # Assume unsigned are int
162
- result = BASE.copy()
163
- result["type"] = type
164
- result["confidence"] = rate * dist_rate(i) # OK
165
- log.trace(f" - found '{kw}' at position {i} with rating {dist_rate(i)}")
166
- candidates.append(result)
167
- return candidates
168
-
169
-
170
- def compound_candidates(
171
- type: str,
172
- match_string: str,
173
- keywords: List[str],
174
- rate: float = 0.85,
175
- exclude: Optional[List[str]] = None,
176
- ):
177
- """
178
- find and rate possible types and confidence weighting for compound types that can have a subscription.
179
- Case sensitive
180
- """
181
- if exclude is None:
182
- exclude = []
183
- candidates = []
184
- if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
185
- # quick bailout , there are no matches, or there is an exclude
186
- return []
187
-
188
- # word matching
189
- match_words = [w.strip(WORD_TERMINATORS) for w in match_string.split()]
190
- # kw = single word -
191
- for kw in keywords:
192
- i = match_string.find(kw)
193
- if " " not in kw and kw not in match_words or " " in kw and i < 0:
194
- continue
195
- # List / Dict / Generator of Any / Tuple /
196
- sub = None
197
- result = BASE.copy()
198
- confidence = rate
199
- for element in ("tuple", "string", "unsigned", "int"):
200
- if element in match_string.casefold():
201
- j = match_string.find(element)
202
- if i == j:
203
- # do not match on the same main and sub
204
- continue
205
- confidence += 0.10 # boost as we have a subtype
206
- if element == "string":
207
- sub = "str"
208
- break
209
- elif element == "tuple":
210
- sub = "Tuple"
211
- break
212
- elif element == "unsigned":
213
- sub = "int"
214
- break
215
- else:
216
- sub = element
217
- result["type"] = f"{type}[{sub}]" if sub else f"{type}"
218
- confidence = confidence * dist_rate(i) # distance weighting
219
- result["confidence"] = confidence
220
- log.trace(
221
- f" - found '{kw}' at position {i} with confidence {confidence} rating {dist_rate(i)}"
222
- )
223
-
224
- candidates.append(result)
225
- return candidates
226
-
227
-
228
- def object_candidates(match_string: str, rate: float = 0.81, exclude: Optional[List[str]] = None):
229
- """
230
- find and rate possible types and confidence weighting for Object types.
231
- Case sensitive
232
- Exclude defaults to ["IRQ"]
233
- """
234
- # defaults
235
- if exclude is None:
236
- exclude = ["IRQ"]
237
- candidates = []
238
- keywords = [
239
- "Object",
240
- "object",
241
- ] # Q&D
242
-
243
- if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
244
- # quick bailout , there are no matches, or there is an exclude
245
- return []
246
- for kw in keywords:
247
- i = match_string.find(kw)
248
- if i < 0:
249
- continue
250
- # List / Dict / Generator of Any / Tuple /
251
- confidence = rate
252
-
253
- # did the word actually occur, or is it just a partial
254
- words = match_string.split(" ") # Return <multiple words object>
255
- if kw in words:
256
- pos = words.index(kw)
257
- obj = "Incomplete" if pos == 0 else words[pos - 1]
258
- if obj in ("stream-like", "file"):
259
- obj = "IO" # needs from typing import IO
260
- elif obj == "callback":
261
- obj = "Callable[..., Incomplete]" # requires additional 'from typing import Callable'
262
- else:
263
- # clean
264
- obj = re.sub(r"[^a-z.A-Z0-9]", "", obj)
265
- result = BASE.copy()
266
- result["type"] = obj
267
- if obj in ["an", "any"]: # "Return an / any object"
268
- result["type"] = "Incomplete"
269
- confidence += 0.10 # abstract , but very good
270
- elif obj[0].islower():
271
- confidence -= 0.20 # not so good
272
- result["confidence"] = confidence * dist_rate(i)
273
- candidates.append(result)
274
- return candidates
275
-
276
-
277
- def has_none_verb(docstr: str) -> List:
278
- "returns a None result if the docstring starts with a verb that indicates None"
279
- docstr = docstr.strip().casefold()
280
- if not any(docstr.startswith(kw.casefold()) for kw in NONE_VERBS):
281
- return []
282
- result = BASE.copy()
283
- result["type"] = "None"
284
- result["confidence"] = C_NONE # better than the default Any
285
- return [result]
286
-
287
-
288
- def distill_return(return_text: str) -> List[Dict]:
289
- """Find return type and confidence.
290
- Returns a list of possible types and confidence weighting.
291
- {
292
-
293
- type :str # the return type
294
- confidence: float # the confidence between 0.0 and 1
295
- match: Optional[str] # for debugging : the reason the match was made
296
-
297
- }
298
-
299
- """
300
- candidates = [BASE] # Default to the base , which is 'Any'
301
-
302
- # clean up match_string
303
- match_string = return_text.strip().rstrip(".")
304
- match_string = match_string.replace("`", "")
305
-
306
- candidates += compound_candidates("Generator", match_string, ["generator"], C_GENERATOR)
307
- candidates += compound_candidates("Iterator", match_string, ["iterator"], C_ITERATOR)
308
- candidates += compound_candidates(
309
- "List", match_string, ["a list of", "list of", "an array"], C_LIST
310
- )
311
-
312
- candidates += simple_candidates(
313
- "Dict", match_string, ["a dictionary", "dict", "Dictionary"], C_DICT
314
- )
315
- candidates += simple_candidates(
316
- "Tuple",
317
- match_string,
318
- [
319
- "tuple",
320
- "a pair",
321
- "1-tuple",
322
- "2-tuple",
323
- "3-tuple",
324
- "4-tuple",
325
- "5-tuple",
326
- "6-tuple",
327
- "7-tuple",
328
- "8-tuple",
329
- "9-tuple",
330
- ],
331
- C_TUPLE,
332
- )
333
-
334
- candidates += simple_candidates(
335
- "int", match_string, ["unsigned integer", "unsigned int", "unsigned"], C_UINT
336
- )
337
-
338
- candidates += simple_candidates(
339
- "int",
340
- match_string,
341
- [
342
- "number",
343
- "integer",
344
- "count",
345
- "int",
346
- "0 or 1",
347
- ],
348
- C_INT,
349
- )
350
-
351
- # good but nor perfect indicators of integers
352
- # better match than bytes and bytearray or object
353
- candidates += simple_candidates(
354
- "int",
355
- match_string,
356
- [
357
- "length",
358
- "total size",
359
- "size of",
360
- "the index",
361
- "number of",
362
- "address of",
363
- "the duration",
364
- ],
365
- C_INT_SIZES,
366
- )
367
-
368
- candidates += simple_candidates("int", match_string, [], C_INT_SIZES)
369
-
370
- # Assume numbers are signed int
371
- candidates += simple_candidates(
372
- "int",
373
- match_string,
374
- [
375
- "index",
376
- "**signed** value",
377
- "seconds",
378
- "nanoseconds",
379
- "millisecond",
380
- "offset",
381
- ],
382
- C_INT_LIKE,
383
- )
384
-
385
- # better match than bytes
386
- candidates += simple_candidates("bytearray", match_string, ["bytearray"], C_BYTEARRAY)
387
-
388
- # OK, better than just string
389
- candidates += simple_candidates("bytes", match_string, ["bytes", "byte string"], C_BYTES)
390
-
391
- candidates += simple_candidates(
392
- "bool", match_string, ["boolean", "bool", "True", "False"], C_BOOL
393
- )
394
- candidates += simple_candidates(
395
- "float",
396
- match_string,
397
- [
398
- "float",
399
- "logarithm",
400
- "sine",
401
- "cosine",
402
- "tangent",
403
- "exponential",
404
- "complex number",
405
- "phase",
406
- "ratio of",
407
- ],
408
- C_FLOAT,
409
- )
410
-
411
- candidates += simple_candidates(
412
- "str", match_string, ["string", "(sub)string", "sub-string", "substring"], C_STR
413
- )
414
-
415
- candidates += simple_candidates("str", match_string, ["name", "names"], C_STR_NAMES)
416
- ## "? contains 'None if there is no' --> Union[Null, xxx]"
417
- candidates += simple_candidates(
418
- "None",
419
- match_string,
420
- ["``None``", "None"],
421
- C_NONE_RETURN,
422
- exclude=["previous value", "if there is no"],
423
- )
424
-
425
- candidates += object_candidates(match_string, C_OBJECTS)
426
-
427
- return candidates
428
-
429
-
430
- def return_type_from_context(
431
- *, docstring: Union[str, List[str]], signature: str, module: str, literal: bool = False
432
- ):
433
- try:
434
- return str(
435
- _type_from_context(
436
- module=module, signature=signature, docstring=docstring, literal=literal
437
- )["type"]
438
- )
439
- except Exception:
440
- return "Incomplete"
441
-
442
-
443
- def _type_from_context(
444
- *, docstring: Union[str, List[str]], signature: str, module: str, literal: bool = False
445
- ): # -> Dict[str , Union[str,float]]:
446
- """Determine the return type of a function or method based on:
447
- - the function signature
448
- - the terminology used in the docstring
449
-
450
- Logic:
451
- - if the signature contains a return type --> <something> then that is returned
452
- - use re to find phrases such as:
453
-
454
- - 'Returns ..... '
455
- - 'Gets ..... '
456
-
457
- - docstring is joined without newlines to simplify parsing
458
- - then parses the docstring to find references to known types and give then a rating though a hand coded model ()
459
- - builds a list return type candidates
460
- - selects the highest ranking candidate
461
- - the default Type is 'Any'
462
- """
463
-
464
- if isinstance(docstring, list):
465
- # join with space to avoid ending at a newline
466
- docstring = " ".join(docstring)
467
-
468
- # give the regex that searches for returns a 0.2 boost as that is bound to be more relevant
469
-
470
- weighted_regex = (
471
- [
472
- (RE_LIT_AS_A, 1.0),
473
- (RE_LIT_SENTENCE, 2.0),
474
- ]
475
- if literal
476
- else [
477
- (RE_RETURN_VALUE, WEIGHT_RETURN_VAL),
478
- (RE_RETURN, WEIGHT_RETURNS),
479
- (RE_GETS, WEIGHT_GETS),
480
- # (reads_regex, 1.0),
481
- ]
482
- )
483
- # only the function name without the leading module
484
- function_re = re.compile(r"[\w|.]+(?=\()")
485
-
486
- # matches: List[re.Match] = []
487
- candidates: List[Dict] = [{"match": "default", "type": "Incomplete", "confidence": 0}]
488
-
489
- # if the signature contains a return type , then use that and do nothing else.
490
- if "->" in signature:
491
- sig_type = signature.split("->")[-1].strip(": ")
492
- return {"type": sig_type, "confidence": WEIGHT_LOOPUPS, "match": signature}
493
-
494
- # ------------------------------------------------------
495
- # lookup returns that cannot be found based on the docstring from the lookup list
496
- try:
497
- function_name = function_re.findall(signature)[0]
498
- except IndexError:
499
- function_name = signature.strip().strip(":()")
500
-
501
- function_name = ".".join((module, function_name))
502
-
503
- if function_name in LOOKUP_LIST.keys():
504
- sig_type = LOOKUP_LIST[function_name][0]
505
- return {
506
- "type": sig_type,
507
- "confidence": C_LOOKUP * WEIGHT_LOOPUPS,
508
- "match": function_name,
509
- }
510
- # ------------------------------------------------------
511
- # parse the docstring for simple start verbs,
512
- # and add them as a candidate
513
- candidates += has_none_verb(docstring)
514
-
515
- # ------------------------------------------------------
516
- # parse the docstring for the regexes and weigh the results accordingly
517
- for weighted in weighted_regex:
518
- match_iter = re.finditer(weighted[0], docstring, re.MULTILINE | re.IGNORECASE)
519
- for match in match_iter:
520
- # matches.append(match)
521
- distilled = distill_return(match.group("return"))
522
- for item in distilled:
523
- candidate = {
524
- "match": match,
525
- "type": item["type"],
526
- "confidence": item["confidence"] * weighted[1], # add search boost
527
- }
528
- candidates.append(candidate)
529
- # Sort
530
- candidates = sorted(candidates, key=lambda x: x["confidence"], reverse=True)
531
- best = candidates[0] # best candidate
532
-
533
- # ref: https://docs.python.org/3/library/typing.html#typing.Coroutine
534
- # Coroutine[YieldType, SendType, ReturnType]
535
- # todo: sanity check against actual code .....
536
- if "This is a coroutine" in docstring and "Coroutine" not in str(best["type"]): # type: ignore
537
- best["type"] = f"Coroutine[{best['type']}, Any, Any]"
538
-
539
- # return the best candidate, or Any
540
- return best # best candidate
1
+ """
2
+ Work in Progress
3
+ ----------------
4
+
5
+ Tries to determine the return type by parsing the docstring and the function signature
6
+ - if the signature contains a return type --> <something> then that is returned
7
+ - check a lookup dictionary of type overrides,
8
+ if the functionnae is listed, then use the override
9
+ - use re to find phrases such as:
10
+ - 'Returns ..... '
11
+ - 'Gets ..... '
12
+ - docstring is joined without newlines to simplify parsing
13
+ - then parses the docstring to find references to known types and give then a rating though a hand coded model ()
14
+ - builds a list return type candidates
15
+ - selects the highest ranking candidate
16
+ - the default Type is 'Any'
17
+
18
+
19
+ to do:
20
+
21
+ - regex :
22
+ - 'With no arguments the frequency in Hz is returned.'
23
+ - 'Get or set' --> indicates overloaded/optional return Union[None|...]
24
+ - add regex for 'Query' ` Otherwise, query current state if no argument is provided. `
25
+
26
+ - regex :
27
+ - 'With no arguments the frequency in Hz is returned.'
28
+ - 'Get or set' --> indicates overloaded/optional return Union[None|...]
29
+ - add regex for 'Query' ` Otherwise, query current state if no argument is provided. `
30
+
31
+ - try if an Azure Machine Learning works as well
32
+ https://docs.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources
33
+ -
34
+ """
35
+
36
+ # ref: https://regex101.com/codegen?language=python
37
+ # https://regex101.com/r/Ni8g2z/2
38
+
39
+ import re
40
+ from typing import Dict, List, Optional, Union
41
+
42
+ from mpflash.logger import log
43
+
44
+ from .lookup import LOOKUP_LIST, NONE_VERBS, TYPING_IMPORT
45
+
46
+ # These are shown to import
47
+ __all__ = [
48
+ "simple_candidates",
49
+ "compound_candidates",
50
+ "object_candidates",
51
+ "distill_return",
52
+ "return_type_from_context",
53
+ "_type_from_context", # For testing only
54
+ "TYPING_IMPORT",
55
+ ]
56
+
57
+
58
+ # logging
59
+ # # log = logging.getLogger(__name__)
60
+
61
+
62
+ # --------------------------------------
63
+ # Confidence levels
64
+ # these heuristics are based a significant amout of manual testing,
65
+ # and not based on any statistical analysis
66
+
67
+ C_DEFAULT = 0 # Any , the default for all
68
+ C_NONE = 0.1 + C_DEFAULT # better than the default Any
69
+ C_BASE = 0.1 + C_NONE # the Base if a return type has been found
70
+
71
+ C_STR_NAMES = 0.3
72
+
73
+ C_GENERIC = 0.6
74
+ C_DICT = C_GENERIC
75
+ C_TUPLE = C_GENERIC
76
+ C_LIST = C_GENERIC
77
+ C_BOOL = C_GENERIC
78
+ C_FLOAT = C_GENERIC
79
+ C_STR = C_GENERIC
80
+
81
+ # tehere is a bit of logic - but mostly empirical
82
+ C_NONE_RETURN = C_GENERIC
83
+ C_OBJECTS = 0.01 + C_GENERIC
84
+
85
+ C_BYTES = 0.01 + C_GENERIC
86
+ C_BYTEARRAY = 0.03 + C_GENERIC
87
+ C_INT = 0.03 + C_GENERIC
88
+ C_UINT = 0.04 + C_GENERIC
89
+ C_ITERATOR = 0.4 + C_GENERIC
90
+ C_GENERATOR = 0.4 + C_GENERIC
91
+
92
+ C_INT_SIZES = 0.5 + C_GENERIC # better match than bytes and bytearray or object
93
+ C_INT_LIKE = 0.5 + C_GENERIC
94
+
95
+ C_LOOKUP = C_GENERIC + 1
96
+
97
+ # --------------------------------------
98
+ # Weights of the different Lookups
99
+ WEIGHT_LOOPUPS = 3.0 # Lookup list weight factor
100
+ WEIGHT_RETURN_VAL = 3.0 # Lookup list weight factor
101
+ WEIGHT_RETURNS = 1.8 # for Docstring returns
102
+ WEIGHT_GETS = 1.5 # For docstring Gets
103
+
104
+ # --------------------------------------
105
+
106
+ # base has a confidence that is quite low, but better than rubbish
107
+ BASE = {"type": "Incomplete", "confidence": C_BASE, "match": None}
108
+
109
+ # --------------------------------------
110
+ # Regexes
111
+ # --------------------------------------
112
+
113
+ # all regex matches stop at end of sentence:: . ! ? : ;
114
+ # Look for "Return Value: xxxx"
115
+ RE_RETURN_VALUE = r"Return value\s?:\s?(?P<return>[^.!?:;]*)"
116
+ # Look for Returns , but no 'Information'
117
+ RE_RETURN = r"Return(?:s?,?|(?:ing)?)\s(?!information)(?P<return>[^.!?:;]*)"
118
+ # Look for gets
119
+ RE_GETS = r"Gets?\s(?P<return>[^.!?:;]*)"
120
+
121
+ # --------------------------------------
122
+ # Regex for Literals
123
+ # --------------------------------------
124
+ RE_LIT_AS_A = r"as a\s?(?P<return>[^.!?:;]*)"
125
+ RE_LIT_SENTENCE = r"\s?(?P<return>[^.!?:;]*)"
126
+
127
+
128
+ def dist_rate(i: int) -> float:
129
+ """"""
130
+ max_len = 150 # must occur in the first 150 chars
131
+ return max((max_len - i), 1) / max_len
132
+
133
+
134
+ WORD_TERMINATORS = ".,!;:?"
135
+
136
+
137
+ def simple_candidates(
138
+ type: str,
139
+ match_string: str,
140
+ keywords: List[str],
141
+ rate: float = 0.5,
142
+ exclude: Optional[List[str]] = None,
143
+ ):
144
+ """
145
+ find and rate possible types and confidence weighting for simple types.
146
+ Case sensitive
147
+ """
148
+ if exclude is None:
149
+ exclude = []
150
+ candidates = []
151
+ if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
152
+ # quick bailout , there are no matches, or there is an exclude
153
+ return []
154
+
155
+ # word matching
156
+ match_words = [w.strip(WORD_TERMINATORS) for w in match_string.split()]
157
+ # kw = single word -
158
+ for kw in keywords:
159
+ i = match_string.find(kw)
160
+ if " " not in kw and kw not in match_words or " " in kw and i < 0:
161
+ continue
162
+ # Assume unsigned are int
163
+ result = BASE.copy()
164
+ result["type"] = type
165
+ result["confidence"] = rate * dist_rate(i) # OK
166
+ log.trace(f" - found '{kw}' at position {i} with rating {dist_rate(i)}")
167
+ candidates.append(result)
168
+ return candidates
169
+
170
+
171
+ def compound_candidates(
172
+ type: str,
173
+ match_string: str,
174
+ keywords: List[str],
175
+ rate: float = 0.85,
176
+ exclude: Optional[List[str]] = None,
177
+ ):
178
+ """
179
+ find and rate possible types and confidence weighting for compound types that can have a subscription.
180
+ Case sensitive
181
+ """
182
+ if exclude is None:
183
+ exclude = []
184
+ candidates = []
185
+ if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
186
+ # quick bailout , there are no matches, or there is an exclude
187
+ return []
188
+
189
+ # word matching
190
+ match_words = [w.strip(WORD_TERMINATORS) for w in match_string.split()]
191
+ # kw = single word -
192
+ for kw in keywords:
193
+ i = match_string.find(kw)
194
+ if " " not in kw and kw not in match_words or " " in kw and i < 0:
195
+ continue
196
+ # List / Dict / Generator of Any / Tuple /
197
+ sub = None
198
+ result = BASE.copy()
199
+ confidence = rate
200
+ for element in ("tuple", "string", "unsigned", "int"):
201
+ if element in match_string.casefold():
202
+ j = match_string.find(element)
203
+ if i == j:
204
+ # do not match on the same main and sub
205
+ continue
206
+ confidence += 0.10 # boost as we have a subtype
207
+ if element == "string":
208
+ sub = "str"
209
+ break
210
+ elif element == "tuple":
211
+ sub = "Tuple"
212
+ break
213
+ elif element == "unsigned":
214
+ sub = "int"
215
+ break
216
+ else:
217
+ sub = element
218
+ result["type"] = f"{type}[{sub}]" if sub else f"{type}"
219
+ confidence = confidence * dist_rate(i) # distance weighting
220
+ result["confidence"] = confidence
221
+ log.trace(
222
+ f" - found '{kw}' at position {i} with confidence {confidence} rating {dist_rate(i)}"
223
+ )
224
+
225
+ candidates.append(result)
226
+ return candidates
227
+
228
+
229
+ def object_candidates(match_string: str, rate: float = 0.81, exclude: Optional[List[str]] = None):
230
+ """
231
+ find and rate possible types and confidence weighting for Object types.
232
+ Case sensitive
233
+ Exclude defaults to ["IRQ"]
234
+ """
235
+ # defaults
236
+ if exclude is None:
237
+ exclude = ["IRQ"]
238
+ candidates = []
239
+ keywords = [
240
+ "Object",
241
+ "object",
242
+ ] # Q&D
243
+
244
+ if not any(t in match_string for t in keywords) or any(t in match_string for t in exclude):
245
+ # quick bailout , there are no matches, or there is an exclude
246
+ return []
247
+ for kw in keywords:
248
+ i = match_string.find(kw)
249
+ if i < 0:
250
+ continue
251
+ # List / Dict / Generator of Any / Tuple /
252
+ confidence = rate
253
+
254
+ # did the word actually occur, or is it just a partial
255
+ words = match_string.split(" ") # Return <multiple words object>
256
+ if kw in words:
257
+ pos = words.index(kw)
258
+ obj = "Incomplete" if pos == 0 else words[pos - 1]
259
+ if obj in ("stream-like", "file"):
260
+ obj = "IO" # needs from typing import IO
261
+ elif obj == "callback":
262
+ obj = "Callable[..., Incomplete]" # requires additional 'from typing import Callable'
263
+ else:
264
+ # clean
265
+ obj = re.sub(r"[^a-z.A-Z0-9]", "", obj)
266
+ result = BASE.copy()
267
+ result["type"] = obj
268
+ if obj in ["an", "any"]: # "Return an / any object"
269
+ result["type"] = "Incomplete"
270
+ confidence += 0.10 # abstract , but very good
271
+ elif obj[0].islower():
272
+ confidence -= 0.20 # not so good
273
+ result["confidence"] = confidence * dist_rate(i)
274
+ candidates.append(result)
275
+ return candidates
276
+
277
+
278
+ def has_none_verb(docstr: str) -> List:
279
+ "returns a None result if the docstring starts with a verb that indicates None"
280
+ docstr = docstr.strip().casefold()
281
+ if not any(docstr.startswith(kw.casefold()) for kw in NONE_VERBS):
282
+ return []
283
+ result = BASE.copy()
284
+ result["type"] = "None"
285
+ result["confidence"] = C_NONE # better than the default Any
286
+ return [result]
287
+
288
+
289
+ def distill_return(return_text: str) -> List[Dict]:
290
+ """Find return type and confidence.
291
+ Returns a list of possible types and confidence weighting.
292
+ {
293
+
294
+ type :str # the return type
295
+ confidence: float # the confidence between 0.0 and 1
296
+ match: Optional[str] # for debugging : the reason the match was made
297
+
298
+ }
299
+
300
+ """
301
+ candidates = [BASE] # Default to the base , which is 'Any'
302
+
303
+ # clean up match_string
304
+ match_string = return_text.strip().rstrip(".")
305
+ match_string = match_string.replace("`", "")
306
+
307
+ candidates += compound_candidates("Generator", match_string, ["generator"], C_GENERATOR)
308
+ candidates += compound_candidates("Iterator", match_string, ["iterator"], C_ITERATOR)
309
+ candidates += compound_candidates(
310
+ "List", match_string, ["a list of", "list of", "an array"], C_LIST
311
+ )
312
+
313
+ candidates += simple_candidates(
314
+ "Dict", match_string, ["a dictionary", "dict", "Dictionary"], C_DICT
315
+ )
316
+ candidates += simple_candidates(
317
+ "Tuple",
318
+ match_string,
319
+ [
320
+ "tuple",
321
+ "a pair",
322
+ "1-tuple",
323
+ "2-tuple",
324
+ "3-tuple",
325
+ "4-tuple",
326
+ "5-tuple",
327
+ "6-tuple",
328
+ "7-tuple",
329
+ "8-tuple",
330
+ "9-tuple",
331
+ ],
332
+ C_TUPLE,
333
+ )
334
+
335
+ candidates += simple_candidates(
336
+ "int", match_string, ["unsigned integer", "unsigned int", "unsigned"], C_UINT
337
+ )
338
+
339
+ candidates += simple_candidates(
340
+ "int",
341
+ match_string,
342
+ [
343
+ "number",
344
+ "integer",
345
+ "count",
346
+ "int",
347
+ "0 or 1",
348
+ ],
349
+ C_INT,
350
+ )
351
+
352
+ # good but nor perfect indicators of integers
353
+ # better match than bytes and bytearray or object
354
+ candidates += simple_candidates(
355
+ "int",
356
+ match_string,
357
+ [
358
+ "length",
359
+ "total size",
360
+ "size of",
361
+ "the index",
362
+ "number of",
363
+ "address of",
364
+ "the duration",
365
+ ],
366
+ C_INT_SIZES,
367
+ )
368
+
369
+ candidates += simple_candidates("int", match_string, [], C_INT_SIZES)
370
+
371
+ # Assume numbers are signed int
372
+ candidates += simple_candidates(
373
+ "int",
374
+ match_string,
375
+ [
376
+ "index",
377
+ "**signed** value",
378
+ "seconds",
379
+ "nanoseconds",
380
+ "millisecond",
381
+ "offset",
382
+ ],
383
+ C_INT_LIKE,
384
+ )
385
+
386
+ # better match than bytes
387
+ candidates += simple_candidates("bytearray", match_string, ["bytearray"], C_BYTEARRAY)
388
+
389
+ # OK, better than just string
390
+ candidates += simple_candidates("bytes", match_string, ["bytes", "byte string"], C_BYTES)
391
+
392
+ candidates += simple_candidates(
393
+ "bool", match_string, ["boolean", "bool", "True", "False"], C_BOOL
394
+ )
395
+ candidates += simple_candidates(
396
+ "float",
397
+ match_string,
398
+ [
399
+ "float",
400
+ "logarithm",
401
+ "sine",
402
+ "cosine",
403
+ "tangent",
404
+ "exponential",
405
+ "complex number",
406
+ "phase",
407
+ "ratio of",
408
+ ],
409
+ C_FLOAT,
410
+ )
411
+
412
+ candidates += simple_candidates(
413
+ "str", match_string, ["string", "(sub)string", "sub-string", "substring"], C_STR
414
+ )
415
+
416
+ candidates += simple_candidates("str", match_string, ["name", "names"], C_STR_NAMES)
417
+ ## "? contains 'None if there is no' --> Union[Null, xxx]"
418
+ candidates += simple_candidates(
419
+ "None",
420
+ match_string,
421
+ ["``None``", "None"],
422
+ C_NONE_RETURN,
423
+ exclude=["previous value", "if there is no"],
424
+ )
425
+
426
+ candidates += object_candidates(match_string, C_OBJECTS)
427
+
428
+ return candidates
429
+
430
+
431
+ def return_type_from_context(
432
+ *, docstring: Union[str, List[str]], signature: str, module: str, literal: bool = False
433
+ ):
434
+ try:
435
+ return str(
436
+ _type_from_context(
437
+ module=module, signature=signature, docstring=docstring, literal=literal
438
+ )["type"]
439
+ )
440
+ except Exception:
441
+ return "Incomplete"
442
+
443
+
444
+ def _type_from_context(
445
+ *, docstring: Union[str, List[str]], signature: str, module: str, literal: bool = False
446
+ ): # -> Dict[str , Union[str,float]]:
447
+ """Determine the return type of a function or method based on:
448
+ - the function signature
449
+ - the terminology used in the docstring
450
+
451
+ Logic:
452
+ - if the signature contains a return type --> <something> then that is returned
453
+ - use re to find phrases such as:
454
+
455
+ - 'Returns ..... '
456
+ - 'Gets ..... '
457
+
458
+ - docstring is joined without newlines to simplify parsing
459
+ - then parses the docstring to find references to known types and give then a rating though a hand coded model ()
460
+ - builds a list return type candidates
461
+ - selects the highest ranking candidate
462
+ - the default Type is 'Any'
463
+ """
464
+
465
+ if isinstance(docstring, list):
466
+ # join with space to avoid ending at a newline
467
+ docstring = " ".join(docstring)
468
+
469
+ # give the regex that searches for returns a 0.2 boost as that is bound to be more relevant
470
+
471
+ weighted_regex = (
472
+ [
473
+ (RE_LIT_AS_A, 1.0),
474
+ (RE_LIT_SENTENCE, 2.0),
475
+ ]
476
+ if literal
477
+ else [
478
+ (RE_RETURN_VALUE, WEIGHT_RETURN_VAL),
479
+ (RE_RETURN, WEIGHT_RETURNS),
480
+ (RE_GETS, WEIGHT_GETS),
481
+ # (reads_regex, 1.0),
482
+ ]
483
+ )
484
+ # only the function name without the leading module
485
+ function_re = re.compile(r"[\w|.]+(?=\()")
486
+
487
+ # matches: List[re.Match] = []
488
+ candidates: List[Dict] = [{"match": "default", "type": "Incomplete", "confidence": 0}]
489
+
490
+ # if the signature contains a return type , then use that and do nothing else.
491
+ if "->" in signature:
492
+ sig_type = signature.split("->")[-1].strip(": ")
493
+ return {"type": sig_type, "confidence": WEIGHT_LOOPUPS, "match": signature}
494
+
495
+ # ------------------------------------------------------
496
+ # lookup returns that cannot be found based on the docstring from the lookup list
497
+ try:
498
+ function_name = function_re.findall(signature)[0]
499
+ except IndexError:
500
+ function_name = signature.strip().strip(":()")
501
+
502
+ function_name = ".".join((module, function_name))
503
+
504
+ if function_name in LOOKUP_LIST.keys():
505
+ sig_type = LOOKUP_LIST[function_name][0]
506
+ return {
507
+ "type": sig_type,
508
+ "confidence": C_LOOKUP * WEIGHT_LOOPUPS,
509
+ "match": function_name,
510
+ }
511
+ # ------------------------------------------------------
512
+ # parse the docstring for simple start verbs,
513
+ # and add them as a candidate
514
+ candidates += has_none_verb(docstring)
515
+
516
+ # ------------------------------------------------------
517
+ # parse the docstring for the regexes and weigh the results accordingly
518
+ for weighted in weighted_regex:
519
+ match_iter = re.finditer(weighted[0], docstring, re.MULTILINE | re.IGNORECASE)
520
+ for match in match_iter:
521
+ # matches.append(match)
522
+ distilled = distill_return(match.group("return"))
523
+ for item in distilled:
524
+ candidate = {
525
+ "match": match,
526
+ "type": item["type"],
527
+ "confidence": item["confidence"] * weighted[1], # add search boost
528
+ }
529
+ candidates.append(candidate)
530
+ # Sort
531
+ candidates = sorted(candidates, key=lambda x: x["confidence"], reverse=True)
532
+ best = candidates[0] # best candidate
533
+
534
+ # ref: https://docs.python.org/3/library/typing.html#typing.Coroutine
535
+ # Coroutine[YieldType, SendType, ReturnType]
536
+ # todo: sanity check against actual code .....
537
+ if "This is a coroutine" in docstring and "Coroutine" not in str(best["type"]): # type: ignore
538
+ best["type"] = f"Coroutine[{best['type']}, Any, Any]"
539
+
540
+ # return the best candidate, or Any
541
+ return best # best candidate