crosshair-tool 0.0.99__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. _crosshair_tracers.cpython-312-darwin.so +0 -0
  2. crosshair/__init__.py +42 -0
  3. crosshair/__main__.py +8 -0
  4. crosshair/_mark_stacks.h +790 -0
  5. crosshair/_preliminaries_test.py +18 -0
  6. crosshair/_tracers.h +94 -0
  7. crosshair/_tracers_pycompat.h +522 -0
  8. crosshair/_tracers_test.py +138 -0
  9. crosshair/abcstring.py +245 -0
  10. crosshair/auditwall.py +190 -0
  11. crosshair/auditwall_test.py +77 -0
  12. crosshair/codeconfig.py +113 -0
  13. crosshair/codeconfig_test.py +117 -0
  14. crosshair/condition_parser.py +1237 -0
  15. crosshair/condition_parser_test.py +497 -0
  16. crosshair/conftest.py +30 -0
  17. crosshair/copyext.py +155 -0
  18. crosshair/copyext_test.py +84 -0
  19. crosshair/core.py +1763 -0
  20. crosshair/core_and_libs.py +149 -0
  21. crosshair/core_regestered_types_test.py +82 -0
  22. crosshair/core_test.py +1316 -0
  23. crosshair/diff_behavior.py +314 -0
  24. crosshair/diff_behavior_test.py +261 -0
  25. crosshair/dynamic_typing.py +346 -0
  26. crosshair/dynamic_typing_test.py +210 -0
  27. crosshair/enforce.py +282 -0
  28. crosshair/enforce_test.py +182 -0
  29. crosshair/examples/PEP316/__init__.py +1 -0
  30. crosshair/examples/PEP316/bugs_detected/__init__.py +0 -0
  31. crosshair/examples/PEP316/bugs_detected/getattr_magic.py +16 -0
  32. crosshair/examples/PEP316/bugs_detected/hash_consistent_with_equals.py +31 -0
  33. crosshair/examples/PEP316/bugs_detected/shopping_cart.py +24 -0
  34. crosshair/examples/PEP316/bugs_detected/showcase.py +39 -0
  35. crosshair/examples/PEP316/correct_code/__init__.py +0 -0
  36. crosshair/examples/PEP316/correct_code/arith.py +60 -0
  37. crosshair/examples/PEP316/correct_code/chess.py +77 -0
  38. crosshair/examples/PEP316/correct_code/nesting_inference.py +17 -0
  39. crosshair/examples/PEP316/correct_code/numpy_examples.py +132 -0
  40. crosshair/examples/PEP316/correct_code/rolling_average.py +35 -0
  41. crosshair/examples/PEP316/correct_code/showcase.py +104 -0
  42. crosshair/examples/__init__.py +0 -0
  43. crosshair/examples/check_examples_test.py +146 -0
  44. crosshair/examples/deal/__init__.py +1 -0
  45. crosshair/examples/icontract/__init__.py +1 -0
  46. crosshair/examples/icontract/bugs_detected/__init__.py +0 -0
  47. crosshair/examples/icontract/bugs_detected/showcase.py +41 -0
  48. crosshair/examples/icontract/bugs_detected/wrong_sign.py +8 -0
  49. crosshair/examples/icontract/correct_code/__init__.py +0 -0
  50. crosshair/examples/icontract/correct_code/arith.py +51 -0
  51. crosshair/examples/icontract/correct_code/showcase.py +94 -0
  52. crosshair/fnutil.py +391 -0
  53. crosshair/fnutil_test.py +75 -0
  54. crosshair/fuzz_core_test.py +516 -0
  55. crosshair/libimpl/__init__.py +0 -0
  56. crosshair/libimpl/arraylib.py +161 -0
  57. crosshair/libimpl/binascii_ch_test.py +30 -0
  58. crosshair/libimpl/binascii_test.py +67 -0
  59. crosshair/libimpl/binasciilib.py +150 -0
  60. crosshair/libimpl/bisectlib_test.py +23 -0
  61. crosshair/libimpl/builtinslib.py +5228 -0
  62. crosshair/libimpl/builtinslib_ch_test.py +1191 -0
  63. crosshair/libimpl/builtinslib_test.py +3735 -0
  64. crosshair/libimpl/codecslib.py +86 -0
  65. crosshair/libimpl/codecslib_test.py +86 -0
  66. crosshair/libimpl/collectionslib.py +264 -0
  67. crosshair/libimpl/collectionslib_ch_test.py +252 -0
  68. crosshair/libimpl/collectionslib_test.py +332 -0
  69. crosshair/libimpl/copylib.py +23 -0
  70. crosshair/libimpl/copylib_test.py +18 -0
  71. crosshair/libimpl/datetimelib.py +2559 -0
  72. crosshair/libimpl/datetimelib_ch_test.py +354 -0
  73. crosshair/libimpl/datetimelib_test.py +112 -0
  74. crosshair/libimpl/decimallib.py +5257 -0
  75. crosshair/libimpl/decimallib_ch_test.py +78 -0
  76. crosshair/libimpl/decimallib_test.py +76 -0
  77. crosshair/libimpl/encodings/__init__.py +23 -0
  78. crosshair/libimpl/encodings/_encutil.py +187 -0
  79. crosshair/libimpl/encodings/ascii.py +44 -0
  80. crosshair/libimpl/encodings/latin_1.py +40 -0
  81. crosshair/libimpl/encodings/utf_8.py +93 -0
  82. crosshair/libimpl/encodings_ch_test.py +83 -0
  83. crosshair/libimpl/fractionlib.py +16 -0
  84. crosshair/libimpl/fractionlib_test.py +80 -0
  85. crosshair/libimpl/functoolslib.py +34 -0
  86. crosshair/libimpl/functoolslib_test.py +56 -0
  87. crosshair/libimpl/hashliblib.py +30 -0
  88. crosshair/libimpl/hashliblib_test.py +18 -0
  89. crosshair/libimpl/heapqlib.py +47 -0
  90. crosshair/libimpl/heapqlib_test.py +21 -0
  91. crosshair/libimpl/importliblib.py +18 -0
  92. crosshair/libimpl/importliblib_test.py +38 -0
  93. crosshair/libimpl/iolib.py +216 -0
  94. crosshair/libimpl/iolib_ch_test.py +128 -0
  95. crosshair/libimpl/iolib_test.py +19 -0
  96. crosshair/libimpl/ipaddresslib.py +8 -0
  97. crosshair/libimpl/itertoolslib.py +44 -0
  98. crosshair/libimpl/itertoolslib_test.py +44 -0
  99. crosshair/libimpl/jsonlib.py +984 -0
  100. crosshair/libimpl/jsonlib_ch_test.py +42 -0
  101. crosshair/libimpl/jsonlib_test.py +51 -0
  102. crosshair/libimpl/mathlib.py +179 -0
  103. crosshair/libimpl/mathlib_ch_test.py +44 -0
  104. crosshair/libimpl/mathlib_test.py +67 -0
  105. crosshair/libimpl/oslib.py +7 -0
  106. crosshair/libimpl/pathliblib_test.py +10 -0
  107. crosshair/libimpl/randomlib.py +178 -0
  108. crosshair/libimpl/randomlib_test.py +120 -0
  109. crosshair/libimpl/relib.py +846 -0
  110. crosshair/libimpl/relib_ch_test.py +169 -0
  111. crosshair/libimpl/relib_test.py +493 -0
  112. crosshair/libimpl/timelib.py +72 -0
  113. crosshair/libimpl/timelib_test.py +82 -0
  114. crosshair/libimpl/typeslib.py +15 -0
  115. crosshair/libimpl/typeslib_test.py +36 -0
  116. crosshair/libimpl/unicodedatalib.py +75 -0
  117. crosshair/libimpl/unicodedatalib_test.py +42 -0
  118. crosshair/libimpl/urlliblib.py +23 -0
  119. crosshair/libimpl/urlliblib_test.py +19 -0
  120. crosshair/libimpl/weakreflib.py +13 -0
  121. crosshair/libimpl/weakreflib_test.py +69 -0
  122. crosshair/libimpl/zliblib.py +15 -0
  123. crosshair/libimpl/zliblib_test.py +13 -0
  124. crosshair/lsp_server.py +261 -0
  125. crosshair/lsp_server_test.py +30 -0
  126. crosshair/main.py +973 -0
  127. crosshair/main_test.py +543 -0
  128. crosshair/objectproxy.py +376 -0
  129. crosshair/objectproxy_test.py +41 -0
  130. crosshair/opcode_intercept.py +601 -0
  131. crosshair/opcode_intercept_test.py +304 -0
  132. crosshair/options.py +218 -0
  133. crosshair/options_test.py +10 -0
  134. crosshair/patch_equivalence_test.py +75 -0
  135. crosshair/path_cover.py +209 -0
  136. crosshair/path_cover_test.py +138 -0
  137. crosshair/path_search.py +161 -0
  138. crosshair/path_search_test.py +52 -0
  139. crosshair/pathing_oracle.py +271 -0
  140. crosshair/pathing_oracle_test.py +21 -0
  141. crosshair/pure_importer.py +27 -0
  142. crosshair/pure_importer_test.py +16 -0
  143. crosshair/py.typed +0 -0
  144. crosshair/register_contract.py +273 -0
  145. crosshair/register_contract_test.py +190 -0
  146. crosshair/simplestructs.py +1165 -0
  147. crosshair/simplestructs_test.py +283 -0
  148. crosshair/smtlib.py +24 -0
  149. crosshair/smtlib_test.py +14 -0
  150. crosshair/statespace.py +1199 -0
  151. crosshair/statespace_test.py +108 -0
  152. crosshair/stubs_parser.py +352 -0
  153. crosshair/stubs_parser_test.py +43 -0
  154. crosshair/test_util.py +329 -0
  155. crosshair/test_util_test.py +26 -0
  156. crosshair/tools/__init__.py +0 -0
  157. crosshair/tools/check_help_in_doc.py +264 -0
  158. crosshair/tools/check_init_and_setup_coincide.py +119 -0
  159. crosshair/tools/generate_demo_table.py +127 -0
  160. crosshair/tracers.py +544 -0
  161. crosshair/tracers_test.py +154 -0
  162. crosshair/type_repo.py +151 -0
  163. crosshair/unicode_categories.py +589 -0
  164. crosshair/unicode_categories_test.py +27 -0
  165. crosshair/util.py +741 -0
  166. crosshair/util_test.py +173 -0
  167. crosshair/watcher.py +307 -0
  168. crosshair/watcher_test.py +107 -0
  169. crosshair/z3util.py +76 -0
  170. crosshair/z3util_test.py +11 -0
  171. crosshair_tool-0.0.99.dist-info/METADATA +144 -0
  172. crosshair_tool-0.0.99.dist-info/RECORD +176 -0
  173. crosshair_tool-0.0.99.dist-info/WHEEL +6 -0
  174. crosshair_tool-0.0.99.dist-info/entry_points.txt +3 -0
  175. crosshair_tool-0.0.99.dist-info/licenses/LICENSE +93 -0
  176. crosshair_tool-0.0.99.dist-info/top_level.txt +2 -0
@@ -0,0 +1,846 @@
1
+ import operator
2
+ import re
3
+ import sys
4
+ from array import array
5
+ from unicodedata import category
6
+
7
+ if sys.version_info < (3, 11):
8
+ import sre_parse as re_parser
9
+ else:
10
+ import re._parser as re_parser # type: ignore
11
+
12
+ from sys import maxunicode
13
+ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast
14
+
15
+ import z3 # type: ignore
16
+
17
+ from crosshair.core import deep_realize, realize, register_patch, with_realized_args
18
+ from crosshair.libimpl.builtinslib import AnySymbolicStr, BytesLike, SymbolicInt
19
+ from crosshair.statespace import context_statespace
20
+ from crosshair.tracers import NoTracing, ResumedTracing, is_tracing
21
+ from crosshair.unicode_categories import CharMask, get_unicode_categories
22
+ from crosshair.util import CrossHairInternal, CrossHairValue, debug, is_iterable
23
+
24
+ ANY = re_parser.ANY
25
+ ASSERT = re_parser.ASSERT
26
+ ASSERT_NOT = re_parser.ASSERT_NOT
27
+ AT = re_parser.AT
28
+ AT_BEGINNING = re_parser.AT_BEGINNING
29
+ AT_BEGINNING_STRING = re_parser.AT_BEGINNING_STRING
30
+ AT_BOUNDARY = re_parser.AT_BOUNDARY
31
+ AT_END = re_parser.AT_END
32
+ AT_END_STRING = re_parser.AT_END_STRING
33
+ AT_NON_BOUNDARY = re_parser.AT_NON_BOUNDARY
34
+ BRANCH = re_parser.BRANCH
35
+ CATEGORY = re_parser.CATEGORY
36
+ CATEGORY_DIGIT = re_parser.CATEGORY_DIGIT
37
+ CATEGORY_NOT_DIGIT = re_parser.CATEGORY_NOT_DIGIT
38
+ CATEGORY_NOT_SPACE = re_parser.CATEGORY_NOT_SPACE
39
+ CATEGORY_NOT_WORD = re_parser.CATEGORY_NOT_WORD
40
+ CATEGORY_SPACE = re_parser.CATEGORY_SPACE
41
+ CATEGORY_WORD = re_parser.CATEGORY_WORD
42
+ IN = re_parser.IN
43
+ LITERAL = re_parser.LITERAL
44
+ MAX_REPEAT = re_parser.MAX_REPEAT
45
+ MAXREPEAT = re_parser.MAXREPEAT
46
+ MIN_REPEAT = re_parser.MIN_REPEAT
47
+ NEGATE = re_parser.NEGATE
48
+ NOT_LITERAL = re_parser.NOT_LITERAL
49
+ RANGE = re_parser.RANGE
50
+ SUBPATTERN = re_parser.SUBPATTERN
51
+ parse = re_parser.parse
52
+
53
+
54
+ class ReUnhandled(Exception):
55
+ pass
56
+
57
+
58
+ _ALL_BYTES_TYPES = (bytes, bytearray, memoryview, array)
59
+ _STR_AND_BYTES_TYPES = (str, *_ALL_BYTES_TYPES)
60
+ _NO_CHAR = CharMask([])
61
+ _ANY_CHAR = CharMask([(0, maxunicode + 1)])
62
+ _ANY_NON_NEWLINE_CHAR = _ANY_CHAR.subtract(CharMask([ord("\n")]))
63
+ _ASCII_CHAR = CharMask([(0, 128)])
64
+ _ASCII_WHITESPACE_CHAR = CharMask([(9, 14), 32])
65
+ _UNICODE_WHITESPACE_CHAR = _ASCII_WHITESPACE_CHAR.union(
66
+ CharMask(
67
+ [
68
+ # NOTE: Although 28-31 are in the ASCII range, they only count as whitespace
69
+ # when matching in unicode mode:
70
+ (28, 32),
71
+ 133,
72
+ 160,
73
+ 5760,
74
+ (8192, 8203),
75
+ (8232, 8234),
76
+ 8239,
77
+ 8287,
78
+ 12288,
79
+ ]
80
+ )
81
+ )
82
+
83
+ _CASEABLE_CHARS = None
84
+
85
+
86
+ def caseable_chars():
87
+ global _CASEABLE_CHARS
88
+ if _CASEABLE_CHARS is None:
89
+ codepoints = []
90
+ for i in range(sys.maxunicode + 1):
91
+ ch = chr(i)
92
+ # Exclude the (large) "Other Letter" group that doesn't caseswap:
93
+ if category(ch) in ("Lo"):
94
+ assert ch.casefold() == ch
95
+ else:
96
+ codepoints.append(ch)
97
+
98
+ _CASEABLE_CHARS = "".join(codepoints)
99
+ return _CASEABLE_CHARS
100
+
101
+
102
+ _UNICODE_IGNORECASE_MASKS: Dict[int, CharMask] = {} # codepoint -> CharMask
103
+
104
+
105
+ def unicode_ignorecase_mask(cp: int) -> CharMask:
106
+ mask = _UNICODE_IGNORECASE_MASKS.get(cp)
107
+ if mask is None:
108
+ chars = caseable_chars()
109
+ matches = re.compile(chr(cp), re.IGNORECASE).findall(chars)
110
+ mask = CharMask([ord(c) for c in matches])
111
+ _UNICODE_IGNORECASE_MASKS[cp] = mask
112
+ return mask
113
+
114
+
115
+ def single_char_mask(
116
+ parsed: Tuple[object, Any], flags: int, ord=ord, chr=chr
117
+ ) -> Optional[CharMask]:
118
+ """
119
+ Compute a CharMask from a parsed regex.
120
+
121
+ Takes a pattern object, like those returned by sre_parse.parse().
122
+ Returns None if `parsed` is not a single-character regular expression.
123
+ Returns a list of valid codepoint or codepoint ranges if it can find them, or raises
124
+ ReUnhandled if such an expression cannot be determined.
125
+ """
126
+ (op, arg) = parsed
127
+ isascii = re.ASCII & flags
128
+ if op in (LITERAL, NOT_LITERAL):
129
+ if re.IGNORECASE & flags:
130
+ ret = unicode_ignorecase_mask(arg)
131
+ else:
132
+ ret = CharMask([arg])
133
+ if op is NOT_LITERAL:
134
+ ret = ret.invert()
135
+ elif op is RANGE:
136
+ lo, hi = arg
137
+ if re.IGNORECASE & flags:
138
+ ret = CharMask(
139
+ [
140
+ # TODO: among other issues, this doesn't handle multi-codepoint caseswaps:
141
+ (ord(chr(lo).lower()), ord(chr(hi).lower()) + 1),
142
+ (ord(chr(lo).upper()), ord(chr(hi).upper()) + 1),
143
+ ]
144
+ )
145
+ else:
146
+ ret = CharMask([(lo, hi + 1)])
147
+ elif op is IN:
148
+ ret = CharMask([])
149
+ negate = arg and arg[0][0] is NEGATE
150
+ if negate:
151
+ arg = arg[1:]
152
+ for term in arg:
153
+ submask = single_char_mask(term, flags, ord=ord, chr=chr)
154
+ if submask is None:
155
+ raise ReUnhandled("IN contains non-single-char expression")
156
+ ret = ret.union(submask)
157
+ if negate:
158
+ ret = ret.invert()
159
+ elif op is CATEGORY:
160
+ cats = get_unicode_categories()
161
+ if arg == CATEGORY_DIGIT:
162
+ ret = cats["Nd"]
163
+ elif arg == CATEGORY_NOT_DIGIT:
164
+ ret = cats["Nd"].invert()
165
+ elif arg == CATEGORY_SPACE:
166
+ return _ASCII_WHITESPACE_CHAR if isascii else _UNICODE_WHITESPACE_CHAR
167
+ elif arg == CATEGORY_NOT_SPACE:
168
+ ret = _ASCII_WHITESPACE_CHAR if isascii else _UNICODE_WHITESPACE_CHAR
169
+ return ret.invert()
170
+ elif arg == CATEGORY_WORD:
171
+ ret = cats["word"]
172
+ elif arg == CATEGORY_NOT_WORD:
173
+ ret = cats["word"].invert()
174
+ else:
175
+ raise ReUnhandled("Unsupported category: ", arg)
176
+ elif op is ANY and arg is None:
177
+ # TODO: test dot under ascii mode (seems like we should fall through to the re.ASCII check below)
178
+ return _ANY_CHAR if re.DOTALL & flags else _ANY_NON_NEWLINE_CHAR
179
+ else:
180
+ return None
181
+ if re.ASCII & flags:
182
+ # TODO: this is probably expensive!
183
+ ret = ret.intersect(_ASCII_CHAR)
184
+ return ret
185
+
186
+
187
+ Span = Tuple[int, Union[int, SymbolicInt]]
188
+
189
+
190
+ def _traced_binop(a, op, b):
191
+ if isinstance(a, CrossHairValue) or isinstance(b, CrossHairValue):
192
+ with ResumedTracing():
193
+ return op(a, b)
194
+ return op(a, b)
195
+
196
+
197
+ class _MatchPart:
198
+ def __init__(self, groups: List[Optional[Span]]):
199
+ self._groups = groups
200
+
201
+ def _fullspan(self) -> Span:
202
+ span = self._groups[0]
203
+ assert span is not None
204
+ return span
205
+
206
+ def _clamp_all_spans(self, start, end):
207
+ groups = self._groups
208
+ for idx, span in enumerate(groups):
209
+ if span is not None:
210
+ (span_start, span_end) = span
211
+ with ResumedTracing():
212
+ if span_start == span_end:
213
+ if span_start < start:
214
+ groups[idx] = (start, start)
215
+ if span_start > end:
216
+ groups[idx] = (end, end)
217
+
218
+ def isempty(self):
219
+ (start, end) = self._groups[0]
220
+ return _traced_binop(end, operator.le, start)
221
+
222
+ def __bool__(self):
223
+ return True
224
+
225
+ def __repr__(self):
226
+ return f"<re.Match object; span={self.span()!r}, match={self.group()!r}>"
227
+
228
+ def _add_match(self, suffix_match: "_MatchPart") -> "_MatchPart":
229
+ groups: List[Optional[Span]] = [None] * max(
230
+ len(self._groups), len(suffix_match._groups)
231
+ )
232
+ for idx, g in enumerate(self._groups):
233
+ groups[idx] = g
234
+ for idx, g in enumerate(suffix_match._groups):
235
+ if g is not None:
236
+ groups[idx] = g
237
+ my_start = self._fullspan()[0]
238
+ suffix_end = suffix_match._fullspan()[1]
239
+ groups[0] = (my_start, suffix_end)
240
+ return _MatchPart(groups)
241
+
242
+ def start(self, group=0):
243
+ return self._groups[group][0]
244
+
245
+ def end(self, group=0):
246
+ return self._groups[group][1]
247
+
248
+ def span(self, group=0):
249
+ return self._groups[group]
250
+
251
+
252
+ _BACKREF_RE_SOURCE = rb"""
253
+ (?P<prefix> .*?)
254
+ \\
255
+ (?:
256
+ # Note that earlier matches are preferred in regex unions like this:
257
+ (?P<num> [1-9][0-9]? ) |
258
+ g\< (?P<namednum> \s*\+?\d+\s* ) \> |
259
+ g\< (?P<named> \w+ ) \> |
260
+ g\< (?P<namedother> .* ) \>
261
+ )
262
+ (?P<suffix> .*)
263
+ """
264
+ _BACKREF_BYTES_RE = re.compile(_BACKREF_RE_SOURCE, re.VERBOSE | re.MULTILINE)
265
+ _BACKREF_STR_RE = re.compile(
266
+ str(_BACKREF_RE_SOURCE, "ascii"), re.VERBOSE | re.MULTILINE
267
+ )
268
+
269
+
270
+ class _Match(_MatchPart):
271
+ def __init__(self, groups, pos, endpos, regex, orig_str):
272
+ # fill None in unmatched groups:
273
+ while len(groups) < regex.groups + 1:
274
+ groups.append(None)
275
+ super().__init__(groups)
276
+ self.pos = pos
277
+ if endpos is None:
278
+ with ResumedTracing():
279
+ self.endpos = len(orig_str)
280
+ else:
281
+ self.endpos = endpos
282
+ self.re = regex
283
+ self.string = orig_str
284
+
285
+ # Compute lastindex & lastgroup:
286
+ self.lastindex, self.lastgroup = None, None
287
+ _idx_to_name = {num: name for (name, num) in regex.groupindex.items()}
288
+ for idx, grp in enumerate(groups):
289
+ if grp is None:
290
+ continue
291
+ self.lastindex = idx
292
+ if idx in _idx_to_name:
293
+ self.lastgroup = _idx_to_name[idx]
294
+
295
+ def __ch_deep_realize__(self, memo):
296
+ # We cannot manually create realistic Match instances.
297
+ # Realize our contents - it's better than nothing
298
+ return _Match(
299
+ deep_realize(self._groups, memo),
300
+ realize(self.pos),
301
+ realize(self.endpos),
302
+ deep_realize(self.re, memo),
303
+ realize(self.string),
304
+ )
305
+
306
+ def __getitem__(self, idx):
307
+ return self.group(idx)
308
+
309
+ def expand(self, template):
310
+ backref_re = _BACKREF_STR_RE if isinstance(template, str) else _BACKREF_BYTES_RE
311
+ with NoTracing():
312
+ template = realize(template) # Usually this is a literal string
313
+ match = backref_re.fullmatch(template)
314
+ if match is None:
315
+ return template
316
+ prefix, num, namednum, named, _, suffix = match.groups()
317
+ if num or namednum:
318
+ replacement = self.group(int(num or namednum))
319
+ elif named:
320
+ replacement = self.group(named)
321
+ else:
322
+ raise re.error
323
+ return prefix + replacement + self.expand(suffix)
324
+
325
+ def group(self, *nums):
326
+ if not nums:
327
+ nums = (0,)
328
+ ret: List[str] = []
329
+ for num in nums:
330
+ if isinstance(num, str):
331
+ num = self.re.groupindex[num]
332
+ if self._groups[num] is None:
333
+ ret.append(None)
334
+ else:
335
+ start, end = self._groups[num]
336
+ ret.append(self.string[start:end])
337
+ if len(nums) == 1:
338
+ return ret[0]
339
+ else:
340
+ return tuple(ret)
341
+
342
+ def groups(self):
343
+ indicies = range(1, len(self._groups))
344
+ if indicies:
345
+ return tuple(self.group(i) for i in indicies)
346
+ else:
347
+ return ()
348
+
349
+ def groupdict(self, default=None):
350
+ groups = self._groups
351
+ ret = {}
352
+ for name, idx in self.re.groupindex.items():
353
+ group_range = groups[idx]
354
+ if group_range is not None:
355
+ ret[name] = group_range
356
+ return ret
357
+
358
+
359
+ _REMOVE = object()
360
+
361
+
362
+ def _patt_replace(list_tree: List, from_obj: object, to_obj: object = _REMOVE) -> List:
363
+ """
364
+ >>> _patt_replace([[], [2, None]], None, 3)
365
+ [[], [2, 3]]
366
+ >>> _patt_replace([[], [None, 7]], None, _REMOVE)
367
+ [[], [7]]
368
+ """
369
+ for idx, child in enumerate(list_tree):
370
+ if child is from_obj:
371
+ if to_obj is _REMOVE:
372
+ return list_tree[:idx] + list_tree[idx + 1 :]
373
+ else:
374
+ return [(to_obj if x is from_obj else x) for x in list_tree]
375
+ if not is_iterable(child):
376
+ continue
377
+ newchild = _patt_replace(child, from_obj, to_obj)
378
+ if newchild is not child:
379
+ # Found it; make a copy of this list with the new item:
380
+ newlist = list(list_tree)
381
+ newlist[idx] = newchild
382
+ return newlist
383
+ # nothing changed; re-use the original list
384
+ return list_tree
385
+
386
+
387
+ _END_GROUP_MARKER = object()
388
+
389
+
390
+ def _internal_match_patterns(
391
+ top_patterns: Any,
392
+ flags: int,
393
+ string: AnySymbolicStr,
394
+ offset: int,
395
+ allow_empty: bool = True,
396
+ ord=ord,
397
+ chr=chr,
398
+ ) -> Optional[_MatchPart]:
399
+ """
400
+ >>> import sre_parse
401
+ >>> from crosshair.core_and_libs import standalone_statespace, NoTracing
402
+ >>> from crosshair.libimpl.builtinslib import LazyIntSymbolicStr
403
+ >>> with standalone_statespace, NoTracing():
404
+ ... string = LazyIntSymbolicStr(list(map(ord, 'aabb')))
405
+ ... _internal_match_patterns(sre_parse.parse('a+'), 0, string, 0).span()
406
+ ... _internal_match_patterns(sre_parse.parse('ab'), 0, string, 1).span()
407
+ (0, 2)
408
+ (1, 3)
409
+ """
410
+ space = context_statespace()
411
+ with ResumedTracing():
412
+ matchablestr = string[offset:] if offset > 0 else string
413
+
414
+ if len(top_patterns) == 0:
415
+ return _MatchPart([(offset, offset)]) if allow_empty else None
416
+ pattern = top_patterns[0]
417
+
418
+ def continue_matching(prefix):
419
+ sub_allow_empty = allow_empty if prefix.isempty() else True
420
+ suffix = _internal_match_patterns(
421
+ top_patterns[1:],
422
+ flags,
423
+ string,
424
+ prefix.end(),
425
+ sub_allow_empty,
426
+ ord=ord,
427
+ chr=chr,
428
+ )
429
+ if suffix is None:
430
+ return None
431
+ return prefix._add_match(suffix)
432
+
433
+ # TODO: using a typed internal function triggers __hash__es inside the typing module.
434
+ # Seems like this casues nondeterminism due to a global LRU cache used by the typing module.
435
+ def fork_on(expr, sz):
436
+ if space.smt_fork(expr):
437
+ return continue_matching(
438
+ _MatchPart([(offset, _traced_binop(offset, operator.add, sz))])
439
+ )
440
+ else:
441
+ return None
442
+
443
+ mask = single_char_mask(pattern, flags, ord=ord, chr=chr)
444
+ if mask is not None:
445
+ with ResumedTracing():
446
+ if any([offset < 0, offset >= len(string)]):
447
+ return None
448
+ char = ord(string[offset])
449
+ if isinstance(char, int): # Concrete int? Just check it!
450
+ if mask.covers(char):
451
+ return continue_matching(
452
+ _MatchPart([(offset, _traced_binop(offset, operator.add, 1))])
453
+ )
454
+ else:
455
+ return None
456
+ smt_ch = SymbolicInt._coerce_to_smt_sort(char)
457
+ return fork_on(mask.smt_matches(smt_ch), 1)
458
+
459
+ (op, arg) = pattern
460
+ if op in (MIN_REPEAT, MAX_REPEAT):
461
+ (min_repeat, max_repeat, subpattern) = arg
462
+ if max_repeat < min_repeat:
463
+ return None
464
+ reps = 0
465
+ overall_match = _MatchPart([(offset, offset)])
466
+ while reps < min_repeat:
467
+ submatch = _internal_match_patterns(
468
+ subpattern, flags, string, overall_match.end(), True, ord=ord, chr=chr
469
+ )
470
+ if submatch is None:
471
+ return None
472
+ overall_match = overall_match._add_match(submatch)
473
+ reps += 1
474
+ if max_repeat != MAXREPEAT and reps >= max_repeat:
475
+ return continue_matching(overall_match)
476
+
477
+ if max_repeat == MAXREPEAT:
478
+ remaining_reps = max_repeat
479
+ else:
480
+ remaining_reps = max_repeat - min_repeat
481
+
482
+ if op is MIN_REPEAT:
483
+ # Non-greedy match: try the shortest possible match first.
484
+ short_match = continue_matching(overall_match)
485
+ if short_match is not None:
486
+ return short_match
487
+
488
+ remaining_matcher = _patt_replace(
489
+ top_patterns, arg, (1, remaining_reps, subpattern)
490
+ )
491
+ remainder_allow_empty = allow_empty or not overall_match.isempty()
492
+ remainder_match = _internal_match_patterns(
493
+ remaining_matcher,
494
+ flags,
495
+ string,
496
+ overall_match.end(),
497
+ remainder_allow_empty,
498
+ ord=ord,
499
+ chr=chr,
500
+ )
501
+ if remainder_match is not None:
502
+ return overall_match._add_match(remainder_match)
503
+
504
+ if op is MAX_REPEAT:
505
+ # Greedy match: didn't match more repetitions - try from here.
506
+ return continue_matching(overall_match)
507
+
508
+ return None
509
+ elif op is BRANCH and arg[0] is None:
510
+ # NOTE: order matters - earlier branches are more greedily matched than later branches.
511
+ branches = arg[1]
512
+ first_path = list(branches[0]) + list(top_patterns)[1:]
513
+ submatch = _internal_match_patterns(
514
+ first_path, flags, string, offset, allow_empty, ord=ord, chr=chr
515
+ )
516
+ if submatch is not None:
517
+ return submatch
518
+ if len(branches) <= 1:
519
+ return None
520
+ else:
521
+ return _internal_match_patterns(
522
+ _patt_replace(top_patterns, branches, branches[1:]),
523
+ flags,
524
+ string,
525
+ offset,
526
+ allow_empty,
527
+ ord=ord,
528
+ chr=chr,
529
+ )
530
+ elif op is AT:
531
+ if arg in (AT_BEGINNING, AT_BEGINNING_STRING):
532
+ begins_string = fork_on(SymbolicInt._coerce_to_smt_sort(offset) == 0, 0)
533
+ if begins_string:
534
+ return begins_string
535
+ if arg is AT_BEGINNING and re.MULTILINE & flags:
536
+ with ResumedTracing():
537
+ prev_char = ord(string[offset - 1])
538
+ return fork_on(
539
+ SymbolicInt._coerce_to_smt_sort(prev_char) == ord("\n"), 0
540
+ )
541
+ return None
542
+ with ResumedTracing():
543
+ matchable_len = len(matchablestr)
544
+ ends_string = space.smt_fork(
545
+ SymbolicInt._coerce_to_smt_sort(matchable_len) == 0
546
+ )
547
+ if arg in (AT_END, AT_END_STRING):
548
+ if ends_string:
549
+ return continue_matching(_MatchPart([(offset, offset)]))
550
+ if arg is AT_END and re.MULTILINE & flags:
551
+ with ResumedTracing():
552
+ next_char = ord(string[offset])
553
+ return fork_on(
554
+ SymbolicInt._coerce_to_smt_sort(next_char) == ord("\n"), 0
555
+ )
556
+ return None
557
+ elif arg in (AT_BOUNDARY, AT_NON_BOUNDARY):
558
+ if ends_string or offset == 0:
559
+ if arg == AT_BOUNDARY:
560
+ return continue_matching(_MatchPart([(offset, offset)]))
561
+ else:
562
+ assert arg == AT_NON_BOUNDARY
563
+ return None
564
+ with ResumedTracing():
565
+ left = ord(string[offset - 1])
566
+ right = ord(string[offset])
567
+ wordmask = get_unicode_categories()["word"]
568
+ left_expr = wordmask.smt_matches(SymbolicInt._coerce_to_smt_sort(left))
569
+ right_expr = wordmask.smt_matches(SymbolicInt._coerce_to_smt_sort(right))
570
+ at_boundary_expr = z3.Xor(left_expr, right_expr)
571
+ if arg == AT_NON_BOUNDARY:
572
+ at_boundary_expr = z3.Not(at_boundary_expr)
573
+ return fork_on(at_boundary_expr, 0)
574
+ elif op in (ASSERT, ASSERT_NOT):
575
+ (direction_int, subpattern) = arg
576
+ positive_look = op == ASSERT
577
+ if direction_int == 1:
578
+ matched = _internal_match_patterns(
579
+ subpattern, flags, string, offset, True, ord=ord, chr=chr
580
+ )
581
+ else:
582
+ assert direction_int == -1
583
+ minwidth, maxwidth = subpattern.getwidth()
584
+ if minwidth != maxwidth:
585
+ raise re.error("")
586
+ rewound = offset - minwidth
587
+ if rewound < 0:
588
+ return None
589
+ matched = _internal_match_patterns(
590
+ subpattern, flags, string, rewound, True, ord=ord, chr=chr
591
+ )
592
+ if bool(matched) != bool(positive_look):
593
+ return None
594
+ return _internal_match_patterns(
595
+ top_patterns[1:], flags, string, offset, allow_empty, ord=ord, chr=chr
596
+ )
597
+ elif op is SUBPATTERN:
598
+ (groupnum, _a, _b, subpatterns) = arg
599
+ if (_a, _b) != (0, 0):
600
+ raise ReUnhandled("unsupported subpattern args")
601
+ new_top = (
602
+ list(subpatterns)
603
+ + [(_END_GROUP_MARKER, (groupnum, offset))]
604
+ + list(top_patterns)[1:]
605
+ )
606
+ return _internal_match_patterns(
607
+ new_top, flags, string, offset, allow_empty, ord=ord, chr=chr
608
+ )
609
+ elif op is _END_GROUP_MARKER:
610
+ (group_num, begin) = arg
611
+ match = continue_matching(_MatchPart([(offset, offset)]))
612
+ if match is None:
613
+ return None
614
+ while len(match._groups) <= group_num:
615
+ match._groups.append(None)
616
+ match._groups[group_num] = (begin, offset)
617
+ return match
618
+ raise ReUnhandled(op)
619
+
620
+
621
+ def _match_pattern(
622
+ compiled_regex: re.Pattern,
623
+ orig_str: Union[AnySymbolicStr, BytesLike],
624
+ pos: int,
625
+ endpos: Optional[int] = None,
626
+ subpattern: Optional[List] = None,
627
+ allow_empty=True,
628
+ ord=ord,
629
+ chr=chr,
630
+ ) -> Optional[_Match]:
631
+ assert not is_tracing()
632
+ if subpattern is None:
633
+ subpattern = cast(List, parse(compiled_regex.pattern, compiled_regex.flags))
634
+ with ResumedTracing():
635
+ trimmed_str = orig_str[:endpos]
636
+ matchpart = _internal_match_patterns(
637
+ subpattern,
638
+ compiled_regex.flags,
639
+ trimmed_str,
640
+ pos,
641
+ allow_empty,
642
+ ord=ord,
643
+ chr=chr,
644
+ )
645
+ if matchpart is None:
646
+ return None
647
+ match_start, match_end = matchpart._fullspan()
648
+ if _traced_binop(match_start, operator.eq, match_end):
649
+ matchpart._clamp_all_spans(0, len(orig_str))
650
+ return _Match(matchpart._groups, pos, endpos, compiled_regex, orig_str)
651
+
652
+
653
+ def _compile(*a):
654
+ # Symbolic regexes aren't supported, and it's expensive to perform compilation
655
+ # with tracing enabled.
656
+ with NoTracing():
657
+ return re._compile(*deep_realize(a))
658
+
659
+
660
+ def _check_str_or_bytes(patt: re.Pattern, obj: Any):
661
+ if not isinstance(patt, re.Pattern):
662
+ raise TypeError # TODO: e.g. "descriptor 'search' for 're.Pattern' objects doesn't apply to a 'str' object"
663
+ if not isinstance(obj, _STR_AND_BYTES_TYPES):
664
+ raise TypeError(f"expected string or bytes-like object, got '{type(obj)}'")
665
+ if isinstance(patt.pattern, str):
666
+ if isinstance(obj, str):
667
+ return (chr, ord)
668
+ raise TypeError("cannot use a bytes pattern on a string-like object")
669
+ else:
670
+ if isinstance(obj, _ALL_BYTES_TYPES):
671
+ return (lambda i: bytes([i]), lambda i: i)
672
+ raise TypeError("cannot use a string pattern on a bytes-like object")
673
+
674
+
675
+ def _finditer_symbolic(
676
+ patt: re.Pattern, string: AnySymbolicStr, pos: int, endpos: int, chr=chr, ord=ord
677
+ ) -> Iterable[_Match]:
678
+ last_match_was_empty = False
679
+ while True:
680
+ with NoTracing():
681
+ if pos > endpos:
682
+ break
683
+ allow_empty = not last_match_was_empty
684
+ match = _match_pattern(
685
+ patt, string, pos, endpos, allow_empty=allow_empty, chr=chr, ord=ord
686
+ )
687
+ last_match_was_empty = False
688
+ if not match:
689
+ pos += 1
690
+ continue
691
+ yield match
692
+ with NoTracing():
693
+ if match.start() == match.end():
694
+ if not allow_empty:
695
+ raise CrossHairInternal("Unexpected empty match")
696
+ last_match_was_empty = True
697
+ else:
698
+ pos = match.end()
699
+
700
+
701
+ def _finditer(
702
+ self: re.Pattern,
703
+ string: Union[str, AnySymbolicStr, bytes],
704
+ pos: int = 0,
705
+ endpos: Optional[int] = None,
706
+ ) -> Iterable[Union[re.Match, _Match]]:
707
+ chr, ord = _check_str_or_bytes(self, string)
708
+ if not isinstance(pos, int):
709
+ raise TypeError
710
+ if not (endpos is None or isinstance(endpos, int)):
711
+ raise TypeError
712
+ pos, endpos = realize(pos), realize(endpos)
713
+ strlen = len(string)
714
+ with NoTracing():
715
+ if isinstance(string, AnySymbolicStr):
716
+ pos, endpos, _ = slice(pos, endpos, 1).indices(realize(strlen))
717
+ with ResumedTracing():
718
+ try:
719
+ yield from _finditer_symbolic(
720
+ self, string, pos, endpos, chr=chr, ord=ord
721
+ )
722
+ return
723
+ except ReUnhandled as e:
724
+ debug("Unsupported symbolic regex", self.pattern, e)
725
+ if endpos is None:
726
+ yield from re.Pattern.finditer(self, realize(string), pos)
727
+ else:
728
+ yield from re.Pattern.finditer(self, realize(string), pos, endpos)
729
+
730
+
731
+ def _fullmatch(
732
+ self: re.Pattern, string: Union[str, AnySymbolicStr, bytes], pos=0, endpos=None
733
+ ):
734
+ with NoTracing():
735
+ if isinstance(string, (AnySymbolicStr, BytesLike)):
736
+ with ResumedTracing():
737
+ chr, ord = _check_str_or_bytes(self, string)
738
+ try:
739
+ compiled = cast(List, parse(self.pattern, self.flags))
740
+ compiled.append((AT, AT_END_STRING))
741
+ return _match_pattern(
742
+ self, string, pos, endpos, compiled, chr=chr, ord=ord
743
+ )
744
+ except ReUnhandled as e:
745
+ debug("Unsupported symbolic regex", self.pattern, e)
746
+ if endpos is None:
747
+ return re.Pattern.fullmatch(self, realize(string), pos)
748
+ else:
749
+ return re.Pattern.fullmatch(self, realize(string), pos, endpos)
750
+
751
+
752
+ def _match(
753
+ self, string: Union[str, AnySymbolicStr], pos=0, endpos=None
754
+ ) -> Union[None, re.Match, _Match]:
755
+ with NoTracing():
756
+ if isinstance(string, (AnySymbolicStr, BytesLike)):
757
+ with ResumedTracing():
758
+ chr, ord = _check_str_or_bytes(self, string)
759
+ try:
760
+ return _match_pattern(self, string, pos, endpos, chr=chr, ord=ord)
761
+ except ReUnhandled as e:
762
+ debug("Unsupported symbolic regex", self.pattern, e)
763
+ if endpos is None:
764
+ return re.Pattern.match(self, realize(string), pos)
765
+ else:
766
+ return re.Pattern.match(self, realize(string), pos, endpos)
767
+
768
+
769
+ def _search(
770
+ self: re.Pattern,
771
+ string: Union[str, AnySymbolicStr, bytes],
772
+ pos: int = 0,
773
+ endpos: Optional[int] = None,
774
+ ) -> Union[None, re.Match, _Match]:
775
+ chr, ord = _check_str_or_bytes(self, string)
776
+ if not isinstance(pos, int):
777
+ raise TypeError
778
+ if not (endpos is None or isinstance(endpos, int)):
779
+ raise TypeError
780
+ pos, endpos = realize(pos), realize(endpos)
781
+ mylen = string.__len__()
782
+ with NoTracing():
783
+ if isinstance(string, (AnySymbolicStr, BytesLike)):
784
+ pos, endpos, _ = slice(pos, endpos, 1).indices(realize(mylen))
785
+ try:
786
+ while pos < endpos:
787
+ match = _match_pattern(self, string, pos, endpos, chr=chr, ord=ord)
788
+ if match:
789
+ return match
790
+ pos += 1
791
+ return None
792
+ except ReUnhandled as e:
793
+ debug("Unsupported symbolic regex", self.pattern, e)
794
+ if endpos is None:
795
+ return re.Pattern.search(self, realize(string), pos)
796
+ else:
797
+ return re.Pattern.search(self, realize(string), pos, endpos)
798
+
799
+
800
+ def _sub(self, repl, string, count=0):
801
+ (result, _) = _subn(self, repl, string, count)
802
+ return result
803
+
804
+
805
+ def _subn(
806
+ self: re.Pattern, repl: Union[str, Callable], string: str, count: int = 0
807
+ ) -> Tuple[str, int]:
808
+ if not isinstance(self, re.Pattern):
809
+ raise TypeError
810
+ if isinstance(repl, _STR_AND_BYTES_TYPES):
811
+ _check_str_or_bytes(self, repl)
812
+
813
+ def replfn(m):
814
+ return m.expand(repl)
815
+
816
+ elif callable(repl):
817
+ replfn = repl
818
+ else:
819
+ raise TypeError
820
+ _check_str_or_bytes(self, string)
821
+ if not isinstance(count, int):
822
+ raise TypeError
823
+ match = self.search(string)
824
+ if match is None:
825
+ return (string, 0)
826
+ result_prefix = string[: match.start()] + replfn(match)
827
+ if count == 1:
828
+ return (result_prefix + string[match.end() :], 1)
829
+ if match.end() == match.start():
830
+ remaining = string[match.end() + 1 :]
831
+ else:
832
+ remaining = string[match.end() :]
833
+ (result_suffix, suffix_replacements) = _subn(self, repl, remaining, count - 1)
834
+ return (result_prefix + result_suffix, suffix_replacements + 1)
835
+
836
+
837
+ def make_registrations():
838
+ register_patch(re._compile, _compile)
839
+ register_patch(re.Pattern.search, _search)
840
+ register_patch(re.Pattern.match, _match)
841
+ register_patch(re.Pattern.fullmatch, _fullmatch)
842
+ register_patch(re.Pattern.split, with_realized_args(re.Pattern.split))
843
+ register_patch(re.Pattern.findall, with_realized_args(re.Pattern.findall))
844
+ register_patch(re.Pattern.finditer, _finditer)
845
+ register_patch(re.Pattern.sub, _sub)
846
+ register_patch(re.Pattern.subn, _subn)