crosshair-tool 0.0.99__cp312-cp312-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _crosshair_tracers.cpython-312-darwin.so +0 -0
- crosshair/__init__.py +42 -0
- crosshair/__main__.py +8 -0
- crosshair/_mark_stacks.h +790 -0
- crosshair/_preliminaries_test.py +18 -0
- crosshair/_tracers.h +94 -0
- crosshair/_tracers_pycompat.h +522 -0
- crosshair/_tracers_test.py +138 -0
- crosshair/abcstring.py +245 -0
- crosshair/auditwall.py +190 -0
- crosshair/auditwall_test.py +77 -0
- crosshair/codeconfig.py +113 -0
- crosshair/codeconfig_test.py +117 -0
- crosshair/condition_parser.py +1237 -0
- crosshair/condition_parser_test.py +497 -0
- crosshair/conftest.py +30 -0
- crosshair/copyext.py +155 -0
- crosshair/copyext_test.py +84 -0
- crosshair/core.py +1763 -0
- crosshair/core_and_libs.py +149 -0
- crosshair/core_regestered_types_test.py +82 -0
- crosshair/core_test.py +1316 -0
- crosshair/diff_behavior.py +314 -0
- crosshair/diff_behavior_test.py +261 -0
- crosshair/dynamic_typing.py +346 -0
- crosshair/dynamic_typing_test.py +210 -0
- crosshair/enforce.py +282 -0
- crosshair/enforce_test.py +182 -0
- crosshair/examples/PEP316/__init__.py +1 -0
- crosshair/examples/PEP316/bugs_detected/__init__.py +0 -0
- crosshair/examples/PEP316/bugs_detected/getattr_magic.py +16 -0
- crosshair/examples/PEP316/bugs_detected/hash_consistent_with_equals.py +31 -0
- crosshair/examples/PEP316/bugs_detected/shopping_cart.py +24 -0
- crosshair/examples/PEP316/bugs_detected/showcase.py +39 -0
- crosshair/examples/PEP316/correct_code/__init__.py +0 -0
- crosshair/examples/PEP316/correct_code/arith.py +60 -0
- crosshair/examples/PEP316/correct_code/chess.py +77 -0
- crosshair/examples/PEP316/correct_code/nesting_inference.py +17 -0
- crosshair/examples/PEP316/correct_code/numpy_examples.py +132 -0
- crosshair/examples/PEP316/correct_code/rolling_average.py +35 -0
- crosshair/examples/PEP316/correct_code/showcase.py +104 -0
- crosshair/examples/__init__.py +0 -0
- crosshair/examples/check_examples_test.py +146 -0
- crosshair/examples/deal/__init__.py +1 -0
- crosshair/examples/icontract/__init__.py +1 -0
- crosshair/examples/icontract/bugs_detected/__init__.py +0 -0
- crosshair/examples/icontract/bugs_detected/showcase.py +41 -0
- crosshair/examples/icontract/bugs_detected/wrong_sign.py +8 -0
- crosshair/examples/icontract/correct_code/__init__.py +0 -0
- crosshair/examples/icontract/correct_code/arith.py +51 -0
- crosshair/examples/icontract/correct_code/showcase.py +94 -0
- crosshair/fnutil.py +391 -0
- crosshair/fnutil_test.py +75 -0
- crosshair/fuzz_core_test.py +516 -0
- crosshair/libimpl/__init__.py +0 -0
- crosshair/libimpl/arraylib.py +161 -0
- crosshair/libimpl/binascii_ch_test.py +30 -0
- crosshair/libimpl/binascii_test.py +67 -0
- crosshair/libimpl/binasciilib.py +150 -0
- crosshair/libimpl/bisectlib_test.py +23 -0
- crosshair/libimpl/builtinslib.py +5228 -0
- crosshair/libimpl/builtinslib_ch_test.py +1191 -0
- crosshair/libimpl/builtinslib_test.py +3735 -0
- crosshair/libimpl/codecslib.py +86 -0
- crosshair/libimpl/codecslib_test.py +86 -0
- crosshair/libimpl/collectionslib.py +264 -0
- crosshair/libimpl/collectionslib_ch_test.py +252 -0
- crosshair/libimpl/collectionslib_test.py +332 -0
- crosshair/libimpl/copylib.py +23 -0
- crosshair/libimpl/copylib_test.py +18 -0
- crosshair/libimpl/datetimelib.py +2559 -0
- crosshair/libimpl/datetimelib_ch_test.py +354 -0
- crosshair/libimpl/datetimelib_test.py +112 -0
- crosshair/libimpl/decimallib.py +5257 -0
- crosshair/libimpl/decimallib_ch_test.py +78 -0
- crosshair/libimpl/decimallib_test.py +76 -0
- crosshair/libimpl/encodings/__init__.py +23 -0
- crosshair/libimpl/encodings/_encutil.py +187 -0
- crosshair/libimpl/encodings/ascii.py +44 -0
- crosshair/libimpl/encodings/latin_1.py +40 -0
- crosshair/libimpl/encodings/utf_8.py +93 -0
- crosshair/libimpl/encodings_ch_test.py +83 -0
- crosshair/libimpl/fractionlib.py +16 -0
- crosshair/libimpl/fractionlib_test.py +80 -0
- crosshair/libimpl/functoolslib.py +34 -0
- crosshair/libimpl/functoolslib_test.py +56 -0
- crosshair/libimpl/hashliblib.py +30 -0
- crosshair/libimpl/hashliblib_test.py +18 -0
- crosshair/libimpl/heapqlib.py +47 -0
- crosshair/libimpl/heapqlib_test.py +21 -0
- crosshair/libimpl/importliblib.py +18 -0
- crosshair/libimpl/importliblib_test.py +38 -0
- crosshair/libimpl/iolib.py +216 -0
- crosshair/libimpl/iolib_ch_test.py +128 -0
- crosshair/libimpl/iolib_test.py +19 -0
- crosshair/libimpl/ipaddresslib.py +8 -0
- crosshair/libimpl/itertoolslib.py +44 -0
- crosshair/libimpl/itertoolslib_test.py +44 -0
- crosshair/libimpl/jsonlib.py +984 -0
- crosshair/libimpl/jsonlib_ch_test.py +42 -0
- crosshair/libimpl/jsonlib_test.py +51 -0
- crosshair/libimpl/mathlib.py +179 -0
- crosshair/libimpl/mathlib_ch_test.py +44 -0
- crosshair/libimpl/mathlib_test.py +67 -0
- crosshair/libimpl/oslib.py +7 -0
- crosshair/libimpl/pathliblib_test.py +10 -0
- crosshair/libimpl/randomlib.py +178 -0
- crosshair/libimpl/randomlib_test.py +120 -0
- crosshair/libimpl/relib.py +846 -0
- crosshair/libimpl/relib_ch_test.py +169 -0
- crosshair/libimpl/relib_test.py +493 -0
- crosshair/libimpl/timelib.py +72 -0
- crosshair/libimpl/timelib_test.py +82 -0
- crosshair/libimpl/typeslib.py +15 -0
- crosshair/libimpl/typeslib_test.py +36 -0
- crosshair/libimpl/unicodedatalib.py +75 -0
- crosshair/libimpl/unicodedatalib_test.py +42 -0
- crosshair/libimpl/urlliblib.py +23 -0
- crosshair/libimpl/urlliblib_test.py +19 -0
- crosshair/libimpl/weakreflib.py +13 -0
- crosshair/libimpl/weakreflib_test.py +69 -0
- crosshair/libimpl/zliblib.py +15 -0
- crosshair/libimpl/zliblib_test.py +13 -0
- crosshair/lsp_server.py +261 -0
- crosshair/lsp_server_test.py +30 -0
- crosshair/main.py +973 -0
- crosshair/main_test.py +543 -0
- crosshair/objectproxy.py +376 -0
- crosshair/objectproxy_test.py +41 -0
- crosshair/opcode_intercept.py +601 -0
- crosshair/opcode_intercept_test.py +304 -0
- crosshair/options.py +218 -0
- crosshair/options_test.py +10 -0
- crosshair/patch_equivalence_test.py +75 -0
- crosshair/path_cover.py +209 -0
- crosshair/path_cover_test.py +138 -0
- crosshair/path_search.py +161 -0
- crosshair/path_search_test.py +52 -0
- crosshair/pathing_oracle.py +271 -0
- crosshair/pathing_oracle_test.py +21 -0
- crosshair/pure_importer.py +27 -0
- crosshair/pure_importer_test.py +16 -0
- crosshair/py.typed +0 -0
- crosshair/register_contract.py +273 -0
- crosshair/register_contract_test.py +190 -0
- crosshair/simplestructs.py +1165 -0
- crosshair/simplestructs_test.py +283 -0
- crosshair/smtlib.py +24 -0
- crosshair/smtlib_test.py +14 -0
- crosshair/statespace.py +1199 -0
- crosshair/statespace_test.py +108 -0
- crosshair/stubs_parser.py +352 -0
- crosshair/stubs_parser_test.py +43 -0
- crosshair/test_util.py +329 -0
- crosshair/test_util_test.py +26 -0
- crosshair/tools/__init__.py +0 -0
- crosshair/tools/check_help_in_doc.py +264 -0
- crosshair/tools/check_init_and_setup_coincide.py +119 -0
- crosshair/tools/generate_demo_table.py +127 -0
- crosshair/tracers.py +544 -0
- crosshair/tracers_test.py +154 -0
- crosshair/type_repo.py +151 -0
- crosshair/unicode_categories.py +589 -0
- crosshair/unicode_categories_test.py +27 -0
- crosshair/util.py +741 -0
- crosshair/util_test.py +173 -0
- crosshair/watcher.py +307 -0
- crosshair/watcher_test.py +107 -0
- crosshair/z3util.py +76 -0
- crosshair/z3util_test.py +11 -0
- crosshair_tool-0.0.99.dist-info/METADATA +144 -0
- crosshair_tool-0.0.99.dist-info/RECORD +176 -0
- crosshair_tool-0.0.99.dist-info/WHEEL +6 -0
- crosshair_tool-0.0.99.dist-info/entry_points.txt +3 -0
- crosshair_tool-0.0.99.dist-info/licenses/LICENSE +93 -0
- crosshair_tool-0.0.99.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,846 @@
|
|
|
1
|
+
import operator
|
|
2
|
+
import re
|
|
3
|
+
import sys
|
|
4
|
+
from array import array
|
|
5
|
+
from unicodedata import category
|
|
6
|
+
|
|
7
|
+
if sys.version_info < (3, 11):
|
|
8
|
+
import sre_parse as re_parser
|
|
9
|
+
else:
|
|
10
|
+
import re._parser as re_parser # type: ignore
|
|
11
|
+
|
|
12
|
+
from sys import maxunicode
|
|
13
|
+
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast
|
|
14
|
+
|
|
15
|
+
import z3 # type: ignore
|
|
16
|
+
|
|
17
|
+
from crosshair.core import deep_realize, realize, register_patch, with_realized_args
|
|
18
|
+
from crosshair.libimpl.builtinslib import AnySymbolicStr, BytesLike, SymbolicInt
|
|
19
|
+
from crosshair.statespace import context_statespace
|
|
20
|
+
from crosshair.tracers import NoTracing, ResumedTracing, is_tracing
|
|
21
|
+
from crosshair.unicode_categories import CharMask, get_unicode_categories
|
|
22
|
+
from crosshair.util import CrossHairInternal, CrossHairValue, debug, is_iterable
|
|
23
|
+
|
|
24
|
+
ANY = re_parser.ANY
|
|
25
|
+
ASSERT = re_parser.ASSERT
|
|
26
|
+
ASSERT_NOT = re_parser.ASSERT_NOT
|
|
27
|
+
AT = re_parser.AT
|
|
28
|
+
AT_BEGINNING = re_parser.AT_BEGINNING
|
|
29
|
+
AT_BEGINNING_STRING = re_parser.AT_BEGINNING_STRING
|
|
30
|
+
AT_BOUNDARY = re_parser.AT_BOUNDARY
|
|
31
|
+
AT_END = re_parser.AT_END
|
|
32
|
+
AT_END_STRING = re_parser.AT_END_STRING
|
|
33
|
+
AT_NON_BOUNDARY = re_parser.AT_NON_BOUNDARY
|
|
34
|
+
BRANCH = re_parser.BRANCH
|
|
35
|
+
CATEGORY = re_parser.CATEGORY
|
|
36
|
+
CATEGORY_DIGIT = re_parser.CATEGORY_DIGIT
|
|
37
|
+
CATEGORY_NOT_DIGIT = re_parser.CATEGORY_NOT_DIGIT
|
|
38
|
+
CATEGORY_NOT_SPACE = re_parser.CATEGORY_NOT_SPACE
|
|
39
|
+
CATEGORY_NOT_WORD = re_parser.CATEGORY_NOT_WORD
|
|
40
|
+
CATEGORY_SPACE = re_parser.CATEGORY_SPACE
|
|
41
|
+
CATEGORY_WORD = re_parser.CATEGORY_WORD
|
|
42
|
+
IN = re_parser.IN
|
|
43
|
+
LITERAL = re_parser.LITERAL
|
|
44
|
+
MAX_REPEAT = re_parser.MAX_REPEAT
|
|
45
|
+
MAXREPEAT = re_parser.MAXREPEAT
|
|
46
|
+
MIN_REPEAT = re_parser.MIN_REPEAT
|
|
47
|
+
NEGATE = re_parser.NEGATE
|
|
48
|
+
NOT_LITERAL = re_parser.NOT_LITERAL
|
|
49
|
+
RANGE = re_parser.RANGE
|
|
50
|
+
SUBPATTERN = re_parser.SUBPATTERN
|
|
51
|
+
parse = re_parser.parse
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ReUnhandled(Exception):
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
_ALL_BYTES_TYPES = (bytes, bytearray, memoryview, array)
|
|
59
|
+
_STR_AND_BYTES_TYPES = (str, *_ALL_BYTES_TYPES)
|
|
60
|
+
_NO_CHAR = CharMask([])
|
|
61
|
+
_ANY_CHAR = CharMask([(0, maxunicode + 1)])
|
|
62
|
+
_ANY_NON_NEWLINE_CHAR = _ANY_CHAR.subtract(CharMask([ord("\n")]))
|
|
63
|
+
_ASCII_CHAR = CharMask([(0, 128)])
|
|
64
|
+
_ASCII_WHITESPACE_CHAR = CharMask([(9, 14), 32])
|
|
65
|
+
_UNICODE_WHITESPACE_CHAR = _ASCII_WHITESPACE_CHAR.union(
|
|
66
|
+
CharMask(
|
|
67
|
+
[
|
|
68
|
+
# NOTE: Although 28-31 are in the ASCII range, they only count as whitespace
|
|
69
|
+
# when matching in unicode mode:
|
|
70
|
+
(28, 32),
|
|
71
|
+
133,
|
|
72
|
+
160,
|
|
73
|
+
5760,
|
|
74
|
+
(8192, 8203),
|
|
75
|
+
(8232, 8234),
|
|
76
|
+
8239,
|
|
77
|
+
8287,
|
|
78
|
+
12288,
|
|
79
|
+
]
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
_CASEABLE_CHARS = None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def caseable_chars():
|
|
87
|
+
global _CASEABLE_CHARS
|
|
88
|
+
if _CASEABLE_CHARS is None:
|
|
89
|
+
codepoints = []
|
|
90
|
+
for i in range(sys.maxunicode + 1):
|
|
91
|
+
ch = chr(i)
|
|
92
|
+
# Exclude the (large) "Other Letter" group that doesn't caseswap:
|
|
93
|
+
if category(ch) in ("Lo"):
|
|
94
|
+
assert ch.casefold() == ch
|
|
95
|
+
else:
|
|
96
|
+
codepoints.append(ch)
|
|
97
|
+
|
|
98
|
+
_CASEABLE_CHARS = "".join(codepoints)
|
|
99
|
+
return _CASEABLE_CHARS
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
_UNICODE_IGNORECASE_MASKS: Dict[int, CharMask] = {} # codepoint -> CharMask
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def unicode_ignorecase_mask(cp: int) -> CharMask:
|
|
106
|
+
mask = _UNICODE_IGNORECASE_MASKS.get(cp)
|
|
107
|
+
if mask is None:
|
|
108
|
+
chars = caseable_chars()
|
|
109
|
+
matches = re.compile(chr(cp), re.IGNORECASE).findall(chars)
|
|
110
|
+
mask = CharMask([ord(c) for c in matches])
|
|
111
|
+
_UNICODE_IGNORECASE_MASKS[cp] = mask
|
|
112
|
+
return mask
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def single_char_mask(
|
|
116
|
+
parsed: Tuple[object, Any], flags: int, ord=ord, chr=chr
|
|
117
|
+
) -> Optional[CharMask]:
|
|
118
|
+
"""
|
|
119
|
+
Compute a CharMask from a parsed regex.
|
|
120
|
+
|
|
121
|
+
Takes a pattern object, like those returned by sre_parse.parse().
|
|
122
|
+
Returns None if `parsed` is not a single-character regular expression.
|
|
123
|
+
Returns a list of valid codepoint or codepoint ranges if it can find them, or raises
|
|
124
|
+
ReUnhandled if such an expression cannot be determined.
|
|
125
|
+
"""
|
|
126
|
+
(op, arg) = parsed
|
|
127
|
+
isascii = re.ASCII & flags
|
|
128
|
+
if op in (LITERAL, NOT_LITERAL):
|
|
129
|
+
if re.IGNORECASE & flags:
|
|
130
|
+
ret = unicode_ignorecase_mask(arg)
|
|
131
|
+
else:
|
|
132
|
+
ret = CharMask([arg])
|
|
133
|
+
if op is NOT_LITERAL:
|
|
134
|
+
ret = ret.invert()
|
|
135
|
+
elif op is RANGE:
|
|
136
|
+
lo, hi = arg
|
|
137
|
+
if re.IGNORECASE & flags:
|
|
138
|
+
ret = CharMask(
|
|
139
|
+
[
|
|
140
|
+
# TODO: among other issues, this doesn't handle multi-codepoint caseswaps:
|
|
141
|
+
(ord(chr(lo).lower()), ord(chr(hi).lower()) + 1),
|
|
142
|
+
(ord(chr(lo).upper()), ord(chr(hi).upper()) + 1),
|
|
143
|
+
]
|
|
144
|
+
)
|
|
145
|
+
else:
|
|
146
|
+
ret = CharMask([(lo, hi + 1)])
|
|
147
|
+
elif op is IN:
|
|
148
|
+
ret = CharMask([])
|
|
149
|
+
negate = arg and arg[0][0] is NEGATE
|
|
150
|
+
if negate:
|
|
151
|
+
arg = arg[1:]
|
|
152
|
+
for term in arg:
|
|
153
|
+
submask = single_char_mask(term, flags, ord=ord, chr=chr)
|
|
154
|
+
if submask is None:
|
|
155
|
+
raise ReUnhandled("IN contains non-single-char expression")
|
|
156
|
+
ret = ret.union(submask)
|
|
157
|
+
if negate:
|
|
158
|
+
ret = ret.invert()
|
|
159
|
+
elif op is CATEGORY:
|
|
160
|
+
cats = get_unicode_categories()
|
|
161
|
+
if arg == CATEGORY_DIGIT:
|
|
162
|
+
ret = cats["Nd"]
|
|
163
|
+
elif arg == CATEGORY_NOT_DIGIT:
|
|
164
|
+
ret = cats["Nd"].invert()
|
|
165
|
+
elif arg == CATEGORY_SPACE:
|
|
166
|
+
return _ASCII_WHITESPACE_CHAR if isascii else _UNICODE_WHITESPACE_CHAR
|
|
167
|
+
elif arg == CATEGORY_NOT_SPACE:
|
|
168
|
+
ret = _ASCII_WHITESPACE_CHAR if isascii else _UNICODE_WHITESPACE_CHAR
|
|
169
|
+
return ret.invert()
|
|
170
|
+
elif arg == CATEGORY_WORD:
|
|
171
|
+
ret = cats["word"]
|
|
172
|
+
elif arg == CATEGORY_NOT_WORD:
|
|
173
|
+
ret = cats["word"].invert()
|
|
174
|
+
else:
|
|
175
|
+
raise ReUnhandled("Unsupported category: ", arg)
|
|
176
|
+
elif op is ANY and arg is None:
|
|
177
|
+
# TODO: test dot under ascii mode (seems like we should fall through to the re.ASCII check below)
|
|
178
|
+
return _ANY_CHAR if re.DOTALL & flags else _ANY_NON_NEWLINE_CHAR
|
|
179
|
+
else:
|
|
180
|
+
return None
|
|
181
|
+
if re.ASCII & flags:
|
|
182
|
+
# TODO: this is probably expensive!
|
|
183
|
+
ret = ret.intersect(_ASCII_CHAR)
|
|
184
|
+
return ret
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
Span = Tuple[int, Union[int, SymbolicInt]]
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _traced_binop(a, op, b):
|
|
191
|
+
if isinstance(a, CrossHairValue) or isinstance(b, CrossHairValue):
|
|
192
|
+
with ResumedTracing():
|
|
193
|
+
return op(a, b)
|
|
194
|
+
return op(a, b)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class _MatchPart:
|
|
198
|
+
def __init__(self, groups: List[Optional[Span]]):
|
|
199
|
+
self._groups = groups
|
|
200
|
+
|
|
201
|
+
def _fullspan(self) -> Span:
|
|
202
|
+
span = self._groups[0]
|
|
203
|
+
assert span is not None
|
|
204
|
+
return span
|
|
205
|
+
|
|
206
|
+
def _clamp_all_spans(self, start, end):
|
|
207
|
+
groups = self._groups
|
|
208
|
+
for idx, span in enumerate(groups):
|
|
209
|
+
if span is not None:
|
|
210
|
+
(span_start, span_end) = span
|
|
211
|
+
with ResumedTracing():
|
|
212
|
+
if span_start == span_end:
|
|
213
|
+
if span_start < start:
|
|
214
|
+
groups[idx] = (start, start)
|
|
215
|
+
if span_start > end:
|
|
216
|
+
groups[idx] = (end, end)
|
|
217
|
+
|
|
218
|
+
def isempty(self):
|
|
219
|
+
(start, end) = self._groups[0]
|
|
220
|
+
return _traced_binop(end, operator.le, start)
|
|
221
|
+
|
|
222
|
+
def __bool__(self):
|
|
223
|
+
return True
|
|
224
|
+
|
|
225
|
+
def __repr__(self):
|
|
226
|
+
return f"<re.Match object; span={self.span()!r}, match={self.group()!r}>"
|
|
227
|
+
|
|
228
|
+
def _add_match(self, suffix_match: "_MatchPart") -> "_MatchPart":
|
|
229
|
+
groups: List[Optional[Span]] = [None] * max(
|
|
230
|
+
len(self._groups), len(suffix_match._groups)
|
|
231
|
+
)
|
|
232
|
+
for idx, g in enumerate(self._groups):
|
|
233
|
+
groups[idx] = g
|
|
234
|
+
for idx, g in enumerate(suffix_match._groups):
|
|
235
|
+
if g is not None:
|
|
236
|
+
groups[idx] = g
|
|
237
|
+
my_start = self._fullspan()[0]
|
|
238
|
+
suffix_end = suffix_match._fullspan()[1]
|
|
239
|
+
groups[0] = (my_start, suffix_end)
|
|
240
|
+
return _MatchPart(groups)
|
|
241
|
+
|
|
242
|
+
def start(self, group=0):
|
|
243
|
+
return self._groups[group][0]
|
|
244
|
+
|
|
245
|
+
def end(self, group=0):
|
|
246
|
+
return self._groups[group][1]
|
|
247
|
+
|
|
248
|
+
def span(self, group=0):
|
|
249
|
+
return self._groups[group]
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
_BACKREF_RE_SOURCE = rb"""
|
|
253
|
+
(?P<prefix> .*?)
|
|
254
|
+
\\
|
|
255
|
+
(?:
|
|
256
|
+
# Note that earlier matches are preferred in regex unions like this:
|
|
257
|
+
(?P<num> [1-9][0-9]? ) |
|
|
258
|
+
g\< (?P<namednum> \s*\+?\d+\s* ) \> |
|
|
259
|
+
g\< (?P<named> \w+ ) \> |
|
|
260
|
+
g\< (?P<namedother> .* ) \>
|
|
261
|
+
)
|
|
262
|
+
(?P<suffix> .*)
|
|
263
|
+
"""
|
|
264
|
+
_BACKREF_BYTES_RE = re.compile(_BACKREF_RE_SOURCE, re.VERBOSE | re.MULTILINE)
|
|
265
|
+
_BACKREF_STR_RE = re.compile(
|
|
266
|
+
str(_BACKREF_RE_SOURCE, "ascii"), re.VERBOSE | re.MULTILINE
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
class _Match(_MatchPart):
|
|
271
|
+
def __init__(self, groups, pos, endpos, regex, orig_str):
|
|
272
|
+
# fill None in unmatched groups:
|
|
273
|
+
while len(groups) < regex.groups + 1:
|
|
274
|
+
groups.append(None)
|
|
275
|
+
super().__init__(groups)
|
|
276
|
+
self.pos = pos
|
|
277
|
+
if endpos is None:
|
|
278
|
+
with ResumedTracing():
|
|
279
|
+
self.endpos = len(orig_str)
|
|
280
|
+
else:
|
|
281
|
+
self.endpos = endpos
|
|
282
|
+
self.re = regex
|
|
283
|
+
self.string = orig_str
|
|
284
|
+
|
|
285
|
+
# Compute lastindex & lastgroup:
|
|
286
|
+
self.lastindex, self.lastgroup = None, None
|
|
287
|
+
_idx_to_name = {num: name for (name, num) in regex.groupindex.items()}
|
|
288
|
+
for idx, grp in enumerate(groups):
|
|
289
|
+
if grp is None:
|
|
290
|
+
continue
|
|
291
|
+
self.lastindex = idx
|
|
292
|
+
if idx in _idx_to_name:
|
|
293
|
+
self.lastgroup = _idx_to_name[idx]
|
|
294
|
+
|
|
295
|
+
def __ch_deep_realize__(self, memo):
|
|
296
|
+
# We cannot manually create realistic Match instances.
|
|
297
|
+
# Realize our contents - it's better than nothing
|
|
298
|
+
return _Match(
|
|
299
|
+
deep_realize(self._groups, memo),
|
|
300
|
+
realize(self.pos),
|
|
301
|
+
realize(self.endpos),
|
|
302
|
+
deep_realize(self.re, memo),
|
|
303
|
+
realize(self.string),
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def __getitem__(self, idx):
|
|
307
|
+
return self.group(idx)
|
|
308
|
+
|
|
309
|
+
def expand(self, template):
|
|
310
|
+
backref_re = _BACKREF_STR_RE if isinstance(template, str) else _BACKREF_BYTES_RE
|
|
311
|
+
with NoTracing():
|
|
312
|
+
template = realize(template) # Usually this is a literal string
|
|
313
|
+
match = backref_re.fullmatch(template)
|
|
314
|
+
if match is None:
|
|
315
|
+
return template
|
|
316
|
+
prefix, num, namednum, named, _, suffix = match.groups()
|
|
317
|
+
if num or namednum:
|
|
318
|
+
replacement = self.group(int(num or namednum))
|
|
319
|
+
elif named:
|
|
320
|
+
replacement = self.group(named)
|
|
321
|
+
else:
|
|
322
|
+
raise re.error
|
|
323
|
+
return prefix + replacement + self.expand(suffix)
|
|
324
|
+
|
|
325
|
+
def group(self, *nums):
|
|
326
|
+
if not nums:
|
|
327
|
+
nums = (0,)
|
|
328
|
+
ret: List[str] = []
|
|
329
|
+
for num in nums:
|
|
330
|
+
if isinstance(num, str):
|
|
331
|
+
num = self.re.groupindex[num]
|
|
332
|
+
if self._groups[num] is None:
|
|
333
|
+
ret.append(None)
|
|
334
|
+
else:
|
|
335
|
+
start, end = self._groups[num]
|
|
336
|
+
ret.append(self.string[start:end])
|
|
337
|
+
if len(nums) == 1:
|
|
338
|
+
return ret[0]
|
|
339
|
+
else:
|
|
340
|
+
return tuple(ret)
|
|
341
|
+
|
|
342
|
+
def groups(self):
|
|
343
|
+
indicies = range(1, len(self._groups))
|
|
344
|
+
if indicies:
|
|
345
|
+
return tuple(self.group(i) for i in indicies)
|
|
346
|
+
else:
|
|
347
|
+
return ()
|
|
348
|
+
|
|
349
|
+
def groupdict(self, default=None):
|
|
350
|
+
groups = self._groups
|
|
351
|
+
ret = {}
|
|
352
|
+
for name, idx in self.re.groupindex.items():
|
|
353
|
+
group_range = groups[idx]
|
|
354
|
+
if group_range is not None:
|
|
355
|
+
ret[name] = group_range
|
|
356
|
+
return ret
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
_REMOVE = object()
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _patt_replace(list_tree: List, from_obj: object, to_obj: object = _REMOVE) -> List:
|
|
363
|
+
"""
|
|
364
|
+
>>> _patt_replace([[], [2, None]], None, 3)
|
|
365
|
+
[[], [2, 3]]
|
|
366
|
+
>>> _patt_replace([[], [None, 7]], None, _REMOVE)
|
|
367
|
+
[[], [7]]
|
|
368
|
+
"""
|
|
369
|
+
for idx, child in enumerate(list_tree):
|
|
370
|
+
if child is from_obj:
|
|
371
|
+
if to_obj is _REMOVE:
|
|
372
|
+
return list_tree[:idx] + list_tree[idx + 1 :]
|
|
373
|
+
else:
|
|
374
|
+
return [(to_obj if x is from_obj else x) for x in list_tree]
|
|
375
|
+
if not is_iterable(child):
|
|
376
|
+
continue
|
|
377
|
+
newchild = _patt_replace(child, from_obj, to_obj)
|
|
378
|
+
if newchild is not child:
|
|
379
|
+
# Found it; make a copy of this list with the new item:
|
|
380
|
+
newlist = list(list_tree)
|
|
381
|
+
newlist[idx] = newchild
|
|
382
|
+
return newlist
|
|
383
|
+
# nothing changed; re-use the original list
|
|
384
|
+
return list_tree
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
_END_GROUP_MARKER = object()
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def _internal_match_patterns(
|
|
391
|
+
top_patterns: Any,
|
|
392
|
+
flags: int,
|
|
393
|
+
string: AnySymbolicStr,
|
|
394
|
+
offset: int,
|
|
395
|
+
allow_empty: bool = True,
|
|
396
|
+
ord=ord,
|
|
397
|
+
chr=chr,
|
|
398
|
+
) -> Optional[_MatchPart]:
|
|
399
|
+
"""
|
|
400
|
+
>>> import sre_parse
|
|
401
|
+
>>> from crosshair.core_and_libs import standalone_statespace, NoTracing
|
|
402
|
+
>>> from crosshair.libimpl.builtinslib import LazyIntSymbolicStr
|
|
403
|
+
>>> with standalone_statespace, NoTracing():
|
|
404
|
+
... string = LazyIntSymbolicStr(list(map(ord, 'aabb')))
|
|
405
|
+
... _internal_match_patterns(sre_parse.parse('a+'), 0, string, 0).span()
|
|
406
|
+
... _internal_match_patterns(sre_parse.parse('ab'), 0, string, 1).span()
|
|
407
|
+
(0, 2)
|
|
408
|
+
(1, 3)
|
|
409
|
+
"""
|
|
410
|
+
space = context_statespace()
|
|
411
|
+
with ResumedTracing():
|
|
412
|
+
matchablestr = string[offset:] if offset > 0 else string
|
|
413
|
+
|
|
414
|
+
if len(top_patterns) == 0:
|
|
415
|
+
return _MatchPart([(offset, offset)]) if allow_empty else None
|
|
416
|
+
pattern = top_patterns[0]
|
|
417
|
+
|
|
418
|
+
def continue_matching(prefix):
|
|
419
|
+
sub_allow_empty = allow_empty if prefix.isempty() else True
|
|
420
|
+
suffix = _internal_match_patterns(
|
|
421
|
+
top_patterns[1:],
|
|
422
|
+
flags,
|
|
423
|
+
string,
|
|
424
|
+
prefix.end(),
|
|
425
|
+
sub_allow_empty,
|
|
426
|
+
ord=ord,
|
|
427
|
+
chr=chr,
|
|
428
|
+
)
|
|
429
|
+
if suffix is None:
|
|
430
|
+
return None
|
|
431
|
+
return prefix._add_match(suffix)
|
|
432
|
+
|
|
433
|
+
# TODO: using a typed internal function triggers __hash__es inside the typing module.
|
|
434
|
+
# Seems like this casues nondeterminism due to a global LRU cache used by the typing module.
|
|
435
|
+
def fork_on(expr, sz):
|
|
436
|
+
if space.smt_fork(expr):
|
|
437
|
+
return continue_matching(
|
|
438
|
+
_MatchPart([(offset, _traced_binop(offset, operator.add, sz))])
|
|
439
|
+
)
|
|
440
|
+
else:
|
|
441
|
+
return None
|
|
442
|
+
|
|
443
|
+
mask = single_char_mask(pattern, flags, ord=ord, chr=chr)
|
|
444
|
+
if mask is not None:
|
|
445
|
+
with ResumedTracing():
|
|
446
|
+
if any([offset < 0, offset >= len(string)]):
|
|
447
|
+
return None
|
|
448
|
+
char = ord(string[offset])
|
|
449
|
+
if isinstance(char, int): # Concrete int? Just check it!
|
|
450
|
+
if mask.covers(char):
|
|
451
|
+
return continue_matching(
|
|
452
|
+
_MatchPart([(offset, _traced_binop(offset, operator.add, 1))])
|
|
453
|
+
)
|
|
454
|
+
else:
|
|
455
|
+
return None
|
|
456
|
+
smt_ch = SymbolicInt._coerce_to_smt_sort(char)
|
|
457
|
+
return fork_on(mask.smt_matches(smt_ch), 1)
|
|
458
|
+
|
|
459
|
+
(op, arg) = pattern
|
|
460
|
+
if op in (MIN_REPEAT, MAX_REPEAT):
|
|
461
|
+
(min_repeat, max_repeat, subpattern) = arg
|
|
462
|
+
if max_repeat < min_repeat:
|
|
463
|
+
return None
|
|
464
|
+
reps = 0
|
|
465
|
+
overall_match = _MatchPart([(offset, offset)])
|
|
466
|
+
while reps < min_repeat:
|
|
467
|
+
submatch = _internal_match_patterns(
|
|
468
|
+
subpattern, flags, string, overall_match.end(), True, ord=ord, chr=chr
|
|
469
|
+
)
|
|
470
|
+
if submatch is None:
|
|
471
|
+
return None
|
|
472
|
+
overall_match = overall_match._add_match(submatch)
|
|
473
|
+
reps += 1
|
|
474
|
+
if max_repeat != MAXREPEAT and reps >= max_repeat:
|
|
475
|
+
return continue_matching(overall_match)
|
|
476
|
+
|
|
477
|
+
if max_repeat == MAXREPEAT:
|
|
478
|
+
remaining_reps = max_repeat
|
|
479
|
+
else:
|
|
480
|
+
remaining_reps = max_repeat - min_repeat
|
|
481
|
+
|
|
482
|
+
if op is MIN_REPEAT:
|
|
483
|
+
# Non-greedy match: try the shortest possible match first.
|
|
484
|
+
short_match = continue_matching(overall_match)
|
|
485
|
+
if short_match is not None:
|
|
486
|
+
return short_match
|
|
487
|
+
|
|
488
|
+
remaining_matcher = _patt_replace(
|
|
489
|
+
top_patterns, arg, (1, remaining_reps, subpattern)
|
|
490
|
+
)
|
|
491
|
+
remainder_allow_empty = allow_empty or not overall_match.isempty()
|
|
492
|
+
remainder_match = _internal_match_patterns(
|
|
493
|
+
remaining_matcher,
|
|
494
|
+
flags,
|
|
495
|
+
string,
|
|
496
|
+
overall_match.end(),
|
|
497
|
+
remainder_allow_empty,
|
|
498
|
+
ord=ord,
|
|
499
|
+
chr=chr,
|
|
500
|
+
)
|
|
501
|
+
if remainder_match is not None:
|
|
502
|
+
return overall_match._add_match(remainder_match)
|
|
503
|
+
|
|
504
|
+
if op is MAX_REPEAT:
|
|
505
|
+
# Greedy match: didn't match more repetitions - try from here.
|
|
506
|
+
return continue_matching(overall_match)
|
|
507
|
+
|
|
508
|
+
return None
|
|
509
|
+
elif op is BRANCH and arg[0] is None:
|
|
510
|
+
# NOTE: order matters - earlier branches are more greedily matched than later branches.
|
|
511
|
+
branches = arg[1]
|
|
512
|
+
first_path = list(branches[0]) + list(top_patterns)[1:]
|
|
513
|
+
submatch = _internal_match_patterns(
|
|
514
|
+
first_path, flags, string, offset, allow_empty, ord=ord, chr=chr
|
|
515
|
+
)
|
|
516
|
+
if submatch is not None:
|
|
517
|
+
return submatch
|
|
518
|
+
if len(branches) <= 1:
|
|
519
|
+
return None
|
|
520
|
+
else:
|
|
521
|
+
return _internal_match_patterns(
|
|
522
|
+
_patt_replace(top_patterns, branches, branches[1:]),
|
|
523
|
+
flags,
|
|
524
|
+
string,
|
|
525
|
+
offset,
|
|
526
|
+
allow_empty,
|
|
527
|
+
ord=ord,
|
|
528
|
+
chr=chr,
|
|
529
|
+
)
|
|
530
|
+
elif op is AT:
|
|
531
|
+
if arg in (AT_BEGINNING, AT_BEGINNING_STRING):
|
|
532
|
+
begins_string = fork_on(SymbolicInt._coerce_to_smt_sort(offset) == 0, 0)
|
|
533
|
+
if begins_string:
|
|
534
|
+
return begins_string
|
|
535
|
+
if arg is AT_BEGINNING and re.MULTILINE & flags:
|
|
536
|
+
with ResumedTracing():
|
|
537
|
+
prev_char = ord(string[offset - 1])
|
|
538
|
+
return fork_on(
|
|
539
|
+
SymbolicInt._coerce_to_smt_sort(prev_char) == ord("\n"), 0
|
|
540
|
+
)
|
|
541
|
+
return None
|
|
542
|
+
with ResumedTracing():
|
|
543
|
+
matchable_len = len(matchablestr)
|
|
544
|
+
ends_string = space.smt_fork(
|
|
545
|
+
SymbolicInt._coerce_to_smt_sort(matchable_len) == 0
|
|
546
|
+
)
|
|
547
|
+
if arg in (AT_END, AT_END_STRING):
|
|
548
|
+
if ends_string:
|
|
549
|
+
return continue_matching(_MatchPart([(offset, offset)]))
|
|
550
|
+
if arg is AT_END and re.MULTILINE & flags:
|
|
551
|
+
with ResumedTracing():
|
|
552
|
+
next_char = ord(string[offset])
|
|
553
|
+
return fork_on(
|
|
554
|
+
SymbolicInt._coerce_to_smt_sort(next_char) == ord("\n"), 0
|
|
555
|
+
)
|
|
556
|
+
return None
|
|
557
|
+
elif arg in (AT_BOUNDARY, AT_NON_BOUNDARY):
|
|
558
|
+
if ends_string or offset == 0:
|
|
559
|
+
if arg == AT_BOUNDARY:
|
|
560
|
+
return continue_matching(_MatchPart([(offset, offset)]))
|
|
561
|
+
else:
|
|
562
|
+
assert arg == AT_NON_BOUNDARY
|
|
563
|
+
return None
|
|
564
|
+
with ResumedTracing():
|
|
565
|
+
left = ord(string[offset - 1])
|
|
566
|
+
right = ord(string[offset])
|
|
567
|
+
wordmask = get_unicode_categories()["word"]
|
|
568
|
+
left_expr = wordmask.smt_matches(SymbolicInt._coerce_to_smt_sort(left))
|
|
569
|
+
right_expr = wordmask.smt_matches(SymbolicInt._coerce_to_smt_sort(right))
|
|
570
|
+
at_boundary_expr = z3.Xor(left_expr, right_expr)
|
|
571
|
+
if arg == AT_NON_BOUNDARY:
|
|
572
|
+
at_boundary_expr = z3.Not(at_boundary_expr)
|
|
573
|
+
return fork_on(at_boundary_expr, 0)
|
|
574
|
+
elif op in (ASSERT, ASSERT_NOT):
|
|
575
|
+
(direction_int, subpattern) = arg
|
|
576
|
+
positive_look = op == ASSERT
|
|
577
|
+
if direction_int == 1:
|
|
578
|
+
matched = _internal_match_patterns(
|
|
579
|
+
subpattern, flags, string, offset, True, ord=ord, chr=chr
|
|
580
|
+
)
|
|
581
|
+
else:
|
|
582
|
+
assert direction_int == -1
|
|
583
|
+
minwidth, maxwidth = subpattern.getwidth()
|
|
584
|
+
if minwidth != maxwidth:
|
|
585
|
+
raise re.error("")
|
|
586
|
+
rewound = offset - minwidth
|
|
587
|
+
if rewound < 0:
|
|
588
|
+
return None
|
|
589
|
+
matched = _internal_match_patterns(
|
|
590
|
+
subpattern, flags, string, rewound, True, ord=ord, chr=chr
|
|
591
|
+
)
|
|
592
|
+
if bool(matched) != bool(positive_look):
|
|
593
|
+
return None
|
|
594
|
+
return _internal_match_patterns(
|
|
595
|
+
top_patterns[1:], flags, string, offset, allow_empty, ord=ord, chr=chr
|
|
596
|
+
)
|
|
597
|
+
elif op is SUBPATTERN:
|
|
598
|
+
(groupnum, _a, _b, subpatterns) = arg
|
|
599
|
+
if (_a, _b) != (0, 0):
|
|
600
|
+
raise ReUnhandled("unsupported subpattern args")
|
|
601
|
+
new_top = (
|
|
602
|
+
list(subpatterns)
|
|
603
|
+
+ [(_END_GROUP_MARKER, (groupnum, offset))]
|
|
604
|
+
+ list(top_patterns)[1:]
|
|
605
|
+
)
|
|
606
|
+
return _internal_match_patterns(
|
|
607
|
+
new_top, flags, string, offset, allow_empty, ord=ord, chr=chr
|
|
608
|
+
)
|
|
609
|
+
elif op is _END_GROUP_MARKER:
|
|
610
|
+
(group_num, begin) = arg
|
|
611
|
+
match = continue_matching(_MatchPart([(offset, offset)]))
|
|
612
|
+
if match is None:
|
|
613
|
+
return None
|
|
614
|
+
while len(match._groups) <= group_num:
|
|
615
|
+
match._groups.append(None)
|
|
616
|
+
match._groups[group_num] = (begin, offset)
|
|
617
|
+
return match
|
|
618
|
+
raise ReUnhandled(op)
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def _match_pattern(
|
|
622
|
+
compiled_regex: re.Pattern,
|
|
623
|
+
orig_str: Union[AnySymbolicStr, BytesLike],
|
|
624
|
+
pos: int,
|
|
625
|
+
endpos: Optional[int] = None,
|
|
626
|
+
subpattern: Optional[List] = None,
|
|
627
|
+
allow_empty=True,
|
|
628
|
+
ord=ord,
|
|
629
|
+
chr=chr,
|
|
630
|
+
) -> Optional[_Match]:
|
|
631
|
+
assert not is_tracing()
|
|
632
|
+
if subpattern is None:
|
|
633
|
+
subpattern = cast(List, parse(compiled_regex.pattern, compiled_regex.flags))
|
|
634
|
+
with ResumedTracing():
|
|
635
|
+
trimmed_str = orig_str[:endpos]
|
|
636
|
+
matchpart = _internal_match_patterns(
|
|
637
|
+
subpattern,
|
|
638
|
+
compiled_regex.flags,
|
|
639
|
+
trimmed_str,
|
|
640
|
+
pos,
|
|
641
|
+
allow_empty,
|
|
642
|
+
ord=ord,
|
|
643
|
+
chr=chr,
|
|
644
|
+
)
|
|
645
|
+
if matchpart is None:
|
|
646
|
+
return None
|
|
647
|
+
match_start, match_end = matchpart._fullspan()
|
|
648
|
+
if _traced_binop(match_start, operator.eq, match_end):
|
|
649
|
+
matchpart._clamp_all_spans(0, len(orig_str))
|
|
650
|
+
return _Match(matchpart._groups, pos, endpos, compiled_regex, orig_str)
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
def _compile(*a):
|
|
654
|
+
# Symbolic regexes aren't supported, and it's expensive to perform compilation
|
|
655
|
+
# with tracing enabled.
|
|
656
|
+
with NoTracing():
|
|
657
|
+
return re._compile(*deep_realize(a))
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
def _check_str_or_bytes(patt: re.Pattern, obj: Any):
|
|
661
|
+
if not isinstance(patt, re.Pattern):
|
|
662
|
+
raise TypeError # TODO: e.g. "descriptor 'search' for 're.Pattern' objects doesn't apply to a 'str' object"
|
|
663
|
+
if not isinstance(obj, _STR_AND_BYTES_TYPES):
|
|
664
|
+
raise TypeError(f"expected string or bytes-like object, got '{type(obj)}'")
|
|
665
|
+
if isinstance(patt.pattern, str):
|
|
666
|
+
if isinstance(obj, str):
|
|
667
|
+
return (chr, ord)
|
|
668
|
+
raise TypeError("cannot use a bytes pattern on a string-like object")
|
|
669
|
+
else:
|
|
670
|
+
if isinstance(obj, _ALL_BYTES_TYPES):
|
|
671
|
+
return (lambda i: bytes([i]), lambda i: i)
|
|
672
|
+
raise TypeError("cannot use a string pattern on a bytes-like object")
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
def _finditer_symbolic(
|
|
676
|
+
patt: re.Pattern, string: AnySymbolicStr, pos: int, endpos: int, chr=chr, ord=ord
|
|
677
|
+
) -> Iterable[_Match]:
|
|
678
|
+
last_match_was_empty = False
|
|
679
|
+
while True:
|
|
680
|
+
with NoTracing():
|
|
681
|
+
if pos > endpos:
|
|
682
|
+
break
|
|
683
|
+
allow_empty = not last_match_was_empty
|
|
684
|
+
match = _match_pattern(
|
|
685
|
+
patt, string, pos, endpos, allow_empty=allow_empty, chr=chr, ord=ord
|
|
686
|
+
)
|
|
687
|
+
last_match_was_empty = False
|
|
688
|
+
if not match:
|
|
689
|
+
pos += 1
|
|
690
|
+
continue
|
|
691
|
+
yield match
|
|
692
|
+
with NoTracing():
|
|
693
|
+
if match.start() == match.end():
|
|
694
|
+
if not allow_empty:
|
|
695
|
+
raise CrossHairInternal("Unexpected empty match")
|
|
696
|
+
last_match_was_empty = True
|
|
697
|
+
else:
|
|
698
|
+
pos = match.end()
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
def _finditer(
|
|
702
|
+
self: re.Pattern,
|
|
703
|
+
string: Union[str, AnySymbolicStr, bytes],
|
|
704
|
+
pos: int = 0,
|
|
705
|
+
endpos: Optional[int] = None,
|
|
706
|
+
) -> Iterable[Union[re.Match, _Match]]:
|
|
707
|
+
chr, ord = _check_str_or_bytes(self, string)
|
|
708
|
+
if not isinstance(pos, int):
|
|
709
|
+
raise TypeError
|
|
710
|
+
if not (endpos is None or isinstance(endpos, int)):
|
|
711
|
+
raise TypeError
|
|
712
|
+
pos, endpos = realize(pos), realize(endpos)
|
|
713
|
+
strlen = len(string)
|
|
714
|
+
with NoTracing():
|
|
715
|
+
if isinstance(string, AnySymbolicStr):
|
|
716
|
+
pos, endpos, _ = slice(pos, endpos, 1).indices(realize(strlen))
|
|
717
|
+
with ResumedTracing():
|
|
718
|
+
try:
|
|
719
|
+
yield from _finditer_symbolic(
|
|
720
|
+
self, string, pos, endpos, chr=chr, ord=ord
|
|
721
|
+
)
|
|
722
|
+
return
|
|
723
|
+
except ReUnhandled as e:
|
|
724
|
+
debug("Unsupported symbolic regex", self.pattern, e)
|
|
725
|
+
if endpos is None:
|
|
726
|
+
yield from re.Pattern.finditer(self, realize(string), pos)
|
|
727
|
+
else:
|
|
728
|
+
yield from re.Pattern.finditer(self, realize(string), pos, endpos)
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
def _fullmatch(
|
|
732
|
+
self: re.Pattern, string: Union[str, AnySymbolicStr, bytes], pos=0, endpos=None
|
|
733
|
+
):
|
|
734
|
+
with NoTracing():
|
|
735
|
+
if isinstance(string, (AnySymbolicStr, BytesLike)):
|
|
736
|
+
with ResumedTracing():
|
|
737
|
+
chr, ord = _check_str_or_bytes(self, string)
|
|
738
|
+
try:
|
|
739
|
+
compiled = cast(List, parse(self.pattern, self.flags))
|
|
740
|
+
compiled.append((AT, AT_END_STRING))
|
|
741
|
+
return _match_pattern(
|
|
742
|
+
self, string, pos, endpos, compiled, chr=chr, ord=ord
|
|
743
|
+
)
|
|
744
|
+
except ReUnhandled as e:
|
|
745
|
+
debug("Unsupported symbolic regex", self.pattern, e)
|
|
746
|
+
if endpos is None:
|
|
747
|
+
return re.Pattern.fullmatch(self, realize(string), pos)
|
|
748
|
+
else:
|
|
749
|
+
return re.Pattern.fullmatch(self, realize(string), pos, endpos)
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
def _match(
|
|
753
|
+
self, string: Union[str, AnySymbolicStr], pos=0, endpos=None
|
|
754
|
+
) -> Union[None, re.Match, _Match]:
|
|
755
|
+
with NoTracing():
|
|
756
|
+
if isinstance(string, (AnySymbolicStr, BytesLike)):
|
|
757
|
+
with ResumedTracing():
|
|
758
|
+
chr, ord = _check_str_or_bytes(self, string)
|
|
759
|
+
try:
|
|
760
|
+
return _match_pattern(self, string, pos, endpos, chr=chr, ord=ord)
|
|
761
|
+
except ReUnhandled as e:
|
|
762
|
+
debug("Unsupported symbolic regex", self.pattern, e)
|
|
763
|
+
if endpos is None:
|
|
764
|
+
return re.Pattern.match(self, realize(string), pos)
|
|
765
|
+
else:
|
|
766
|
+
return re.Pattern.match(self, realize(string), pos, endpos)
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
def _search(
|
|
770
|
+
self: re.Pattern,
|
|
771
|
+
string: Union[str, AnySymbolicStr, bytes],
|
|
772
|
+
pos: int = 0,
|
|
773
|
+
endpos: Optional[int] = None,
|
|
774
|
+
) -> Union[None, re.Match, _Match]:
|
|
775
|
+
chr, ord = _check_str_or_bytes(self, string)
|
|
776
|
+
if not isinstance(pos, int):
|
|
777
|
+
raise TypeError
|
|
778
|
+
if not (endpos is None or isinstance(endpos, int)):
|
|
779
|
+
raise TypeError
|
|
780
|
+
pos, endpos = realize(pos), realize(endpos)
|
|
781
|
+
mylen = string.__len__()
|
|
782
|
+
with NoTracing():
|
|
783
|
+
if isinstance(string, (AnySymbolicStr, BytesLike)):
|
|
784
|
+
pos, endpos, _ = slice(pos, endpos, 1).indices(realize(mylen))
|
|
785
|
+
try:
|
|
786
|
+
while pos < endpos:
|
|
787
|
+
match = _match_pattern(self, string, pos, endpos, chr=chr, ord=ord)
|
|
788
|
+
if match:
|
|
789
|
+
return match
|
|
790
|
+
pos += 1
|
|
791
|
+
return None
|
|
792
|
+
except ReUnhandled as e:
|
|
793
|
+
debug("Unsupported symbolic regex", self.pattern, e)
|
|
794
|
+
if endpos is None:
|
|
795
|
+
return re.Pattern.search(self, realize(string), pos)
|
|
796
|
+
else:
|
|
797
|
+
return re.Pattern.search(self, realize(string), pos, endpos)
|
|
798
|
+
|
|
799
|
+
|
|
800
|
+
def _sub(self, repl, string, count=0):
|
|
801
|
+
(result, _) = _subn(self, repl, string, count)
|
|
802
|
+
return result
|
|
803
|
+
|
|
804
|
+
|
|
805
|
+
def _subn(
|
|
806
|
+
self: re.Pattern, repl: Union[str, Callable], string: str, count: int = 0
|
|
807
|
+
) -> Tuple[str, int]:
|
|
808
|
+
if not isinstance(self, re.Pattern):
|
|
809
|
+
raise TypeError
|
|
810
|
+
if isinstance(repl, _STR_AND_BYTES_TYPES):
|
|
811
|
+
_check_str_or_bytes(self, repl)
|
|
812
|
+
|
|
813
|
+
def replfn(m):
|
|
814
|
+
return m.expand(repl)
|
|
815
|
+
|
|
816
|
+
elif callable(repl):
|
|
817
|
+
replfn = repl
|
|
818
|
+
else:
|
|
819
|
+
raise TypeError
|
|
820
|
+
_check_str_or_bytes(self, string)
|
|
821
|
+
if not isinstance(count, int):
|
|
822
|
+
raise TypeError
|
|
823
|
+
match = self.search(string)
|
|
824
|
+
if match is None:
|
|
825
|
+
return (string, 0)
|
|
826
|
+
result_prefix = string[: match.start()] + replfn(match)
|
|
827
|
+
if count == 1:
|
|
828
|
+
return (result_prefix + string[match.end() :], 1)
|
|
829
|
+
if match.end() == match.start():
|
|
830
|
+
remaining = string[match.end() + 1 :]
|
|
831
|
+
else:
|
|
832
|
+
remaining = string[match.end() :]
|
|
833
|
+
(result_suffix, suffix_replacements) = _subn(self, repl, remaining, count - 1)
|
|
834
|
+
return (result_prefix + result_suffix, suffix_replacements + 1)
|
|
835
|
+
|
|
836
|
+
|
|
837
|
+
def make_registrations():
|
|
838
|
+
register_patch(re._compile, _compile)
|
|
839
|
+
register_patch(re.Pattern.search, _search)
|
|
840
|
+
register_patch(re.Pattern.match, _match)
|
|
841
|
+
register_patch(re.Pattern.fullmatch, _fullmatch)
|
|
842
|
+
register_patch(re.Pattern.split, with_realized_args(re.Pattern.split))
|
|
843
|
+
register_patch(re.Pattern.findall, with_realized_args(re.Pattern.findall))
|
|
844
|
+
register_patch(re.Pattern.finditer, _finditer)
|
|
845
|
+
register_patch(re.Pattern.sub, _sub)
|
|
846
|
+
register_patch(re.Pattern.subn, _subn)
|