crosshair-tool 0.0.99__cp312-cp312-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _crosshair_tracers.cpython-312-darwin.so +0 -0
- crosshair/__init__.py +42 -0
- crosshair/__main__.py +8 -0
- crosshair/_mark_stacks.h +790 -0
- crosshair/_preliminaries_test.py +18 -0
- crosshair/_tracers.h +94 -0
- crosshair/_tracers_pycompat.h +522 -0
- crosshair/_tracers_test.py +138 -0
- crosshair/abcstring.py +245 -0
- crosshair/auditwall.py +190 -0
- crosshair/auditwall_test.py +77 -0
- crosshair/codeconfig.py +113 -0
- crosshair/codeconfig_test.py +117 -0
- crosshair/condition_parser.py +1237 -0
- crosshair/condition_parser_test.py +497 -0
- crosshair/conftest.py +30 -0
- crosshair/copyext.py +155 -0
- crosshair/copyext_test.py +84 -0
- crosshair/core.py +1763 -0
- crosshair/core_and_libs.py +149 -0
- crosshair/core_regestered_types_test.py +82 -0
- crosshair/core_test.py +1316 -0
- crosshair/diff_behavior.py +314 -0
- crosshair/diff_behavior_test.py +261 -0
- crosshair/dynamic_typing.py +346 -0
- crosshair/dynamic_typing_test.py +210 -0
- crosshair/enforce.py +282 -0
- crosshair/enforce_test.py +182 -0
- crosshair/examples/PEP316/__init__.py +1 -0
- crosshair/examples/PEP316/bugs_detected/__init__.py +0 -0
- crosshair/examples/PEP316/bugs_detected/getattr_magic.py +16 -0
- crosshair/examples/PEP316/bugs_detected/hash_consistent_with_equals.py +31 -0
- crosshair/examples/PEP316/bugs_detected/shopping_cart.py +24 -0
- crosshair/examples/PEP316/bugs_detected/showcase.py +39 -0
- crosshair/examples/PEP316/correct_code/__init__.py +0 -0
- crosshair/examples/PEP316/correct_code/arith.py +60 -0
- crosshair/examples/PEP316/correct_code/chess.py +77 -0
- crosshair/examples/PEP316/correct_code/nesting_inference.py +17 -0
- crosshair/examples/PEP316/correct_code/numpy_examples.py +132 -0
- crosshair/examples/PEP316/correct_code/rolling_average.py +35 -0
- crosshair/examples/PEP316/correct_code/showcase.py +104 -0
- crosshair/examples/__init__.py +0 -0
- crosshair/examples/check_examples_test.py +146 -0
- crosshair/examples/deal/__init__.py +1 -0
- crosshair/examples/icontract/__init__.py +1 -0
- crosshair/examples/icontract/bugs_detected/__init__.py +0 -0
- crosshair/examples/icontract/bugs_detected/showcase.py +41 -0
- crosshair/examples/icontract/bugs_detected/wrong_sign.py +8 -0
- crosshair/examples/icontract/correct_code/__init__.py +0 -0
- crosshair/examples/icontract/correct_code/arith.py +51 -0
- crosshair/examples/icontract/correct_code/showcase.py +94 -0
- crosshair/fnutil.py +391 -0
- crosshair/fnutil_test.py +75 -0
- crosshair/fuzz_core_test.py +516 -0
- crosshair/libimpl/__init__.py +0 -0
- crosshair/libimpl/arraylib.py +161 -0
- crosshair/libimpl/binascii_ch_test.py +30 -0
- crosshair/libimpl/binascii_test.py +67 -0
- crosshair/libimpl/binasciilib.py +150 -0
- crosshair/libimpl/bisectlib_test.py +23 -0
- crosshair/libimpl/builtinslib.py +5228 -0
- crosshair/libimpl/builtinslib_ch_test.py +1191 -0
- crosshair/libimpl/builtinslib_test.py +3735 -0
- crosshair/libimpl/codecslib.py +86 -0
- crosshair/libimpl/codecslib_test.py +86 -0
- crosshair/libimpl/collectionslib.py +264 -0
- crosshair/libimpl/collectionslib_ch_test.py +252 -0
- crosshair/libimpl/collectionslib_test.py +332 -0
- crosshair/libimpl/copylib.py +23 -0
- crosshair/libimpl/copylib_test.py +18 -0
- crosshair/libimpl/datetimelib.py +2559 -0
- crosshair/libimpl/datetimelib_ch_test.py +354 -0
- crosshair/libimpl/datetimelib_test.py +112 -0
- crosshair/libimpl/decimallib.py +5257 -0
- crosshair/libimpl/decimallib_ch_test.py +78 -0
- crosshair/libimpl/decimallib_test.py +76 -0
- crosshair/libimpl/encodings/__init__.py +23 -0
- crosshair/libimpl/encodings/_encutil.py +187 -0
- crosshair/libimpl/encodings/ascii.py +44 -0
- crosshair/libimpl/encodings/latin_1.py +40 -0
- crosshair/libimpl/encodings/utf_8.py +93 -0
- crosshair/libimpl/encodings_ch_test.py +83 -0
- crosshair/libimpl/fractionlib.py +16 -0
- crosshair/libimpl/fractionlib_test.py +80 -0
- crosshair/libimpl/functoolslib.py +34 -0
- crosshair/libimpl/functoolslib_test.py +56 -0
- crosshair/libimpl/hashliblib.py +30 -0
- crosshair/libimpl/hashliblib_test.py +18 -0
- crosshair/libimpl/heapqlib.py +47 -0
- crosshair/libimpl/heapqlib_test.py +21 -0
- crosshair/libimpl/importliblib.py +18 -0
- crosshair/libimpl/importliblib_test.py +38 -0
- crosshair/libimpl/iolib.py +216 -0
- crosshair/libimpl/iolib_ch_test.py +128 -0
- crosshair/libimpl/iolib_test.py +19 -0
- crosshair/libimpl/ipaddresslib.py +8 -0
- crosshair/libimpl/itertoolslib.py +44 -0
- crosshair/libimpl/itertoolslib_test.py +44 -0
- crosshair/libimpl/jsonlib.py +984 -0
- crosshair/libimpl/jsonlib_ch_test.py +42 -0
- crosshair/libimpl/jsonlib_test.py +51 -0
- crosshair/libimpl/mathlib.py +179 -0
- crosshair/libimpl/mathlib_ch_test.py +44 -0
- crosshair/libimpl/mathlib_test.py +67 -0
- crosshair/libimpl/oslib.py +7 -0
- crosshair/libimpl/pathliblib_test.py +10 -0
- crosshair/libimpl/randomlib.py +178 -0
- crosshair/libimpl/randomlib_test.py +120 -0
- crosshair/libimpl/relib.py +846 -0
- crosshair/libimpl/relib_ch_test.py +169 -0
- crosshair/libimpl/relib_test.py +493 -0
- crosshair/libimpl/timelib.py +72 -0
- crosshair/libimpl/timelib_test.py +82 -0
- crosshair/libimpl/typeslib.py +15 -0
- crosshair/libimpl/typeslib_test.py +36 -0
- crosshair/libimpl/unicodedatalib.py +75 -0
- crosshair/libimpl/unicodedatalib_test.py +42 -0
- crosshair/libimpl/urlliblib.py +23 -0
- crosshair/libimpl/urlliblib_test.py +19 -0
- crosshair/libimpl/weakreflib.py +13 -0
- crosshair/libimpl/weakreflib_test.py +69 -0
- crosshair/libimpl/zliblib.py +15 -0
- crosshair/libimpl/zliblib_test.py +13 -0
- crosshair/lsp_server.py +261 -0
- crosshair/lsp_server_test.py +30 -0
- crosshair/main.py +973 -0
- crosshair/main_test.py +543 -0
- crosshair/objectproxy.py +376 -0
- crosshair/objectproxy_test.py +41 -0
- crosshair/opcode_intercept.py +601 -0
- crosshair/opcode_intercept_test.py +304 -0
- crosshair/options.py +218 -0
- crosshair/options_test.py +10 -0
- crosshair/patch_equivalence_test.py +75 -0
- crosshair/path_cover.py +209 -0
- crosshair/path_cover_test.py +138 -0
- crosshair/path_search.py +161 -0
- crosshair/path_search_test.py +52 -0
- crosshair/pathing_oracle.py +271 -0
- crosshair/pathing_oracle_test.py +21 -0
- crosshair/pure_importer.py +27 -0
- crosshair/pure_importer_test.py +16 -0
- crosshair/py.typed +0 -0
- crosshair/register_contract.py +273 -0
- crosshair/register_contract_test.py +190 -0
- crosshair/simplestructs.py +1165 -0
- crosshair/simplestructs_test.py +283 -0
- crosshair/smtlib.py +24 -0
- crosshair/smtlib_test.py +14 -0
- crosshair/statespace.py +1199 -0
- crosshair/statespace_test.py +108 -0
- crosshair/stubs_parser.py +352 -0
- crosshair/stubs_parser_test.py +43 -0
- crosshair/test_util.py +329 -0
- crosshair/test_util_test.py +26 -0
- crosshair/tools/__init__.py +0 -0
- crosshair/tools/check_help_in_doc.py +264 -0
- crosshair/tools/check_init_and_setup_coincide.py +119 -0
- crosshair/tools/generate_demo_table.py +127 -0
- crosshair/tracers.py +544 -0
- crosshair/tracers_test.py +154 -0
- crosshair/type_repo.py +151 -0
- crosshair/unicode_categories.py +589 -0
- crosshair/unicode_categories_test.py +27 -0
- crosshair/util.py +741 -0
- crosshair/util_test.py +173 -0
- crosshair/watcher.py +307 -0
- crosshair/watcher_test.py +107 -0
- crosshair/z3util.py +76 -0
- crosshair/z3util_test.py +11 -0
- crosshair_tool-0.0.99.dist-info/METADATA +144 -0
- crosshair_tool-0.0.99.dist-info/RECORD +176 -0
- crosshair_tool-0.0.99.dist-info/WHEEL +6 -0
- crosshair_tool-0.0.99.dist-info/entry_points.txt +3 -0
- crosshair_tool-0.0.99.dist-info/licenses/LICENSE +93 -0
- crosshair_tool-0.0.99.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import sys
|
|
3
|
+
from typing import Optional, Sequence
|
|
4
|
+
|
|
5
|
+
import pytest # type: ignore
|
|
6
|
+
|
|
7
|
+
from crosshair.core_and_libs import MessageType, analyze_function, run_checkables
|
|
8
|
+
from crosshair.options import AnalysisOptionSet
|
|
9
|
+
from crosshair.test_util import ResultComparison, compare_results
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def groups(match: Optional[re.Match]) -> Optional[Sequence]:
|
|
13
|
+
if match is None:
|
|
14
|
+
return None
|
|
15
|
+
return [
|
|
16
|
+
(match.start(i), match.end(i), match.group(i))
|
|
17
|
+
for i in range(len(match.groups()) + 1)
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def check_inverted_categories(text: str, flags: int) -> ResultComparison:
|
|
22
|
+
"""
|
|
23
|
+
pre: len(text) == 3
|
|
24
|
+
post: _
|
|
25
|
+
"""
|
|
26
|
+
return compare_results(
|
|
27
|
+
lambda t, f: groups(re.fullmatch(r"\W\S\D", t, f)), text, flags
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def check_nongreedy(text: str, flags: int) -> ResultComparison:
|
|
32
|
+
"""post: _"""
|
|
33
|
+
return compare_results(
|
|
34
|
+
lambda t, f: groups(re.match("a+?(a*?)a", t, f)), text, flags
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def check_match_repr(text: str) -> ResultComparison:
|
|
39
|
+
"""
|
|
40
|
+
pre: len(text) < 2
|
|
41
|
+
post: _
|
|
42
|
+
"""
|
|
43
|
+
return compare_results(lambda t: repr(re.match(r"[^ab]", t)), text)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def check_match_with_sliced_string(text: str) -> ResultComparison:
|
|
47
|
+
"""post: _"""
|
|
48
|
+
return compare_results(lambda t: groups(re.match(r"^[ab]{2}\Z", t)), text[1:])
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def check_match_with_offsets(text: str, start: int, end: int) -> ResultComparison:
|
|
52
|
+
"""post: _"""
|
|
53
|
+
# return compare_results(lambda t: groups(re.compile(r"a").match(t, start, end)), text)
|
|
54
|
+
return compare_results(
|
|
55
|
+
lambda t: groups(re.compile(r"(a*)(a*)").match(t, start, end)), text
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def check_findall(text: str, flags: int) -> ResultComparison:
|
|
60
|
+
"""post: _"""
|
|
61
|
+
return compare_results(lambda t, f: re.findall("aa", t, f), text, flags)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def check_findall_with_groups(text: str, flags: int) -> ResultComparison:
|
|
65
|
+
"""post: _"""
|
|
66
|
+
return compare_results(lambda t, f: re.findall("a(a)", t, f), text, flags)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def check_findall_with_empty_matches(text: str, flags: int) -> ResultComparison:
|
|
70
|
+
"""post: _"""
|
|
71
|
+
return compare_results(lambda t, f: re.findall("a?", t, f), text, flags)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def check_finditer(text: str, flags: int) -> ResultComparison:
|
|
75
|
+
"""post: _"""
|
|
76
|
+
return compare_results(
|
|
77
|
+
lambda t, f: list(map(groups, re.finditer("(^|a?)", t, f))), text, flags
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def check_finditer_with_bounds(text: str, pos: int) -> ResultComparison:
|
|
82
|
+
"""post: _"""
|
|
83
|
+
regex = re.compile("a?")
|
|
84
|
+
return compare_results(
|
|
85
|
+
lambda *a: list(map(groups, regex.finditer(*a))), text, pos, pos * 2
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def check_finditer_with_sliced_string(text: str) -> ResultComparison:
|
|
90
|
+
"""post: _"""
|
|
91
|
+
return compare_results(
|
|
92
|
+
lambda t: list(map(groups, re.finditer("(a|bb)", t))), text[1:]
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def check_search(text: str, flags: int) -> ResultComparison:
|
|
97
|
+
"""post: _"""
|
|
98
|
+
return compare_results(lambda t, f: groups(re.search("aa", t, f)), text, flags)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def check_search_with_offset(text: str, pos: int) -> ResultComparison:
|
|
102
|
+
"""post: _"""
|
|
103
|
+
return compare_results(lambda *a: groups(re.compile("a").search(*a)), text, pos)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def check_search_with_bounds(text: str, pos: int, endpos: int) -> ResultComparison:
|
|
107
|
+
"""post: _"""
|
|
108
|
+
return compare_results(
|
|
109
|
+
lambda *a: groups(re.compile("a").search(*a)), text, pos, endpos
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def check_search_anchored_begin(text: str, flags: int) -> ResultComparison:
|
|
114
|
+
"""post: _"""
|
|
115
|
+
return compare_results(lambda t, f: groups(re.search("^a", t, f)), text, flags)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def check_search_anchored_end(text: str, flags: int) -> ResultComparison:
|
|
119
|
+
"""post: _"""
|
|
120
|
+
return compare_results(lambda t, f: groups(re.search("a$", t, f)), text, flags)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def check_subn(text: str, flags: int) -> ResultComparison:
|
|
124
|
+
"""post: _"""
|
|
125
|
+
return compare_results(lambda t, f: re.subn("aa", "ba", t, flags=f), text, flags)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def check_lookahead(text: str) -> ResultComparison:
|
|
129
|
+
"""post: _"""
|
|
130
|
+
return compare_results(lambda t: groups(re.match("a(?=b)", t)), text)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def check_lookbehind(text: str) -> ResultComparison:
|
|
134
|
+
"""post: _"""
|
|
135
|
+
return compare_results(lambda t: groups(re.search("(?<=a)b", t)), text)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def check_negative_lookahead(text: str) -> ResultComparison:
|
|
139
|
+
"""post: _"""
|
|
140
|
+
return compare_results(lambda t: groups(re.match("a(?!b)", t)), text)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def check_negative_lookbehind(text: str) -> ResultComparison:
|
|
144
|
+
"""post: _"""
|
|
145
|
+
return compare_results(lambda t: groups(re.search(".(?<!b)", t)), text)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# Bytes-based regexes
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def check_subn_bytes(text: bytes, flags: int) -> ResultComparison:
|
|
152
|
+
"""post: _"""
|
|
153
|
+
return compare_results(lambda t, f: re.subn(b"a", b"b", t, flags=f), text, flags)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def check_findall_bytes(text: bytes, flags: int) -> ResultComparison:
|
|
157
|
+
"""post: _"""
|
|
158
|
+
return compare_results(lambda t, f: re.findall("aa", t, f), text, flags)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# This is the only real test definition.
|
|
162
|
+
# It runs crosshair on each of the "check" functions defined above.
|
|
163
|
+
@pytest.mark.parametrize("fn_name", [fn for fn in dir() if fn.startswith("check_")])
|
|
164
|
+
def test_builtin(fn_name: str) -> None:
|
|
165
|
+
this_module = sys.modules[__name__]
|
|
166
|
+
fn = getattr(this_module, fn_name)
|
|
167
|
+
messages = run_checkables(analyze_function(fn))
|
|
168
|
+
errors = [m for m in messages if m.state > MessageType.PRE_UNSAT]
|
|
169
|
+
assert errors == []
|
|
@@ -0,0 +1,493 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import re
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from crosshair import ResumedTracing
|
|
8
|
+
from crosshair.core import deep_realize, proxy_for_type
|
|
9
|
+
from crosshair.core_and_libs import NoTracing, standalone_statespace
|
|
10
|
+
from crosshair.libimpl.builtinslib import LazyIntSymbolicStr, SymbolicBytes
|
|
11
|
+
from crosshair.libimpl.relib import _BACKREF_STR_RE, _match_pattern
|
|
12
|
+
from crosshair.options import AnalysisOptionSet
|
|
13
|
+
from crosshair.statespace import CANNOT_CONFIRM, CONFIRMED, POST_FAIL, MessageType
|
|
14
|
+
from crosshair.test_util import check_states
|
|
15
|
+
from crosshair.util import CrossHairInternal
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def eval_regex(re_string, flags, test_string, offset, endpos=None):
|
|
19
|
+
py_patt = re.compile(re_string, flags)
|
|
20
|
+
with standalone_statespace as space:
|
|
21
|
+
with NoTracing():
|
|
22
|
+
s = LazyIntSymbolicStr([ord(c) for c in test_string])
|
|
23
|
+
match = _match_pattern(py_patt, s, offset, endpos)
|
|
24
|
+
return deep_realize(match)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_handle_simple():
|
|
28
|
+
assert eval_regex("abc", 0, "abc", 0) is not None
|
|
29
|
+
assert eval_regex("abc", 0, "ab", 0) is None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_handle_or():
|
|
33
|
+
assert eval_regex("a|bc", 0, "bc", 0) is not None
|
|
34
|
+
assert eval_regex("a|bc", 0, "bc", 0).span() == (0, 2)
|
|
35
|
+
|
|
36
|
+
assert eval_regex("a|bc", 0, "ab", 0) is not None
|
|
37
|
+
assert eval_regex("a|bc", 0, "ab", 0).span() == (0, 1)
|
|
38
|
+
|
|
39
|
+
assert eval_regex("a|bc", 0, "c", 0) is None
|
|
40
|
+
assert eval_regex("a|bc", 0, "bd", 0) is None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_handle_start_markers():
|
|
44
|
+
assert eval_regex(r"^ab", 0, "abc", 0) is not None
|
|
45
|
+
assert eval_regex(r"\Aab", 0, "abc", 0) is not None
|
|
46
|
+
assert eval_regex(r"^", 0, "", 0) is not None
|
|
47
|
+
# Surprisingly!: re.compile('^bc').match('abc', 1) is None
|
|
48
|
+
# Even more surprisingly, the end markers are happy to match off of endpos.
|
|
49
|
+
assert eval_regex(r"^bc", 0, "abc", 1) is None
|
|
50
|
+
assert eval_regex(r"^bc", re.MULTILINE, "a\nbc", 2) is not None
|
|
51
|
+
assert eval_regex(r"^bc", 0, "a\nbc", 2) is None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_handle_end_markers():
|
|
55
|
+
assert eval_regex(r"abc$", 0, "abc", 0) is not None
|
|
56
|
+
assert eval_regex(r"abc$", 0, "abcd", 0, 3) is not None
|
|
57
|
+
assert eval_regex(r"abc\Z", 0, "abc", 0) is not None
|
|
58
|
+
assert eval_regex(r"abc\Z", re.MULTILINE, "abc", 0) is not None
|
|
59
|
+
assert eval_regex("abc$", re.MULTILINE, "abc\n", 0) is not None
|
|
60
|
+
assert eval_regex("a$.b", re.MULTILINE | re.DOTALL, "a\nb", 0) is not None
|
|
61
|
+
assert eval_regex("abc$", 0, "abc\n", 0) is None
|
|
62
|
+
assert eval_regex("abc$", re.MULTILINE, "abcd", 0) is None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_handle_range():
|
|
66
|
+
assert eval_regex("[a-z]7", 0, "b7", 0) is not None
|
|
67
|
+
assert eval_regex("[a-z]7", 0, "z7", 0) is not None
|
|
68
|
+
assert eval_regex("[a-z]7", 0, "A7", 0) is None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_handle_sets():
|
|
72
|
+
assert eval_regex("[a7]", 0, "7", 0) is not None
|
|
73
|
+
assert eval_regex("[^a7]", 0, "7", 0) is None
|
|
74
|
+
assert eval_regex("[^3-9]", 0, "7", 0) is None
|
|
75
|
+
assert eval_regex("[^a]", 0, "7", 0) is not None
|
|
76
|
+
assert eval_regex("[^a]", 0, "a", 0) is None
|
|
77
|
+
assert eval_regex("[^a]", 0, "", 0) is None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_handle_ascii_wildcard():
|
|
81
|
+
assert eval_regex("1.2", re.A, "1x2", 0) is not None
|
|
82
|
+
assert eval_regex("1.2", re.A, "1\x002", 0) is not None
|
|
83
|
+
assert eval_regex("1.2", re.A, "111", 0) is None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_handle_repeats():
|
|
87
|
+
assert eval_regex("a+a", 0, "aa", 0) is not None
|
|
88
|
+
assert eval_regex("s", 0, "ssss", 0).span() == (0, 1)
|
|
89
|
+
assert eval_regex("ss", 0, "ssss", 0).span() == (0, 2)
|
|
90
|
+
assert eval_regex("s{1,2}x", 0, "sx", 0) is not None
|
|
91
|
+
assert eval_regex("s{1,2}x", 0, "ssx", 0) is not None
|
|
92
|
+
assert eval_regex("s{1,2}x", 0, "sssx", 0) is None
|
|
93
|
+
assert eval_regex("s{1,2}x", 0, "x", 0) is None
|
|
94
|
+
assert eval_regex("s{2,3}", 0, "ssss", 0) is not None
|
|
95
|
+
assert eval_regex("s{2,3}", 0, "ssss", 0).span() == (0, 3)
|
|
96
|
+
assert eval_regex("y*", 0, "y", 0) is not None
|
|
97
|
+
assert eval_regex("y*", 0, "y", 0).span() == (0, 1)
|
|
98
|
+
assert eval_regex("y*e+", 0, "ye", 0) is not None
|
|
99
|
+
assert eval_regex("y*e", 0, "yye", 0) is not None
|
|
100
|
+
assert eval_regex("y*e", 0, "yye", 0).span() == (0, 3)
|
|
101
|
+
assert eval_regex("y*e+s{2,3}x", 0, "yessx", 0) is not None
|
|
102
|
+
assert eval_regex("y*e+s{2,3}x", 0, "essx", 0) is not None
|
|
103
|
+
assert eval_regex("y*e+s{2,3}x", 0, "yyessssx", 0) is None
|
|
104
|
+
assert eval_regex("y*e+s{2,3}x", 0, "yssx", 0) is None
|
|
105
|
+
assert eval_regex("y*e+s{2,3}x", 0, "ex", 0) is None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_handle_nongreedy_repeats():
|
|
109
|
+
assert eval_regex("a+?", 0, "a", 0) is not None
|
|
110
|
+
assert eval_regex("a+?b", 0, "aab", 0) is not None
|
|
111
|
+
assert eval_regex("a*?", 0, "aa", 0).span() == (0, 0)
|
|
112
|
+
assert eval_regex("a*?b", 0, "aabaa", 0).span() == (0, 3)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def test_handle_ascii_numeric():
|
|
116
|
+
assert eval_regex(r"a\d", re.A, "a3", 0) is not None
|
|
117
|
+
assert eval_regex(r"a\d", re.A, "a0", 0) is not None
|
|
118
|
+
assert eval_regex(r"a\d", re.A, "a-", 0) is None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def test_handle_ascii_whitespace():
|
|
122
|
+
assert eval_regex(r"a\s", re.A, "a ", 0) is not None
|
|
123
|
+
assert eval_regex(r"a\s", re.A, "a\r", 0) is not None
|
|
124
|
+
assert eval_regex(r"a\s", re.A, "a.", 0) is None
|
|
125
|
+
assert eval_regex(r"a\s", re.A, "a\x1c", 0) is None
|
|
126
|
+
assert eval_regex(r"a\s", 0, "a\x1c", 0) is not None
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_word_boundaries():
|
|
130
|
+
assert eval_regex(r".\b", 0, "a", 0) is not None
|
|
131
|
+
assert eval_regex(r".\b", 0, "a ", 0) is not None
|
|
132
|
+
assert eval_regex(r"\b.", 0, "a", 0) is not None
|
|
133
|
+
assert eval_regex(r".\b", 0, "ab", 0) is None
|
|
134
|
+
assert eval_regex(r"\b.", 0, "", 0) is None
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def test_word_non_boundaries():
|
|
138
|
+
assert eval_regex(r"\B", 0, "ab", 1) is not None
|
|
139
|
+
assert eval_regex(r"\B", 0, "ab", 0) is None
|
|
140
|
+
assert eval_regex(r"\B", 0, "ab", 2) is None
|
|
141
|
+
assert eval_regex(r"a\B.", 0, "ab", 0) is not None
|
|
142
|
+
assert eval_regex(r"a\B.", 0, "a ", 0) is None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def test_handle_noncapturing_group():
|
|
146
|
+
assert eval_regex("(?:a|b)c", 0, "ac", 0) is not None
|
|
147
|
+
assert eval_regex("(?:a|b)c", 0, "bc", 0) is not None
|
|
148
|
+
assert eval_regex("(?:a|b)c", 0, "a", 0) is None
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def test_handle_capturing_group():
|
|
152
|
+
assert eval_regex("(a|b)c", 0, "ac", 0) is not None
|
|
153
|
+
assert eval_regex("(a|b)c", 0, "a", 0) is None
|
|
154
|
+
assert type(eval_regex("(a|b)c", 0, "bc", 0).groups()[0]) == str
|
|
155
|
+
assert eval_regex("(a|b)c", 0, "bc", 0).groups() == ("b",)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def test_handle_named_groups():
|
|
159
|
+
assert eval_regex("(?P<foo>a|b)c", 0, "bc", 0) is not None
|
|
160
|
+
assert eval_regex("(?P<foo>a|b)c", 0, "bc", 0)["foo"] == "b"
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def test_handle_optional_named_groups():
|
|
164
|
+
assert eval_regex("a(?P<foo>b)?", 0, "a", 0)["foo"] is None
|
|
165
|
+
assert eval_regex("a(?P<foo>b)?c", 0, "ac", 0)["foo"] is None
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def test_handle_nested_groups():
|
|
169
|
+
assert eval_regex("(a|b(xx))+(c)?", 0, "bxxc", 0) is not None
|
|
170
|
+
assert eval_regex("(bxx)(c)?", 0, "bxxc", 0).groups() == ("bxx", "c")
|
|
171
|
+
assert eval_regex("(a|b(xx))+(c)?", 0, "bxxc", 0).groups() == ("bxx", "xx", "c")
|
|
172
|
+
assert eval_regex("(a|b(xx))+(c)?", 0, "a", 0).groups() == ("a", None, None)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def test_with_fuzzed_inputs() -> None:
|
|
176
|
+
rand = random.Random(253209)
|
|
177
|
+
|
|
178
|
+
def check(pattern, literal_string, offset):
|
|
179
|
+
flags = re.ASCII | re.DOTALL
|
|
180
|
+
sym_match = eval_regex(pattern, flags, literal_string, offset)
|
|
181
|
+
py_match = re.compile(pattern, flags).match(literal_string, offset)
|
|
182
|
+
if (sym_match is None) != (py_match is None):
|
|
183
|
+
assert py_match == sym_match
|
|
184
|
+
if py_match is None:
|
|
185
|
+
return
|
|
186
|
+
assert py_match.span() == sym_match.span()
|
|
187
|
+
assert py_match.group(0) == sym_match.group(0)
|
|
188
|
+
assert py_match.groups() == sym_match.groups()
|
|
189
|
+
assert py_match.pos == sym_match.pos
|
|
190
|
+
assert py_match.endpos == sym_match.endpos
|
|
191
|
+
assert py_match.lastgroup == sym_match.lastgroup
|
|
192
|
+
|
|
193
|
+
for iter in range(100):
|
|
194
|
+
literal_string = "".join(
|
|
195
|
+
rand.choice(["a", "5", "_"])
|
|
196
|
+
for _ in range(rand.choice([0, 1, 1, 2, 2, 3, 4]))
|
|
197
|
+
)
|
|
198
|
+
pattern = "".join(
|
|
199
|
+
rand.choice(["a", "5", "."]) + rand.choice(["", "", "+", "*"])
|
|
200
|
+
for _ in range(rand.choice([0, 1, 1, 2, 2]))
|
|
201
|
+
)
|
|
202
|
+
offset = rand.choice([0, 0, 0, 0, 1])
|
|
203
|
+
check(pattern, literal_string, offset)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def test_fullmatch_basic_fail() -> None:
|
|
207
|
+
def f(s: str) -> Optional[re.Match]:
|
|
208
|
+
"""post: _"""
|
|
209
|
+
return re.compile("a").fullmatch(s)
|
|
210
|
+
|
|
211
|
+
check_states(f, POST_FAIL)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def test_star_fail() -> None:
|
|
215
|
+
def f(s: str) -> bool:
|
|
216
|
+
"""
|
|
217
|
+
pre: s[1] == 'a'
|
|
218
|
+
post: _
|
|
219
|
+
"""
|
|
220
|
+
return not re.fullmatch("a*", s)
|
|
221
|
+
|
|
222
|
+
check_states(f, POST_FAIL)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def test_plus_unknown() -> None:
|
|
226
|
+
def f(s: str) -> bool:
|
|
227
|
+
"""
|
|
228
|
+
pre: len(s) > 0
|
|
229
|
+
post: _
|
|
230
|
+
"""
|
|
231
|
+
return bool(re.fullmatch(".+", s, re.DOTALL))
|
|
232
|
+
|
|
233
|
+
check_states(f, CANNOT_CONFIRM)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def test_greedy_backtracking() -> None:
|
|
237
|
+
def f(s: str) -> int:
|
|
238
|
+
"""
|
|
239
|
+
pre: len(s) == 3
|
|
240
|
+
post: _ == 3
|
|
241
|
+
"""
|
|
242
|
+
return re.match(".+.", s, re.A | re.DOTALL).end() # type: ignore
|
|
243
|
+
|
|
244
|
+
check_states(f, CONFIRMED)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def test_fullmatch_basic_ok() -> None:
|
|
248
|
+
def f(s: str) -> Optional[re.Match]:
|
|
249
|
+
"""
|
|
250
|
+
pre: s == 'a'
|
|
251
|
+
post: _
|
|
252
|
+
"""
|
|
253
|
+
return re.compile("a").fullmatch(s)
|
|
254
|
+
|
|
255
|
+
check_states(f, CONFIRMED)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def test_fullmatch_matches_whole_string() -> None:
|
|
259
|
+
def f(s: str) -> Optional[re.Match]:
|
|
260
|
+
"""
|
|
261
|
+
pre: len(s) == 3
|
|
262
|
+
post: implies(_, s[-1] == 'b')
|
|
263
|
+
"""
|
|
264
|
+
return re.compile("a+b+").fullmatch(s)
|
|
265
|
+
|
|
266
|
+
check_states(f, CONFIRMED)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def test_match_properties() -> None:
|
|
270
|
+
match = re.compile("(a)b").match("01ab9", 2, 4)
|
|
271
|
+
|
|
272
|
+
# Before we begin, quickly double-check that our expectations match what Python
|
|
273
|
+
# actually does:
|
|
274
|
+
assert match is not None
|
|
275
|
+
assert match.span() == (2, 4)
|
|
276
|
+
assert match.groups() == ("a",)
|
|
277
|
+
assert match.group(0) == "ab"
|
|
278
|
+
assert match.group(1) == "a"
|
|
279
|
+
assert match[0] == "ab"
|
|
280
|
+
assert match.pos == 2
|
|
281
|
+
assert match.endpos == 4
|
|
282
|
+
assert match.lastgroup is None
|
|
283
|
+
assert match.string == "01ab9"
|
|
284
|
+
assert match.re.pattern == "(a)b"
|
|
285
|
+
assert match.expand(r"z\1z") == "zaz"
|
|
286
|
+
assert match.groupdict() == {}
|
|
287
|
+
assert match.start(1) == 2
|
|
288
|
+
assert match.lastindex == 1
|
|
289
|
+
|
|
290
|
+
def f(s: str) -> Optional[re.Match]:
|
|
291
|
+
r"""
|
|
292
|
+
pre: s == '01ab9'
|
|
293
|
+
post: _.span() == (2, 4)
|
|
294
|
+
post: _.groups() == ('a',)
|
|
295
|
+
post: _.group(0) == 'ab'
|
|
296
|
+
post: _.group(1) == 'a'
|
|
297
|
+
post: _[0] == 'ab'
|
|
298
|
+
post: _.pos == 2
|
|
299
|
+
post: _.endpos == 4
|
|
300
|
+
post: _.lastgroup == None
|
|
301
|
+
post: _.string == '01ab9'
|
|
302
|
+
post: _.re.pattern == '(a)b'
|
|
303
|
+
post: _.expand(r'z\1z') == 'zaz'
|
|
304
|
+
post: _.groupdict() == {}
|
|
305
|
+
post: _.start(1) == 2
|
|
306
|
+
post: _.lastindex == 1
|
|
307
|
+
"""
|
|
308
|
+
return re.compile("(a)b").match(s, 2, 4)
|
|
309
|
+
|
|
310
|
+
check_states(f, CONFIRMED)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def test_fullmatch_complex_fail() -> None:
|
|
314
|
+
def f(s: str) -> str:
|
|
315
|
+
"""
|
|
316
|
+
pre: re.fullmatch('a+Xb', s)
|
|
317
|
+
post: _ != 'X'
|
|
318
|
+
"""
|
|
319
|
+
return s[2]
|
|
320
|
+
|
|
321
|
+
check_states(f, MessageType.POST_FAIL)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
@pytest.mark.demo
|
|
325
|
+
def test_match() -> None:
|
|
326
|
+
def f(s: str) -> Optional[re.Match]:
|
|
327
|
+
"""
|
|
328
|
+
Can the captured character in this regex be "x"?
|
|
329
|
+
|
|
330
|
+
NOTE: Although this use case is solved quickly, many regex problems will
|
|
331
|
+
require a few minutes of processing time or more.
|
|
332
|
+
|
|
333
|
+
post: _ is None or _.group(1) != "x"
|
|
334
|
+
"""
|
|
335
|
+
return re.compile("a([a-z])").match(s)
|
|
336
|
+
|
|
337
|
+
check_states(f, POST_FAIL)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def test_match_basic_fail2() -> None:
|
|
341
|
+
def f(s: str) -> bool:
|
|
342
|
+
"""post: implies(_, len(s) <= 3)"""
|
|
343
|
+
return bool(re.compile("ab?c").match(s))
|
|
344
|
+
|
|
345
|
+
check_states(f, POST_FAIL)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def test_number_parse() -> None:
|
|
349
|
+
number_re = re.compile(r"(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?")
|
|
350
|
+
|
|
351
|
+
def f(s: str):
|
|
352
|
+
"""
|
|
353
|
+
pre: len(s) == 4
|
|
354
|
+
post: not _
|
|
355
|
+
"""
|
|
356
|
+
return bool(number_re.fullmatch(s))
|
|
357
|
+
|
|
358
|
+
check_states(
|
|
359
|
+
f,
|
|
360
|
+
POST_FAIL,
|
|
361
|
+
AnalysisOptionSet(max_iterations=20),
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def test_lookbehind() -> None:
|
|
366
|
+
regex = re.compile(r"(?<=a)bc")
|
|
367
|
+
|
|
368
|
+
def f(s: str):
|
|
369
|
+
"""
|
|
370
|
+
post: not _
|
|
371
|
+
"""
|
|
372
|
+
return bool(regex.search(s))
|
|
373
|
+
|
|
374
|
+
check_states(f, POST_FAIL)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def test_backref_re():
|
|
378
|
+
assert _BACKREF_STR_RE.fullmatch(r"\1").group("num") == "1"
|
|
379
|
+
assert _BACKREF_STR_RE.fullmatch(r"ab\1cd").group("num") == "1"
|
|
380
|
+
assert _BACKREF_STR_RE.fullmatch(r"$%^ \g<_cat> &*").group("named") == "_cat"
|
|
381
|
+
assert _BACKREF_STR_RE.fullmatch(r"\g< cat>").group("namedother") == " cat"
|
|
382
|
+
assert _BACKREF_STR_RE.fullmatch(r"\g<0>").group("namednum") == "0"
|
|
383
|
+
assert _BACKREF_STR_RE.fullmatch(r"\g<+100>").group("namednum") == "+100"
|
|
384
|
+
assert _BACKREF_STR_RE.fullmatch(r"\1 foo \2").group("num") == "1"
|
|
385
|
+
|
|
386
|
+
# "\g<0>" is OK; "\0" is not:
|
|
387
|
+
assert _BACKREF_STR_RE.fullmatch(r"\g<0>")
|
|
388
|
+
assert not _BACKREF_STR_RE.fullmatch(r"\0")
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def test_template_expansion():
|
|
392
|
+
regex = re.compile("(a)(?P<foo>b)")
|
|
393
|
+
with standalone_statespace as space:
|
|
394
|
+
with NoTracing():
|
|
395
|
+
s = LazyIntSymbolicStr(list(map(ord, "abc")))
|
|
396
|
+
match = regex.match(s)
|
|
397
|
+
assert match.expand(r"z\1z") == "zaz"
|
|
398
|
+
assert match.expand(r"z\g<foo>z") == "zbz"
|
|
399
|
+
assert match.expand(r"z\g<0>z") == "zabz"
|
|
400
|
+
assert match.expand(r"\1z\1\1") == "azaa"
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def test_finditer():
|
|
404
|
+
regex = re.compile("a")
|
|
405
|
+
with standalone_statespace as space:
|
|
406
|
+
with NoTracing():
|
|
407
|
+
s = LazyIntSymbolicStr(list(map(ord, "abaa")))
|
|
408
|
+
itr = regex.finditer(s)
|
|
409
|
+
assert next(itr).pos == 0
|
|
410
|
+
assert next(itr).pos == 2
|
|
411
|
+
assert next(itr).pos == 3
|
|
412
|
+
try:
|
|
413
|
+
unexpected_match = next(itr)
|
|
414
|
+
assert False, unexpected_match
|
|
415
|
+
except StopIteration:
|
|
416
|
+
pass
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def test_charmatch_literal_does_not_fork():
|
|
420
|
+
letters = re.compile("[a-z]")
|
|
421
|
+
with standalone_statespace as space:
|
|
422
|
+
with NoTracing():
|
|
423
|
+
s = LazyIntSymbolicStr(list(map(ord, "abaa")))
|
|
424
|
+
|
|
425
|
+
def explode(*a, **kw):
|
|
426
|
+
raise CrossHairInternal
|
|
427
|
+
|
|
428
|
+
space.smt_fork = explode
|
|
429
|
+
match = letters.match(s)
|
|
430
|
+
assert match
|
|
431
|
+
assert match.group(0) == "a"
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def test_symbolic_offset():
|
|
435
|
+
_all_zeros = re.compile("0*$")
|
|
436
|
+
with standalone_statespace as space:
|
|
437
|
+
with NoTracing():
|
|
438
|
+
string = LazyIntSymbolicStr(list(map(ord, "21000")))
|
|
439
|
+
offset = proxy_for_type(int, "offset")
|
|
440
|
+
endpos = proxy_for_type(int, "endpos")
|
|
441
|
+
space.add(offset == 2)
|
|
442
|
+
space.add(endpos == 5)
|
|
443
|
+
assert _all_zeros.match(string, offset)
|
|
444
|
+
assert not _all_zeros.match(string, offset - 1)
|
|
445
|
+
assert not _all_zeros.match(string + "1", offset)
|
|
446
|
+
assert _all_zeros.match(string + "1", offset, endpos)
|
|
447
|
+
assert not _all_zeros.match(string + "1", offset, endpos + 1)
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
@pytest.mark.parametrize(
|
|
451
|
+
"patt_char,match_char",
|
|
452
|
+
[
|
|
453
|
+
("ß", "ẞ"),
|
|
454
|
+
("ẞ", "ß"),
|
|
455
|
+
("İ", "i"),
|
|
456
|
+
("i", "İ"),
|
|
457
|
+
("Ⓐ", "ⓐ"),
|
|
458
|
+
("ⓐ", "Ⓐ"),
|
|
459
|
+
],
|
|
460
|
+
)
|
|
461
|
+
def test_ignorecase_matches(space, patt_char, match_char):
|
|
462
|
+
pattern = re.compile(patt_char, re.IGNORECASE)
|
|
463
|
+
# sanity check that regular python does what we expect:
|
|
464
|
+
assert pattern.fullmatch(match_char)
|
|
465
|
+
symbolic_match_char = LazyIntSymbolicStr(list(map(ord, match_char)))
|
|
466
|
+
with ResumedTracing():
|
|
467
|
+
assert pattern.fullmatch(symbolic_match_char)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
@pytest.mark.parametrize(
|
|
471
|
+
"patt_char,match_char",
|
|
472
|
+
[
|
|
473
|
+
("a", "ⓐ"),
|
|
474
|
+
("ß".upper(), "ß"),
|
|
475
|
+
("ß", "ß".upper()),
|
|
476
|
+
("İ".lower(), "İ"),
|
|
477
|
+
("İ", "İ".lower()),
|
|
478
|
+
],
|
|
479
|
+
)
|
|
480
|
+
def test_ignorecase_nonmatches(space, patt_char, match_char):
|
|
481
|
+
pattern = re.compile(patt_char, re.IGNORECASE)
|
|
482
|
+
# sanity check that regular python does what we expect:
|
|
483
|
+
assert not pattern.fullmatch(match_char)
|
|
484
|
+
symbolic_match_char = LazyIntSymbolicStr(list(map(ord, match_char)))
|
|
485
|
+
with ResumedTracing():
|
|
486
|
+
assert not pattern.fullmatch(symbolic_match_char)
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def test_bytes_based_pattern(space):
|
|
490
|
+
string = SymbolicBytes(b"abbc")
|
|
491
|
+
with ResumedTracing():
|
|
492
|
+
assert re.fullmatch(b"ab+c", string)
|
|
493
|
+
assert [m.span() for m in re.finditer(b"b", string)] == [(1, 2), (2, 3)]
|