PyPI - crosshair-tool - Versions diffs - 0.0.56__cp39-cp39-macosx_11_0_arm64.whl → 0.0.100__cp39-cp39-macosx_11_0_arm64.whl - Mend

crosshair-tool 0.0.56__cp39-cp39-macosx_11_0_arm64.whl → 0.0.100__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

_crosshair_tracers.cpython-39-darwin.so +0 -0
crosshair/__init__.py +1 -1
crosshair/_mark_stacks.h +51 -24
crosshair/_tracers.h +9 -5
crosshair/_tracers_test.py +19 -9
crosshair/auditwall.py +9 -8
crosshair/auditwall_test.py +31 -19
crosshair/codeconfig.py +3 -2
crosshair/condition_parser.py +17 -133
crosshair/condition_parser_test.py +54 -96
crosshair/conftest.py +1 -1
crosshair/copyext.py +91 -22
crosshair/copyext_test.py +33 -0
crosshair/core.py +259 -203
crosshair/core_and_libs.py +20 -0
crosshair/core_regestered_types_test.py +82 -0
crosshair/core_test.py +693 -664
crosshair/diff_behavior.py +76 -21
crosshair/diff_behavior_test.py +132 -23
crosshair/dynamic_typing.py +128 -18
crosshair/dynamic_typing_test.py +91 -4
crosshair/enforce.py +1 -6
crosshair/enforce_test.py +15 -23
crosshair/examples/check_examples_test.py +2 -1
crosshair/fnutil.py +2 -3
crosshair/fnutil_test.py +0 -7
crosshair/fuzz_core_test.py +70 -83
crosshair/libimpl/arraylib.py +10 -7
crosshair/libimpl/binascii_ch_test.py +30 -0
crosshair/libimpl/binascii_test.py +67 -0
crosshair/libimpl/binasciilib.py +150 -0
crosshair/libimpl/bisectlib_test.py +5 -5
crosshair/libimpl/builtinslib.py +1002 -682
crosshair/libimpl/builtinslib_ch_test.py +108 -30
crosshair/libimpl/builtinslib_test.py +431 -143
crosshair/libimpl/codecslib.py +22 -2
crosshair/libimpl/codecslib_test.py +41 -9
crosshair/libimpl/collectionslib.py +44 -8
crosshair/libimpl/collectionslib_test.py +108 -20
crosshair/libimpl/copylib.py +1 -1
crosshair/libimpl/copylib_test.py +18 -0
crosshair/libimpl/datetimelib.py +84 -67
crosshair/libimpl/datetimelib_ch_test.py +12 -7
crosshair/libimpl/datetimelib_test.py +5 -6
crosshair/libimpl/decimallib.py +5257 -0
crosshair/libimpl/decimallib_ch_test.py +78 -0
crosshair/libimpl/decimallib_test.py +76 -0
crosshair/libimpl/encodings/_encutil.py +21 -11
crosshair/libimpl/fractionlib.py +16 -0
crosshair/libimpl/fractionlib_test.py +80 -0
crosshair/libimpl/functoolslib.py +19 -7
crosshair/libimpl/functoolslib_test.py +22 -6
crosshair/libimpl/hashliblib.py +30 -0
crosshair/libimpl/hashliblib_test.py +18 -0
crosshair/libimpl/heapqlib.py +32 -5
crosshair/libimpl/heapqlib_test.py +15 -12
crosshair/libimpl/iolib.py +7 -4
crosshair/libimpl/ipaddresslib.py +8 -0
crosshair/libimpl/itertoolslib_test.py +1 -1
crosshair/libimpl/mathlib.py +165 -2
crosshair/libimpl/mathlib_ch_test.py +44 -0
crosshair/libimpl/mathlib_test.py +59 -16
crosshair/libimpl/oslib.py +7 -0
crosshair/libimpl/pathliblib_test.py +10 -0
crosshair/libimpl/randomlib.py +1 -0
crosshair/libimpl/randomlib_test.py +6 -4
crosshair/libimpl/relib.py +180 -59
crosshair/libimpl/relib_ch_test.py +26 -2
crosshair/libimpl/relib_test.py +77 -14
crosshair/libimpl/timelib.py +35 -13
crosshair/libimpl/timelib_test.py +13 -3
crosshair/libimpl/typeslib.py +15 -0
crosshair/libimpl/typeslib_test.py +36 -0
crosshair/libimpl/unicodedatalib_test.py +3 -3
crosshair/libimpl/weakreflib.py +13 -0
crosshair/libimpl/weakreflib_test.py +69 -0
crosshair/libimpl/zliblib.py +15 -0
crosshair/libimpl/zliblib_test.py +13 -0
crosshair/lsp_server.py +21 -10
crosshair/main.py +48 -28
crosshair/main_test.py +59 -14
crosshair/objectproxy.py +39 -14
crosshair/objectproxy_test.py +27 -13
crosshair/opcode_intercept.py +212 -24
crosshair/opcode_intercept_test.py +172 -18
crosshair/options.py +0 -1
crosshair/patch_equivalence_test.py +5 -21
crosshair/path_cover.py +7 -5
crosshair/path_search.py +6 -4
crosshair/path_search_test.py +1 -2
crosshair/pathing_oracle.py +53 -10
crosshair/pathing_oracle_test.py +21 -0
crosshair/pure_importer_test.py +5 -21
crosshair/register_contract.py +16 -6
crosshair/register_contract_test.py +2 -14
crosshair/simplestructs.py +154 -85
crosshair/simplestructs_test.py +16 -2
crosshair/smtlib.py +24 -0
crosshair/smtlib_test.py +14 -0
crosshair/statespace.py +319 -196
crosshair/statespace_test.py +45 -0
crosshair/stubs_parser.py +0 -2
crosshair/test_util.py +87 -25
crosshair/test_util_test.py +26 -0
crosshair/tools/check_init_and_setup_coincide.py +0 -3
crosshair/tools/generate_demo_table.py +2 -2
crosshair/tracers.py +141 -49
crosshair/type_repo.py +11 -4
crosshair/unicode_categories.py +1 -0
crosshair/util.py +158 -76
crosshair/util_test.py +13 -20
crosshair/watcher.py +4 -4
crosshair/z3util.py +1 -1
{crosshair_tool-0.0.56.dist-info → crosshair_tool-0.0.100.dist-info}/METADATA +45 -36
crosshair_tool-0.0.100.dist-info/RECORD +176 -0
{crosshair_tool-0.0.56.dist-info → crosshair_tool-0.0.100.dist-info}/WHEEL +2 -1
crosshair/examples/hypothesis/__init__.py +0 -2
crosshair/examples/hypothesis/bugs_detected/simple_strategies.py +0 -74
crosshair_tool-0.0.56.dist-info/RECORD +0 -152
/crosshair/{examples/hypothesis/bugs_detected/__init__.py → py.typed} +0 -0
{crosshair_tool-0.0.56.dist-info → crosshair_tool-0.0.100.dist-info}/entry_points.txt +0 -0
{crosshair_tool-0.0.56.dist-info → crosshair_tool-0.0.100.dist-info/licenses}/LICENSE +0 -0
{crosshair_tool-0.0.56.dist-info → crosshair_tool-0.0.100.dist-info}/top_level.txt +0 -0

crosshair/libimpl/relib.py CHANGED Viewed

@@ -1,21 +1,25 @@
+import operator
 import re
 import sys
+from array import array
+from unicodedata import category
 if sys.version_info < (3, 11):
     import sre_parse as re_parser
 else:
-    import re._parser as re_parser
+    import re._parser as re_parser  # type: ignore
 from sys import maxunicode
-from typing import Any, Callable, Iterable, List, Optional, Tuple, Union, cast
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast
 import z3  # type: ignore
 from crosshair.core import deep_realize, realize, register_patch, with_realized_args
-from crosshair.libimpl.builtinslib import AnySymbolicStr, SymbolicInt
+from crosshair.libimpl.builtinslib import AnySymbolicStr, BytesLike, SymbolicInt
 from crosshair.statespace import context_statespace
 from crosshair.tracers import NoTracing, ResumedTracing, is_tracing
 from crosshair.unicode_categories import CharMask, get_unicode_categories
-from crosshair.util import CrosshairInternal, debug, is_iterable
+from crosshair.util import CrossHairInternal, CrossHairValue, debug, is_iterable
 ANY = re_parser.ANY
 ASSERT = re_parser.ASSERT
@@ -51,6 +55,8 @@ class ReUnhandled(Exception):
     pass
+_ALL_BYTES_TYPES = (bytes, bytearray, memoryview, array)
+_STR_AND_BYTES_TYPES = (str, *_ALL_BYTES_TYPES)
 _NO_CHAR = CharMask([])
 _ANY_CHAR = CharMask([(0, maxunicode + 1)])
 _ANY_NON_NEWLINE_CHAR = _ANY_CHAR.subtract(CharMask([ord("\n")]))
@@ -74,8 +80,41 @@ _UNICODE_WHITESPACE_CHAR = _ASCII_WHITESPACE_CHAR.union(
     )
 )
+_CASEABLE_CHARS = None
+def caseable_chars():
+    global _CASEABLE_CHARS
+    if _CASEABLE_CHARS is None:
+        codepoints = []
+        for i in range(sys.maxunicode + 1):
+            ch = chr(i)
+            # Exclude the (large) "Other Letter" group that doesn't caseswap:
+            if category(ch) in ("Lo"):
+                assert ch.casefold() == ch
+            else:
+                codepoints.append(ch)
+        _CASEABLE_CHARS = "".join(codepoints)
+    return _CASEABLE_CHARS
+_UNICODE_IGNORECASE_MASKS: Dict[int, CharMask] = {}  # codepoint -> CharMask
+def unicode_ignorecase_mask(cp: int) -> CharMask:
+    mask = _UNICODE_IGNORECASE_MASKS.get(cp)
+    if mask is None:
+        chars = caseable_chars()
+        matches = re.compile(chr(cp), re.IGNORECASE).findall(chars)
+        mask = CharMask([ord(c) for c in matches])
+        _UNICODE_IGNORECASE_MASKS[cp] = mask
+    return mask
-def single_char_mask(parsed: Tuple[object, Any], flags: int) -> Optional[CharMask]:
+def single_char_mask(
+    parsed: Tuple[object, Any], flags: int, ord=ord, chr=chr
+) -> Optional[CharMask]:
     """
     Compute a CharMask from a parsed regex.
@@ -88,10 +127,7 @@ def single_char_mask(parsed: Tuple[object, Any], flags: int) -> Optional[CharMas
     isascii = re.ASCII & flags
     if op in (LITERAL, NOT_LITERAL):
         if re.IGNORECASE & flags:
-            # NOTE: I *think* IGNORECASE does not do "fancy" case matching like the
-            # casefold() builtin.
-            # TODO: This fails on 1-to-many case transformations
-            ret = CharMask([ord(chr(arg).lower()), ord(chr(arg).upper())])
+            ret = unicode_ignorecase_mask(arg)
         else:
             ret = CharMask([arg])
         if op is NOT_LITERAL:
@@ -101,6 +137,7 @@ def single_char_mask(parsed: Tuple[object, Any], flags: int) -> Optional[CharMas
         if re.IGNORECASE & flags:
             ret = CharMask(
                 [
+                    # TODO: among other issues, this doesn't handle multi-codepoint caseswaps:
                     (ord(chr(lo).lower()), ord(chr(hi).lower()) + 1),
                     (ord(chr(lo).upper()), ord(chr(hi).upper()) + 1),
                 ]
@@ -113,7 +150,7 @@ def single_char_mask(parsed: Tuple[object, Any], flags: int) -> Optional[CharMas
         if negate:
             arg = arg[1:]
         for term in arg:
-            submask = single_char_mask(term, flags)
+            submask = single_char_mask(term, flags, ord=ord, chr=chr)
             if submask is None:
                 raise ReUnhandled("IN contains non-single-char expression")
             ret = ret.union(submask)
@@ -137,6 +174,7 @@ def single_char_mask(parsed: Tuple[object, Any], flags: int) -> Optional[CharMas
         else:
             raise ReUnhandled("Unsupported category: ", arg)
     elif op is ANY and arg is None:
+        # TODO: test dot under ascii mode (seems like we should fall through to the re.ASCII check below)
         return _ANY_CHAR if re.DOTALL & flags else _ANY_NON_NEWLINE_CHAR
     else:
         return None
@@ -149,6 +187,13 @@ def single_char_mask(parsed: Tuple[object, Any], flags: int) -> Optional[CharMas
 Span = Tuple[int, Union[int, SymbolicInt]]
+def _traced_binop(a, op, b):
+    if isinstance(a, CrossHairValue) or isinstance(b, CrossHairValue):
+        with ResumedTracing():
+            return op(a, b)
+    return op(a, b)
 class _MatchPart:
     def __init__(self, groups: List[Optional[Span]]):
         self._groups = groups
@@ -158,11 +203,21 @@ class _MatchPart:
         assert span is not None
         return span
+    def _clamp_all_spans(self, start, end):
+        groups = self._groups
+        for idx, span in enumerate(groups):
+            if span is not None:
+                (span_start, span_end) = span
+                with ResumedTracing():
+                    if span_start == span_end:
+                        if span_start < start:
+                            groups[idx] = (start, start)
+                        if span_start > end:
+                            groups[idx] = (end, end)
     def isempty(self):
-        for (start, end) in self._groups:
-            if end > start:
-                return False
-        return True
+        (start, end) = self._groups[0]
+        return _traced_binop(end, operator.le, start)
     def __bool__(self):
         return True
@@ -194,8 +249,7 @@ class _MatchPart:
         return self._groups[group]
-_BACKREF_RE = re.compile(
-    r"""
+_BACKREF_RE_SOURCE = rb"""
     (?P<prefix> .*?)
     \\
     (?:
@@ -206,8 +260,10 @@ _BACKREF_RE = re.compile(
         g\< (?P<namedother>          .* ) \>
     )
     (?P<suffix> .*)
-""",
-    re.VERBOSE | re.MULTILINE,
+"""
+_BACKREF_BYTES_RE = re.compile(_BACKREF_RE_SOURCE, re.VERBOSE | re.MULTILINE)
+_BACKREF_STR_RE = re.compile(
+    str(_BACKREF_RE_SOURCE, "ascii"), re.VERBOSE | re.MULTILINE
 )
@@ -236,14 +292,14 @@ class _Match(_MatchPart):
             if idx in _idx_to_name:
                 self.lastgroup = _idx_to_name[idx]
-    def __ch_deep_realize__(self):
+    def __ch_deep_realize__(self, memo):
         # We cannot manually create realistic Match instances.
         # Realize our contents - it's better than nothing
         return _Match(
-            deep_realize(self._groups),
+            deep_realize(self._groups, memo),
             realize(self.pos),
             realize(self.endpos),
-            deep_realize(self.re),
+            deep_realize(self.re, memo),
             realize(self.string),
         )
@@ -251,11 +307,10 @@ class _Match(_MatchPart):
         return self.group(idx)
     def expand(self, template):
-        if not isinstance(template, str):
-            raise TypeError
+        backref_re = _BACKREF_STR_RE if isinstance(template, str) else _BACKREF_BYTES_RE
         with NoTracing():
             template = realize(template)  # Usually this is a literal string
-            match = _BACKREF_RE.fullmatch(template)
+            match = backref_re.fullmatch(template)
             if match is None:
                 return template
             prefix, num, namednum, named, _, suffix = match.groups()
@@ -338,6 +393,8 @@ def _internal_match_patterns(
     string: AnySymbolicStr,
     offset: int,
     allow_empty: bool = True,
+    ord=ord,
+    chr=chr,
 ) -> Optional[_MatchPart]:
     """
     >>> import sre_parse
@@ -361,7 +418,13 @@ def _internal_match_patterns(
     def continue_matching(prefix):
         sub_allow_empty = allow_empty if prefix.isempty() else True
         suffix = _internal_match_patterns(
-            top_patterns[1:], flags, string, prefix.end(), sub_allow_empty
+            top_patterns[1:],
+            flags,
+            string,
+            prefix.end(),
+            sub_allow_empty,
+            ord=ord,
+            chr=chr,
         )
         if suffix is None:
             return None
@@ -371,19 +434,23 @@ def _internal_match_patterns(
     # Seems like this casues nondeterminism due to a global LRU cache used by the typing module.
     def fork_on(expr, sz):
         if space.smt_fork(expr):
-            return continue_matching(_MatchPart([(offset, offset + sz)]))
+            return continue_matching(
+                _MatchPart([(offset, _traced_binop(offset, operator.add, sz))])
+            )
         else:
             return None
-    mask = single_char_mask(pattern, flags)
+    mask = single_char_mask(pattern, flags, ord=ord, chr=chr)
     if mask is not None:
         with ResumedTracing():
-            if len(string) <= offset:
+            if any([offset < 0, offset >= len(string)]):
                 return None
             char = ord(string[offset])
         if isinstance(char, int):  # Concrete int? Just check it!
             if mask.covers(char):
-                return continue_matching(_MatchPart([(offset, offset + 1)]))
+                return continue_matching(
+                    _MatchPart([(offset, _traced_binop(offset, operator.add, 1))])
+                )
             else:
                 return None
         smt_ch = SymbolicInt._coerce_to_smt_sort(char)
@@ -398,7 +465,7 @@ def _internal_match_patterns(
         overall_match = _MatchPart([(offset, offset)])
         while reps < min_repeat:
             submatch = _internal_match_patterns(
-                subpattern, flags, string, overall_match.end(), True
+                subpattern, flags, string, overall_match.end(), True, ord=ord, chr=chr
             )
             if submatch is None:
                 return None
@@ -423,7 +490,13 @@ def _internal_match_patterns(
         )
         remainder_allow_empty = allow_empty or not overall_match.isempty()
         remainder_match = _internal_match_patterns(
-            remaining_matcher, flags, string, overall_match.end(), remainder_allow_empty
+            remaining_matcher,
+            flags,
+            string,
+            overall_match.end(),
+            remainder_allow_empty,
+            ord=ord,
+            chr=chr,
         )
         if remainder_match is not None:
             return overall_match._add_match(remainder_match)
@@ -438,7 +511,7 @@ def _internal_match_patterns(
         branches = arg[1]
         first_path = list(branches[0]) + list(top_patterns)[1:]
         submatch = _internal_match_patterns(
-            first_path, flags, string, offset, allow_empty
+            first_path, flags, string, offset, allow_empty, ord=ord, chr=chr
         )
         if submatch is not None:
             return submatch
@@ -451,6 +524,8 @@ def _internal_match_patterns(
                 string,
                 offset,
                 allow_empty,
+                ord=ord,
+                chr=chr,
             )
     elif op is AT:
         if arg in (AT_BEGINNING, AT_BEGINNING_STRING):
@@ -500,7 +575,9 @@ def _internal_match_patterns(
         (direction_int, subpattern) = arg
         positive_look = op == ASSERT
         if direction_int == 1:
-            matched = _internal_match_patterns(subpattern, flags, string, offset, True)
+            matched = _internal_match_patterns(
+                subpattern, flags, string, offset, True, ord=ord, chr=chr
+            )
         else:
             assert direction_int == -1
             minwidth, maxwidth = subpattern.getwidth()
@@ -509,11 +586,13 @@ def _internal_match_patterns(
             rewound = offset - minwidth
             if rewound < 0:
                 return None
-            matched = _internal_match_patterns(subpattern, flags, string, rewound, True)
+            matched = _internal_match_patterns(
+                subpattern, flags, string, rewound, True, ord=ord, chr=chr
+            )
         if bool(matched) != bool(positive_look):
             return None
         return _internal_match_patterns(
-            top_patterns[1:], flags, string, offset, allow_empty
+            top_patterns[1:], flags, string, offset, allow_empty, ord=ord, chr=chr
         )
     elif op is SUBPATTERN:
         (groupnum, _a, _b, subpatterns) = arg
@@ -524,7 +603,9 @@ def _internal_match_patterns(
             + [(_END_GROUP_MARKER, (groupnum, offset))]
             + list(top_patterns)[1:]
         )
-        return _internal_match_patterns(new_top, flags, string, offset, allow_empty)
+        return _internal_match_patterns(
+            new_top, flags, string, offset, allow_empty, ord=ord, chr=chr
+        )
     elif op is _END_GROUP_MARKER:
         (group_num, begin) = arg
         match = continue_matching(_MatchPart([(offset, offset)]))
@@ -539,21 +620,33 @@ def _internal_match_patterns(
 def _match_pattern(
     compiled_regex: re.Pattern,
-    orig_str: AnySymbolicStr,
+    orig_str: Union[AnySymbolicStr, BytesLike],
     pos: int,
     endpos: Optional[int] = None,
     subpattern: Optional[List] = None,
     allow_empty=True,
+    ord=ord,
+    chr=chr,
 ) -> Optional[_Match]:
     assert not is_tracing()
     if subpattern is None:
         subpattern = cast(List, parse(compiled_regex.pattern, compiled_regex.flags))
-    trimmed_str = orig_str[:endpos]
+    with ResumedTracing():
+        trimmed_str = orig_str[:endpos]
     matchpart = _internal_match_patterns(
-        subpattern, compiled_regex.flags, trimmed_str, pos, allow_empty
+        subpattern,
+        compiled_regex.flags,
+        trimmed_str,
+        pos,
+        allow_empty,
+        ord=ord,
+        chr=chr,
     )
     if matchpart is None:
         return None
+    match_start, match_end = matchpart._fullspan()
+    if _traced_binop(match_start, operator.eq, match_end):
+        matchpart._clamp_all_spans(0, len(orig_str))
     return _Match(matchpart._groups, pos, endpos, compiled_regex, orig_str)
@@ -564,8 +657,23 @@ def _compile(*a):
         return re._compile(*deep_realize(a))
+def _check_str_or_bytes(patt: re.Pattern, obj: Any):
+    if not isinstance(patt, re.Pattern):
+        raise TypeError  # TODO: e.g. "descriptor 'search' for 're.Pattern' objects doesn't apply to a 'str' object"
+    if not isinstance(obj, _STR_AND_BYTES_TYPES):
+        raise TypeError(f"expected string or bytes-like object, got '{type(obj)}'")
+    if isinstance(patt.pattern, str):
+        if isinstance(obj, str):
+            return (chr, ord)
+        raise TypeError("cannot use a bytes pattern on a string-like object")
+    else:
+        if isinstance(obj, _ALL_BYTES_TYPES):
+            return (lambda i: bytes([i]), lambda i: i)
+        raise TypeError("cannot use a string pattern on a bytes-like object")
 def _finditer_symbolic(
-    patt: re.Pattern, string: AnySymbolicStr, pos: int, endpos: int
+    patt: re.Pattern, string: AnySymbolicStr, pos: int, endpos: int, chr=chr, ord=ord
 ) -> Iterable[_Match]:
     last_match_was_empty = False
     while True:
@@ -573,7 +681,9 @@ def _finditer_symbolic(
             if pos > endpos:
                 break
             allow_empty = not last_match_was_empty
-            match = _match_pattern(patt, string, pos, endpos, allow_empty=allow_empty)
+            match = _match_pattern(
+                patt, string, pos, endpos, allow_empty=allow_empty, chr=chr, ord=ord
+            )
             last_match_was_empty = False
             if not match:
                 pos += 1
@@ -582,7 +692,7 @@ def _finditer_symbolic(
         with NoTracing():
             if match.start() == match.end():
                 if not allow_empty:
-                    raise CrosshairInternal("Unexpected empty match")
+                    raise CrossHairInternal("Unexpected empty match")
                 last_match_was_empty = True
             else:
                 pos = match.end()
@@ -590,12 +700,11 @@ def _finditer_symbolic(
 def _finditer(
     self: re.Pattern,
-    string: Union[str, AnySymbolicStr],
+    string: Union[str, AnySymbolicStr, bytes],
     pos: int = 0,
     endpos: Optional[int] = None,
 ) -> Iterable[Union[re.Match, _Match]]:
-    if not isinstance(string, str):
-        raise TypeError
+    chr, ord = _check_str_or_bytes(self, string)
     if not isinstance(pos, int):
         raise TypeError
     if not (endpos is None or isinstance(endpos, int)):
@@ -607,7 +716,9 @@ def _finditer(
             pos, endpos, _ = slice(pos, endpos, 1).indices(realize(strlen))
             with ResumedTracing():
                 try:
-                    yield from _finditer_symbolic(self, string, pos, endpos)
+                    yield from _finditer_symbolic(
+                        self, string, pos, endpos, chr=chr, ord=ord
+                    )
                     return
                 except ReUnhandled as e:
                     debug("Unsupported symbolic regex", self.pattern, e)
@@ -617,13 +728,19 @@ def _finditer(
         yield from re.Pattern.finditer(self, realize(string), pos, endpos)
-def _fullmatch(self, string: Union[str, AnySymbolicStr], pos=0, endpos=None):
+def _fullmatch(
+    self: re.Pattern, string: Union[str, AnySymbolicStr, bytes], pos=0, endpos=None
+):
     with NoTracing():
-        if isinstance(string, AnySymbolicStr):
+        if isinstance(string, (AnySymbolicStr, BytesLike)):
+            with ResumedTracing():
+                chr, ord = _check_str_or_bytes(self, string)
             try:
                 compiled = cast(List, parse(self.pattern, self.flags))
                 compiled.append((AT, AT_END_STRING))
-                return _match_pattern(self, string, pos, endpos, compiled)
+                return _match_pattern(
+                    self, string, pos, endpos, compiled, chr=chr, ord=ord
+                )
             except ReUnhandled as e:
                 debug("Unsupported symbolic regex", self.pattern, e)
         if endpos is None:
@@ -636,9 +753,11 @@ def _match(
     self, string: Union[str, AnySymbolicStr], pos=0, endpos=None
 ) -> Union[None, re.Match, _Match]:
     with NoTracing():
-        if isinstance(string, AnySymbolicStr):
+        if isinstance(string, (AnySymbolicStr, BytesLike)):
+            with ResumedTracing():
+                chr, ord = _check_str_or_bytes(self, string)
             try:
-                return _match_pattern(self, string, pos, endpos)
+                return _match_pattern(self, string, pos, endpos, chr=chr, ord=ord)
             except ReUnhandled as e:
                 debug("Unsupported symbolic regex", self.pattern, e)
         if endpos is None:
@@ -648,10 +767,12 @@ def _match(
 def _search(
-    self, string: Union[str, AnySymbolicStr], pos: int = 0, endpos: Optional[int] = None
+    self: re.Pattern,
+    string: Union[str, AnySymbolicStr, bytes],
+    pos: int = 0,
+    endpos: Optional[int] = None,
 ) -> Union[None, re.Match, _Match]:
-    if not isinstance(string, str):
-        raise TypeError
+    chr, ord = _check_str_or_bytes(self, string)
     if not isinstance(pos, int):
         raise TypeError
     if not (endpos is None or isinstance(endpos, int)):
@@ -659,11 +780,11 @@ def _search(
     pos, endpos = realize(pos), realize(endpos)
     mylen = string.__len__()
     with NoTracing():
-        if isinstance(string, AnySymbolicStr):
+        if isinstance(string, (AnySymbolicStr, BytesLike)):
             pos, endpos, _ = slice(pos, endpos, 1).indices(realize(mylen))
             try:
                 while pos < endpos:
-                    match = _match_pattern(self, string, pos, endpos)
+                    match = _match_pattern(self, string, pos, endpos, chr=chr, ord=ord)
                     if match:
                         return match
                     pos += 1
@@ -686,7 +807,8 @@ def _subn(
 ) -> Tuple[str, int]:
     if not isinstance(self, re.Pattern):
         raise TypeError
-    if isinstance(repl, str):
+    if isinstance(repl, _STR_AND_BYTES_TYPES):
+        _check_str_or_bytes(self, repl)
         def replfn(m):
             return m.expand(repl)
@@ -695,8 +817,7 @@ def _subn(
         replfn = repl
     else:
         raise TypeError
-    if not isinstance(string, str):
-        raise TypeError
+    _check_str_or_bytes(self, string)
     if not isinstance(count, int):
         raise TypeError
     match = self.search(string)

crosshair/libimpl/relib_ch_test.py CHANGED Viewed

@@ -12,7 +12,10 @@ from crosshair.test_util import ResultComparison, compare_results
 def groups(match: Optional[re.Match]) -> Optional[Sequence]:
     if match is None:
         return None
-    return match.groups(), match.start(), match.end()
+    return [
+        (match.start(i), match.end(i), match.group(i))
+        for i in range(len(match.groups()) + 1)
+    ]
 def check_inverted_categories(text: str, flags: int) -> ResultComparison:
@@ -45,6 +48,14 @@ def check_match_with_sliced_string(text: str) -> ResultComparison:
     return compare_results(lambda t: groups(re.match(r"^[ab]{2}\Z", t)), text[1:])
+def check_match_with_offsets(text: str, start: int, end: int) -> ResultComparison:
+    """post: _"""
+    # return compare_results(lambda t: groups(re.compile(r"a").match(t, start, end)), text)
+    return compare_results(
+        lambda t: groups(re.compile(r"(a*)(a*)").match(t, start, end)), text
+    )
 def check_findall(text: str, flags: int) -> ResultComparison:
     """post: _"""
     return compare_results(lambda t, f: re.findall("aa", t, f), text, flags)
@@ -111,7 +122,7 @@ def check_search_anchored_end(text: str, flags: int) -> ResultComparison:
 def check_subn(text: str, flags: int) -> ResultComparison:
     """post: _"""
-    return compare_results(lambda t, f: re.subn("aa", "ba", t, f), text, flags)
+    return compare_results(lambda t, f: re.subn("aa", "ba", t, flags=f), text, flags)
 def check_lookahead(text: str) -> ResultComparison:
@@ -134,6 +145,19 @@ def check_negative_lookbehind(text: str) -> ResultComparison:
     return compare_results(lambda t: groups(re.search(".(?<!b)", t)), text)
+# Bytes-based regexes
+def check_subn_bytes(text: bytes, flags: int) -> ResultComparison:
+    """post: _"""
+    return compare_results(lambda t, f: re.subn(b"a", b"b", t, flags=f), text, flags)
+def check_findall_bytes(text: bytes, flags: int) -> ResultComparison:
+    """post: _"""
+    return compare_results(lambda t, f: re.findall("aa", t, f), text, flags)
 # This is the only real test definition.
 # It runs crosshair on each of the "check" functions defined above.
 @pytest.mark.parametrize("fn_name", [fn for fn in dir() if fn.startswith("check_")])