readable-regex 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- readable_regex/__init__.py +5 -0
- readable_regex/builder.py +209 -0
- readable_regex/compiler.py +9 -0
- readable_regex/components.py +153 -0
- readable_regex/exclude_proxy.py +73 -0
- readable_regex/flags.py +20 -0
- readable_regex/py.typed +0 -0
- readable_regex-0.2.0.dist-info/METADATA +194 -0
- readable_regex-0.2.0.dist-info/RECORD +11 -0
- readable_regex-0.2.0.dist-info/WHEEL +4 -0
- readable_regex-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Sequence
|
|
5
|
+
|
|
6
|
+
from readable_regex.compiler import compile_components
|
|
7
|
+
from readable_regex.components import (
|
|
8
|
+
Anchor,
|
|
9
|
+
AnchorType,
|
|
10
|
+
AnyOf,
|
|
11
|
+
CharClass,
|
|
12
|
+
CharClassType,
|
|
13
|
+
Component,
|
|
14
|
+
ExcludeFilter,
|
|
15
|
+
Group,
|
|
16
|
+
Literal,
|
|
17
|
+
Quantifier,
|
|
18
|
+
QuantifierKind,
|
|
19
|
+
)
|
|
20
|
+
from readable_regex.exclude_proxy import ExcludeProxy
|
|
21
|
+
from readable_regex.flags import Flag, flags_to_re
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class RegexBuilder:
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
components: tuple[Component, ...] = (),
|
|
28
|
+
flags: frozenset[Flag] = frozenset(),
|
|
29
|
+
) -> None:
|
|
30
|
+
self._components = components
|
|
31
|
+
self._flags = flags
|
|
32
|
+
self._compiled: re.Pattern[str] | None = None
|
|
33
|
+
|
|
34
|
+
def _extend(self, *new_components: Component) -> RegexBuilder:
|
|
35
|
+
return RegexBuilder(self._components + new_components, self._flags)
|
|
36
|
+
|
|
37
|
+
def _with_flag(self, flag: Flag) -> RegexBuilder:
|
|
38
|
+
return RegexBuilder(self._components, self._flags | {flag})
|
|
39
|
+
|
|
40
|
+
def _quantify_last(self, kind: QuantifierKind, **kwargs: int | None) -> RegexBuilder:
|
|
41
|
+
if not self._components:
|
|
42
|
+
raise ValueError("No component to quantify")
|
|
43
|
+
last = self._components[-1]
|
|
44
|
+
quantified = Quantifier(last, kind, **kwargs)
|
|
45
|
+
return RegexBuilder(self._components[:-1] + (quantified,), self._flags)
|
|
46
|
+
|
|
47
|
+
# ── Anchors & Literals ──────────────────────────────────────────
|
|
48
|
+
|
|
49
|
+
def starts_with(self, text: str | None = None) -> RegexBuilder:
|
|
50
|
+
parts: list[Component] = [Anchor(AnchorType.START)]
|
|
51
|
+
if text is not None:
|
|
52
|
+
parts.append(Literal(text))
|
|
53
|
+
return self._extend(*parts)
|
|
54
|
+
|
|
55
|
+
def ends_with(self, text: str | None = None) -> RegexBuilder:
|
|
56
|
+
parts: list[Component] = []
|
|
57
|
+
if text is not None:
|
|
58
|
+
parts.append(Literal(text))
|
|
59
|
+
parts.append(Anchor(AnchorType.END))
|
|
60
|
+
return self._extend(*parts)
|
|
61
|
+
|
|
62
|
+
def then(self, text: str) -> RegexBuilder:
|
|
63
|
+
return self._extend(Literal(text))
|
|
64
|
+
|
|
65
|
+
# ── Character Classes (singular = one) ──────────────────────────
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def digit(self) -> RegexBuilder:
|
|
69
|
+
return self._extend(CharClass(CharClassType.DIGIT))
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def word(self) -> RegexBuilder:
|
|
73
|
+
return self._extend(CharClass(CharClassType.WORD))
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def whitespace(self) -> RegexBuilder:
|
|
77
|
+
return self._extend(CharClass(CharClassType.WHITESPACE))
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def any_char(self) -> RegexBuilder:
|
|
81
|
+
return self._extend(CharClass(CharClassType.ANY))
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def letter(self) -> RegexBuilder:
|
|
85
|
+
return self._extend(CharClass(CharClassType.LETTER))
|
|
86
|
+
|
|
87
|
+
# ── Character Classes (plural = one or more) ────────────────────
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def digits(self) -> RegexBuilder:
|
|
91
|
+
cc = CharClass(CharClassType.DIGIT)
|
|
92
|
+
return self._extend(Quantifier(cc, QuantifierKind.ONE_OR_MORE))
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def words(self) -> RegexBuilder:
|
|
96
|
+
cc = CharClass(CharClassType.WORD)
|
|
97
|
+
return self._extend(Quantifier(cc, QuantifierKind.ONE_OR_MORE))
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def whitespaces(self) -> RegexBuilder:
|
|
101
|
+
cc = CharClass(CharClassType.WHITESPACE)
|
|
102
|
+
return self._extend(Quantifier(cc, QuantifierKind.ONE_OR_MORE))
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def any_chars(self) -> RegexBuilder:
|
|
106
|
+
cc = CharClass(CharClassType.ANY)
|
|
107
|
+
return self._extend(Quantifier(cc, QuantifierKind.ONE_OR_MORE))
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def letters(self) -> RegexBuilder:
|
|
111
|
+
cc = CharClass(CharClassType.LETTER)
|
|
112
|
+
return self._extend(Quantifier(cc, QuantifierKind.ONE_OR_MORE))
|
|
113
|
+
|
|
114
|
+
# ── Combinators ─────────────────────────────────────────────────
|
|
115
|
+
|
|
116
|
+
def any_of(self, *options: str) -> RegexBuilder:
|
|
117
|
+
return self._extend(AnyOf(options))
|
|
118
|
+
|
|
119
|
+
def capture(self, content: RegexBuilder) -> RegexBuilder:
|
|
120
|
+
return self._extend(Group(content._components))
|
|
121
|
+
|
|
122
|
+
# ── Quantifiers (modify last component) ─────────────────────────
|
|
123
|
+
|
|
124
|
+
def exactly(self, n: int) -> RegexBuilder:
|
|
125
|
+
return self._quantify_last(QuantifierKind.EXACT, count=n)
|
|
126
|
+
|
|
127
|
+
def between(self, min_n: int, max_n: int) -> RegexBuilder:
|
|
128
|
+
return self._quantify_last(QuantifierKind.RANGE, min_count=min_n, max_count=max_n)
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def optional(self) -> RegexBuilder:
|
|
132
|
+
return self._quantify_last(QuantifierKind.OPTIONAL)
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def zero_or_more(self) -> RegexBuilder:
|
|
136
|
+
return self._quantify_last(QuantifierKind.ZERO_OR_MORE)
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def one_or_more(self) -> RegexBuilder:
|
|
140
|
+
return self._quantify_last(QuantifierKind.ONE_OR_MORE)
|
|
141
|
+
|
|
142
|
+
# ── Exclude ─────────────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def exclude(self) -> ExcludeProxy:
|
|
146
|
+
"""As property: returns proxy for negated classes.
|
|
147
|
+
e.g., regex.exclude.digits → \\D+
|
|
148
|
+
"""
|
|
149
|
+
return ExcludeProxy(self)
|
|
150
|
+
|
|
151
|
+
def excluding(self, chars: str) -> RegexBuilder:
|
|
152
|
+
"""Filters characters from the last char class component.
|
|
153
|
+
e.g., regex.words.excluding('_') → [^\\W_]+
|
|
154
|
+
"""
|
|
155
|
+
if not self._components:
|
|
156
|
+
raise ValueError("No component to filter")
|
|
157
|
+
|
|
158
|
+
last = self._components[-1]
|
|
159
|
+
|
|
160
|
+
# If last is a quantified char class, unwrap, filter, re-wrap
|
|
161
|
+
if isinstance(last, Quantifier) and isinstance(last.target, CharClass):
|
|
162
|
+
filtered = ExcludeFilter(last.target.class_type, chars)
|
|
163
|
+
new_last = Quantifier(filtered, last.kind, last.count, last.min_count, last.max_count)
|
|
164
|
+
return RegexBuilder(self._components[:-1] + (new_last,), self._flags)
|
|
165
|
+
|
|
166
|
+
if isinstance(last, CharClass):
|
|
167
|
+
filtered = ExcludeFilter(last.class_type, chars)
|
|
168
|
+
return RegexBuilder(self._components[:-1] + (filtered,), self._flags)
|
|
169
|
+
|
|
170
|
+
raise ValueError("excluding() can only be applied to a character class")
|
|
171
|
+
|
|
172
|
+
# ── Flags ───────────────────────────────────────────────────────
|
|
173
|
+
|
|
174
|
+
@property
|
|
175
|
+
def ignore_case(self) -> RegexBuilder:
|
|
176
|
+
return self._with_flag(Flag.IGNORE_CASE)
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
def multiline(self) -> RegexBuilder:
|
|
180
|
+
return self._with_flag(Flag.MULTILINE)
|
|
181
|
+
|
|
182
|
+
# ── Compilation & Execution ─────────────────────────────────────
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def pattern(self) -> str:
|
|
186
|
+
return compile_components(self._components)
|
|
187
|
+
|
|
188
|
+
def compile(self) -> re.Pattern[str]:
|
|
189
|
+
if self._compiled is None:
|
|
190
|
+
self._compiled = re.compile(self.pattern, flags_to_re(self._flags))
|
|
191
|
+
return self._compiled
|
|
192
|
+
|
|
193
|
+
def search(self, text: str) -> re.Match[str] | None:
|
|
194
|
+
return self.compile().search(text)
|
|
195
|
+
|
|
196
|
+
def match(self, text: str) -> re.Match[str] | None:
|
|
197
|
+
return self.compile().match(text)
|
|
198
|
+
|
|
199
|
+
def find_all(self, text: str) -> list[str]:
|
|
200
|
+
return self.compile().findall(text)
|
|
201
|
+
|
|
202
|
+
def replace(self, text: str, repl: str) -> str:
|
|
203
|
+
return self.compile().sub(repl, text)
|
|
204
|
+
|
|
205
|
+
def split(self, text: str) -> list[str]:
|
|
206
|
+
return self.compile().split(text)
|
|
207
|
+
|
|
208
|
+
def test(self, text: str) -> bool:
|
|
209
|
+
return self.compile().search(text) is not None
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Protocol, Sequence
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Component(Protocol):
|
|
9
|
+
def compile(self) -> str: ...
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Literal:
|
|
13
|
+
def __init__(self, text: str) -> None:
|
|
14
|
+
self.text = text
|
|
15
|
+
|
|
16
|
+
def compile(self) -> str:
|
|
17
|
+
return re.escape(self.text)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AnchorType(Enum):
|
|
21
|
+
START = "^"
|
|
22
|
+
END = "$"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Anchor:
|
|
26
|
+
def __init__(self, anchor_type: AnchorType) -> None:
|
|
27
|
+
self.anchor_type = anchor_type
|
|
28
|
+
|
|
29
|
+
def compile(self) -> str:
|
|
30
|
+
return self.anchor_type.value
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class CharClassType(Enum):
|
|
34
|
+
DIGIT = r"\d"
|
|
35
|
+
WORD = r"\w"
|
|
36
|
+
WHITESPACE = r"\s"
|
|
37
|
+
ANY = "."
|
|
38
|
+
LETTER = "[a-zA-Z]"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
NEGATED_MAP = {
|
|
42
|
+
CharClassType.DIGIT: r"\D",
|
|
43
|
+
CharClassType.WORD: r"\W",
|
|
44
|
+
CharClassType.WHITESPACE: r"\S",
|
|
45
|
+
CharClassType.ANY: r"[^\s\S]", # negation of . (nothing matches)
|
|
46
|
+
CharClassType.LETTER: r"[^a-zA-Z]",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class CharClass:
|
|
51
|
+
def __init__(self, class_type: CharClassType) -> None:
|
|
52
|
+
self.class_type = class_type
|
|
53
|
+
|
|
54
|
+
def compile(self) -> str:
|
|
55
|
+
return self.class_type.value
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class NegatedCharClass:
|
|
59
|
+
def __init__(self, class_type: CharClassType) -> None:
|
|
60
|
+
self.class_type = class_type
|
|
61
|
+
|
|
62
|
+
def compile(self) -> str:
|
|
63
|
+
return NEGATED_MAP[self.class_type]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class AnyOf:
|
|
67
|
+
def __init__(self, items: Sequence[str]) -> None:
|
|
68
|
+
self.items = list(items)
|
|
69
|
+
|
|
70
|
+
def compile(self) -> str:
|
|
71
|
+
if all(len(item) == 1 for item in self.items):
|
|
72
|
+
escaped = "".join(re.escape(ch) for ch in self.items)
|
|
73
|
+
return f"[{escaped}]"
|
|
74
|
+
escaped_items = [re.escape(item) for item in self.items]
|
|
75
|
+
return f"(?:{'|'.join(escaped_items)})"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class Group:
|
|
79
|
+
def __init__(self, content: Sequence[Component]) -> None:
|
|
80
|
+
self.content = list(content)
|
|
81
|
+
|
|
82
|
+
def compile(self) -> str:
|
|
83
|
+
from readable_regex.compiler import compile_components
|
|
84
|
+
|
|
85
|
+
inner = compile_components(self.content)
|
|
86
|
+
return f"({inner})"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class QuantifierKind(Enum):
|
|
90
|
+
ONE_OR_MORE = "+"
|
|
91
|
+
ZERO_OR_MORE = "*"
|
|
92
|
+
OPTIONAL = "?"
|
|
93
|
+
EXACT = "exact"
|
|
94
|
+
RANGE = "range"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class Quantifier:
|
|
98
|
+
def __init__(
|
|
99
|
+
self,
|
|
100
|
+
target: Component,
|
|
101
|
+
kind: QuantifierKind,
|
|
102
|
+
count: int | None = None,
|
|
103
|
+
min_count: int | None = None,
|
|
104
|
+
max_count: int | None = None,
|
|
105
|
+
) -> None:
|
|
106
|
+
self.target = target
|
|
107
|
+
self.kind = kind
|
|
108
|
+
self.count = count
|
|
109
|
+
self.min_count = min_count
|
|
110
|
+
self.max_count = max_count
|
|
111
|
+
|
|
112
|
+
def compile(self) -> str:
|
|
113
|
+
inner = self.target.compile()
|
|
114
|
+
needs_wrap = (
|
|
115
|
+
len(inner) > 1
|
|
116
|
+
and not isinstance(self.target, (CharClass, NegatedCharClass, Group))
|
|
117
|
+
and not (inner.startswith("(") and inner.endswith(")"))
|
|
118
|
+
and not (inner.startswith("[") and inner.endswith("]"))
|
|
119
|
+
)
|
|
120
|
+
if needs_wrap:
|
|
121
|
+
inner = f"(?:{inner})"
|
|
122
|
+
|
|
123
|
+
if self.kind == QuantifierKind.EXACT:
|
|
124
|
+
return f"{inner}{{{self.count}}}"
|
|
125
|
+
elif self.kind == QuantifierKind.RANGE:
|
|
126
|
+
return f"{inner}{{{self.min_count},{self.max_count}}}"
|
|
127
|
+
else:
|
|
128
|
+
return f"{inner}{self.kind.value}"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class ExcludeFilter:
|
|
132
|
+
"""Represents a character class with certain characters excluded."""
|
|
133
|
+
|
|
134
|
+
def __init__(self, base: CharClassType, excluded_chars: str) -> None:
|
|
135
|
+
self.base = base
|
|
136
|
+
self.excluded_chars = excluded_chars
|
|
137
|
+
|
|
138
|
+
def compile(self) -> str:
|
|
139
|
+
# Build a negated character class that excludes both the negation
|
|
140
|
+
# of the base class and the excluded characters.
|
|
141
|
+
# e.g., \w excluding '_' → [^\W_]
|
|
142
|
+
negated_base = NEGATED_MAP[self.base]
|
|
143
|
+
escaped_excluded = re.escape(self.excluded_chars)
|
|
144
|
+
|
|
145
|
+
# If negated base is a simple escape like \W, use it directly in bracket
|
|
146
|
+
if negated_base.startswith("\\"):
|
|
147
|
+
return f"[^{negated_base}{escaped_excluded}]"
|
|
148
|
+
# If it's a bracket expression like [^a-zA-Z], extract the inner part
|
|
149
|
+
if negated_base.startswith("[^") and negated_base.endswith("]"):
|
|
150
|
+
inner = negated_base[2:-1]
|
|
151
|
+
return f"[^{inner}{escaped_excluded}]"
|
|
152
|
+
# Fallback
|
|
153
|
+
return f"[^{negated_base}{escaped_excluded}]"
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from readable_regex.components import (
|
|
6
|
+
CharClassType,
|
|
7
|
+
NegatedCharClass,
|
|
8
|
+
Quantifier,
|
|
9
|
+
QuantifierKind,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from readable_regex.builder import RegexBuilder
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ExcludeProxy:
|
|
17
|
+
"""Proxy returned by `regex.exclude` property.
|
|
18
|
+
|
|
19
|
+
Provides `.digit`, `.digits`, `.word`, `.words`, etc. that produce
|
|
20
|
+
negated character classes (e.g., `\\D`, `\\W`).
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, builder: RegexBuilder) -> None:
|
|
24
|
+
self._builder = builder
|
|
25
|
+
|
|
26
|
+
def _add(self, class_type: CharClassType) -> RegexBuilder:
|
|
27
|
+
return self._builder._extend(NegatedCharClass(class_type))
|
|
28
|
+
|
|
29
|
+
def _add_plus(self, class_type: CharClassType) -> RegexBuilder:
|
|
30
|
+
target = NegatedCharClass(class_type)
|
|
31
|
+
return self._builder._extend(Quantifier(target, QuantifierKind.ONE_OR_MORE))
|
|
32
|
+
|
|
33
|
+
# Singular (one)
|
|
34
|
+
@property
|
|
35
|
+
def digit(self) -> RegexBuilder:
|
|
36
|
+
return self._add(CharClassType.DIGIT)
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def word(self) -> RegexBuilder:
|
|
40
|
+
return self._add(CharClassType.WORD)
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def whitespace(self) -> RegexBuilder:
|
|
44
|
+
return self._add(CharClassType.WHITESPACE)
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def letter(self) -> RegexBuilder:
|
|
48
|
+
return self._add(CharClassType.LETTER)
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def any_char(self) -> RegexBuilder:
|
|
52
|
+
return self._add(CharClassType.ANY)
|
|
53
|
+
|
|
54
|
+
# Plural (one or more)
|
|
55
|
+
@property
|
|
56
|
+
def digits(self) -> RegexBuilder:
|
|
57
|
+
return self._add_plus(CharClassType.DIGIT)
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def words(self) -> RegexBuilder:
|
|
61
|
+
return self._add_plus(CharClassType.WORD)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def whitespaces(self) -> RegexBuilder:
|
|
65
|
+
return self._add_plus(CharClassType.WHITESPACE)
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def letters(self) -> RegexBuilder:
|
|
69
|
+
return self._add_plus(CharClassType.LETTER)
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def any_chars(self) -> RegexBuilder:
|
|
73
|
+
return self._add_plus(CharClassType.ANY)
|
readable_regex/flags.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Flag(Enum):
|
|
8
|
+
IGNORE_CASE = "IGNORECASE"
|
|
9
|
+
MULTILINE = "MULTILINE"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def flags_to_re(flags: frozenset[Flag]) -> re.RegexFlag:
|
|
13
|
+
mapping = {
|
|
14
|
+
Flag.IGNORE_CASE: re.IGNORECASE,
|
|
15
|
+
Flag.MULTILINE: re.MULTILINE,
|
|
16
|
+
}
|
|
17
|
+
result = re.RegexFlag(0)
|
|
18
|
+
for flag in flags:
|
|
19
|
+
result |= mapping[flag]
|
|
20
|
+
return result
|
readable_regex/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: readable-regex
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: A fluent, chainable API for building regular expressions that read like English
|
|
5
|
+
Project-URL: Homepage, https://github.com/molestreettechllc-dev/readable-regex
|
|
6
|
+
Project-URL: Documentation, https://molestreettechllc-dev.github.io/readable-regex/
|
|
7
|
+
Project-URL: Repository, https://github.com/molestreettechllc-dev/readable-regex
|
|
8
|
+
Project-URL: Issues, https://github.com/molestreettechllc-dev/readable-regex/issues
|
|
9
|
+
Author: Derwin Emmanuel
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: builder,fluent,readable,regex,regular-expressions
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Topic :: Text Processing
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# readable-regex
|
|
29
|
+
|
|
30
|
+
A fluent, chainable Python API for building regular expressions that read like English.
|
|
31
|
+
|
|
32
|
+
**[Documentation](https://molestreettechllc-dev.github.io/readable-regex/)**
|
|
33
|
+
|
|
34
|
+
## Install
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install readable-regex
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Quick Start
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from readable_regex import regex
|
|
44
|
+
|
|
45
|
+
# Email pattern
|
|
46
|
+
regex.words.then('@').words.then('.').words.test("user@example.com") # True
|
|
47
|
+
|
|
48
|
+
# Phone number
|
|
49
|
+
regex.digit.exactly(3).then('-').digit.exactly(3).then('-').digit.exactly(4).test("123-456-7890") # True
|
|
50
|
+
|
|
51
|
+
# Extract all numbers
|
|
52
|
+
regex.digits.find_all("Order #42 has 3 items totaling $129") # ['42', '3', '129']
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Vocabulary
|
|
56
|
+
|
|
57
|
+
The API uses a **plural convention**: singular = exactly one, plural = one or more.
|
|
58
|
+
|
|
59
|
+
### Items — what you match
|
|
60
|
+
|
|
61
|
+
| Singular | Plural (1+) | Regex |
|
|
62
|
+
|---|---|---|
|
|
63
|
+
| `digit` | `digits` | `\d` / `\d+` |
|
|
64
|
+
| `word` | `words` | `\w` / `\w+` |
|
|
65
|
+
| `letter` | `letters` | `[a-zA-Z]` / `[a-zA-Z]+` |
|
|
66
|
+
| `whitespace` | `whitespaces` | `\s` / `\s+` |
|
|
67
|
+
| `any_char` | `any_chars` | `.` / `.+` |
|
|
68
|
+
| `then('text')` | — | escaped literal |
|
|
69
|
+
| `any_of('a', 'b')` | — | `[ab]` or `(?:foo\|bar)` |
|
|
70
|
+
|
|
71
|
+
### Modifiers — how you constrain
|
|
72
|
+
|
|
73
|
+
| Modifier | Example | Effect |
|
|
74
|
+
|---|---|---|
|
|
75
|
+
| `exactly(n)` | `digit.exactly(3)` | `\d{3}` |
|
|
76
|
+
| `between(n, m)` | `digit.between(1, 3)` | `\d{1,3}` |
|
|
77
|
+
| `optional` | `digit.optional` | `\d?` |
|
|
78
|
+
| `zero_or_more` | `digit.zero_or_more` | `\d*` |
|
|
79
|
+
| `starts_with(text?)` | `starts_with('Hello')` | `^Hello` |
|
|
80
|
+
| `ends_with(text?)` | `ends_with('!')` | `!$` |
|
|
81
|
+
| `ignore_case` | — | case-insensitive flag |
|
|
82
|
+
| `multiline` | — | multiline flag |
|
|
83
|
+
| `exclude.digits` | — | `\D+` (negated class) |
|
|
84
|
+
| `excluding('_')` | `words.excluding('_')` | `[^\W_]+` |
|
|
85
|
+
| `capture(builder)` | `capture(regex.words)` | `(\w+)` |
|
|
86
|
+
|
|
87
|
+
### Execution — terminal methods
|
|
88
|
+
|
|
89
|
+
| Method | Returns |
|
|
90
|
+
|---|---|
|
|
91
|
+
| `test(text)` | `bool` |
|
|
92
|
+
| `search(text)` | `re.Match \| None` |
|
|
93
|
+
| `match(text)` | `re.Match \| None` |
|
|
94
|
+
| `find_all(text)` | `list[str]` |
|
|
95
|
+
| `replace(text, repl)` | `str` |
|
|
96
|
+
| `split(text)` | `list[str]` |
|
|
97
|
+
| `compile()` | `re.Pattern` (cached) |
|
|
98
|
+
| `.pattern` | raw regex string |
|
|
99
|
+
|
|
100
|
+
## Examples
|
|
101
|
+
|
|
102
|
+
### Email validation
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
email = regex.words.then('@').words.then('.').words
|
|
106
|
+
email.test("user@example.com") # True
|
|
107
|
+
email.test("bad@@address") # False
|
|
108
|
+
email.pattern # '\w+@\w+\.\w+'
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Phone number
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
phone = (
|
|
115
|
+
regex
|
|
116
|
+
.digit.exactly(3).then('-')
|
|
117
|
+
.digit.exactly(3).then('-')
|
|
118
|
+
.digit.exactly(4)
|
|
119
|
+
)
|
|
120
|
+
phone.test("123-456-7890") # True
|
|
121
|
+
phone.pattern # '\d{3}\-\d{3}\-\d{4}'
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### IP address
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
ip = (
|
|
128
|
+
regex
|
|
129
|
+
.digit.between(1, 3).then('.')
|
|
130
|
+
.digit.between(1, 3).then('.')
|
|
131
|
+
.digit.between(1, 3).then('.')
|
|
132
|
+
.digit.between(1, 3)
|
|
133
|
+
)
|
|
134
|
+
ip.test("192.168.1.1") # True
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Capturing groups
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
kv = regex.capture(regex.words).then('=').capture(regex.any_chars)
|
|
141
|
+
m = kv.search("color=blue")
|
|
142
|
+
m.group(1) # 'color'
|
|
143
|
+
m.group(2) # 'blue'
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Case-insensitive matching
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
greeting = regex.starts_with('hello').ignore_case
|
|
150
|
+
greeting.test("HELLO world") # True
|
|
151
|
+
greeting.test("hey there") # False
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Search and replace
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
regex.digits.replace("My SSN is 123-45-6789", "***")
|
|
158
|
+
# 'My SSN is ***-***-***'
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Splitting text
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
regex.then(',').whitespace.zero_or_more.split("a, b,c, d")
|
|
165
|
+
# ['a', 'b', 'c', 'd']
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Negated classes
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
regex.exclude.digits.find_all("a1b2c3") # ['a', 'b', 'c']
|
|
172
|
+
regex.words.excluding('_').pattern # '[^\W_]+'
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Immutable builder — safe reuse
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
base = regex.starts_with('LOG-')
|
|
179
|
+
errors = base.then('ERROR').any_chars
|
|
180
|
+
warns = base.then('WARN').any_chars
|
|
181
|
+
|
|
182
|
+
errors.test("LOG-ERROR disk full") # True
|
|
183
|
+
warns.test("LOG-WARN low memory") # True
|
|
184
|
+
base.pattern # '^LOG\-' (unchanged)
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## Requirements
|
|
188
|
+
|
|
189
|
+
- Python 3.10+
|
|
190
|
+
- Zero runtime dependencies
|
|
191
|
+
|
|
192
|
+
## License
|
|
193
|
+
|
|
194
|
+
MIT
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
readable_regex/__init__.py,sha256=wC8r47Jz61tas25SFNI7XxKzvP7RZFuFYUnbtZSEta8,109
|
|
2
|
+
readable_regex/builder.py,sha256=ZrxedMHVYP5bsDNPy41JWSjY3LyvbbmvGKR0297q67c,7667
|
|
3
|
+
readable_regex/compiler.py,sha256=1kOEgBAgXyZFY4htLeoZXes0XF2wjuxGMXHdIeNfZ_I,231
|
|
4
|
+
readable_regex/components.py,sha256=Wp1p8uj_spLoBtjG_DuZmflqMaPVo5XmKGEqYI1AZ_M,4220
|
|
5
|
+
readable_regex/exclude_proxy.py,sha256=qikdXCJGs2NuWHqbmLaxOqQuaQ24E2dHEdOXwN7jnkA,1964
|
|
6
|
+
readable_regex/flags.py,sha256=9ITQ6yOFnKy-q01cJryPTtRuwTs4Si9vtdN_eVSnCI8,409
|
|
7
|
+
readable_regex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
readable_regex-0.2.0.dist-info/METADATA,sha256=uANzXvfDw04GpvUIMVm9gW3txphi-op84ktdKQNc1RA,5145
|
|
9
|
+
readable_regex-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
10
|
+
readable_regex-0.2.0.dist-info/licenses/LICENSE,sha256=GU7q1ON02j-w0tC_np1IBBcm56lJcRhf03l_bwTqZzc,1072
|
|
11
|
+
readable_regex-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Derwin Emmanuel
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|