Verbex 1.0.0__zip

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ Metadata-Version: 2.1
2
+ Name: Verbex
3
+ Version: 1.0.0
4
+ Summary: Make difficult regular expressions easy! Python fork based on of the awesome VerbalExpressions repo - https://github.com/jehna/VerbalExpressions
5
+ Home-page: https://github.com/VerbalExpressions/PythonVerbalExpressions
6
+ Author: Victor Titor, Yan Wenjun, diogobeda, Mihai Ionut Vilcu, Peder Soholt, Sameer Raghuram, Kharms, Richard Broderick
7
+ License: GPLv3
8
+ Platform: UNKNOWN
9
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10
+ Classifier: Programming Language :: Python
11
+ Classifier: Programming Language :: Python :: 3.6
12
+ Classifier: Programming Language :: Python :: 3.7
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Topic :: Software Development :: Libraries
15
+ Classifier: Topic :: Text Processing
16
+ License-File: LICENSE.txt
17
+
18
+ Please see https://github.com/rbroderi/Verbex/blob/master/README.md for more information!
19
+
@@ -0,0 +1,12 @@
1
+ LICENSE.TXT
2
+ LICENSE.txt
3
+ MANIFEST.in
4
+ README.md
5
+ setup.py
6
+ Verbex.egg-info/PKG-INFO
7
+ Verbex.egg-info/SOURCES.txt
8
+ Verbex.egg-info/dependency_links.txt
9
+ Verbex.egg-info/top_level.txt
10
+ verbex/__init__.py
11
+ verbex/py.typed
12
+ verbex/verbex.py
@@ -0,0 +1,3 @@
1
+ from .verbex import CharClass as CharClass
2
+ from .verbex import SpecialChar as SpecialChar
3
+ from .verbex import Verbex as Verbex
File without changes
@@ -0,0 +1,622 @@
1
+ """Generate regular expressions from an easier fluent verbal form."""
2
+ from __future__ import annotations
3
+
4
+ import re
5
+ from enum import Enum
6
+ from functools import wraps
7
+
8
+ try:
9
+ from typing import ( # <--------------- if Python ≥ 3.9.0
10
+ Annotated,
11
+ ParamSpec,
12
+ TypeAlias,
13
+ )
14
+ except (ModuleNotFoundError, ImportError):
15
+ from typing_extensions import TypeAlias, Annotated, ParamSpec # type: ignore # <--- if Python < 3.9.0
16
+
17
+ from typing import Pattern, Protocol, TypeVar
18
+
19
+ from beartype import beartype # type: ignore
20
+ from beartype.typing import ( # type: ignore
21
+ Any,
22
+ Callable,
23
+ Dict,
24
+ Iterator,
25
+ List,
26
+ Optional,
27
+ Tuple,
28
+ Union,
29
+ cast,
30
+ runtime_checkable,
31
+ )
32
+ from beartype.vale import Is # type: ignore
33
+
34
+
35
+ def _string_len_is_1(text: object) -> bool:
36
+ return isinstance(text, str) and len(text) == 1
37
+
38
+
39
+ Char = Annotated[str, Is[_string_len_is_1]]
40
+
41
+
42
+ P = ParamSpec("P") # noqa VNE001
43
+ R = TypeVar("R") # noqa VNE001
44
+
45
+
46
+ # work around for bug https://github.com/python/mypy/issues/12660
47
+ # fixed in next version of mypy
48
+ @runtime_checkable
49
+ class HasIter(Protocol):
50
+ """Workaround for mypy P.args."""
51
+
52
+ def __iter__(self) -> Iterator[Any]:
53
+ """Object can be iterated.
54
+
55
+ Yields:
56
+ Next object.
57
+ """
58
+ ...
59
+
60
+
61
+ # work around for bug https://github.com/python/mypy/issues/12660
62
+ # fixed in next version of mypy
63
+ @runtime_checkable
64
+ class HasItems(Protocol):
65
+ """Workaround for mypy P.kwargs."""
66
+
67
+ def items(self) -> Tuple[str, Any]:
68
+ """Object has items method.
69
+
70
+ Returns:
71
+ The dict of items.
72
+ """
73
+ ...
74
+
75
+
76
+ class EscapedText(str):
77
+ """Text that has been escaped for regex.
78
+
79
+ Arguments:
80
+ str -- Extend the string class.
81
+ """
82
+
83
+ def __new__(cls, value: str) -> EscapedText:
84
+ """Return a escaped regex string.
85
+
86
+ Arguments:
87
+ value -- the string to escape
88
+
89
+ Returns:
90
+ _description_
91
+ """
92
+ return str.__new__(cls, re.escape(value))
93
+
94
+
95
+ def re_escape(func: Callable[P, R]) -> Callable[P, R]:
96
+ """Automatically escape any string parameters as EscapedText.
97
+
98
+ Arguments:
99
+ func -- The function to decorate.
100
+
101
+ Returns:
102
+ The decorated function.
103
+ """
104
+
105
+ @wraps(func)
106
+ def inner(*args: P.args, **kwargs: P.kwargs) -> R: # type: ignore
107
+ escaped_args: List[Any] = []
108
+ escaped_kwargs: Dict[str, Any] = {}
109
+ for arg in cast(HasIter, args):
110
+ if not isinstance(arg, EscapedText) and isinstance(arg, str):
111
+ escaped_args.append(EscapedText(arg))
112
+ else:
113
+ escaped_args.append(arg)
114
+ arg_k: str
115
+ arg_v: Any
116
+ for arg_k, arg_v in cast(HasItems, kwargs).items():
117
+ if not isinstance(arg_v, EscapedText) and isinstance(arg_v, str):
118
+ escaped_kwargs[arg_k] = EscapedText(str(arg_v))
119
+ else:
120
+ escaped_kwargs[arg_k] = arg_v
121
+ return func(*escaped_args, **escaped_kwargs) # type: ignore
122
+
123
+ return inner
124
+
125
+
126
+ class CharClass(Enum):
127
+ """Enum of character classes in regex.
128
+
129
+ Arguments:
130
+ Enum -- Extends the Enum class.
131
+ """
132
+
133
+ DIGIT = "\\d"
134
+ LETTER = "\\w"
135
+ UPPERCASE_LETTER = "\\u"
136
+ LOWERCASE_LETTER = "\\l"
137
+ WHITESPACE = "\\s"
138
+ TAB = "\\t"
139
+
140
+ def __str__(self) -> str:
141
+ """To string method based on Enum value.
142
+
143
+ Returns:
144
+ value of Enum
145
+ """
146
+ return self.value
147
+
148
+
149
+ class SpecialChar(Enum):
150
+ """Enum of special charaters, shorthand.
151
+
152
+ Arguments:
153
+ Enum -- Extends the Enum class.
154
+ """
155
+
156
+ # does not work / should not be used in [ ]
157
+ LINEBREAK = "(\\n|(\\r\\n))"
158
+ START_OF_LINE = "^"
159
+ END_OF_LINE = "$"
160
+ TAB = "\t"
161
+
162
+ def __str__(self) -> str:
163
+ """To string for special chars enum.
164
+
165
+ Returns:
166
+ Return value of enum as string.
167
+ """
168
+ return self.value
169
+
170
+
171
+ CharClassOrChars: TypeAlias = Union[str, CharClass]
172
+ EscapedCharClassOrSpecial: TypeAlias = Union[str, CharClass, SpecialChar]
173
+ VerbexEscapedCharClassOrSpecial: TypeAlias = Union["Verbex", EscapedCharClassOrSpecial]
174
+
175
+
176
+ class Verbex:
177
+ """
178
+ VerbalExpressions class.
179
+
180
+ the following methods do not try to match the original js lib!
181
+ """
182
+
183
+ EMPTY_REGEX_FLAG = re.RegexFlag(0)
184
+
185
+ @re_escape
186
+ @beartype
187
+ def __init__(self, modifiers: re.RegexFlag = EMPTY_REGEX_FLAG):
188
+ """Create a Verbex object; setting any needed flags.
189
+
190
+ Keyword Arguments:
191
+ modifiers -- Regex modifying flags (default: {re.RegexFlag(0)})
192
+ """
193
+ # self._parts: List[str] = [text]
194
+ self._parts: List[str] = []
195
+ self._modifiers = modifiers
196
+
197
+ @property
198
+ def modifiers(self) -> re.RegexFlag:
199
+ """Return the modifiers for this Verbex object.
200
+
201
+ Returns:
202
+ The modifiers applied to this object.
203
+ """
204
+ return self._modifiers
205
+
206
+ def __str__(self) -> str:
207
+ """Return regex string representation."""
208
+ return "".join(self._parts)
209
+
210
+ @beartype
211
+ def _add(self, value: Union[str, List[str]]) -> Verbex:
212
+ """
213
+ Append a transformed value to internal expression to be compiled.
214
+
215
+ As possible, this method should be "private".
216
+ """
217
+ if isinstance(value, list):
218
+ self._parts.extend(value)
219
+ else:
220
+ self._parts.append(value)
221
+ return self
222
+
223
+ def regex(self) -> Pattern[str]:
224
+ """Get a regular expression object."""
225
+ return re.compile(
226
+ str(self),
227
+ self._modifiers,
228
+ )
229
+
230
+ # allow VerbexEscapedCharClassOrSpecial
231
+
232
+ @re_escape
233
+ @beartype
234
+ def _capture_group_with_name(
235
+ self,
236
+ name: str,
237
+ text: VerbexEscapedCharClassOrSpecial,
238
+ ) -> Verbex:
239
+ return self._add(f"(?<{name}>{str(text)})")
240
+
241
+ @re_escape
242
+ @beartype
243
+ def _capture_group_without_name(
244
+ self,
245
+ text: VerbexEscapedCharClassOrSpecial,
246
+ ) -> Verbex:
247
+ return self._add(f"({str(text)})")
248
+
249
+ @re_escape
250
+ @beartype
251
+ def capture_group(
252
+ self,
253
+ /,
254
+ name_or_text: Union[Optional[str], VerbexEscapedCharClassOrSpecial] = None,
255
+ text: Optional[VerbexEscapedCharClassOrSpecial] = None,
256
+ ) -> Verbex:
257
+ """Create a capture group.
258
+
259
+ Name is optional if not specified then the first argument is the text.
260
+
261
+ Keyword Arguments:
262
+ name_or_text -- The name of the group / text to search for (default: {None})
263
+ text -- The text to search for (default: {None})
264
+
265
+ Raises:
266
+ ValueError: If name is specified then text must be as well.
267
+
268
+ Returns:
269
+ Verbex with added capture group.
270
+ """
271
+ if name_or_text is not None:
272
+ if text is None:
273
+ _text = name_or_text
274
+ return self._capture_group_without_name(_text)
275
+ if isinstance(name_or_text, str):
276
+ return self._capture_group_with_name(name_or_text, text)
277
+ raise ValueError("text must be specified with optional name")
278
+
279
+ @re_escape
280
+ @beartype
281
+ def OR(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex: # noqa N802
282
+ """`or` is a python keyword so we use `OR` instead.
283
+
284
+ Arguments:
285
+ text -- Text to find or a Verbex object.
286
+
287
+ Returns:
288
+ Modified Verbex object.
289
+ """
290
+ return self._add("|").find(text)
291
+
292
+ @re_escape
293
+ @beartype
294
+ def zero_or_more(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex:
295
+ """Find the text or Verbex object zero or more times.
296
+
297
+ Arguments:
298
+ text -- The text / Verbex object to look for.
299
+
300
+ Returns:
301
+ Modified Verbex object.
302
+ """
303
+ return self._add(f"(?:{str(text)})*")
304
+
305
+ @re_escape
306
+ @beartype
307
+ def one_or_more(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex:
308
+ """Find the text or Verbex object one or more times.
309
+
310
+ Arguments:
311
+ text -- The text / Verbex object to look for.
312
+
313
+ Returns:
314
+ Modified Verbex object.
315
+ """
316
+ return self._add(f"(?:{str(text)})+")
317
+
318
+ @re_escape
319
+ @beartype
320
+ def n_times(
321
+ self,
322
+ text: VerbexEscapedCharClassOrSpecial,
323
+ n: int, # noqa: VNE001
324
+ ) -> Verbex:
325
+ """Find the text or Verbex object n or more times.
326
+
327
+ Arguments:
328
+ text -- The text / Verbex object to look for.
329
+
330
+ Returns:
331
+ Modified Verbex object.
332
+ """
333
+ return self._add(f"(?:{str(text)}){{{n}}}")
334
+
335
+ @re_escape
336
+ @beartype
337
+ def n_times_or_more(
338
+ self,
339
+ text: VerbexEscapedCharClassOrSpecial,
340
+ n: int, # noqa: VNE001
341
+ ) -> Verbex:
342
+ """Find the text or Verbex object at least n times.
343
+
344
+ Arguments:
345
+ text -- The text / Verbex object to look for.
346
+
347
+ Returns:
348
+ Modified Verbex object.
349
+ """
350
+ return self._add(f"(?:{str(text)}){{{n},}}")
351
+
352
+ @re_escape
353
+ @beartype
354
+ def n_to_m_times(
355
+ self,
356
+ text: VerbexEscapedCharClassOrSpecial,
357
+ n: int, # noqa: VNE001
358
+ m: int, # noqa: VNE001
359
+ ) -> Verbex:
360
+ """Find the text or Verbex object between n and m times.
361
+
362
+ Arguments:
363
+ text -- The text / Verbex object to look for.
364
+
365
+ Returns:
366
+ Modified Verbex object.
367
+ """
368
+ return self._add(f"(?:{str(text)}){{{n},{m}}}")
369
+
370
+ @re_escape
371
+ @beartype
372
+ def maybe(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex:
373
+ """Possibly find the text / Verbex object.
374
+
375
+ Arguments:
376
+ text -- The text / Verbex object to possibly find.
377
+
378
+ Returns:
379
+ Modified Verbex object.
380
+ """
381
+ return self._add(f"(?:{str(text)})?")
382
+
383
+ @re_escape
384
+ @beartype
385
+ def find(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex:
386
+ """Find the text or Verbex object.
387
+
388
+ Arguments:
389
+ text -- The text / Verbex object to look for.
390
+
391
+ Returns:
392
+ Modified Verbex object.
393
+ """
394
+ return self._add(str(text))
395
+
396
+ @re_escape
397
+ @beartype
398
+ def then(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex:
399
+ """Synonym for find.
400
+
401
+ Arguments:
402
+ text -- The text / Verbex object to look for.
403
+
404
+ Returns:
405
+ Modified Verbex object.
406
+ """
407
+ return self.find(text)
408
+
409
+ @re_escape
410
+ @beartype
411
+ def followed_by(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex:
412
+ """Match if string is followed by text.
413
+
414
+ Positive lookahead
415
+
416
+ Returns:
417
+ Modified Verbex object.
418
+ """
419
+ return self._add(f"(?={text})")
420
+
421
+ @re_escape
422
+ @beartype
423
+ def not_followed_by(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex:
424
+ """Match if string is not followed by text.
425
+
426
+ Negative lookahead
427
+
428
+ Returns:
429
+ Modified Verbex object.
430
+ """
431
+ return self._add(f"(?!{text})")
432
+
433
+ @re_escape
434
+ @beartype
435
+ def preceded_by(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex:
436
+ """Match if string is not preceded by text.
437
+
438
+ Positive lookbehind
439
+
440
+ Returns:
441
+ Modified Verbex object.
442
+ """
443
+ return self._add(f"(?<={text})")
444
+
445
+ @re_escape
446
+ @beartype
447
+ def not_preceded_by(self, text: VerbexEscapedCharClassOrSpecial) -> Verbex:
448
+ """Match if string is not preceded by text.
449
+
450
+ Negative Lookbehind
451
+
452
+ Returns:
453
+ Modified Verbex object.
454
+ """
455
+ return self._add(f"(?<!{text})")
456
+
457
+ # only allow CharclassOrChars
458
+
459
+ @re_escape
460
+ @beartype
461
+ def any_of(self, chargroup: CharClassOrChars) -> Verbex:
462
+ """Find anything in this group of chars or char class.
463
+
464
+ Arguments:
465
+ text -- The characters to look for.
466
+
467
+ Returns:
468
+ Modified Verbex object.
469
+ """
470
+ return self._add(f"(?:[{chargroup}])")
471
+
472
+ @re_escape
473
+ @beartype
474
+ def not_any_of(self, text: CharClassOrChars) -> Verbex:
475
+ """Find anything but this group of chars or char class.
476
+
477
+ Arguments:
478
+ text -- The characters to not look for.
479
+
480
+ Returns:
481
+ Modified Verbex object.
482
+ """
483
+ return self._add(f"(?:[^{text}])")
484
+
485
+ @re_escape
486
+ def anything_but(self, chargroup: EscapedCharClassOrSpecial) -> Verbex:
487
+ """Find anything one or more times but this group of chars or char class.
488
+
489
+ Arguments:
490
+ text -- The characters to not look for.
491
+
492
+ Returns:
493
+ Modified Verbex object.
494
+ """
495
+ return self._add(f"[^{chargroup}]+")
496
+
497
+ # no text input
498
+
499
+ def start_of_line(self) -> Verbex:
500
+ """Find the start of the line.
501
+
502
+ Returns:
503
+ Modified Verbex object.
504
+ """
505
+ return self.find(SpecialChar.START_OF_LINE)
506
+
507
+ def end_of_line(self) -> Verbex:
508
+ """Find the end of the line.
509
+
510
+ Returns:
511
+ Modified Verbex object.
512
+ """
513
+ return self.find(SpecialChar.END_OF_LINE)
514
+
515
+ def line_break(self) -> Verbex:
516
+ """Find a line break.
517
+
518
+ Returns:
519
+ Modified Verbex object.
520
+ """
521
+ return self.find(SpecialChar.LINEBREAK)
522
+
523
+ def tab(self) -> Verbex:
524
+ """Find a tab.
525
+
526
+ Returns:
527
+ Modified Verbex object.
528
+ """
529
+ return self.find(SpecialChar.TAB)
530
+
531
+ def anything(self) -> Verbex:
532
+ """Find anything one or more time.
533
+
534
+ Returns:
535
+ Modified Verbex object.
536
+ """
537
+ return self._add(".+")
538
+
539
+ def as_few(self) -> Verbex:
540
+ """Modify previous search to not be greedy.
541
+
542
+ Returns:
543
+ Modified Verbex object.
544
+ """
545
+ return self._add("?")
546
+
547
+ @beartype
548
+ def number_range(self, start: int, end: int) -> Verbex:
549
+ """Generate a range of numbers.
550
+
551
+ Arguments:
552
+ start -- Start of the range
553
+ end -- End of the range
554
+
555
+ Returns:
556
+ Modified Verbex object.
557
+ """
558
+ return self._add("(?:" + "|".join(str(i) for i in range(start, end + 1)) + ")")
559
+
560
+ @beartype
561
+ def letter_range(self, start: Char, end: Char) -> Verbex:
562
+ """Generate a range of letters.
563
+
564
+ Arguments:
565
+ start -- Start of the range
566
+ end -- End of the range
567
+
568
+ Returns:
569
+ Modified Verbex object.
570
+ """
571
+ return self._add(f"[{start}-{end}]")
572
+
573
+ def word(self) -> Verbex:
574
+ """Find a word on word boundary.
575
+
576
+ Returns:
577
+ Modified Verbex object.
578
+ """
579
+ return self._add("(\\b\\w+\\b)")
580
+
581
+ # # --------------- modifiers ------------------------
582
+
583
+ def with_any_case(self) -> Verbex:
584
+ """Modify Verbex object to be case insensitive.
585
+
586
+ Returns:
587
+ Modified Verbex object.
588
+ """
589
+ self._modifiers |= re.IGNORECASE
590
+ return self
591
+
592
+ def search_by_line(self) -> Verbex:
593
+ """Search each line, ^ and $ match begining and end of line respectively.
594
+
595
+ Returns:
596
+ Modified Verbex object.
597
+ """
598
+ self._modifiers |= re.MULTILINE
599
+ return self
600
+
601
+ def with_ascii(self) -> Verbex:
602
+ """Match ascii instead of unicode.
603
+
604
+ Returns:
605
+ Modified Verbex object.
606
+ """
607
+ self._modifiers |= re.ASCII
608
+ return self
609
+
610
+
611
+ # left over notes from original version
612
+ # def __getattr__(self, attr):
613
+ # """ any other function will be sent to the regex object """
614
+ # regex = self.regex()
615
+ # return getattr(regex, attr)
616
+
617
+ # def replace(self, string, repl):
618
+ # return self.sub(repl, string)
619
+
620
+
621
+ if __name__ == "__main__":
622
+ pass