py-text-toolkit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py_text_toolkit-0.1.0.dist-info/METADATA +262 -0
- py_text_toolkit-0.1.0.dist-info/RECORD +11 -0
- py_text_toolkit-0.1.0.dist-info/WHEEL +5 -0
- py_text_toolkit-0.1.0.dist-info/licenses/LICENSE +21 -0
- py_text_toolkit-0.1.0.dist-info/top_level.txt +1 -0
- strutils/__init__.py +35 -0
- strutils/analysis.py +175 -0
- strutils/cleaning.py +170 -0
- strutils/format_cases.py +220 -0
- strutils/generation.py +119 -0
- strutils/validation.py +129 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: py-text-toolkit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A comprehensive string utility library for Python.
|
|
5
|
+
Author-email: Dawood Afzal <dawoodafzal.62138@gmail.com>
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: emoji>=2.0.0
|
|
13
|
+
Dynamic: license-file
|
|
14
|
+
|
|
15
|
+
# py-text-toolkit
|
|
16
|
+
|
|
17
|
+
A lightweight, dependency-minimal Python library for everyday string operations — cleaning, validation, analysis, case conversion, and generation.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install py-text-toolkit
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
> **Requires:** Python 3.8+
|
|
28
|
+
> **Optional dependency:** `emoji` (required only for `cleaning.remove_emojis`)
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Modules at a Glance
|
|
33
|
+
|
|
34
|
+
| Module | What it does |
|
|
35
|
+
|---|---|
|
|
36
|
+
| `py-text-toolkit.cleaning` | Strip, replace, and normalize raw text |
|
|
37
|
+
| `py-text-toolkit.validation` | Validate emails, URLs, passwords, and character sets |
|
|
38
|
+
| `py-text-toolkit.analysis` | Count, compare, and measure strings |
|
|
39
|
+
| `py-text-toolkit.format_cases` | Convert between naming conventions and formatting styles |
|
|
40
|
+
| `py-text-toolkit.generation` | Generate slugs, masks, ciphers, and reversed strings |
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from py-text-toolkit.cleaning import remove_html_tags, remove_urls
|
|
48
|
+
from py-text-toolkit.validation import is_email, is_strong_password
|
|
49
|
+
from py-text-toolkit.analysis import word_count, is_palindrome
|
|
50
|
+
from py-text-toolkit.format_cases import to_snake_case, to_camel_case
|
|
51
|
+
from py-text-toolkit.generation import generate_slug, mask_range
|
|
52
|
+
|
|
53
|
+
# Clean
|
|
54
|
+
remove_html_tags("<p>Hello <b>world</b></p>") # "Hello world"
|
|
55
|
+
remove_urls("Visit https://example.com today") # "Visit today"
|
|
56
|
+
|
|
57
|
+
# Validate
|
|
58
|
+
is_email("user@example.com") # True
|
|
59
|
+
is_strong_password("Passw0rd!") # True
|
|
60
|
+
|
|
61
|
+
# Analyse
|
|
62
|
+
word_count("Hello, world!") # 2
|
|
63
|
+
is_palindrome("A man a plan a canal Panama") # True
|
|
64
|
+
|
|
65
|
+
# Convert case
|
|
66
|
+
to_snake_case("camelCaseText") # "camel_case_text"
|
|
67
|
+
to_camel_case("hello_world") # "helloWorld"
|
|
68
|
+
|
|
69
|
+
# Generate
|
|
70
|
+
generate_slug("Hello World!") # "hello-world"
|
|
71
|
+
mask_range("1234-5678-9012", 5, 9, "*") # "1234-****-9012"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## Module Reference
|
|
77
|
+
|
|
78
|
+
### `py-text-toolkit.cleaning`
|
|
79
|
+
|
|
80
|
+
Functions for sanitising and normalising raw text.
|
|
81
|
+
|
|
82
|
+
| Function | Signature | Description |
|
|
83
|
+
|---|---|---|
|
|
84
|
+
| `normalize_whitespace` | `(text) → str` | Collapse all whitespace runs to a single space and strip ends |
|
|
85
|
+
| `remove_punctuation` | `(text, replace="") → str` | Remove or replace all punctuation characters |
|
|
86
|
+
| `remove_digits` | `(text, replace="") → str` | Remove or replace all digit characters |
|
|
87
|
+
| `remove_html_tags` | `(text, replace="") → str` | Strip or replace HTML tags |
|
|
88
|
+
| `remove_urls` | `(text, replace="") → str` | Remove or replace HTTP/HTTPS and `www.` URLs |
|
|
89
|
+
| `remove_emojis` | `(text, replace="") → str` | Remove or replace emoji characters (requires `emoji`) |
|
|
90
|
+
| `collapse_spaces` | `(text) → str` | Remove **all** whitespace (not just collapse) |
|
|
91
|
+
|
|
92
|
+
All cleaning functions accept an optional `replace` argument — the string substituted in place of each removed element (defaults to `""`). After replacement, whitespace is always normalized.
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from py-text-toolkit.cleaning import remove_punctuation, remove_html_tags, remove_emojis
|
|
96
|
+
|
|
97
|
+
remove_punctuation("Hello, world!") # "Hello world"
|
|
98
|
+
remove_punctuation("Hello, world!", replace=" ") # "Hello world"
|
|
99
|
+
|
|
100
|
+
remove_html_tags("<p>Hello <b>world</b></p>") # "Hello world"
|
|
101
|
+
remove_html_tags("<br/>line1<br/>line2", replace=" ") # "line1 line2"
|
|
102
|
+
|
|
103
|
+
remove_emojis("Great job! 🎉") # "Great job!"
|
|
104
|
+
remove_emojis("Hello 😊", replace="[emoji]") # "Hello [emoji]"
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
### `py-text-toolkit.validation`
|
|
110
|
+
|
|
111
|
+
Boolean predicates for common string formats.
|
|
112
|
+
|
|
113
|
+
| Function | Signature | Description |
|
|
114
|
+
|---|---|---|
|
|
115
|
+
| `is_email` | `(text) → bool` | Check for a valid email address |
|
|
116
|
+
| `is_url` | `(text) → bool` | Check for a valid HTTP or HTTPS URL |
|
|
117
|
+
| `contains_only` | `(text, allowed_chars) → bool` | Check that every character is in the allowed set |
|
|
118
|
+
| `is_strong_password` | `(text) → bool` | Check that a password meets strength requirements |
|
|
119
|
+
|
|
120
|
+
**Password requirements** (`is_strong_password`):
|
|
121
|
+
- Minimum 8 characters
|
|
122
|
+
- At least one lowercase letter
|
|
123
|
+
- At least one uppercase letter
|
|
124
|
+
- At least one digit
|
|
125
|
+
- At least one special character from `@$!%*?&`
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
from py-text-toolkit.validation import is_email, is_url, contains_only, is_strong_password
|
|
129
|
+
|
|
130
|
+
is_email("user@example.com") # True
|
|
131
|
+
is_email("not-an-email") # False
|
|
132
|
+
|
|
133
|
+
is_url("https://api.service.io/v1") # True
|
|
134
|
+
is_url("ftp://files.example.com") # False
|
|
135
|
+
|
|
136
|
+
contains_only("12345", "0123456789") # True
|
|
137
|
+
contains_only("hello!", "a-z") # False (literal chars only, not a range)
|
|
138
|
+
|
|
139
|
+
is_strong_password("Passw0rd!") # True
|
|
140
|
+
is_strong_password("weakpass") # False
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
> **Note on `contains_only`:** `allowed_chars` is treated as a set of literal characters. Special regex characters are escaped automatically, so `"a-z"` matches only the three characters `a`, `-`, and `z`, **not** a range.
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
### `py-text-toolkit.analysis`
|
|
148
|
+
|
|
149
|
+
Functions that measure and compare strings.
|
|
150
|
+
|
|
151
|
+
| Function | Signature | Description |
|
|
152
|
+
|---|---|---|
|
|
153
|
+
| `word_count` | `(text) → int` | Count words using regex word-boundary matching |
|
|
154
|
+
| `char_frequency` | `(text, char) → int` | Count non-overlapping occurrences of a character or substring |
|
|
155
|
+
| `count_vowels` | `(text) → int` | Count English vowels (a e i o u), case-insensitive |
|
|
156
|
+
| `longest_word` | `(text) → int` | Return the length of the longest whitespace-delimited word |
|
|
157
|
+
| `is_palindrome` | `(text, case_sensitive=False, ignore_formatting=True) → bool` | Check if a string is a palindrome |
|
|
158
|
+
| `is_anagram` | `(word1, word2) → bool` | Check if two strings are anagrams (case-insensitive, ignores spaces) |
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
from py-text-toolkit.analysis import word_count, is_palindrome, is_anagram, char_frequency
|
|
162
|
+
|
|
163
|
+
word_count("Hello, world!") # 2
|
|
164
|
+
word_count(" spaces everywhere ") # 2
|
|
165
|
+
|
|
166
|
+
char_frequency("banana", "an") # 2
|
|
167
|
+
|
|
168
|
+
is_palindrome("racecar") # True
|
|
169
|
+
is_palindrome("A man a plan a canal Panama") # True
|
|
170
|
+
is_palindrome("Racecar", case_sensitive=True) # False
|
|
171
|
+
|
|
172
|
+
is_anagram("listen", "silent") # True
|
|
173
|
+
is_anagram("Astronomer", "Moon starer") # True
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
### `py-text-toolkit.format_cases`
|
|
179
|
+
|
|
180
|
+
Convert strings between naming conventions and apply text formatting.
|
|
181
|
+
|
|
182
|
+
| Function | Signature | Description |
|
|
183
|
+
|---|---|---|
|
|
184
|
+
| `to_snake_case` | `(text) → str` | Convert to `snake_case` |
|
|
185
|
+
| `to_camel_case` | `(text) → str` | Convert to `camelCase` |
|
|
186
|
+
| `to_pascal_case` | `(text) → str` | Convert to `PascalCase` |
|
|
187
|
+
| `to_kebab_case` | `(text) → str` | Convert to `kebab-case` |
|
|
188
|
+
| `to_title_case` | `(text) → str` | Convert to `Title Case` |
|
|
189
|
+
| `truncate` | `(text, max_length, suffix="...") → str` | Truncate to a maximum length with a suffix |
|
|
190
|
+
| `pad_center` | `(text, width, fillchar=" ") → str` | Center-pad to a given width |
|
|
191
|
+
|
|
192
|
+
All case converters handle mixed input (camelCase, PascalCase, snake_case, kebab-case, spaces).
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
from py-text-toolkit.format_cases import to_snake_case, to_camel_case, truncate, pad_center
|
|
196
|
+
|
|
197
|
+
to_snake_case("camelCaseText") # "camel_case_text"
|
|
198
|
+
to_snake_case("Hello World!") # "hello_world"
|
|
199
|
+
|
|
200
|
+
to_camel_case("hello_world") # "helloWorld"
|
|
201
|
+
to_camel_case("PascalCaseText") # "pascalCaseText"
|
|
202
|
+
|
|
203
|
+
to_pascal_case("kebab-case-text") # "KebabCaseText"
|
|
204
|
+
to_kebab_case("camelCaseText") # "camel-case-text"
|
|
205
|
+
to_title_case("hello_world") # "Hello World"
|
|
206
|
+
|
|
207
|
+
truncate("Hello, World!", 8) # "Hello..."
|
|
208
|
+
truncate("Hi", 10) # "Hi"
|
|
209
|
+
|
|
210
|
+
pad_center("hello", 11) # " hello "
|
|
211
|
+
pad_center("hi", 10, "-") # "----hi----"
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
### `py-text-toolkit.generation`
|
|
217
|
+
|
|
218
|
+
Functions that produce new strings from existing ones.
|
|
219
|
+
|
|
220
|
+
| Function | Signature | Description |
|
|
221
|
+
|---|---|---|
|
|
222
|
+
| `generate_slug` | `(text) → str` | Convert to a URL-friendly slug |
|
|
223
|
+
| `reverse_word` | `(text) → str` | Reverse all characters |
|
|
224
|
+
| `mask_range` | `(text, start_index, end_index, placeholder="X") → str` | Mask a character range with a placeholder |
|
|
225
|
+
| `ceasar_cipher` | `(text, shift) → str` | Encrypt/decrypt with the Caesar cipher |
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
from py-text-toolkit.generation import generate_slug, mask_range, ceasar_cipher, reverse_word
|
|
229
|
+
|
|
230
|
+
generate_slug("Hello World!") # "hello-world"
|
|
231
|
+
generate_slug("Python 3.11 -- Release Notes") # "python-3-11-release-notes"
|
|
232
|
+
|
|
233
|
+
reverse_word("hello") # "olleh"
|
|
234
|
+
|
|
235
|
+
mask_range("1234-5678-9012", 5, 9, "*") # "1234-****-9012"
|
|
236
|
+
mask_range("secret", -3, -1) # "secXXt"
|
|
237
|
+
|
|
238
|
+
ceasar_cipher("Hello, World!", 3) # "Khoor, Zruog!"
|
|
239
|
+
ceasar_cipher("Khoor, Zruog!", -3) # "Hello, World!" (decrypt)
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
## Dependencies
|
|
245
|
+
|
|
246
|
+
| Package | Required | Used by |
|
|
247
|
+
|---|---|---|
|
|
248
|
+
| `re` (stdlib) | Always | All modules |
|
|
249
|
+
| `string` (stdlib) | Always | `cleaning` |
|
|
250
|
+
| `emoji` | Optional | `cleaning.remove_emojis` only |
|
|
251
|
+
|
|
252
|
+
Install with the optional dependency:
|
|
253
|
+
|
|
254
|
+
```bash
|
|
255
|
+
pip install py-text-toolkit[emoji]
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
---
|
|
259
|
+
|
|
260
|
+
## License
|
|
261
|
+
|
|
262
|
+
MIT License — see [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
py_text_toolkit-0.1.0.dist-info/licenses/LICENSE,sha256=TPjmzJvh2E7wUniQsHMR1XVxyX0SzXh1JKNTkNABqUU,1088
|
|
2
|
+
strutils/__init__.py,sha256=yl8l7eXWNYt9NF3KvE5TESPGsLrgmYRE5bEq6rEtxfo,1262
|
|
3
|
+
strutils/analysis.py,sha256=DZfb9D4OWOS4IK2hkQ10gCi-bI2cPhMTb_rlyLQQ9ZU,4956
|
|
4
|
+
strutils/cleaning.py,sha256=rvMYNHi78TfH6IuqD1s0A2Zt9JFxhtd6wU-_w8choRY,5595
|
|
5
|
+
strutils/format_cases.py,sha256=d0aP20Fz2dEpRoGbCKZHBxUsD058iRO2zVdnz8BHzn4,6680
|
|
6
|
+
strutils/generation.py,sha256=MKed4-tgXzQkImFEHpaWgM7tAR7YSKpzoxP7hHUdeg4,3905
|
|
7
|
+
strutils/validation.py,sha256=QVdrPLQfHm-b4kAIv1ZxWYZ49UkRQJwZGMixyihR0Po,4006
|
|
8
|
+
py_text_toolkit-0.1.0.dist-info/METADATA,sha256=BVRsylSqdDfTyIDTfwddI3GMYGjP75yJV0nnUK3pDIM,9604
|
|
9
|
+
py_text_toolkit-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
10
|
+
py_text_toolkit-0.1.0.dist-info/top_level.txt,sha256=jOv2-Ma6MHfqFp8wQed3G2W88ceTT39Abb58ixN5ES8,9
|
|
11
|
+
py_text_toolkit-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Dawood Afzal
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
strutils
|
strutils/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Strutils: A comprehensive string utility library for Python.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .analysis import (
|
|
6
|
+
word_count, char_frequency, is_palindrome,
|
|
7
|
+
is_anagram, count_vowels, longest_word
|
|
8
|
+
)
|
|
9
|
+
from .cleaning import (
|
|
10
|
+
normalize_whitespace, remove_punctuation, remove_digits,
|
|
11
|
+
remove_html_tags, remove_urls, remove_emojis, collapse_spaces
|
|
12
|
+
)
|
|
13
|
+
from .format_cases import (
|
|
14
|
+
to_snake_case, to_camel_case, to_pascal_case,
|
|
15
|
+
to_kebab_case, to_title_case, truncate, pad_center
|
|
16
|
+
)
|
|
17
|
+
from .generation import (
|
|
18
|
+
generate_slug, reverse_word, mask_range, ceasar_cipher
|
|
19
|
+
)
|
|
20
|
+
from .validation import (
|
|
21
|
+
is_email, is_url, contains_only, is_strong_password
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
# Analysis
|
|
26
|
+
"word_count", "char_frequency", "is_palindrome", "is_anagram", "count_vowels", "longest_word",
|
|
27
|
+
# Cleaning
|
|
28
|
+
"normalize_whitespace", "remove_punctuation", "remove_digits", "remove_html_tags", "remove_urls", "remove_emojis", "collapse_spaces",
|
|
29
|
+
# Format Cases
|
|
30
|
+
"to_snake_case", "to_camel_case", "to_pascal_case", "to_kebab_case", "to_title_case", "truncate", "pad_center",
|
|
31
|
+
# Generation
|
|
32
|
+
"generate_slug", "reverse_word", "mask_range", "ceasar_cipher",
|
|
33
|
+
# Validation
|
|
34
|
+
"is_email", "is_url", "contains_only", "is_strong_password"
|
|
35
|
+
]
|
strutils/analysis.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from .cleaning import remove_punctuation, collapse_spaces
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def word_count(text: str) -> int:
|
|
6
|
+
"""
|
|
7
|
+
Count the number of words in a string.
|
|
8
|
+
|
|
9
|
+
Uses regex word-boundary matching to identify tokens, so punctuation
|
|
10
|
+
attached to words (e.g. "don't", "end.") is handled gracefully and
|
|
11
|
+
does not produce false word counts.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
text (str): The input string to count words in.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
int: The total number of words found.
|
|
18
|
+
|
|
19
|
+
Examples:
|
|
20
|
+
>>> word_count("Hello, world!")
|
|
21
|
+
2
|
|
22
|
+
>>> word_count(" spaces everywhere ")
|
|
23
|
+
2
|
|
24
|
+
>>> word_count("")
|
|
25
|
+
0
|
|
26
|
+
"""
|
|
27
|
+
text = re.findall(r"\b\w+\b", text)
|
|
28
|
+
return len(text)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def char_frequency(text: str, char: str) -> int:
|
|
32
|
+
"""
|
|
33
|
+
Count how many times a character (or substring) appears in a string.
|
|
34
|
+
|
|
35
|
+
Delegates directly to Python's built-in str.count(), which performs
|
|
36
|
+
non-overlapping matches.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
text (str): The input string to search within.
|
|
40
|
+
char (str): The character or substring to count.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
int: The number of non-overlapping occurrences of char in text.
|
|
44
|
+
|
|
45
|
+
Examples:
|
|
46
|
+
>>> char_frequency("hello world", "l")
|
|
47
|
+
3
|
|
48
|
+
>>> char_frequency("banana", "an")
|
|
49
|
+
2
|
|
50
|
+
>>> char_frequency("hello", "z")
|
|
51
|
+
0
|
|
52
|
+
"""
|
|
53
|
+
return text.count(char)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def is_palindrome(text: str, case_sensitive: bool = False, ignore_formatting: bool = True) -> bool:
|
|
57
|
+
"""
|
|
58
|
+
Check whether a string reads the same forwards and backwards.
|
|
59
|
+
|
|
60
|
+
Optionally strips punctuation and collapses extra whitespace before
|
|
61
|
+
checking, and optionally treats the comparison as case-insensitive.
|
|
62
|
+
Cleaning is done via the shared helpers ``remove_punctuation`` and
|
|
63
|
+
``collapse_spaces`` from the cleaning module.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
text (str): The input string to check.
|
|
67
|
+
case_sensitive (bool): If False (default), comparison is
|
|
68
|
+
case-insensitive. If True, casing must match exactly.
|
|
69
|
+
ignore_formatting (bool): If True (default), punctuation is removed
|
|
70
|
+
and consecutive spaces are collapsed before comparison.
|
|
71
|
+
If False, the raw string is used as-is.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
bool: True if the string is a palindrome, False otherwise.
|
|
75
|
+
|
|
76
|
+
Examples:
|
|
77
|
+
>>> is_palindrome("racecar")
|
|
78
|
+
True
|
|
79
|
+
>>> is_palindrome("A man a plan a canal Panama")
|
|
80
|
+
True
|
|
81
|
+
>>> is_palindrome("Hello")
|
|
82
|
+
False
|
|
83
|
+
>>> is_palindrome("Racecar", case_sensitive=True)
|
|
84
|
+
False
|
|
85
|
+
"""
|
|
86
|
+
if ignore_formatting:
|
|
87
|
+
text = remove_punctuation(text)
|
|
88
|
+
text = collapse_spaces(text)
|
|
89
|
+
|
|
90
|
+
if not case_sensitive:
|
|
91
|
+
text = text.lower()
|
|
92
|
+
|
|
93
|
+
return text == text[::-1]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def is_anagram(word1: str, word2: str) -> bool:
|
|
97
|
+
"""
|
|
98
|
+
Check whether two strings are anagrams of each other.
|
|
99
|
+
|
|
100
|
+
Comparison is case-insensitive and ignores extra whitespace (via
|
|
101
|
+
``collapse_spaces``). Two strings are considered anagrams if they
|
|
102
|
+
contain exactly the same characters in any order.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
word1 (str): The first string to compare.
|
|
106
|
+
word2 (str): The second string to compare.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
bool: True if the two strings are anagrams, False otherwise.
|
|
110
|
+
|
|
111
|
+
Examples:
|
|
112
|
+
>>> is_anagram("listen", "silent")
|
|
113
|
+
True
|
|
114
|
+
>>> is_anagram("hello", "world")
|
|
115
|
+
False
|
|
116
|
+
>>> is_anagram("Astronomer", "Moon starer")
|
|
117
|
+
True
|
|
118
|
+
"""
|
|
119
|
+
list1 = sorted(collapse_spaces(word1.lower()))
|
|
120
|
+
list2 = sorted(collapse_spaces(word2.lower()))
|
|
121
|
+
return list1 == list2
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def count_vowels(text: str) -> int:
|
|
125
|
+
"""
|
|
126
|
+
Count the number of vowels (a, e, i, o, u) in a string.
|
|
127
|
+
|
|
128
|
+
The check is case-insensitive; both uppercase and lowercase vowels
|
|
129
|
+
are counted. Only standard English vowels are matched.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
text (str): The input string to scan.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
int: The total number of vowel characters found.
|
|
136
|
+
|
|
137
|
+
Examples:
|
|
138
|
+
>>> count_vowels("Hello World")
|
|
139
|
+
3
|
|
140
|
+
>>> count_vowels("rhythm")
|
|
141
|
+
0
|
|
142
|
+
>>> count_vowels("AEIOU")
|
|
143
|
+
5
|
|
144
|
+
"""
|
|
145
|
+
return len(re.findall(r"[aeiou]", text.lower()))
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def longest_word(text: str) -> int:
|
|
149
|
+
"""
|
|
150
|
+
Return the length of the longest word in a string.
|
|
151
|
+
|
|
152
|
+
Words are determined by splitting on whitespace. If the input is
|
|
153
|
+
empty or contains only whitespace, 0 is returned.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
text (str): The input string to search.
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
int: The character length of the longest word, or 0 if there
|
|
160
|
+
are no words.
|
|
161
|
+
|
|
162
|
+
Examples:
|
|
163
|
+
>>> longest_word("The quick brown fox")
|
|
164
|
+
5
|
|
165
|
+
>>> longest_word("hi")
|
|
166
|
+
2
|
|
167
|
+
>>> longest_word("")
|
|
168
|
+
0
|
|
169
|
+
"""
|
|
170
|
+
words = text.split()
|
|
171
|
+
if not words:
|
|
172
|
+
return 0
|
|
173
|
+
return len(max(words, key=len))
|
|
174
|
+
|
|
175
|
+
|
strutils/cleaning.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import string
|
|
3
|
+
import emoji
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def normalize_whitespace(text: str) -> str:
|
|
7
|
+
"""Collapse any sequence of whitespace characters into a single space.
|
|
8
|
+
|
|
9
|
+
Replaces one or more consecutive whitespace characters (spaces, tabs,
|
|
10
|
+
newlines, etc.) with a single space, then strips leading and trailing
|
|
11
|
+
whitespace from the result.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
text: The input string to normalize.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
A new string with all whitespace sequences reduced to a single space
|
|
18
|
+
and no leading or trailing whitespace.
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
>>> normalize_whitespace("hello world\\n\\tfoo")
|
|
22
|
+
'hello world foo'
|
|
23
|
+
"""
|
|
24
|
+
return re.sub(r"\s+", " ", string=text).strip()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def remove_punctuation(text: str, replace: str = "") -> str:
|
|
28
|
+
"""Remove or replace all punctuation characters from a string.
|
|
29
|
+
|
|
30
|
+
Replaces every character found in ``string.punctuation`` with the given
|
|
31
|
+
replacement string, then normalizes any resulting extra whitespace.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
text: The input string to process.
|
|
35
|
+
replace: The string to substitute in place of each punctuation
|
|
36
|
+
character. Defaults to ``""`` (deletion).
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
A new string with all punctuation characters replaced and whitespace
|
|
40
|
+
normalized.
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
>>> remove_punctuation("Hello, world!")
|
|
44
|
+
'Hello world'
|
|
45
|
+
>>> remove_punctuation("Hello, world!", replace=" ")
|
|
46
|
+
'Hello world'
|
|
47
|
+
"""
|
|
48
|
+
return normalize_whitespace(re.sub(f"[{re.escape(string.punctuation)}]", replace, string=text))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def remove_digits(text: str, replace: str = "") -> str:
|
|
52
|
+
"""Remove or replace all digit characters from a string.
|
|
53
|
+
|
|
54
|
+
Replaces every decimal digit (0–9) with the given replacement string,
|
|
55
|
+
then normalizes any resulting extra whitespace.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
text: The input string to process.
|
|
59
|
+
replace: The string to substitute in place of each digit character.
|
|
60
|
+
Defaults to ``""`` (deletion).
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
A new string with all digit characters replaced and whitespace
|
|
64
|
+
normalized.
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
>>> remove_digits("abc123def456")
|
|
68
|
+
'abcdef'
|
|
69
|
+
>>> remove_digits("abc123", replace="#")
|
|
70
|
+
'abc###'
|
|
71
|
+
"""
|
|
72
|
+
return normalize_whitespace(re.sub(r"\d", replace, text))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def remove_html_tags(text: str, replace: str = "") -> str:
|
|
76
|
+
"""Strip or replace HTML tags from a string.
|
|
77
|
+
|
|
78
|
+
Matches anything of the form ``<...>`` and replaces it with the given
|
|
79
|
+
replacement string, then normalizes any resulting extra whitespace.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
text: The input string potentially containing HTML markup.
|
|
83
|
+
replace: The string to substitute in place of each HTML tag.
|
|
84
|
+
Defaults to ``""`` (deletion).
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
A new string with all HTML tags replaced and whitespace normalized.
|
|
88
|
+
|
|
89
|
+
Example:
|
|
90
|
+
>>> remove_html_tags("<p>Hello <b>world</b></p>")
|
|
91
|
+
'Hello world'
|
|
92
|
+
>>> remove_html_tags("<br/>line1<br/>line2", replace=" ")
|
|
93
|
+
'line1 line2'
|
|
94
|
+
"""
|
|
95
|
+
return normalize_whitespace(re.sub(r"<[^>]+>", replace, text))
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def remove_urls(text: str, replace: str = "") -> str:
|
|
99
|
+
"""Remove or replace URLs from a string.
|
|
100
|
+
|
|
101
|
+
Matches both ``http://`` / ``https://`` URLs and bare ``www.`` URLs,
|
|
102
|
+
replaces each with the given replacement string, then normalizes any
|
|
103
|
+
resulting extra whitespace.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
text: The input string potentially containing URLs.
|
|
107
|
+
replace: The string to substitute in place of each matched URL.
|
|
108
|
+
Defaults to ``""`` (deletion).
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
A new string with all URLs replaced and whitespace normalized.
|
|
112
|
+
|
|
113
|
+
Example:
|
|
114
|
+
>>> remove_urls("Visit https://example.com for details.")
|
|
115
|
+
'Visit for details.'
|
|
116
|
+
>>> remove_urls("Go to www.example.com now", replace="[link]")
|
|
117
|
+
'Go to [link] now'
|
|
118
|
+
"""
|
|
119
|
+
return normalize_whitespace(
|
|
120
|
+
re.sub(r"(?:https?:\/\/)?www\.[\w./?=#&%-]+|https?:\/\/[\w./?=#&%-]+", replace, text)
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def remove_emojis(text: str, replace: str = "") -> str:
|
|
125
|
+
"""Remove or replace emoji characters from a string.
|
|
126
|
+
|
|
127
|
+
Uses the ``emoji`` library to detect and replace all emoji characters,
|
|
128
|
+
then normalizes any resulting extra whitespace.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
text: The input string potentially containing emoji characters.
|
|
132
|
+
replace: The string to substitute in place of each emoji.
|
|
133
|
+
Defaults to ``""`` (deletion).
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
A new string with all emoji characters replaced and whitespace
|
|
137
|
+
normalized.
|
|
138
|
+
|
|
139
|
+
Example:
|
|
140
|
+
>>> remove_emojis("Hello 😊 world 🌍")
|
|
141
|
+
'Hello world'
|
|
142
|
+
>>> remove_emojis("Great job! 🎉", replace="[emoji]")
|
|
143
|
+
'Great job! [emoji]'
|
|
144
|
+
"""
|
|
145
|
+
text = emoji.replace_emoji(text, replace)
|
|
146
|
+
return normalize_whitespace(text)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def collapse_spaces(text: str) -> str:
|
|
150
|
+
"""Remove all whitespace characters from a string.
|
|
151
|
+
|
|
152
|
+
Replaces every whitespace character (spaces, tabs, newlines, etc.)
|
|
153
|
+
with an empty string, effectively joining all tokens together.
|
|
154
|
+
|
|
155
|
+
Note:
|
|
156
|
+
Unlike ``normalize_whitespace``, this function removes *all*
|
|
157
|
+
whitespace rather than collapsing it to single spaces.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
text: The input string to process.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
A new string with every whitespace character removed.
|
|
164
|
+
|
|
165
|
+
Example:
|
|
166
|
+
>>> collapse_spaces("hello world\\nfoo")
|
|
167
|
+
'helloworldfoo'
|
|
168
|
+
"""
|
|
169
|
+
return re.sub(r"\s+", "", string=text)
|
|
170
|
+
|
strutils/format_cases.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def to_snake_case(text: str) -> str:
|
|
5
|
+
"""
|
|
6
|
+
Convert a string to snake_case.
|
|
7
|
+
|
|
8
|
+
Handles camelCase, PascalCase, kebab-case, spaces, and mixed formats
|
|
9
|
+
by inserting underscores at camelCase boundaries, replacing all
|
|
10
|
+
non-alphanumeric characters with underscores, and lowercasing the result.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
text (str): The input string to convert.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
str: The converted snake_case string.
|
|
17
|
+
|
|
18
|
+
Examples:
|
|
19
|
+
>>> to_snake_case("camelCaseText")
|
|
20
|
+
'camel_case_text'
|
|
21
|
+
>>> to_snake_case("Hello World!")
|
|
22
|
+
'hello_world'
|
|
23
|
+
>>> to_snake_case("kebab-case-text")
|
|
24
|
+
'kebab_case_text'
|
|
25
|
+
"""
|
|
26
|
+
text = re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', text)
|
|
27
|
+
text = re.sub(r'[^a-zA-Z0-9]+', '_', text)
|
|
28
|
+
return text.lower().strip('_')
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def to_camel_case(text: str) -> str:
|
|
32
|
+
"""
|
|
33
|
+
Convert a string to camelCase.
|
|
34
|
+
|
|
35
|
+
Handles camelCase, PascalCase, snake_case, kebab-case, spaces, and mixed
|
|
36
|
+
formats by splitting on non-alphanumeric boundaries, lowercasing the first
|
|
37
|
+
word, and capitalizing subsequent words.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
text (str): The input string to convert.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
str: The converted camelCase string, or an empty string if input
|
|
44
|
+
contains no alphanumeric characters.
|
|
45
|
+
|
|
46
|
+
Examples:
|
|
47
|
+
>>> to_camel_case("hello_world")
|
|
48
|
+
'helloWorld'
|
|
49
|
+
>>> to_camel_case("PascalCaseText")
|
|
50
|
+
'pascalCaseText'
|
|
51
|
+
>>> to_camel_case("kebab-case-text")
|
|
52
|
+
'kebabCaseText'
|
|
53
|
+
"""
|
|
54
|
+
text = re.sub(r'([a-z0-9])([A-Z])', r'\1 \2', text)
|
|
55
|
+
text = re.sub(r'[^a-zA-Z0-9]+', ' ', text)
|
|
56
|
+
words = text.split()
|
|
57
|
+
|
|
58
|
+
if not words:
|
|
59
|
+
return ""
|
|
60
|
+
|
|
61
|
+
return words[0].lower() + "".join(word.capitalize() for word in words[1:])
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def to_pascal_case(text: str) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Convert a string to PascalCase (also known as UpperCamelCase).
|
|
67
|
+
|
|
68
|
+
Handles camelCase, snake_case, kebab-case, spaces, and mixed formats
|
|
69
|
+
by splitting on non-alphanumeric boundaries and capitalizing every word,
|
|
70
|
+
including the first.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
text (str): The input string to convert.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
str: The converted PascalCase string, or an empty string if input
|
|
77
|
+
contains no alphanumeric characters.
|
|
78
|
+
|
|
79
|
+
Examples:
|
|
80
|
+
>>> to_pascal_case("hello_world")
|
|
81
|
+
'HelloWorld'
|
|
82
|
+
>>> to_pascal_case("camelCaseText")
|
|
83
|
+
'CamelCaseText'
|
|
84
|
+
>>> to_pascal_case("kebab-case-text")
|
|
85
|
+
'KebabCaseText'
|
|
86
|
+
"""
|
|
87
|
+
text = re.sub(r'([a-z0-9])([A-Z])', r'\1 \2', text)
|
|
88
|
+
text = re.sub(r'[^a-zA-Z0-9]+', ' ', text)
|
|
89
|
+
words = text.split()
|
|
90
|
+
|
|
91
|
+
if not words:
|
|
92
|
+
return ""
|
|
93
|
+
|
|
94
|
+
return "".join(word.capitalize() for word in words)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def to_kebab_case(text: str) -> str:
|
|
98
|
+
"""
|
|
99
|
+
Convert a string to kebab-case.
|
|
100
|
+
|
|
101
|
+
Handles camelCase, PascalCase, snake_case, spaces, and mixed formats
|
|
102
|
+
by inserting hyphens at camelCase boundaries, replacing all
|
|
103
|
+
non-alphanumeric characters with hyphens, and lowercasing the result.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
text (str): The input string to convert.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
str: The converted kebab-case string.
|
|
110
|
+
|
|
111
|
+
Examples:
|
|
112
|
+
>>> to_kebab_case("camelCaseText")
|
|
113
|
+
'camel-case-text'
|
|
114
|
+
>>> to_kebab_case("Hello World!")
|
|
115
|
+
'hello-world'
|
|
116
|
+
>>> to_kebab_case("snake_case_text")
|
|
117
|
+
'snake-case-text'
|
|
118
|
+
"""
|
|
119
|
+
text = re.sub(r'([a-z0-9])([A-Z])', r'\1-\2', text)
|
|
120
|
+
text = re.sub(r'[^a-zA-Z0-9]+', '-', text)
|
|
121
|
+
return text.lower().strip('-')
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def to_title_case(text: str) -> str:
|
|
125
|
+
"""
|
|
126
|
+
Convert a string to Title Case.
|
|
127
|
+
|
|
128
|
+
Handles camelCase, PascalCase, snake_case, kebab-case, spaces, and mixed
|
|
129
|
+
formats by splitting on non-alphanumeric boundaries and capitalizing the
|
|
130
|
+
first letter of every word, joining them with a single space.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
text (str): The input string to convert.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
str: The converted Title Case string, or an empty string if input
|
|
137
|
+
contains no alphanumeric characters.
|
|
138
|
+
|
|
139
|
+
Examples:
|
|
140
|
+
>>> to_title_case("hello_world")
|
|
141
|
+
'Hello World'
|
|
142
|
+
>>> to_title_case("camelCaseText")
|
|
143
|
+
'Camel Case Text'
|
|
144
|
+
>>> to_title_case("kebab-case-text")
|
|
145
|
+
'Kebab Case Text'
|
|
146
|
+
"""
|
|
147
|
+
text = re.sub(r'([a-z0-9])([A-Z])', r'\1 \2', text)
|
|
148
|
+
text = re.sub(r'[^a-zA-Z0-9]+', ' ', text)
|
|
149
|
+
words = text.split()
|
|
150
|
+
|
|
151
|
+
if not words:
|
|
152
|
+
return ""
|
|
153
|
+
|
|
154
|
+
return " ".join(word.capitalize() for word in words)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def truncate(text: str, max_length: int, suffix: str = "...") -> str:
|
|
158
|
+
"""
|
|
159
|
+
Truncate a string to a maximum length, appending a suffix if truncated.
|
|
160
|
+
|
|
161
|
+
If the string exceeds max_length, it is cut short and the suffix is
|
|
162
|
+
appended so the total length equals max_length. If max_length is less
|
|
163
|
+
than or equal to the suffix length, only the suffix (trimmed to
|
|
164
|
+
max_length) is returned.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
text (str): The input string to truncate.
|
|
168
|
+
max_length (int): The maximum allowed length of the returned string,
|
|
169
|
+
including the suffix.
|
|
170
|
+
suffix (str): The string to append when truncation occurs.
|
|
171
|
+
Defaults to "...".
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
str: The original string if it fits within max_length, otherwise the
|
|
175
|
+
truncated string with the suffix appended.
|
|
176
|
+
|
|
177
|
+
Examples:
|
|
178
|
+
>>> truncate("Hello, World!", 8)
|
|
179
|
+
'Hello...'
|
|
180
|
+
>>> truncate("Hi", 10)
|
|
181
|
+
'Hi'
|
|
182
|
+
>>> truncate("Hello", 2, "...")
|
|
183
|
+
'..'
|
|
184
|
+
"""
|
|
185
|
+
if max_length <= len(suffix):
|
|
186
|
+
return suffix[:max_length]
|
|
187
|
+
|
|
188
|
+
if len(text) > max_length:
|
|
189
|
+
return text[:max_length - len(suffix)] + suffix
|
|
190
|
+
|
|
191
|
+
return text
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def pad_center(text: str, width: int, fillchar: str = " ") -> str:
|
|
195
|
+
"""
|
|
196
|
+
Center a string within a field of a given width, padding with a fill character.
|
|
197
|
+
|
|
198
|
+
Delegates directly to Python's built-in str.center(). If the string is
|
|
199
|
+
already longer than width, it is returned unchanged.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
text (str): The input string to center.
|
|
203
|
+
width (int): The total width of the resulting string.
|
|
204
|
+
fillchar (str): A single character used for padding on both sides.
|
|
205
|
+
Defaults to a space " ".
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
str: The centered string padded to the specified width.
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
Examples:
|
|
213
|
+
>>> pad_center("hello", 11)
|
|
214
|
+
' hello '
|
|
215
|
+
>>> pad_center("hi", 10, "-")
|
|
216
|
+
'----hi----'
|
|
217
|
+
>>> pad_center("toolong", 3)
|
|
218
|
+
'toolong'
|
|
219
|
+
"""
|
|
220
|
+
return text.center(width, fillchar)
|
strutils/generation.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
def generate_slug(text: str) -> str:
|
|
4
|
+
"""
|
|
5
|
+
Convert a string into a URL-friendly slug.
|
|
6
|
+
|
|
7
|
+
Lowercases the input, replaces any sequence of non-alphanumeric
|
|
8
|
+
characters with a hyphen, and strips leading/trailing hyphens.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
text: The string to slugify.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
A lowercase, hyphen-separated slug suitable for use in URLs.
|
|
15
|
+
|
|
16
|
+
Examples:
|
|
17
|
+
>>> generate_slug("Hello World!")
|
|
18
|
+
'hello-world'
|
|
19
|
+
>>> generate_slug(" Python 3.11 -- Release Notes ")
|
|
20
|
+
'python-3-11-release-notes'
|
|
21
|
+
"""
|
|
22
|
+
text = text.lower()
|
|
23
|
+
text = re.sub(r'[^a-z0-9]+', '-', text)
|
|
24
|
+
return text.strip("-")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def reverse_word(text: str) -> str:
|
|
28
|
+
"""
|
|
29
|
+
Reverse the characters in a string.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
text: The string to reverse.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
A new string with characters in reversed order.
|
|
36
|
+
|
|
37
|
+
Examples:
|
|
38
|
+
>>> reverse_word("hello")
|
|
39
|
+
'olleh'
|
|
40
|
+
>>> reverse_word("racecar")
|
|
41
|
+
'racecar'
|
|
42
|
+
"""
|
|
43
|
+
return text[::-1]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def mask_range(text: str, start_index: int, end_index: int, placeholder: str = "X") -> str:
|
|
47
|
+
"""
|
|
48
|
+
Mask a slice of a string with a repeated placeholder character.
|
|
49
|
+
|
|
50
|
+
Replaces characters from ``start_index`` up to (but not including)
|
|
51
|
+
``end_index`` with repeated ``placeholder`` characters. Supports
|
|
52
|
+
negative indices, which are resolved relative to the end of the string.
|
|
53
|
+
Returns the original string unchanged if ``start_index >= end_index``
|
|
54
|
+
after index resolution.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
text: The source string to mask.
|
|
58
|
+
start_index: Index of the first character to mask. Negative values
|
|
59
|
+
count from the end of the string.
|
|
60
|
+
end_index: Index one past the last character to mask. Negative values
|
|
61
|
+
count from the end of the string.
|
|
62
|
+
placeholder: Single character used for masking. Defaults to ``"X"``.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
A new string with the specified range replaced by the placeholder.
|
|
66
|
+
|
|
67
|
+
Examples:
|
|
68
|
+
>>> mask_range("Hello, World!", 7, 12)
|
|
69
|
+
'Hello, XXXXX!'
|
|
70
|
+
>>> mask_range("1234-5678-9012", 5, 9, "*")
|
|
71
|
+
'1234-****-9012'
|
|
72
|
+
>>> mask_range("secret", -3, -1)
|
|
73
|
+
'secXXt'
|
|
74
|
+
"""
|
|
75
|
+
start_index = start_index if start_index >= 0 else len(text) + start_index
|
|
76
|
+
end_index = end_index if end_index >= 0 else len(text) + end_index
|
|
77
|
+
|
|
78
|
+
if start_index >= end_index:
|
|
79
|
+
return text
|
|
80
|
+
first_text = text[:start_index]
|
|
81
|
+
len_text_to_hide = len(text[start_index:end_index])
|
|
82
|
+
second_text = text[end_index:]
|
|
83
|
+
return first_text + (placeholder * len_text_to_hide) + second_text
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def ceasar_cipher(text: str, shift: int) -> str:
|
|
87
|
+
"""
|
|
88
|
+
Encrypt or decrypt a string using the Caesar cipher.
|
|
89
|
+
|
|
90
|
+
Shifts each alphabetic character in ``text`` forward by ``shift``
|
|
91
|
+
positions in the alphabet, wrapping around at Z/z. Non-alphabetic
|
|
92
|
+
characters (digits, spaces, punctuation) are left unchanged.
|
|
93
|
+
To decrypt, pass the negated shift value (e.g. ``shift=-3``).
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
text: The string to encrypt or decrypt.
|
|
97
|
+
shift: Number of positions to shift each letter. May be negative
|
|
98
|
+
for a leftward shift (i.e. decryption with the positive key).
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
A new string with each letter shifted by ``shift`` positions.
|
|
102
|
+
|
|
103
|
+
Examples:
|
|
104
|
+
>>> ceasar_cipher("Hello, World!", 3)
|
|
105
|
+
'Khoor, Zruog!'
|
|
106
|
+
>>> ceasar_cipher("Khoor, Zruog!", -3)
|
|
107
|
+
'Hello, World!'
|
|
108
|
+
>>> ceasar_cipher("abc", 26)
|
|
109
|
+
'abc'
|
|
110
|
+
"""
|
|
111
|
+
result = []
|
|
112
|
+
for char in text:
|
|
113
|
+
if char.isalpha():
|
|
114
|
+
base = ord('a') if char.islower() else ord('A')
|
|
115
|
+
shifted_char = chr((ord(char) - base + shift) % 26 + base)
|
|
116
|
+
result.append(shifted_char)
|
|
117
|
+
else:
|
|
118
|
+
result.append(char)
|
|
119
|
+
return "".join(result)
|
strutils/validation.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def is_email(text: str) -> bool:
|
|
5
|
+
"""
|
|
6
|
+
Check whether a string is a valid email address.
|
|
7
|
+
|
|
8
|
+
Uses a regular expression that requires a local part (letters, digits,
|
|
9
|
+
dots, plus signs, or hyphens), an ``@`` symbol, a domain name, and a
|
|
10
|
+
top-level domain of at least two letters.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
text: The string to validate.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
``True`` if ``text`` matches the email pattern, ``False`` otherwise.
|
|
17
|
+
|
|
18
|
+
Examples:
|
|
19
|
+
>>> is_email("user@example.com")
|
|
20
|
+
True
|
|
21
|
+
>>> is_email("invalid-email")
|
|
22
|
+
False
|
|
23
|
+
>>> is_email("user@sub.domain.org")
|
|
24
|
+
True
|
|
25
|
+
"""
|
|
26
|
+
match = re.fullmatch(r"[\w.+-]+@[\w.-]+\.[a-zA-Z]{2,}", text)
|
|
27
|
+
if match:
|
|
28
|
+
return True
|
|
29
|
+
return False
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def is_url(text: str) -> bool:
|
|
33
|
+
"""
|
|
34
|
+
Check whether a string is a valid HTTP or HTTPS URL.
|
|
35
|
+
|
|
36
|
+
Accepts URLs that start with ``http://`` or ``https://`` followed by
|
|
37
|
+
a non-empty path containing word characters and common URL symbols
|
|
38
|
+
(dots, slashes, query strings, fragments, etc.).
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
text: The string to validate.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
``True`` if ``text`` matches the URL pattern, ``False`` otherwise.
|
|
45
|
+
|
|
46
|
+
Examples:
|
|
47
|
+
>>> is_url("https://www.example.com")
|
|
48
|
+
True
|
|
49
|
+
>>> is_url("http://api.service.io/v1/users?id=42")
|
|
50
|
+
True
|
|
51
|
+
>>> is_url("ftp://files.example.com")
|
|
52
|
+
False
|
|
53
|
+
>>> is_url("not a url")
|
|
54
|
+
False
|
|
55
|
+
"""
|
|
56
|
+
match = re.fullmatch(r"https?:\/\/[\w./?=#&%:-]+", text)
|
|
57
|
+
if match:
|
|
58
|
+
return True
|
|
59
|
+
return False
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def contains_only(text: str, allowed_chars: str) -> bool:
|
|
63
|
+
"""
|
|
64
|
+
Check whether a string is composed entirely of allowed characters.
|
|
65
|
+
|
|
66
|
+
Builds a character-class pattern from ``allowed_chars`` and tests
|
|
67
|
+
whether every character in ``text`` belongs to that set. Returns the
|
|
68
|
+
original falsy value (empty string, ``None``, etc.) unchanged when
|
|
69
|
+
``text`` is empty or falsy, so callers can distinguish an empty input
|
|
70
|
+
from a non-matching one.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
text: The string to validate.
|
|
74
|
+
allowed_chars: A string whose characters form the permitted set.
|
|
75
|
+
Special regex characters are automatically escaped, so passing
|
|
76
|
+
``"a-z"`` matches only the literal characters ``a``, ``-``,
|
|
77
|
+
and ``z`` — not a range.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
``True`` if every character in ``text`` is in ``allowed_chars``,
|
|
81
|
+
the original falsy value of ``text`` if ``text`` is empty/falsy,
|
|
82
|
+
or ``False`` if any character falls outside the allowed set.
|
|
83
|
+
|
|
84
|
+
Examples:
|
|
85
|
+
>>> contains_only("hello", "helo")
|
|
86
|
+
True
|
|
87
|
+
>>> contains_only("hello!", "helo")
|
|
88
|
+
False
|
|
89
|
+
>>> contains_only("12345", "0123456789")
|
|
90
|
+
True
|
|
91
|
+
>>> contains_only("", "abc")
|
|
92
|
+
''
|
|
93
|
+
"""
|
|
94
|
+
if not text:
|
|
95
|
+
return text
|
|
96
|
+
pattern = rf"[{re.escape(allowed_chars)}]+"
|
|
97
|
+
return bool(re.fullmatch(pattern, text))
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def is_strong_password(text: str) -> bool:
|
|
101
|
+
"""
|
|
102
|
+
Check whether a string meets strong-password requirements.
|
|
103
|
+
|
|
104
|
+
A strong password must satisfy all of the following:
|
|
105
|
+
|
|
106
|
+
- At least 8 characters long.
|
|
107
|
+
- Contains at least one lowercase letter (``a``–``z``).
|
|
108
|
+
- Contains at least one uppercase letter (``A``–``Z``).
|
|
109
|
+
- Contains at least one digit (``0``–``9``).
|
|
110
|
+
- Contains at least one special character from ``@$!%*?&``.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
text: The password string to validate.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
``True`` if ``text`` satisfies all password requirements,
|
|
117
|
+
``False`` otherwise.
|
|
118
|
+
|
|
119
|
+
Examples:
|
|
120
|
+
>>> is_strong_password("Passw0rd!")
|
|
121
|
+
True
|
|
122
|
+
>>> is_strong_password("weakpass")
|
|
123
|
+
False
|
|
124
|
+
>>> is_strong_password("NoSpecial1")
|
|
125
|
+
False
|
|
126
|
+
>>> is_strong_password("Short1!")
|
|
127
|
+
False
|
|
128
|
+
"""
|
|
129
|
+
return bool(re.fullmatch(r"(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&]).{8,}", text))
|