morphic 0.1.7__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {morphic-0.1.7 → morphic-0.2.0}/PKG-INFO +1 -1
- {morphic-0.1.7 → morphic-0.2.0}/docs/api/index.md +24 -2
- morphic-0.2.0/docs/api/string.md +3 -0
- morphic-0.2.0/docs/user-guide/string.md +535 -0
- {morphic-0.1.7 → morphic-0.2.0}/mkdocs.yml +2 -0
- {morphic-0.1.7 → morphic-0.2.0}/src/morphic/__init__.py +76 -0
- {morphic-0.1.7 → morphic-0.2.0}/src/morphic/function.py +7 -1
- {morphic-0.1.7 → morphic-0.2.0}/src/morphic/registry.py +33 -56
- morphic-0.2.0/src/morphic/string.py +1982 -0
- morphic-0.2.0/src/morphic/string_data.py +9094 -0
- {morphic-0.1.7 → morphic-0.2.0}/src/morphic/typed.py +1 -64
- morphic-0.2.0/tests/__init__.py +1 -0
- {morphic-0.1.7 → morphic-0.2.0}/tests/test_imports.py +48 -55
- {morphic-0.1.7 → morphic-0.2.0}/tests/test_registry.py +13 -15
- morphic-0.2.0/tests/test_string.py +1234 -0
- {morphic-0.1.7 → morphic-0.2.0}/tests/test_typed_registry_integration.py +4 -4
- morphic-0.1.7/tests/__init__.py +0 -1
- {morphic-0.1.7 → morphic-0.2.0}/.cursor/rules/morphic-standards.mdc +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/.cursor/rules/typed-registry-examples.mdc +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/.github/workflows/docs.yml +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/.github/workflows/linting.yml +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/.github/workflows/release.yml +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/.github/workflows/tests.yml +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/.gitignore +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/LICENSE +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/README.md +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/docs/api/autoenum.md +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/docs/api/registry.md +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/docs/api/typed.md +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/docs/examples.md +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/docs/index.md +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/docs/installation.md +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/docs/stylesheets/extra.css +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/docs/user-guide/autoenum.md +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/docs/user-guide/getting-started.md +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/docs/user-guide/registry.md +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/docs/user-guide/typed-registry-integration.md +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/docs/user-guide/typed.md +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/pyproject.toml +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/src/morphic/autoenum.py +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/src/morphic/imports.py +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/src/morphic/structs.py +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/tests/test_autoenum.py +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/tests/test_function.py +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/tests/test_structs.py +0 -0
- {morphic-0.1.7 → morphic-0.2.0}/tests/test_typed.py +0 -0
|
@@ -30,6 +30,18 @@ Enhanced data modeling with validation and serialization.
|
|
|
30
30
|
**Key Classes:**
|
|
31
31
|
- `Typed` - Base class for data models with validation
|
|
32
32
|
|
|
33
|
+
### [String](string.md)
|
|
34
|
+
Comprehensive string manipulation, formatting, and validation utilities.
|
|
35
|
+
|
|
36
|
+
**Key Functions:**
|
|
37
|
+
- Text Normalization: `normalize()`, `punct_normalize()`, `whitespace_normalize()`
|
|
38
|
+
- Case Conversion: `detect_case()`, `convert_case()`
|
|
39
|
+
- Formatting: `readable_bytes()`, `readable_seconds()`, `readable_number()`
|
|
40
|
+
- Validation: `is_int()`, `is_float()`, `is_empty()`, `is_stream()`
|
|
41
|
+
- Utilities: `hash()`, `join_human()`, `random_name()`, `zfill()`
|
|
42
|
+
- DateTime: `parse_datetime()`, `now()`, `readable_datetime()`
|
|
43
|
+
- Matching: `fuzzy_match()`, `is_fuzzy_match()`
|
|
44
|
+
|
|
33
45
|
## Module Overview
|
|
34
46
|
|
|
35
47
|
The Morphic library is organized into focused modules:
|
|
@@ -38,8 +50,12 @@ The Morphic library is organized into focused modules:
|
|
|
38
50
|
morphic/
|
|
39
51
|
├── __init__.py # Main exports
|
|
40
52
|
├── registry.py # Registry system implementation
|
|
41
|
-
├── autoenum.py
|
|
42
|
-
|
|
53
|
+
├── autoenum.py # AutoEnum functionality
|
|
54
|
+
├── typed.py # Typed base class
|
|
55
|
+
├── string.py # String utilities
|
|
56
|
+
├── function.py # Function utilities
|
|
57
|
+
├── structs.py # Data structure utilities
|
|
58
|
+
└── imports.py # Import utilities
|
|
43
59
|
```
|
|
44
60
|
|
|
45
61
|
## Quick Reference
|
|
@@ -54,6 +70,12 @@ from morphic import Registry, AutoEnum, Typed
|
|
|
54
70
|
from morphic.registry import Registry
|
|
55
71
|
from morphic.autoenum import AutoEnum
|
|
56
72
|
from morphic.typed import Typed
|
|
73
|
+
|
|
74
|
+
# Import string utilities
|
|
75
|
+
from morphic.string import (
|
|
76
|
+
normalize, convert_case, readable_bytes,
|
|
77
|
+
hash, join_human, random_name
|
|
78
|
+
)
|
|
57
79
|
```
|
|
58
80
|
|
|
59
81
|
### Common Usage Patterns
|
|
@@ -0,0 +1,535 @@
|
|
|
1
|
+
# String Utilities
|
|
2
|
+
|
|
3
|
+
Morphic provides a comprehensive suite of string manipulation, formatting, and validation utilities through the `morphic.string` module. This guide covers the most commonly used functions organized by category.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The string utilities cover:
|
|
8
|
+
|
|
9
|
+
- **Text Normalization** - Clean and standardize text
|
|
10
|
+
- **Case Conversion** - Convert between naming conventions
|
|
11
|
+
- **Formatting** - Human-readable output for numbers, bytes, time
|
|
12
|
+
- **Validation** - Type checking and validation
|
|
13
|
+
- **Hashing** - Generate consistent hashes
|
|
14
|
+
- **Name Generation** - Create human-readable random names
|
|
15
|
+
- **DateTime Utilities** - Format and parse dates
|
|
16
|
+
- **Fuzzy Matching** - Flexible string matching
|
|
17
|
+
|
|
18
|
+
## Text Normalization
|
|
19
|
+
|
|
20
|
+
### Basic Normalization
|
|
21
|
+
|
|
22
|
+
Remove characters and convert to lowercase for consistent comparisons:
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from morphic.string import normalize
|
|
26
|
+
|
|
27
|
+
# Remove spaces, hyphens, underscores (default)
|
|
28
|
+
assert normalize("Hello World") == "helloworld"
|
|
29
|
+
assert normalize("snake_case") == "snakecase"
|
|
30
|
+
assert normalize("kebab-case") == "kebabcase"
|
|
31
|
+
|
|
32
|
+
# Custom removal characters
|
|
33
|
+
assert normalize("foo@bar.com", remove=("@", ".")) == "foobarcom"
|
|
34
|
+
|
|
35
|
+
# Only lowercase, no removal
|
|
36
|
+
assert normalize("CamelCase", remove=None) == "camelcase"
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Punctuation Normalization
|
|
40
|
+
|
|
41
|
+
Remove punctuation with optional space and number removal:
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from morphic.string import punct_normalize
|
|
45
|
+
|
|
46
|
+
# Basic usage - removes punctuation, spaces, and lowercases
|
|
47
|
+
assert punct_normalize("Hello, World!") == "helloworld"
|
|
48
|
+
|
|
49
|
+
# Keep spaces
|
|
50
|
+
assert punct_normalize("Hello, World!", space=False) == "hello world"
|
|
51
|
+
|
|
52
|
+
# Keep case
|
|
53
|
+
assert punct_normalize("Hello, World!", lowercase=False) == "HelloWorld"
|
|
54
|
+
|
|
55
|
+
# Remove numbers too
|
|
56
|
+
assert punct_normalize("abc123def", numbers=True) == "abcdef"
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Whitespace Normalization
|
|
60
|
+
|
|
61
|
+
Clean up messy whitespace:
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from morphic.string import whitespace_normalize
|
|
65
|
+
|
|
66
|
+
# Collapse multiple spaces
|
|
67
|
+
assert whitespace_normalize("hello world") == "hello world"
|
|
68
|
+
|
|
69
|
+
# Collapse multiple newlines
|
|
70
|
+
assert whitespace_normalize("line1\n\n\nline2") == "line1\nline2"
|
|
71
|
+
|
|
72
|
+
# Remove newlines entirely
|
|
73
|
+
assert whitespace_normalize("line1\nline2", remove_newlines=True) == "line1line2"
|
|
74
|
+
|
|
75
|
+
# Clean up messy text
|
|
76
|
+
messy = " hello world \n\n foo bar \n\n "
|
|
77
|
+
clean = whitespace_normalize(messy)
|
|
78
|
+
assert clean == "hello world\nfoo bar"
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Case Conversion
|
|
82
|
+
|
|
83
|
+
Convert between different naming conventions:
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from morphic.string import convert_case, detect_case
|
|
87
|
+
|
|
88
|
+
# Snake case
|
|
89
|
+
assert convert_case("CamelCase", "snake") == "camel_case"
|
|
90
|
+
assert convert_case("PascalCase", "snake") == "pascal_case"
|
|
91
|
+
|
|
92
|
+
# Camel case
|
|
93
|
+
assert convert_case("snake_case", "camel") == "snakeCase"
|
|
94
|
+
assert convert_case("kebab-case", "camel") == "kebabCase"
|
|
95
|
+
|
|
96
|
+
# Pascal case
|
|
97
|
+
assert convert_case("snake_case", "pascal") == "SnakeCase"
|
|
98
|
+
assert convert_case("camelCase", "pascal") == "CamelCase"
|
|
99
|
+
|
|
100
|
+
# Kebab case
|
|
101
|
+
assert convert_case("PascalCase", "kebab") == "pascal-case"
|
|
102
|
+
assert convert_case("snake_case", "kebab") == "snake-case"
|
|
103
|
+
|
|
104
|
+
# Detect current case
|
|
105
|
+
assert detect_case("snake_case") == "snake"
|
|
106
|
+
assert detect_case("camelCase") == "camel"
|
|
107
|
+
assert detect_case("PascalCase") == "pascal"
|
|
108
|
+
assert detect_case("kebab-case") == "kebab"
|
|
109
|
+
assert detect_case("SCREAMING_SNAKE") == "screaming_snake"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Human-Readable Formatting
|
|
113
|
+
|
|
114
|
+
### File Sizes
|
|
115
|
+
|
|
116
|
+
Format bytes into human-readable sizes:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from morphic.string import readable_bytes, convert_size_from_bytes, convert_size_to_bytes
|
|
120
|
+
|
|
121
|
+
# Automatic unit selection
|
|
122
|
+
assert "KB" in readable_bytes(5000)
|
|
123
|
+
assert "MB" in readable_bytes(5000000)
|
|
124
|
+
assert "GB" in readable_bytes(5000000000)
|
|
125
|
+
|
|
126
|
+
# Get all conversions
|
|
127
|
+
sizes = convert_size_from_bytes(1024)
|
|
128
|
+
assert sizes["B"] == 1024.0
|
|
129
|
+
assert sizes["KB"] == 1.0
|
|
130
|
+
|
|
131
|
+
# Get specific unit
|
|
132
|
+
kb_size = convert_size_from_bytes(2048, unit="KB")
|
|
133
|
+
assert kb_size == 2.0
|
|
134
|
+
|
|
135
|
+
# Parse human-readable sizes
|
|
136
|
+
assert convert_size_to_bytes("1 KB") == 1024
|
|
137
|
+
assert convert_size_to_bytes("1 MB") == 1024 * 1024
|
|
138
|
+
assert convert_size_to_bytes("1.5 KB") == int(1.5 * 1024)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Time Durations
|
|
142
|
+
|
|
143
|
+
Format seconds into human-readable durations:
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
from morphic.string import readable_seconds, convert_time_from_seconds
|
|
147
|
+
from datetime import timedelta
|
|
148
|
+
|
|
149
|
+
# Automatic unit selection
|
|
150
|
+
result = readable_seconds(5)
|
|
151
|
+
assert "s" in result # "5.0 seconds" or "5s"
|
|
152
|
+
|
|
153
|
+
result = readable_seconds(120)
|
|
154
|
+
assert "min" in result # "2 mins" or "2min"
|
|
155
|
+
|
|
156
|
+
result = readable_seconds(7200)
|
|
157
|
+
assert "hr" in result or "hours" in result
|
|
158
|
+
|
|
159
|
+
# Short format
|
|
160
|
+
assert readable_seconds(5, short=True) in ["5.0s", "5s"]
|
|
161
|
+
|
|
162
|
+
# With timedelta
|
|
163
|
+
td = timedelta(seconds=30)
|
|
164
|
+
result = readable_seconds(td)
|
|
165
|
+
assert "30" in result
|
|
166
|
+
|
|
167
|
+
# Get all conversions
|
|
168
|
+
times = convert_time_from_seconds(60)
|
|
169
|
+
assert times["seconds"] == 60.0
|
|
170
|
+
assert times["mins"] == 1.0
|
|
171
|
+
|
|
172
|
+
# Short format keys
|
|
173
|
+
times = convert_time_from_seconds(5, short=True)
|
|
174
|
+
assert "s" in times
|
|
175
|
+
assert "ms" in times
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Numbers
|
|
179
|
+
|
|
180
|
+
Format large numbers with unit suffixes:
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
from morphic.string import readable_number, convert_number
|
|
184
|
+
|
|
185
|
+
# Large numbers with short units
|
|
186
|
+
assert readable_number(1000, short=True) == "1K"
|
|
187
|
+
assert readable_number(1000000, short=True) == "1M"
|
|
188
|
+
assert readable_number(1000000000, short=True) == "1B"
|
|
189
|
+
|
|
190
|
+
# Long format
|
|
191
|
+
assert readable_number(1000, short=False) == "1 thousand"
|
|
192
|
+
assert readable_number(1000000, short=False) == "1 million"
|
|
193
|
+
|
|
194
|
+
# Small numbers use scientific notation
|
|
195
|
+
result = readable_number(0.00123)
|
|
196
|
+
assert "e" in result
|
|
197
|
+
|
|
198
|
+
# Get all conversions
|
|
199
|
+
numbers = convert_number(1000000, short=True)
|
|
200
|
+
assert numbers["M"] == 1.0
|
|
201
|
+
assert numbers["K"] == 1000.0
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
## Validation Functions
|
|
205
|
+
|
|
206
|
+
Check types and validate inputs:
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
from morphic.string import is_int, is_float, is_empty, is_stream
|
|
210
|
+
|
|
211
|
+
# Integer validation
|
|
212
|
+
assert is_int("123") is True
|
|
213
|
+
assert is_int("-123") is True
|
|
214
|
+
assert is_int("123.0") is False # Has decimal point
|
|
215
|
+
assert is_int("abc") is False
|
|
216
|
+
|
|
217
|
+
# Float validation
|
|
218
|
+
assert is_float("123") is True
|
|
219
|
+
assert is_float("1.23") is True
|
|
220
|
+
assert is_float("123.0") is True
|
|
221
|
+
assert is_float("1e2") is True
|
|
222
|
+
assert is_float("nan") is True
|
|
223
|
+
assert is_float("inf") is True
|
|
224
|
+
|
|
225
|
+
# Empty string validation
|
|
226
|
+
assert is_empty("") is True
|
|
227
|
+
assert is_empty(" ") is True
|
|
228
|
+
assert is_empty("hello") is False
|
|
229
|
+
|
|
230
|
+
# Stream validation
|
|
231
|
+
import io
|
|
232
|
+
stream = io.StringIO("content")
|
|
233
|
+
assert is_stream(stream) is True
|
|
234
|
+
assert is_stream("string") is False
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## String Utilities
|
|
238
|
+
|
|
239
|
+
### Hashing
|
|
240
|
+
|
|
241
|
+
Generate consistent SHA256 hashes:
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
from morphic.string import hash
|
|
245
|
+
|
|
246
|
+
# Hash strings
|
|
247
|
+
h1 = hash("hello")
|
|
248
|
+
h2 = hash("hello")
|
|
249
|
+
assert h1 == h2 # Consistent
|
|
250
|
+
|
|
251
|
+
# Hash numbers
|
|
252
|
+
h = hash(42)
|
|
253
|
+
|
|
254
|
+
# Hash collections
|
|
255
|
+
h1 = hash([1, 2, 3])
|
|
256
|
+
h2 = hash([1, 2, 3])
|
|
257
|
+
assert h1 == h2
|
|
258
|
+
|
|
259
|
+
# Hash dictionaries (order-independent)
|
|
260
|
+
h1 = hash({"a": 1, "b": 2})
|
|
261
|
+
h2 = hash({"b": 2, "a": 1})
|
|
262
|
+
assert h1 == h2
|
|
263
|
+
|
|
264
|
+
# Different bases
|
|
265
|
+
h_base62 = hash("test", base=62) # Default
|
|
266
|
+
h_base36 = hash("test", base=36)
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
### Join Human-Readable Lists
|
|
270
|
+
|
|
271
|
+
Create grammatically correct lists:
|
|
272
|
+
|
|
273
|
+
```python
|
|
274
|
+
from morphic.string import join_human
|
|
275
|
+
|
|
276
|
+
# Basic usage
|
|
277
|
+
assert join_human(["apple", "banana", "cherry"]) == "apple, banana and cherry"
|
|
278
|
+
|
|
279
|
+
# With Oxford comma
|
|
280
|
+
assert join_human(["apple", "banana", "cherry"], oxford_comma=True) == "apple, banana, and cherry"
|
|
281
|
+
|
|
282
|
+
# Two items
|
|
283
|
+
assert join_human(["Alice", "Bob"]) == "Alice and Bob"
|
|
284
|
+
|
|
285
|
+
# Custom separator and conjunction
|
|
286
|
+
assert join_human([1, 2, 3], sep=";", final_join="or") == "1; 2 or 3"
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
### Random Name Generation
|
|
290
|
+
|
|
291
|
+
Generate human-readable random names:
|
|
292
|
+
|
|
293
|
+
```python
|
|
294
|
+
from morphic.string import random_name
|
|
295
|
+
|
|
296
|
+
# Single name
|
|
297
|
+
name = random_name() # e.g., "happy-running-dog"
|
|
298
|
+
|
|
299
|
+
# Multiple names
|
|
300
|
+
names = random_name(3)
|
|
301
|
+
# e.g., ["quick-jumping-fox", "lazy-sleeping-cat", "brave-flying-eagle"]
|
|
302
|
+
|
|
303
|
+
# Custom separator
|
|
304
|
+
name = random_name(sep="_") # e.g., "brave_flying_eagle"
|
|
305
|
+
|
|
306
|
+
# Custom order
|
|
307
|
+
name = random_name(order=("adjective", "noun")) # e.g., "blue-sky"
|
|
308
|
+
name = random_name(order=("verb", "noun")) # e.g., "running-tiger"
|
|
309
|
+
|
|
310
|
+
# Reproducible with seed
|
|
311
|
+
name1 = random_name(seed=42)
|
|
312
|
+
name2 = random_name(seed=42)
|
|
313
|
+
assert name1 == name2
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
### Zero-Padding
|
|
317
|
+
|
|
318
|
+
Pad numbers for consistent width:
|
|
319
|
+
|
|
320
|
+
```python
|
|
321
|
+
from morphic.string import zfill, get_num_zeros_to_pad
|
|
322
|
+
|
|
323
|
+
# Basic usage
|
|
324
|
+
assert zfill(5, 999) == "005"
|
|
325
|
+
assert zfill(42, 999) == "042"
|
|
326
|
+
assert zfill(999, 999) == "999"
|
|
327
|
+
|
|
328
|
+
# For filenames that sort correctly
|
|
329
|
+
for i in range(100):
|
|
330
|
+
filename = f"file_{zfill(i, 100)}.txt"
|
|
331
|
+
# Results in: file_000.txt, file_001.txt, ..., file_099.txt
|
|
332
|
+
|
|
333
|
+
# Calculate padding needed
|
|
334
|
+
assert get_num_zeros_to_pad(99) == 2
|
|
335
|
+
assert get_num_zeros_to_pad(100) == 3
|
|
336
|
+
assert get_num_zeros_to_pad(999) == 3
|
|
337
|
+
assert get_num_zeros_to_pad(1000) == 4
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
## DateTime Utilities
|
|
341
|
+
|
|
342
|
+
### Formatting
|
|
343
|
+
|
|
344
|
+
Format datetimes in various styles:
|
|
345
|
+
|
|
346
|
+
```python
|
|
347
|
+
from morphic.string import readable_datetime, now
|
|
348
|
+
from datetime import datetime
|
|
349
|
+
|
|
350
|
+
dt = datetime(2021, 1, 15, 14, 30, 45, 123456)
|
|
351
|
+
|
|
352
|
+
# ISO 8601 format (default)
|
|
353
|
+
result = readable_datetime(dt)
|
|
354
|
+
# "2021-01-15T14:30:45.123456+00:00"
|
|
355
|
+
|
|
356
|
+
# Human-readable format
|
|
357
|
+
result = readable_datetime(dt, human=True)
|
|
358
|
+
# "15Jan2021-14:30:45+UTC"
|
|
359
|
+
|
|
360
|
+
# Without microseconds
|
|
361
|
+
result = readable_datetime(dt, microsec=False)
|
|
362
|
+
# "2021-01-15T14:30:45+00:00"
|
|
363
|
+
|
|
364
|
+
# Without timezone
|
|
365
|
+
result = readable_datetime(dt, tz=False)
|
|
366
|
+
# "2021-01-15T14:30:45.123456"
|
|
367
|
+
|
|
368
|
+
# Get current time
|
|
369
|
+
current = now() # ISO format with microseconds and timezone
|
|
370
|
+
current = now(human=True) # Human-readable format
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
### Parsing
|
|
374
|
+
|
|
375
|
+
Parse various datetime formats:
|
|
376
|
+
|
|
377
|
+
```python
|
|
378
|
+
from morphic.string import parse_datetime
|
|
379
|
+
from datetime import datetime
|
|
380
|
+
|
|
381
|
+
# datetime object (pass-through)
|
|
382
|
+
dt = datetime.now()
|
|
383
|
+
assert parse_datetime(dt) == dt
|
|
384
|
+
|
|
385
|
+
# Unix timestamp (int)
|
|
386
|
+
dt = parse_datetime(1609459200)
|
|
387
|
+
assert dt.year == 2021
|
|
388
|
+
|
|
389
|
+
# Unix timestamp (float with microseconds)
|
|
390
|
+
dt = parse_datetime(1609459200.5)
|
|
391
|
+
assert dt.microsecond > 0
|
|
392
|
+
|
|
393
|
+
# ISO format string
|
|
394
|
+
dt = parse_datetime("2021-01-01T00:00:00")
|
|
395
|
+
assert dt.year == 2021 and dt.month == 1
|
|
396
|
+
|
|
397
|
+
# With timezone
|
|
398
|
+
dt = parse_datetime("2021-01-01T00:00:00+00:00")
|
|
399
|
+
assert dt.tzinfo is not None
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
## Fuzzy Matching
|
|
403
|
+
|
|
404
|
+
Match strings with different delimiters or case:
|
|
405
|
+
|
|
406
|
+
```python
|
|
407
|
+
from morphic.string import fuzzy_match, is_fuzzy_match
|
|
408
|
+
|
|
409
|
+
# Basic matching with different delimiters
|
|
410
|
+
assert fuzzy_match("my_file", ["my-file", "other"]) == "my-file"
|
|
411
|
+
assert fuzzy_match("my-file", ["my_file", "other"]) == "my_file"
|
|
412
|
+
assert fuzzy_match("my file", ["my_file", "other"]) == "my_file"
|
|
413
|
+
|
|
414
|
+
# Case-insensitive
|
|
415
|
+
assert fuzzy_match("MyFile", ["my_file"]) == "my_file"
|
|
416
|
+
assert fuzzy_match("MY-FILE", ["my_file"]) == "my_file"
|
|
417
|
+
|
|
418
|
+
# No match
|
|
419
|
+
assert fuzzy_match("notfound", ["my_file", "other"]) is None
|
|
420
|
+
|
|
421
|
+
# Check for match existence
|
|
422
|
+
assert is_fuzzy_match("my_file", ["my-file", "other"]) is True
|
|
423
|
+
assert is_fuzzy_match("notfound", ["my_file", "other"]) is False
|
|
424
|
+
|
|
425
|
+
# Custom replacements
|
|
426
|
+
assert fuzzy_match("a.b.c", ["a_b_c"], replacements=(".",)) == "a_b_c"
|
|
427
|
+
```
|
|
428
|
+
|
|
429
|
+
## Advanced Usage
|
|
430
|
+
|
|
431
|
+
### String Format Arguments
|
|
432
|
+
|
|
433
|
+
Extract format string arguments:
|
|
434
|
+
|
|
435
|
+
```python
|
|
436
|
+
from morphic.string import str_format_args
|
|
437
|
+
|
|
438
|
+
# Named arguments
|
|
439
|
+
assert str_format_args("Hello {name}!") == ["name"]
|
|
440
|
+
assert str_format_args("{first} {last}") == ["first", "last"]
|
|
441
|
+
|
|
442
|
+
# Positional arguments (with named_only=False)
|
|
443
|
+
assert str_format_args("Hello {0}!", named_only=False) == ["0"]
|
|
444
|
+
assert str_format_args("{0} {name} {1}", named_only=False) == ["0", "name", "1"]
|
|
445
|
+
assert str_format_args("{0} {name} {1}", named_only=True) == ["name"]
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
### Type Conversion
|
|
449
|
+
|
|
450
|
+
Convert string representations to Python types:
|
|
451
|
+
|
|
452
|
+
```python
|
|
453
|
+
from morphic.string import convert_str_to_type
|
|
454
|
+
|
|
455
|
+
# Basic types
|
|
456
|
+
assert convert_str_to_type("123", int) == 123
|
|
457
|
+
assert convert_str_to_type("12.5", float) == 12.5
|
|
458
|
+
assert convert_str_to_type("true", bool) is True
|
|
459
|
+
assert convert_str_to_type("False", bool) is False
|
|
460
|
+
|
|
461
|
+
# Collections
|
|
462
|
+
assert convert_str_to_type("[1, 2, 3]", list) == [1, 2, 3]
|
|
463
|
+
assert convert_str_to_type("(1, 2, 3)", tuple) == (1, 2, 3)
|
|
464
|
+
assert convert_str_to_type("{1, 2, 3}", set) == {1, 2, 3}
|
|
465
|
+
assert convert_str_to_type("{'a': 1}", dict) == {'a': 1}
|
|
466
|
+
|
|
467
|
+
# Type coercion
|
|
468
|
+
assert convert_str_to_type("5", float) == 5.0 # int to float
|
|
469
|
+
assert convert_str_to_type("5.0", int) == 5 # float to int (if whole)
|
|
470
|
+
assert convert_str_to_type("[1, 2]", tuple) == (1, 2) # list to tuple
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
## Best Practices
|
|
474
|
+
|
|
475
|
+
### Text Processing Pipeline
|
|
476
|
+
|
|
477
|
+
Combine multiple utilities for robust text processing:
|
|
478
|
+
|
|
479
|
+
```python
|
|
480
|
+
from morphic.string import normalize, punct_normalize, whitespace_normalize
|
|
481
|
+
|
|
482
|
+
def clean_text(text: str) -> str:
|
|
483
|
+
"""Clean text for comparison."""
|
|
484
|
+
# Remove extra whitespace
|
|
485
|
+
text = whitespace_normalize(text)
|
|
486
|
+
# Remove punctuation
|
|
487
|
+
text = punct_normalize(text, space=False)
|
|
488
|
+
# Normalize for comparison
|
|
489
|
+
text = normalize(text)
|
|
490
|
+
return text
|
|
491
|
+
|
|
492
|
+
# Use it
|
|
493
|
+
clean = clean_text(" Hello, World! \n How are you? ")
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
### Configuration Keys
|
|
497
|
+
|
|
498
|
+
Many string functions are used internally by Morphic. Use them for consistent formatting:
|
|
499
|
+
|
|
500
|
+
```python
|
|
501
|
+
from morphic.string import readable_bytes, readable_seconds
|
|
502
|
+
|
|
503
|
+
def format_stats(bytes_used: int, time_elapsed: float) -> str:
|
|
504
|
+
"""Format statistics in a consistent way."""
|
|
505
|
+
return f"Used {readable_bytes(bytes_used)} in {readable_seconds(time_elapsed)}"
|
|
506
|
+
|
|
507
|
+
stats = format_stats(1024 * 1024 * 5, 12.5)
|
|
508
|
+
# "Used 5.0 MB in 12.5 seconds"
|
|
509
|
+
```
|
|
510
|
+
|
|
511
|
+
### Case Conversion for APIs
|
|
512
|
+
|
|
513
|
+
Handle different naming conventions:
|
|
514
|
+
|
|
515
|
+
```python
|
|
516
|
+
from morphic.string import convert_case
|
|
517
|
+
|
|
518
|
+
def api_response_to_python(data: dict) -> dict:
|
|
519
|
+
"""Convert API camelCase keys to Python snake_case."""
|
|
520
|
+
return {
|
|
521
|
+
convert_case(key, "snake"): value
|
|
522
|
+
for key, value in data.items()
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
api_data = {"firstName": "John", "lastName": "Doe"}
|
|
526
|
+
python_data = api_response_to_python(api_data)
|
|
527
|
+
# {"first_name": "John", "last_name": "Doe"}
|
|
528
|
+
```
|
|
529
|
+
|
|
530
|
+
## Next Steps
|
|
531
|
+
|
|
532
|
+
- See the [API Reference](../api/string.md) for detailed function signatures
|
|
533
|
+
- Check out [Examples](../examples.md#string-utilities) for more usage patterns
|
|
534
|
+
- Learn about [Typed](typed.md) which uses string utilities for validation
|
|
535
|
+
|
|
@@ -16,11 +16,13 @@ nav:
|
|
|
16
16
|
- AutoEnum: user-guide/autoenum.md
|
|
17
17
|
- Typed: user-guide/typed.md
|
|
18
18
|
- Typed + Registry Integration: user-guide/typed-registry-integration.md
|
|
19
|
+
- String Utilities: user-guide/string.md
|
|
19
20
|
- API Reference:
|
|
20
21
|
- Overview: api/index.md
|
|
21
22
|
- Registry: api/registry.md
|
|
22
23
|
- AutoEnum: api/autoenum.md
|
|
23
24
|
- Typed: api/typed.md
|
|
25
|
+
- String: api/string.md
|
|
24
26
|
- Examples: examples.md
|
|
25
27
|
- Contributing: contributing.md
|
|
26
28
|
- Changelog: changelog.md
|
|
@@ -9,6 +9,7 @@ from .function import (
|
|
|
9
9
|
get_current_fn_name,
|
|
10
10
|
get_fn_args,
|
|
11
11
|
get_fn_spec,
|
|
12
|
+
is_abstract,
|
|
12
13
|
is_function,
|
|
13
14
|
params_to_call_str,
|
|
14
15
|
parsed_fn_source,
|
|
@@ -18,6 +19,41 @@ from .function import (
|
|
|
18
19
|
# Import utilities organized by module
|
|
19
20
|
from .imports import optional_dependency
|
|
20
21
|
from .registry import Registry
|
|
22
|
+
|
|
23
|
+
# String utilities
|
|
24
|
+
from .string import (
|
|
25
|
+
BaseConverter,
|
|
26
|
+
convert_case,
|
|
27
|
+
convert_number,
|
|
28
|
+
convert_size_from_bytes,
|
|
29
|
+
convert_size_to_bytes,
|
|
30
|
+
convert_str_to_type,
|
|
31
|
+
convert_time_from_seconds,
|
|
32
|
+
detect_case,
|
|
33
|
+
format_exception_msg,
|
|
34
|
+
fuzzy_match,
|
|
35
|
+
get_num_zeros_to_pad,
|
|
36
|
+
hash,
|
|
37
|
+
is_empty,
|
|
38
|
+
is_float,
|
|
39
|
+
is_fuzzy_match,
|
|
40
|
+
is_int,
|
|
41
|
+
is_not_empty_bytes,
|
|
42
|
+
is_stream,
|
|
43
|
+
join_human,
|
|
44
|
+
normalize,
|
|
45
|
+
now,
|
|
46
|
+
parse_datetime,
|
|
47
|
+
punct_normalize,
|
|
48
|
+
random_name,
|
|
49
|
+
readable_bytes,
|
|
50
|
+
readable_datetime,
|
|
51
|
+
readable_number,
|
|
52
|
+
readable_seconds,
|
|
53
|
+
str_format_args,
|
|
54
|
+
whitespace_normalize,
|
|
55
|
+
zfill,
|
|
56
|
+
)
|
|
21
57
|
from .structs import (
|
|
22
58
|
all_are_false,
|
|
23
59
|
all_are_none,
|
|
@@ -99,6 +135,7 @@ __all__ = [
|
|
|
99
135
|
# Function utilities
|
|
100
136
|
"fn_str",
|
|
101
137
|
"get_current_fn_name",
|
|
138
|
+
"is_abstract",
|
|
102
139
|
"is_function",
|
|
103
140
|
"call_str_to_params",
|
|
104
141
|
"params_to_call_str",
|
|
@@ -108,4 +145,43 @@ __all__ = [
|
|
|
108
145
|
"get_fn_spec",
|
|
109
146
|
"get_fn_args",
|
|
110
147
|
"filter_kwargs",
|
|
148
|
+
# String utilities - Text normalization
|
|
149
|
+
"normalize",
|
|
150
|
+
"punct_normalize",
|
|
151
|
+
"whitespace_normalize",
|
|
152
|
+
# String utilities - Case conversion
|
|
153
|
+
"detect_case",
|
|
154
|
+
"convert_case",
|
|
155
|
+
# String utilities - Formatting
|
|
156
|
+
"readable_bytes",
|
|
157
|
+
"convert_size_from_bytes",
|
|
158
|
+
"convert_size_to_bytes",
|
|
159
|
+
"readable_seconds",
|
|
160
|
+
"convert_time_from_seconds",
|
|
161
|
+
"readable_number",
|
|
162
|
+
"convert_number",
|
|
163
|
+
# String utilities - Validation
|
|
164
|
+
"is_int",
|
|
165
|
+
"is_float",
|
|
166
|
+
"is_empty",
|
|
167
|
+
"is_not_empty_bytes",
|
|
168
|
+
"is_stream",
|
|
169
|
+
# String utilities - Core utilities
|
|
170
|
+
"hash",
|
|
171
|
+
"join_human",
|
|
172
|
+
"random_name",
|
|
173
|
+
"zfill",
|
|
174
|
+
"get_num_zeros_to_pad",
|
|
175
|
+
"BaseConverter",
|
|
176
|
+
# String utilities - DateTime
|
|
177
|
+
"parse_datetime",
|
|
178
|
+
"now",
|
|
179
|
+
"readable_datetime",
|
|
180
|
+
# String utilities - Matching
|
|
181
|
+
"fuzzy_match",
|
|
182
|
+
"is_fuzzy_match",
|
|
183
|
+
# String utilities - Advanced
|
|
184
|
+
"str_format_args",
|
|
185
|
+
"convert_str_to_type",
|
|
186
|
+
"format_exception_msg",
|
|
111
187
|
]
|