python-dateutil-rs 0.0.13__tar.gz → 0.0.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/Cargo.lock +1 -1
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/PKG-INFO +13 -6
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/README.md +12 -5
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/benches/benchmarks.rs +9 -9
- python_dateutil_rs-0.0.14/crates/dateutil-core/src/parser/parserinfo.rs +318 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/parser.rs +123 -104
- python_dateutil_rs-0.0.14/crates/dateutil-py/src/py/conv.rs +176 -0
- python_dateutil_rs-0.0.14/crates/dateutil-py/src/py/parser.rs +335 -0
- python_dateutil_rs-0.0.14/crates/dateutil-py/src/py/relativedelta.rs +330 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-py/src/py/tz.rs +139 -68
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-py/src/py.rs +1 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/Cargo.toml +1 -1
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/README.md +12 -5
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/pyproject.toml +1 -1
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/v1/__init__.py +2 -1
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/v1/_native.pyi +56 -8
- python_dateutil_rs-0.0.14/python/dateutil_rs/v1/parser.py +18 -0
- python_dateutil_rs-0.0.13/crates/dateutil-py/src/py/parser.rs +0 -63
- python_dateutil_rs-0.0.13/crates/dateutil-py/src/py/relativedelta.rs +0 -206
- python_dateutil_rs-0.0.13/python/dateutil_rs/v1/parser.py +0 -5
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/Cargo.toml +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/LICENSE +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/CLAUDE.md +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/Cargo.toml +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/common.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/easter.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/error.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/lib.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/parser/isoparser.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/parser/tokenizer.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/relativedelta.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/rrule/iter.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/rrule/parse.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/rrule/set.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/rrule.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/tz/file.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/tz/local.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/tz/offset.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/tz/utc.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/src/tz.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-py/Cargo.toml +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-py/src/lib.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-py/src/py/common.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-py/src/py/easter.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-py/src/py/rrule.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/LICENSE +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/benches/benchmarks.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/common.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/easter.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/lib.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/parser/isoparser.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/parser.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/relativedelta.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/rrule/iter.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/rrule.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/tz/file.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/tz/local.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/tz/offset.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/tz/range.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/tz/utc.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/tz.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-rs/src/utils.rs +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/__init__.py +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/_native.pyi +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/common.py +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/easter.py +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/parser.py +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/py.typed +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/relativedelta.py +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/rrule.py +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/tz.py +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/utils.py +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/v1/common.py +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/v1/easter.py +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/v1/py.typed +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/v1/relativedelta.py +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/v1/rrule.py +0 -0
- {python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/python/dateutil_rs/v1/tz.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-dateutil-rs
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.14
|
|
4
4
|
Classifier: Programming Language :: Python :: 3.10
|
|
5
5
|
Classifier: Programming Language :: Python :: 3.11
|
|
6
6
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -84,6 +84,7 @@ from dateutil_rs.v1.relativedelta import relativedelta
|
|
|
84
84
|
from dateutil_rs.v1.rrule import rrule, rruleset, MONTHLY
|
|
85
85
|
from dateutil_rs.v1.easter import easter
|
|
86
86
|
from dateutil_rs.v1.common import MO, TU, WE, TH, FR, SA, SU
|
|
87
|
+
from dateutil_rs.v1.tz import gettz, tzutc
|
|
87
88
|
|
|
88
89
|
# Parse date strings (zero-copy tokenizer)
|
|
89
90
|
dt = parse("2024-01-15T10:30:00")
|
|
@@ -91,6 +92,10 @@ dt = parse("2024-01-15T10:30:00")
|
|
|
91
92
|
# Recurrence rules (buffer-reusing iterator)
|
|
92
93
|
monthly = rrule(MONTHLY, count=5, dtstart=dt)
|
|
93
94
|
dates = monthly.all()
|
|
95
|
+
|
|
96
|
+
# Timezones
|
|
97
|
+
tokyo = gettz("Asia/Tokyo")
|
|
98
|
+
utc = tzutc()
|
|
94
99
|
```
|
|
95
100
|
|
|
96
101
|
## Development
|
|
@@ -220,11 +225,12 @@ dateutil-rs/
|
|
|
220
225
|
│ │ ├── parser.rs # parse() + zero-copy tokenizer
|
|
221
226
|
│ │ ├── parser/ # isoparser
|
|
222
227
|
│ │ ├── rrule.rs # RRule + iterator
|
|
223
|
-
│ │
|
|
228
|
+
│ │ ├── rrule/ # set, parse (rrulestr), iter
|
|
229
|
+
│ │ └── tz/ # tzutc, tzoffset, tzfile, tzlocal
|
|
224
230
|
│ ├── dateutil-py/ # PyO3 bindings for v1 core
|
|
225
231
|
│ │ └── src/
|
|
226
232
|
│ │ ├── lib.rs # Module registration
|
|
227
|
-
│ │ └── py/ # Per-module bindings
|
|
233
|
+
│ │ └── py/ # Per-module bindings (common, easter, parser, relativedelta, rrule, tz)
|
|
228
234
|
│ └── dateutil-rs/ # v0: python-dateutil compat + unified native module
|
|
229
235
|
│ └── src/
|
|
230
236
|
│ ├── lib.rs # Crate root + #[pymodule] (v0 + v1)
|
|
@@ -251,7 +257,8 @@ dateutil-rs/
|
|
|
251
257
|
│ ├── easter.py # Easter
|
|
252
258
|
│ ├── parser.py # parse, isoparse
|
|
253
259
|
│ ├── relativedelta.py
|
|
254
|
-
│
|
|
260
|
+
│ ├── rrule.py # rrule, rruleset, rrulestr
|
|
261
|
+
│ └── tz.py # tzutc, tzoffset, tzfile, tzlocal, gettz
|
|
255
262
|
├── tests/ # Test suite (~13k lines)
|
|
256
263
|
├── benchmarks/ # pytest-benchmark comparisons
|
|
257
264
|
├── .github/workflows/ # CI (lint + test matrix)
|
|
@@ -284,7 +291,7 @@ dateutil-rs/
|
|
|
284
291
|
| relativedelta | ✅ | ✅ | |
|
|
285
292
|
| parser | ✅ | ✅ | Zero-copy tokenizer, PHF lookups |
|
|
286
293
|
| rrule | ✅ | ✅ | Bitflag filters, buffer reuse |
|
|
287
|
-
| tz |
|
|
294
|
+
| tz | ✅ | ✅ | tzutc, tzoffset, tzfile, tzlocal, gettz |
|
|
288
295
|
|
|
289
296
|
## Roadmap
|
|
290
297
|
|
|
@@ -293,7 +300,7 @@ dateutil-rs/
|
|
|
293
300
|
3. **~~Rust rrule~~** — Rewrite recurrence rules in Rust ✅
|
|
294
301
|
4. **~~Rust tz~~** — Rewrite timezone support in Rust (with gettz cache) ✅
|
|
295
302
|
5. **~~v1 optimized core~~** — common, easter, relativedelta, parser, rrule ✅
|
|
296
|
-
6.
|
|
303
|
+
6. **~~v1 timezone~~** — Rewrite tz module for v1 core (tzutc, tzoffset, tzfile, tzlocal, gettz) ✅
|
|
297
304
|
7. **Release** — Publish dateutil-core to crates.io and python-dateutil-rs 1.0 to PyPI
|
|
298
305
|
|
|
299
306
|
## License
|
|
@@ -62,6 +62,7 @@ from dateutil_rs.v1.relativedelta import relativedelta
|
|
|
62
62
|
from dateutil_rs.v1.rrule import rrule, rruleset, MONTHLY
|
|
63
63
|
from dateutil_rs.v1.easter import easter
|
|
64
64
|
from dateutil_rs.v1.common import MO, TU, WE, TH, FR, SA, SU
|
|
65
|
+
from dateutil_rs.v1.tz import gettz, tzutc
|
|
65
66
|
|
|
66
67
|
# Parse date strings (zero-copy tokenizer)
|
|
67
68
|
dt = parse("2024-01-15T10:30:00")
|
|
@@ -69,6 +70,10 @@ dt = parse("2024-01-15T10:30:00")
|
|
|
69
70
|
# Recurrence rules (buffer-reusing iterator)
|
|
70
71
|
monthly = rrule(MONTHLY, count=5, dtstart=dt)
|
|
71
72
|
dates = monthly.all()
|
|
73
|
+
|
|
74
|
+
# Timezones
|
|
75
|
+
tokyo = gettz("Asia/Tokyo")
|
|
76
|
+
utc = tzutc()
|
|
72
77
|
```
|
|
73
78
|
|
|
74
79
|
## Development
|
|
@@ -198,11 +203,12 @@ dateutil-rs/
|
|
|
198
203
|
│ │ ├── parser.rs # parse() + zero-copy tokenizer
|
|
199
204
|
│ │ ├── parser/ # isoparser
|
|
200
205
|
│ │ ├── rrule.rs # RRule + iterator
|
|
201
|
-
│ │
|
|
206
|
+
│ │ ├── rrule/ # set, parse (rrulestr), iter
|
|
207
|
+
│ │ └── tz/ # tzutc, tzoffset, tzfile, tzlocal
|
|
202
208
|
│ ├── dateutil-py/ # PyO3 bindings for v1 core
|
|
203
209
|
│ │ └── src/
|
|
204
210
|
│ │ ├── lib.rs # Module registration
|
|
205
|
-
│ │ └── py/ # Per-module bindings
|
|
211
|
+
│ │ └── py/ # Per-module bindings (common, easter, parser, relativedelta, rrule, tz)
|
|
206
212
|
│ └── dateutil-rs/ # v0: python-dateutil compat + unified native module
|
|
207
213
|
│ └── src/
|
|
208
214
|
│ ├── lib.rs # Crate root + #[pymodule] (v0 + v1)
|
|
@@ -229,7 +235,8 @@ dateutil-rs/
|
|
|
229
235
|
│ ├── easter.py # Easter
|
|
230
236
|
│ ├── parser.py # parse, isoparse
|
|
231
237
|
│ ├── relativedelta.py
|
|
232
|
-
│
|
|
238
|
+
│ ├── rrule.py # rrule, rruleset, rrulestr
|
|
239
|
+
│ └── tz.py # tzutc, tzoffset, tzfile, tzlocal, gettz
|
|
233
240
|
├── tests/ # Test suite (~13k lines)
|
|
234
241
|
├── benchmarks/ # pytest-benchmark comparisons
|
|
235
242
|
├── .github/workflows/ # CI (lint + test matrix)
|
|
@@ -262,7 +269,7 @@ dateutil-rs/
|
|
|
262
269
|
| relativedelta | ✅ | ✅ | |
|
|
263
270
|
| parser | ✅ | ✅ | Zero-copy tokenizer, PHF lookups |
|
|
264
271
|
| rrule | ✅ | ✅ | Bitflag filters, buffer reuse |
|
|
265
|
-
| tz |
|
|
272
|
+
| tz | ✅ | ✅ | tzutc, tzoffset, tzfile, tzlocal, gettz |
|
|
266
273
|
|
|
267
274
|
## Roadmap
|
|
268
275
|
|
|
@@ -271,7 +278,7 @@ dateutil-rs/
|
|
|
271
278
|
3. **~~Rust rrule~~** — Rewrite recurrence rules in Rust ✅
|
|
272
279
|
4. **~~Rust tz~~** — Rewrite timezone support in Rust (with gettz cache) ✅
|
|
273
280
|
5. **~~v1 optimized core~~** — common, easter, relativedelta, parser, rrule ✅
|
|
274
|
-
6.
|
|
281
|
+
6. **~~v1 timezone~~** — Rewrite tz module for v1 core (tzutc, tzoffset, tzfile, tzlocal, gettz) ✅
|
|
275
282
|
7. **Release** — Publish dateutil-core to crates.io and python-dateutil-rs 1.0 to PyPI
|
|
276
283
|
|
|
277
284
|
## License
|
{python_dateutil_rs-0.0.13 → python_dateutil_rs-0.0.14}/crates/dateutil-core/benches/benchmarks.rs
RENAMED
|
@@ -41,26 +41,26 @@ fn bench_tokenizer(c: &mut Criterion) {
|
|
|
41
41
|
fn bench_parser(c: &mut Criterion) {
|
|
42
42
|
c.bench_function("parse_iso_date", |b| {
|
|
43
43
|
b.iter(|| {
|
|
44
|
-
black_box(parser::parse(black_box("2024-01-15"), false, false, None).unwrap());
|
|
44
|
+
black_box(parser::parse(black_box("2024-01-15"), false, false, None, None).unwrap());
|
|
45
45
|
})
|
|
46
46
|
});
|
|
47
47
|
|
|
48
48
|
c.bench_function("parse_datetime", |b| {
|
|
49
49
|
b.iter(|| {
|
|
50
|
-
black_box(parser::parse(black_box("2024-01-15 10:30:45"), false, false, None).unwrap());
|
|
50
|
+
black_box(parser::parse(black_box("2024-01-15 10:30:45"), false, false, None, None).unwrap());
|
|
51
51
|
})
|
|
52
52
|
});
|
|
53
53
|
|
|
54
54
|
c.bench_function("parse_month_name", |b| {
|
|
55
55
|
b.iter(|| {
|
|
56
|
-
black_box(parser::parse(black_box("January 15, 2024"), false, false, None).unwrap());
|
|
56
|
+
black_box(parser::parse(black_box("January 15, 2024"), false, false, None, None).unwrap());
|
|
57
57
|
})
|
|
58
58
|
});
|
|
59
59
|
|
|
60
60
|
c.bench_function("parse_complex", |b| {
|
|
61
61
|
b.iter(|| {
|
|
62
62
|
black_box(
|
|
63
|
-
parser::parse(black_box("Monday, January 15, 2024 3:30:45.123456 PM UTC"), false, false, None)
|
|
63
|
+
parser::parse(black_box("Monday, January 15, 2024 3:30:45.123456 PM UTC"), false, false, None, None)
|
|
64
64
|
.unwrap(),
|
|
65
65
|
);
|
|
66
66
|
})
|
|
@@ -71,7 +71,7 @@ fn bench_parser(c: &mut Criterion) {
|
|
|
71
71
|
black_box(
|
|
72
72
|
parser::parse_to_result(
|
|
73
73
|
black_box("Monday, January 15, 2024 3:30:45.123456 PM EST -05:00"),
|
|
74
|
-
false, false,
|
|
74
|
+
false, false, None,
|
|
75
75
|
).unwrap(),
|
|
76
76
|
);
|
|
77
77
|
})
|
|
@@ -80,7 +80,7 @@ fn bench_parser(c: &mut Criterion) {
|
|
|
80
80
|
c.bench_function("parse_tz_positive_offset", |b| {
|
|
81
81
|
b.iter(|| {
|
|
82
82
|
black_box(
|
|
83
|
-
parser::parse_to_result(black_box("2024-01-15 10:30:45+05:30"), false, false)
|
|
83
|
+
parser::parse_to_result(black_box("2024-01-15 10:30:45+05:30"), false, false, None)
|
|
84
84
|
.unwrap(),
|
|
85
85
|
);
|
|
86
86
|
})
|
|
@@ -89,7 +89,7 @@ fn bench_parser(c: &mut Criterion) {
|
|
|
89
89
|
c.bench_function("parse_tz_negative_offset", |b| {
|
|
90
90
|
b.iter(|| {
|
|
91
91
|
black_box(
|
|
92
|
-
parser::parse_to_result(black_box("2024-01-15 10:30:45-0800"), false, false)
|
|
92
|
+
parser::parse_to_result(black_box("2024-01-15 10:30:45-0800"), false, false, None)
|
|
93
93
|
.unwrap(),
|
|
94
94
|
);
|
|
95
95
|
})
|
|
@@ -97,7 +97,7 @@ fn bench_parser(c: &mut Criterion) {
|
|
|
97
97
|
|
|
98
98
|
c.bench_function("parse_ampm", |b| {
|
|
99
99
|
b.iter(|| {
|
|
100
|
-
black_box(parser::parse(black_box("January 15, 2024 3:30 PM"), false, false, None).unwrap());
|
|
100
|
+
black_box(parser::parse(black_box("January 15, 2024 3:30 PM"), false, false, None, None).unwrap());
|
|
101
101
|
})
|
|
102
102
|
});
|
|
103
103
|
|
|
@@ -135,7 +135,7 @@ fn bench_parser_throughput(c: &mut Criterion) {
|
|
|
135
135
|
c.bench_function("parse_throughput_8_inputs", |b| {
|
|
136
136
|
b.iter(|| {
|
|
137
137
|
for input in &inputs {
|
|
138
|
-
black_box(parser::parse(black_box(input), false, false, None).unwrap());
|
|
138
|
+
black_box(parser::parse(black_box(input), false, false, None, None).unwrap());
|
|
139
139
|
}
|
|
140
140
|
})
|
|
141
141
|
});
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
use std::collections::{HashMap, HashSet};
|
|
2
|
+
|
|
3
|
+
use super::{lowercase_buf, lower_str};
|
|
4
|
+
use super::{lookup_jump, lookup_weekday, lookup_month, lookup_hms, lookup_ampm, lookup_pertain, lookup_utczone};
|
|
5
|
+
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
// ParserInfo — custom lookup tables for non-default locale support
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
/// Custom parser configuration that overrides the default PHF lookup tables.
|
|
11
|
+
///
|
|
12
|
+
/// All string keys must be stored in **lowercase** for case-insensitive matching.
|
|
13
|
+
/// Use [`ParserInfo::default()`] to get the standard English tables as `HashMap`s.
|
|
14
|
+
pub struct ParserInfo {
|
|
15
|
+
/// Jump words — ignored during parsing (e.g. "at", "on", ",").
|
|
16
|
+
pub jump: HashSet<String>,
|
|
17
|
+
/// Weekday name → 0-based index (Mon=0 .. Sun=6).
|
|
18
|
+
pub weekdays: HashMap<String, usize>,
|
|
19
|
+
/// Month name → 1-based index (Jan=1 .. Dec=12).
|
|
20
|
+
pub months: HashMap<String, usize>,
|
|
21
|
+
/// HMS indicator → 0=hour, 1=minute, 2=second.
|
|
22
|
+
pub hms: HashMap<String, usize>,
|
|
23
|
+
/// AM/PM → 0=AM, 1=PM.
|
|
24
|
+
pub ampm: HashMap<String, usize>,
|
|
25
|
+
/// UTC-equivalent zone names.
|
|
26
|
+
pub utczone: HashSet<String>,
|
|
27
|
+
/// Pertain words (e.g. "of").
|
|
28
|
+
pub pertain: HashSet<String>,
|
|
29
|
+
/// Known timezone abbreviations → offset in seconds.
|
|
30
|
+
pub tzoffset: HashMap<String, i32>,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
impl Default for ParserInfo {
|
|
34
|
+
fn default() -> Self {
|
|
35
|
+
let jump: HashSet<String> = [
|
|
36
|
+
" ", ".", ",", ";", "-", "/", "'",
|
|
37
|
+
"at", "on", "and", "ad", "m", "t", "of",
|
|
38
|
+
"st", "nd", "rd", "th",
|
|
39
|
+
].into_iter().map(String::from).collect();
|
|
40
|
+
|
|
41
|
+
let mut weekdays = HashMap::new();
|
|
42
|
+
for (i, names) in [
|
|
43
|
+
&["mon", "monday"][..], &["tue", "tuesday"], &["wed", "wednesday"],
|
|
44
|
+
&["thu", "thursday"], &["fri", "friday"], &["sat", "saturday"],
|
|
45
|
+
&["sun", "sunday"],
|
|
46
|
+
].iter().enumerate() {
|
|
47
|
+
for name in *names {
|
|
48
|
+
weekdays.insert(String::from(*name), i);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
let mut months = HashMap::new();
|
|
53
|
+
for (i, names) in [
|
|
54
|
+
&["jan", "january"][..], &["feb", "february"], &["mar", "march"],
|
|
55
|
+
&["apr", "april"], &["may"][..], &["jun", "june"],
|
|
56
|
+
&["jul", "july"], &["aug", "august"],
|
|
57
|
+
&["sep", "sept", "september"], &["oct", "october"],
|
|
58
|
+
&["nov", "november"], &["dec", "december"],
|
|
59
|
+
].iter().enumerate() {
|
|
60
|
+
for name in *names {
|
|
61
|
+
months.insert(String::from(*name), i + 1);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
let mut hms = HashMap::new();
|
|
66
|
+
for (i, names) in [
|
|
67
|
+
&["h", "hour", "hours"][..],
|
|
68
|
+
&["m", "minute", "minutes"],
|
|
69
|
+
&["s", "second", "seconds"],
|
|
70
|
+
].iter().enumerate() {
|
|
71
|
+
for name in *names {
|
|
72
|
+
hms.insert(String::from(*name), i);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
let mut ampm = HashMap::new();
|
|
77
|
+
for (i, names) in [&["am", "a"][..], &["pm", "p"]].iter().enumerate() {
|
|
78
|
+
for name in *names {
|
|
79
|
+
ampm.insert(String::from(*name), i);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
let utczone: HashSet<String> =
|
|
84
|
+
["utc", "gmt", "z"].into_iter().map(String::from).collect();
|
|
85
|
+
let pertain: HashSet<String> =
|
|
86
|
+
["of"].into_iter().map(String::from).collect();
|
|
87
|
+
|
|
88
|
+
Self {
|
|
89
|
+
jump, weekdays, months, hms, ampm, utczone, pertain,
|
|
90
|
+
tzoffset: HashMap::new(),
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
impl ParserInfo {
|
|
96
|
+
#[inline]
|
|
97
|
+
pub fn jump(&self, s: &str) -> bool {
|
|
98
|
+
lowercase_buf(s).is_some_and(|buf| self.jump.contains(lower_str(s, &buf)))
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
#[inline]
|
|
102
|
+
pub fn weekday(&self, s: &str) -> Option<usize> {
|
|
103
|
+
let buf = lowercase_buf(s)?;
|
|
104
|
+
let low = lower_str(s, &buf);
|
|
105
|
+
if let Some(&v) = self.weekdays.get(low) {
|
|
106
|
+
return Some(v);
|
|
107
|
+
}
|
|
108
|
+
if s.len() >= 4 {
|
|
109
|
+
if let Some(&v) = self.weekdays.get(&low[..3]) {
|
|
110
|
+
return Some(v);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
None
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
#[inline]
|
|
117
|
+
pub fn month(&self, s: &str) -> Option<usize> {
|
|
118
|
+
let buf = lowercase_buf(s)?;
|
|
119
|
+
self.months.get(lower_str(s, &buf)).copied()
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
#[inline]
|
|
123
|
+
pub fn hms(&self, s: &str) -> Option<usize> {
|
|
124
|
+
let buf = lowercase_buf(s)?;
|
|
125
|
+
self.hms.get(lower_str(s, &buf)).copied()
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
#[inline]
|
|
129
|
+
pub fn ampm(&self, s: &str) -> Option<usize> {
|
|
130
|
+
let buf = lowercase_buf(s)?;
|
|
131
|
+
self.ampm.get(lower_str(s, &buf)).copied()
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
#[inline]
|
|
135
|
+
pub fn pertain(&self, s: &str) -> bool {
|
|
136
|
+
lowercase_buf(s).is_some_and(|buf| self.pertain.contains(lower_str(s, &buf)))
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
#[inline]
|
|
140
|
+
pub fn utczone(&self, s: &str) -> bool {
|
|
141
|
+
lowercase_buf(s).is_some_and(|buf| self.utczone.contains(lower_str(s, &buf)))
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/// Look up a known timezone abbreviation. Returns offset in seconds.
|
|
145
|
+
/// UTC-equivalent zones return `Some(0)`. Matching is case-insensitive.
|
|
146
|
+
/// Single `lowercase_buf` call covers both utczone and tzoffset lookups.
|
|
147
|
+
#[inline]
|
|
148
|
+
pub fn tzoffset(&self, name: &str) -> Option<i32> {
|
|
149
|
+
let buf = lowercase_buf(name)?;
|
|
150
|
+
let low = lower_str(name, &buf);
|
|
151
|
+
if self.utczone.contains(low) {
|
|
152
|
+
return Some(0);
|
|
153
|
+
}
|
|
154
|
+
self.tzoffset.get(low).copied()
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// ---------------------------------------------------------------------------
|
|
159
|
+
// Dispatch helpers — use ParserInfo when provided, PHF otherwise.
|
|
160
|
+
// ---------------------------------------------------------------------------
|
|
161
|
+
|
|
162
|
+
#[inline]
|
|
163
|
+
pub(super) fn do_jump(s: &str, info: Option<&ParserInfo>) -> bool {
|
|
164
|
+
match info {
|
|
165
|
+
Some(i) => i.jump(s),
|
|
166
|
+
None => lookup_jump(s),
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
#[inline]
|
|
171
|
+
pub(super) fn do_weekday(s: &str, info: Option<&ParserInfo>) -> Option<usize> {
|
|
172
|
+
match info {
|
|
173
|
+
Some(i) => i.weekday(s),
|
|
174
|
+
None => lookup_weekday(s),
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
#[inline]
|
|
179
|
+
pub(super) fn do_month(s: &str, info: Option<&ParserInfo>) -> Option<usize> {
|
|
180
|
+
match info {
|
|
181
|
+
Some(i) => i.month(s),
|
|
182
|
+
None => lookup_month(s),
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
#[inline]
|
|
187
|
+
pub(super) fn do_hms(s: &str, info: Option<&ParserInfo>) -> Option<usize> {
|
|
188
|
+
match info {
|
|
189
|
+
Some(i) => i.hms(s),
|
|
190
|
+
None => lookup_hms(s),
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
#[inline]
|
|
195
|
+
pub(super) fn do_ampm(s: &str, info: Option<&ParserInfo>) -> Option<usize> {
|
|
196
|
+
match info {
|
|
197
|
+
Some(i) => i.ampm(s),
|
|
198
|
+
None => lookup_ampm(s),
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
#[inline]
|
|
203
|
+
pub(super) fn do_pertain(s: &str, info: Option<&ParserInfo>) -> bool {
|
|
204
|
+
match info {
|
|
205
|
+
Some(i) => i.pertain(s),
|
|
206
|
+
None => lookup_pertain(s),
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
#[inline]
|
|
211
|
+
pub(super) fn do_utczone(s: &str, info: Option<&ParserInfo>) -> bool {
|
|
212
|
+
match info {
|
|
213
|
+
Some(i) => i.utczone(s),
|
|
214
|
+
None => lookup_utczone(s),
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
#[inline]
|
|
219
|
+
pub(super) fn do_tzoffset(name: &str, info: Option<&ParserInfo>) -> Option<i32> {
|
|
220
|
+
match info {
|
|
221
|
+
Some(i) => i.tzoffset(name),
|
|
222
|
+
None => None, // default PHF has no tzoffset map
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
#[cfg(test)]
|
|
227
|
+
mod tests {
|
|
228
|
+
use super::*;
|
|
229
|
+
|
|
230
|
+
#[test]
|
|
231
|
+
fn test_parserinfo_default_months() {
|
|
232
|
+
let info = ParserInfo::default();
|
|
233
|
+
assert_eq!(info.month("January"), Some(1));
|
|
234
|
+
assert_eq!(info.month("jan"), Some(1));
|
|
235
|
+
assert_eq!(info.month("DECEMBER"), Some(12));
|
|
236
|
+
assert_eq!(info.month("sept"), Some(9));
|
|
237
|
+
assert_eq!(info.month("xyz"), None);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
#[test]
|
|
241
|
+
fn test_parserinfo_default_weekdays() {
|
|
242
|
+
let info = ParserInfo::default();
|
|
243
|
+
assert_eq!(info.weekday("Monday"), Some(0));
|
|
244
|
+
assert_eq!(info.weekday("fri"), Some(4));
|
|
245
|
+
assert_eq!(info.weekday("Frid"), Some(4)); // prefix match
|
|
246
|
+
assert_eq!(info.weekday("xyz"), None);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
#[test]
|
|
250
|
+
fn test_parserinfo_default_jump() {
|
|
251
|
+
let info = ParserInfo::default();
|
|
252
|
+
assert!(info.jump("at"));
|
|
253
|
+
assert!(info.jump("on"));
|
|
254
|
+
assert!(info.jump(","));
|
|
255
|
+
assert!(!info.jump("foo"));
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
#[test]
|
|
259
|
+
fn test_parserinfo_default_utczone() {
|
|
260
|
+
let info = ParserInfo::default();
|
|
261
|
+
assert!(info.utczone("UTC"));
|
|
262
|
+
assert!(info.utczone("gmt"));
|
|
263
|
+
assert!(info.utczone("Z"));
|
|
264
|
+
assert!(!info.utczone("EST"));
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
#[test]
|
|
268
|
+
fn test_parserinfo_tzoffset() {
|
|
269
|
+
let mut info = ParserInfo::default();
|
|
270
|
+
info.tzoffset.insert("est".into(), -18000);
|
|
271
|
+
info.tzoffset.insert("cst".into(), -21600);
|
|
272
|
+
|
|
273
|
+
// Case-insensitive lookup
|
|
274
|
+
assert_eq!(info.tzoffset("EST"), Some(-18000));
|
|
275
|
+
assert_eq!(info.tzoffset("est"), Some(-18000));
|
|
276
|
+
assert_eq!(info.tzoffset("Est"), Some(-18000));
|
|
277
|
+
assert_eq!(info.tzoffset("CST"), Some(-21600));
|
|
278
|
+
assert_eq!(info.tzoffset("UTC"), Some(0)); // utczone fallback
|
|
279
|
+
assert_eq!(info.tzoffset("XYZ"), None);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
#[test]
|
|
283
|
+
fn test_parserinfo_custom_months() {
|
|
284
|
+
let mut info = ParserInfo::default();
|
|
285
|
+
// Add German month names
|
|
286
|
+
info.months.insert("januar".into(), 1);
|
|
287
|
+
info.months.insert("februar".into(), 2);
|
|
288
|
+
info.months.insert("maerz".into(), 3);
|
|
289
|
+
|
|
290
|
+
assert_eq!(info.month("Januar"), Some(1));
|
|
291
|
+
assert_eq!(info.month("FEBRUAR"), Some(2));
|
|
292
|
+
assert_eq!(info.month("maerz"), Some(3));
|
|
293
|
+
// English still works
|
|
294
|
+
assert_eq!(info.month("January"), Some(1));
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
#[test]
|
|
298
|
+
fn test_dispatch_with_none_uses_phf() {
|
|
299
|
+
assert_eq!(do_month("January", None), Some(1));
|
|
300
|
+
assert_eq!(do_weekday("Monday", None), Some(0));
|
|
301
|
+
assert!(do_jump("at", None));
|
|
302
|
+
assert!(do_utczone("UTC", None));
|
|
303
|
+
assert_eq!(do_hms("hour", None), Some(0));
|
|
304
|
+
assert_eq!(do_ampm("AM", None), Some(0));
|
|
305
|
+
assert!(do_pertain("of", None));
|
|
306
|
+
assert_eq!(do_tzoffset("EST", None), None);
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
#[test]
|
|
310
|
+
fn test_dispatch_with_info_uses_custom() {
|
|
311
|
+
let mut info = ParserInfo::default();
|
|
312
|
+
info.tzoffset.insert("est".into(), -18000);
|
|
313
|
+
|
|
314
|
+
assert_eq!(do_tzoffset("EST", Some(&info)), Some(-18000));
|
|
315
|
+
assert_eq!(do_tzoffset("est", Some(&info)), Some(-18000));
|
|
316
|
+
assert_eq!(do_month("January", Some(&info)), Some(1));
|
|
317
|
+
}
|
|
318
|
+
}
|