resolvekit 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- resolvekit/README.md +134 -0
- resolvekit/__init__.py +67 -0
- resolvekit/api/README.md +165 -0
- resolvekit/api/__init__.py +10 -0
- resolvekit/api/convenience.py +53 -0
- resolvekit/api/resolver.py +457 -0
- resolvekit/builders/README.md +173 -0
- resolvekit/builders/__init__.py +0 -0
- resolvekit/calibration/README.md +351 -0
- resolvekit/calibration/__init__.py +12 -0
- resolvekit/calibration/calibrator.py +184 -0
- resolvekit/calibration/features.py +139 -0
- resolvekit/calibration/models.py +78 -0
- resolvekit/cli/README.md +215 -0
- resolvekit/cli/__init__.py +0 -0
- resolvekit/cli/main.py +18 -0
- resolvekit/config.py +128 -0
- resolvekit/constants.py +252 -0
- resolvekit/constraints/README.md +102 -0
- resolvekit/constraints/__init__.py +17 -0
- resolvekit/constraints/constraint_engine.py +111 -0
- resolvekit/constraints/hierarchy_validator.py +148 -0
- resolvekit/constraints/membership_validator.py +60 -0
- resolvekit/constraints/protocols.py +33 -0
- resolvekit/constraints/temporal_validator.py +43 -0
- resolvekit/constraints/type_validator.py +42 -0
- resolvekit/data/README.md +165 -0
- resolvekit/data/__init__.py +14 -0
- resolvekit/data/alias_repository.py +206 -0
- resolvekit/data/code_repository.py +85 -0
- resolvekit/data/context_filters.py +49 -0
- resolvekit/data/db_manager.py +196 -0
- resolvekit/data/entity_repository.py +466 -0
- resolvekit/data/membership_repository.py +107 -0
- resolvekit/data/query_builder.py +177 -0
- resolvekit/data/schema.py +122 -0
- resolvekit/disambiguation/README.md +72 -0
- resolvekit/disambiguation/__init__.py +0 -0
- resolvekit/extraction/README.md +204 -0
- resolvekit/extraction/__init__.py +0 -0
- resolvekit/matchers/README.md +77 -0
- resolvekit/matchers/__init__.py +65 -0
- resolvekit/matchers/alias_exact.py +65 -0
- resolvekit/matchers/canonical_name.py +62 -0
- resolvekit/matchers/cascade.py +127 -0
- resolvekit/matchers/code_validators.py +250 -0
- resolvekit/matchers/exact_code.py +177 -0
- resolvekit/matchers/fts_matcher.py +106 -0
- resolvekit/matchers/fuzzy_matcher.py +142 -0
- resolvekit/matchers/priorities.py +174 -0
- resolvekit/matchers/protocols.py +75 -0
- resolvekit/normalization/README.md +192 -0
- resolvekit/normalization/__init__.py +8 -0
- resolvekit/normalization/normalizer.py +164 -0
- resolvekit/overlays/README.md +226 -0
- resolvekit/overlays/__init__.py +0 -0
- resolvekit/types.py +534 -0
- resolvekit/utils/README.md +188 -0
- resolvekit/utils/__init__.py +48 -0
- resolvekit/utils/cache.py +109 -0
- resolvekit/utils/dates.py +339 -0
- resolvekit/utils/errors.py +145 -0
- resolvekit/utils/files.py +366 -0
- resolvekit/utils/logging.py +219 -0
- resolvekit/utils/text.py +475 -0
- resolvekit/utils/validation.py +301 -0
- resolvekit-0.0.1.dist-info/METADATA +36 -0
- resolvekit-0.0.1.dist-info/RECORD +70 -0
- resolvekit-0.0.1.dist-info/WHEEL +4 -0
- resolvekit-0.0.1.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# Utils Module
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
|
|
5
|
+
The utils module contains shared utilities, helpers, and common functionality used across other modules.
|
|
6
|
+
|
|
7
|
+
## Components
|
|
8
|
+
|
|
9
|
+
### Core Utilities
|
|
10
|
+
|
|
11
|
+
1. **Logging** (`logging.py`)
|
|
12
|
+
- Structured logging configuration
|
|
13
|
+
- Log levels and formatters
|
|
14
|
+
- Privacy-aware logging (no query content by default)
|
|
15
|
+
|
|
16
|
+
2. **Validation** (`validation.py`)
|
|
17
|
+
- Input validation utilities
|
|
18
|
+
- Schema validation
|
|
19
|
+
- Error message generation
|
|
20
|
+
|
|
21
|
+
3. **Text Utils** (`text.py`)
|
|
22
|
+
- Common text processing functions
|
|
23
|
+
- Unicode utilities
|
|
24
|
+
- String similarity metrics
|
|
25
|
+
|
|
26
|
+
4. **Date Utils** (`dates.py`)
|
|
27
|
+
- Date parsing and formatting
|
|
28
|
+
- Temporal validity checks
|
|
29
|
+
- ISO date utilities
|
|
30
|
+
|
|
31
|
+
5. **File Utils** (`files.py`)
|
|
32
|
+
- File I/O helpers
|
|
33
|
+
- Path management
|
|
34
|
+
- Checksum computation
|
|
35
|
+
|
|
36
|
+
6. **Cache** (`cache.py`)
|
|
37
|
+
- LRU cache implementations
|
|
38
|
+
- Cache warming strategies
|
|
39
|
+
- Cache invalidation
|
|
40
|
+
|
|
41
|
+
### Data Structures
|
|
42
|
+
|
|
43
|
+
- `priority_queue.py`: Priority queue for candidate ranking
|
|
44
|
+
- `trie.py`: Trie for prefix matching
|
|
45
|
+
- `bloom_filter.py`: Bloom filter for quick existence checks
|
|
46
|
+
|
|
47
|
+
### Error Classes
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
# errors.py
|
|
51
|
+
|
|
52
|
+
class ResolvekitError(Exception):
|
|
53
|
+
"""Base exception for resolvekit."""
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
class ConfigError(ResolvekitError):
|
|
57
|
+
"""Configuration error."""
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
class DataPackError(ResolvekitError):
|
|
61
|
+
"""Data pack error."""
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
class ResolutionError(ResolvekitError):
|
|
65
|
+
"""Resolution error."""
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
class ValidationError(ResolvekitError):
|
|
69
|
+
"""Validation error."""
|
|
70
|
+
pass
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Metrics and Performance
|
|
74
|
+
|
|
75
|
+
- `metrics.py`: Performance metrics collection
|
|
76
|
+
- `profiling.py`: Profiling utilities
|
|
77
|
+
- `benchmarks.py`: Benchmark utilities
|
|
78
|
+
|
|
79
|
+
### Testing Utilities
|
|
80
|
+
|
|
81
|
+
- `test_helpers.py`: Test fixtures and helpers
|
|
82
|
+
- `mock_data.py`: Mock data generators
|
|
83
|
+
- `assertions.py`: Custom assertions
|
|
84
|
+
|
|
85
|
+
## Common Patterns
|
|
86
|
+
|
|
87
|
+
### Logging
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from resolvekit.utils.logging import get_logger
|
|
91
|
+
|
|
92
|
+
logger = get_logger(__name__)
|
|
93
|
+
|
|
94
|
+
logger.info("Resolver initialized", extra={
|
|
95
|
+
"data_pack_version": "1.2.0",
|
|
96
|
+
"overlays": 2
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
logger.debug("Candidate generation", extra={
|
|
100
|
+
"query": "[REDACTED]", # Privacy
|
|
101
|
+
"candidates_found": 5,
|
|
102
|
+
"stage": "fts"
|
|
103
|
+
})
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Validation
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from resolvekit.utils.validation import validate_dcid, validate_date
|
|
110
|
+
|
|
111
|
+
# Validate DCID format
|
|
112
|
+
if not validate_dcid(dcid):
|
|
113
|
+
raise ValidationError(f"Invalid DCID format: {dcid}")
|
|
114
|
+
|
|
115
|
+
# Validate date
|
|
116
|
+
try:
|
|
117
|
+
date_obj = validate_date(date_str)
|
|
118
|
+
except ValidationError as e:
|
|
119
|
+
logger.error(f"Invalid date: {e}")
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Caching
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from resolvekit.utils.cache import lru_cache
|
|
126
|
+
|
|
127
|
+
@lru_cache(maxsize=10000)
|
|
128
|
+
def expensive_lookup(key: str) -> Entity | None:
|
|
129
|
+
"""Cached entity lookup."""
|
|
130
|
+
return database.get_entity(key)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### Text Similarity
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
from resolvekit.utils.text import (
|
|
137
|
+
edit_distance,
|
|
138
|
+
trigram_similarity,
|
|
139
|
+
jaccard_similarity
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Compute similarities
|
|
143
|
+
edit_dist = edit_distance("france", "frence") # 1
|
|
144
|
+
trigram_sim = trigram_similarity("germany", "germeny") # 0.85
|
|
145
|
+
jaccard_sim = jaccard_similarity("turkey", "türkiye") # 0.71
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Date Handling
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
from resolvekit.utils.dates import parse_date, is_valid_at
|
|
152
|
+
from datetime import date
|
|
153
|
+
|
|
154
|
+
# Parse various date formats
|
|
155
|
+
d = parse_date("2025-01-01") # ISO
|
|
156
|
+
d = parse_date("2025-1-1") # Flexible
|
|
157
|
+
d = parse_date("01/01/2025") # US format
|
|
158
|
+
|
|
159
|
+
# Check validity
|
|
160
|
+
entity = get_entity("country/YUG") # Yugoslavia
|
|
161
|
+
is_valid = is_valid_at(entity, date(1990, 1, 1)) # True
|
|
162
|
+
is_valid = is_valid_at(entity, date(2000, 1, 1)) # False (dissolved)
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Checksums
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
from resolvekit.utils.files import compute_checksum, verify_checksum
|
|
169
|
+
|
|
170
|
+
# Compute SHA-256 checksum
|
|
171
|
+
checksum = compute_checksum("base.sqlite")
|
|
172
|
+
|
|
173
|
+
# Verify checksum
|
|
174
|
+
if not verify_checksum("base.sqlite", expected_checksum):
|
|
175
|
+
raise DataPackError("Checksum mismatch - data may be corrupted")
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## Design Principles
|
|
179
|
+
|
|
180
|
+
1. **DRY**: Shared code lives here, not duplicated
|
|
181
|
+
2. **Type-safe**: Full type annotations
|
|
182
|
+
3. **Tested**: High test coverage for utilities
|
|
183
|
+
4. **Documented**: Clear docstrings with examples
|
|
184
|
+
|
|
185
|
+
## Implementation Priority
|
|
186
|
+
|
|
187
|
+
**Phase A** - Core utilities (logging, validation, text, dates)
|
|
188
|
+
**Ongoing** - Add utilities as needed by other modules
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Utilities module for resolvekit.
|
|
2
|
+
|
|
3
|
+
For most utilities, import directly from submodules:
|
|
4
|
+
from resolvekit.utils.text import normalize_unicode
|
|
5
|
+
from resolvekit.utils.dates import parse_date
|
|
6
|
+
from resolvekit.utils.validation import validate_dcid
|
|
7
|
+
|
|
8
|
+
This module only re-exports commonly used items for convenience.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
# Re-export error classes (used throughout codebase)
|
|
12
|
+
from resolvekit.utils.errors import (
|
|
13
|
+
AmbiguousQueryError,
|
|
14
|
+
CalibrationError,
|
|
15
|
+
ChecksumMismatchError,
|
|
16
|
+
CodeFormatError,
|
|
17
|
+
ConfigError,
|
|
18
|
+
DatabaseError,
|
|
19
|
+
DataPackError,
|
|
20
|
+
EntityNotFoundError,
|
|
21
|
+
ExtractionError,
|
|
22
|
+
HierarchyError,
|
|
23
|
+
IncompatibleVersionError,
|
|
24
|
+
ResolvekitError,
|
|
25
|
+
OverlayError,
|
|
26
|
+
ResolutionError,
|
|
27
|
+
TemporalValidityError,
|
|
28
|
+
ValidationError,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"AmbiguousQueryError",
|
|
33
|
+
"CalibrationError",
|
|
34
|
+
"ChecksumMismatchError",
|
|
35
|
+
"CodeFormatError",
|
|
36
|
+
"ConfigError",
|
|
37
|
+
"DataPackError",
|
|
38
|
+
"DatabaseError",
|
|
39
|
+
"EntityNotFoundError",
|
|
40
|
+
"ExtractionError",
|
|
41
|
+
"HierarchyError",
|
|
42
|
+
"IncompatibleVersionError",
|
|
43
|
+
"ResolvekitError",
|
|
44
|
+
"OverlayError",
|
|
45
|
+
"ResolutionError",
|
|
46
|
+
"TemporalValidityError",
|
|
47
|
+
"ValidationError",
|
|
48
|
+
]
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Caching utilities for resolvekit.
|
|
2
|
+
|
|
3
|
+
This module provides thin wrappers and utilities around Python's standard
|
|
4
|
+
library caching mechanisms. We use functools.lru_cache for function memoization
|
|
5
|
+
and simple dicts for data caching.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from collections.abc import Callable, Hashable
|
|
9
|
+
from functools import lru_cache as _stdlib_lru_cache
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
# Re-export standard library lru_cache
|
|
13
|
+
lru_cache = _stdlib_lru_cache
|
|
14
|
+
|
|
15
|
+
__all__ = ["DictCache", "lru_cache", "warm_cache"]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DictCache:
|
|
19
|
+
"""
|
|
20
|
+
Simple dictionary-based cache for pre-loaded data.
|
|
21
|
+
|
|
22
|
+
This is just a thin wrapper around a dict for semantic clarity
|
|
23
|
+
when caching entities, codes, etc. For most cases, a plain dict is fine.
|
|
24
|
+
|
|
25
|
+
Use this when:
|
|
26
|
+
- You need to cache a known set of data at startup
|
|
27
|
+
- You want semantic distinction between "cache" and "dict"
|
|
28
|
+
- You need simple get/set operations with a clear intent
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, name: str = "cache"):
|
|
32
|
+
"""
|
|
33
|
+
Initialize dictionary cache.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
name: Cache name for identification
|
|
37
|
+
"""
|
|
38
|
+
self.name = name
|
|
39
|
+
self._data: dict[Hashable, Any] = {}
|
|
40
|
+
|
|
41
|
+
def load(self, data: dict[Hashable, Any]) -> None:
|
|
42
|
+
"""
|
|
43
|
+
Load data into cache.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
data: Dictionary of data to cache
|
|
47
|
+
"""
|
|
48
|
+
self._data = data.copy()
|
|
49
|
+
|
|
50
|
+
def get(self, key: Hashable, default: Any = None) -> Any:
|
|
51
|
+
"""Get value from cache."""
|
|
52
|
+
return self._data.get(key, default)
|
|
53
|
+
|
|
54
|
+
def get_many(self, keys: list[Hashable]) -> dict[Hashable, Any]:
|
|
55
|
+
"""Get multiple values from cache."""
|
|
56
|
+
return {key: self._data[key] for key in keys if key in self._data}
|
|
57
|
+
|
|
58
|
+
def set(self, key: Hashable, value: Any) -> None:
|
|
59
|
+
"""Set value in cache."""
|
|
60
|
+
self._data[key] = value
|
|
61
|
+
|
|
62
|
+
def update(self, data: dict[Hashable, Any]) -> None:
|
|
63
|
+
"""Update cache with new data."""
|
|
64
|
+
self._data.update(data)
|
|
65
|
+
|
|
66
|
+
def clear(self) -> None:
|
|
67
|
+
"""Clear all cached data."""
|
|
68
|
+
self._data.clear()
|
|
69
|
+
|
|
70
|
+
def __contains__(self, key: Hashable) -> bool:
|
|
71
|
+
"""Check if key is in cache."""
|
|
72
|
+
return key in self._data
|
|
73
|
+
|
|
74
|
+
def __len__(self) -> int:
|
|
75
|
+
"""Get number of cached items."""
|
|
76
|
+
return len(self._data)
|
|
77
|
+
|
|
78
|
+
def __getitem__(self, key: Hashable) -> Any:
|
|
79
|
+
"""Get item using bracket notation."""
|
|
80
|
+
return self._data[key]
|
|
81
|
+
|
|
82
|
+
def __setitem__(self, key: Hashable, value: Any) -> None:
|
|
83
|
+
"""Set item using bracket notation."""
|
|
84
|
+
self._data[key] = value
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def warm_cache(
|
|
88
|
+
cache: DictCache | dict,
|
|
89
|
+
loader: Callable[[], dict[Hashable, Any]],
|
|
90
|
+
) -> None:
|
|
91
|
+
"""
|
|
92
|
+
Warm a cache by loading data.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
cache: Cache to warm (DictCache or plain dict)
|
|
96
|
+
loader: Function that returns data dictionary
|
|
97
|
+
|
|
98
|
+
Example:
|
|
99
|
+
>>> entity_cache = {}
|
|
100
|
+
>>> warm_cache(entity_cache, lambda: load_entities_from_db())
|
|
101
|
+
"""
|
|
102
|
+
data = loader()
|
|
103
|
+
|
|
104
|
+
if isinstance(cache, DictCache):
|
|
105
|
+
cache.load(data)
|
|
106
|
+
elif isinstance(cache, dict):
|
|
107
|
+
cache.update(data)
|
|
108
|
+
else:
|
|
109
|
+
raise TypeError(f"Unsupported cache type: {type(cache)}")
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""Date and temporal utilities for resolvekit.
|
|
2
|
+
|
|
3
|
+
Date parsing powered by python-dateutil for robust format handling.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from datetime import date, datetime
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from dateutil import parser as dateutil_parser
|
|
10
|
+
|
|
11
|
+
from resolvekit.utils.errors import ValidationError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def parse_date(date_input: str | date | datetime | None) -> date | None:
|
|
15
|
+
"""
|
|
16
|
+
Parse various date formats to date object.
|
|
17
|
+
|
|
18
|
+
Powered by python-dateutil for robust parsing of many formats:
|
|
19
|
+
- ISO format: "2025-01-01"
|
|
20
|
+
- Flexible: "2025-1-1", "Jan 1, 2025", "1st January 2025"
|
|
21
|
+
- Various formats: "01/02/2025", "2025/01/01", "20250101"
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
date_input: Date as string, date, datetime, or None
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
date object or None
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
ValidationError: If date string cannot be parsed
|
|
31
|
+
|
|
32
|
+
Examples:
|
|
33
|
+
>>> parse_date("2025-01-01")
|
|
34
|
+
date(2025, 1, 1)
|
|
35
|
+
>>> parse_date("Jan 1, 2025")
|
|
36
|
+
date(2025, 1, 1)
|
|
37
|
+
>>> parse_date("2025-1-1")
|
|
38
|
+
date(2025, 1, 1)
|
|
39
|
+
>>> parse_date(None)
|
|
40
|
+
None
|
|
41
|
+
"""
|
|
42
|
+
if date_input is None:
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
if isinstance(date_input, date):
|
|
46
|
+
return date_input
|
|
47
|
+
|
|
48
|
+
if isinstance(date_input, datetime):
|
|
49
|
+
return date_input.date()
|
|
50
|
+
|
|
51
|
+
if isinstance(date_input, str):
|
|
52
|
+
try:
|
|
53
|
+
# Use dateutil for robust parsing
|
|
54
|
+
# dayfirst=False prefers MM/DD/YYYY (US format) for ambiguous dates
|
|
55
|
+
parsed = dateutil_parser.parse(date_input, dayfirst=False)
|
|
56
|
+
return parsed.date()
|
|
57
|
+
except (ValueError, TypeError, dateutil_parser.ParserError) as e:
|
|
58
|
+
raise ValidationError(
|
|
59
|
+
f"Invalid date format: '{date_input}'. Could not parse date string.",
|
|
60
|
+
details={"date_input": date_input, "error": str(e)},
|
|
61
|
+
) from e
|
|
62
|
+
|
|
63
|
+
raise ValidationError(
|
|
64
|
+
f"Invalid date type: {type(date_input).__name__}",
|
|
65
|
+
details={"type": type(date_input).__name__},
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def format_date(d: date | None, format_str: str = "%Y-%m-%d") -> str | None:
|
|
70
|
+
"""
|
|
71
|
+
Format date object to string.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
d: Date object or None
|
|
75
|
+
format_str: Format string (default: ISO format)
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Formatted date string or None
|
|
79
|
+
|
|
80
|
+
Examples:
|
|
81
|
+
>>> from datetime import date
|
|
82
|
+
>>> format_date(date(2025, 1, 1))
|
|
83
|
+
'2025-01-01'
|
|
84
|
+
"""
|
|
85
|
+
if d is None:
|
|
86
|
+
return None
|
|
87
|
+
return d.strftime(format_str)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def is_valid_at(
|
|
91
|
+
as_of: date,
|
|
92
|
+
valid_from: date | None = None,
|
|
93
|
+
valid_until: date | None = None,
|
|
94
|
+
) -> bool:
|
|
95
|
+
"""
|
|
96
|
+
Check if entity/membership is valid at a given date.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
as_of: Date to check validity at
|
|
100
|
+
valid_from: Start of validity period (inclusive), None = always valid from past
|
|
101
|
+
valid_until: End of validity period (exclusive), None = still valid
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
True if valid at the given date
|
|
105
|
+
|
|
106
|
+
Examples:
|
|
107
|
+
>>> from datetime import date
|
|
108
|
+
>>> is_valid_at(date(2020, 1, 1), date(2019, 1, 1), date(2021, 1, 1))
|
|
109
|
+
True
|
|
110
|
+
>>> is_valid_at(date(2022, 1, 1), date(2019, 1, 1), date(2021, 1, 1))
|
|
111
|
+
False
|
|
112
|
+
"""
|
|
113
|
+
# Check valid_from (inclusive)
|
|
114
|
+
if valid_from is not None and as_of < valid_from:
|
|
115
|
+
return False
|
|
116
|
+
|
|
117
|
+
# Check valid_until (exclusive)
|
|
118
|
+
return not (valid_until is not None and as_of >= valid_until)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def get_current_date() -> date:
|
|
122
|
+
"""
|
|
123
|
+
Get current date.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Today's date
|
|
127
|
+
"""
|
|
128
|
+
return date.today()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def parse_year(year_input: str | int) -> int:
|
|
132
|
+
"""
|
|
133
|
+
Parse year from string or int.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
year_input: Year as string or int
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Year as integer
|
|
140
|
+
|
|
141
|
+
Raises:
|
|
142
|
+
ValidationError: If year is invalid
|
|
143
|
+
|
|
144
|
+
Examples:
|
|
145
|
+
>>> parse_year("2025")
|
|
146
|
+
2025
|
|
147
|
+
>>> parse_year(2025)
|
|
148
|
+
2025
|
|
149
|
+
"""
|
|
150
|
+
try:
|
|
151
|
+
year = int(year_input)
|
|
152
|
+
if year < 1000 or year > 9999:
|
|
153
|
+
raise ValidationError(
|
|
154
|
+
f"Year out of range: {year}. Expected 1000-9999",
|
|
155
|
+
details={"year": year},
|
|
156
|
+
)
|
|
157
|
+
return year
|
|
158
|
+
except (ValueError, TypeError) as e:
|
|
159
|
+
raise ValidationError(
|
|
160
|
+
f"Invalid year: {year_input}",
|
|
161
|
+
details={"year_input": year_input, "error": str(e)},
|
|
162
|
+
) from e
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def date_range(start: date, end: date) -> list[date]:
|
|
166
|
+
"""
|
|
167
|
+
Generate list of dates between start and end (inclusive).
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
start: Start date
|
|
171
|
+
end: End date
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
List of dates
|
|
175
|
+
|
|
176
|
+
Examples:
|
|
177
|
+
>>> from datetime import date, timedelta
|
|
178
|
+
>>> dates = date_range(date(2025, 1, 1), date(2025, 1, 3))
|
|
179
|
+
>>> len(dates)
|
|
180
|
+
3
|
|
181
|
+
"""
|
|
182
|
+
from datetime import timedelta
|
|
183
|
+
|
|
184
|
+
if start > end:
|
|
185
|
+
return []
|
|
186
|
+
|
|
187
|
+
dates = []
|
|
188
|
+
current = start
|
|
189
|
+
while current <= end:
|
|
190
|
+
dates.append(current)
|
|
191
|
+
current += timedelta(days=1)
|
|
192
|
+
|
|
193
|
+
return dates
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def years_between(start: date, end: date) -> int:
|
|
197
|
+
"""
|
|
198
|
+
Calculate number of years between two dates.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
start: Start date
|
|
202
|
+
end: End date
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Number of years (approximate)
|
|
206
|
+
|
|
207
|
+
Examples:
|
|
208
|
+
>>> from datetime import date
|
|
209
|
+
>>> years_between(date(2020, 1, 1), date(2025, 1, 1))
|
|
210
|
+
5
|
|
211
|
+
"""
|
|
212
|
+
return (end - start).days // 365
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def get_validity_status(
|
|
216
|
+
as_of: date | None = None,
|
|
217
|
+
valid_from: date | None = None,
|
|
218
|
+
valid_until: date | None = None,
|
|
219
|
+
) -> str:
|
|
220
|
+
"""
|
|
221
|
+
Get human-readable validity status.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
as_of: Date to check (default: today)
|
|
225
|
+
valid_from: Start of validity
|
|
226
|
+
valid_until: End of validity
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Status string: "current", "historical", "future", or "always"
|
|
230
|
+
|
|
231
|
+
Examples:
|
|
232
|
+
>>> from datetime import date
|
|
233
|
+
>>> get_validity_status(date(2025, 1, 1), date(2020, 1, 1), date(2030, 1, 1))
|
|
234
|
+
'current'
|
|
235
|
+
"""
|
|
236
|
+
if as_of is None:
|
|
237
|
+
as_of = get_current_date()
|
|
238
|
+
|
|
239
|
+
if valid_from is None and valid_until is None:
|
|
240
|
+
return "always"
|
|
241
|
+
|
|
242
|
+
if valid_from and as_of < valid_from:
|
|
243
|
+
return "future"
|
|
244
|
+
|
|
245
|
+
if valid_until and as_of >= valid_until:
|
|
246
|
+
return "historical"
|
|
247
|
+
|
|
248
|
+
return "current"
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def compare_dates(d1: date | None, d2: date | None) -> int:
|
|
252
|
+
"""
|
|
253
|
+
Compare two dates.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
d1: First date (None is treated as negative infinity)
|
|
257
|
+
d2: Second date (None is treated as negative infinity)
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
-1 if d1 < d2, 0 if equal, 1 if d1 > d2
|
|
261
|
+
|
|
262
|
+
Examples:
|
|
263
|
+
>>> from datetime import date
|
|
264
|
+
>>> compare_dates(date(2025, 1, 1), date(2024, 1, 1))
|
|
265
|
+
1
|
|
266
|
+
>>> compare_dates(None, date(2024, 1, 1))
|
|
267
|
+
-1
|
|
268
|
+
"""
|
|
269
|
+
if d1 is None and d2 is None:
|
|
270
|
+
return 0
|
|
271
|
+
if d1 is None:
|
|
272
|
+
return -1
|
|
273
|
+
if d2 is None:
|
|
274
|
+
return 1
|
|
275
|
+
|
|
276
|
+
if d1 < d2:
|
|
277
|
+
return -1
|
|
278
|
+
elif d1 > d2:
|
|
279
|
+
return 1
|
|
280
|
+
else:
|
|
281
|
+
return 0
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def temporal_overlap(
|
|
285
|
+
range1_start: date | None,
|
|
286
|
+
range1_end: date | None,
|
|
287
|
+
range2_start: date | None,
|
|
288
|
+
range2_end: date | None,
|
|
289
|
+
) -> bool:
|
|
290
|
+
"""
|
|
291
|
+
Check if two temporal ranges overlap.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
range1_start: Start of first range (inclusive)
|
|
295
|
+
range1_end: End of first range (exclusive)
|
|
296
|
+
range2_start: Start of second range (inclusive)
|
|
297
|
+
range2_end: End of second range (exclusive)
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
True if ranges overlap
|
|
301
|
+
|
|
302
|
+
Examples:
|
|
303
|
+
>>> from datetime import date
|
|
304
|
+
>>> temporal_overlap(date(2020, 1, 1), date(2021, 1, 1),
|
|
305
|
+
... date(2020, 6, 1), date(2021, 6, 1))
|
|
306
|
+
True
|
|
307
|
+
>>> temporal_overlap(date(2020, 1, 1), date(2021, 1, 1),
|
|
308
|
+
... date(2021, 1, 1), date(2022, 1, 1))
|
|
309
|
+
False # Exclusive end
|
|
310
|
+
"""
|
|
311
|
+
# Convert None to infinities for comparison
|
|
312
|
+
# None for start means beginning of time (very old date)
|
|
313
|
+
# None for end means end of time (very future date)
|
|
314
|
+
min_date = date(1000, 1, 1)
|
|
315
|
+
max_date = date(9999, 12, 31)
|
|
316
|
+
|
|
317
|
+
start1 = range1_start if range1_start is not None else min_date
|
|
318
|
+
end1 = range1_end if range1_end is not None else max_date
|
|
319
|
+
start2 = range2_start if range2_start is not None else min_date
|
|
320
|
+
end2 = range2_end if range2_end is not None else max_date
|
|
321
|
+
|
|
322
|
+
# Ranges overlap if one starts before the other ends
|
|
323
|
+
return start1 < end2 and start2 < end1
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def get_date_or_none(value: Any) -> date | None:
|
|
327
|
+
"""
|
|
328
|
+
Safely convert value to date or None.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
value: Value to convert
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
date object or None (never raises)
|
|
335
|
+
"""
|
|
336
|
+
try:
|
|
337
|
+
return parse_date(value)
|
|
338
|
+
except (ValidationError, ValueError, TypeError):
|
|
339
|
+
return None
|