resolvekit 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- resolvekit/README.md +134 -0
- resolvekit/__init__.py +67 -0
- resolvekit/api/README.md +165 -0
- resolvekit/api/__init__.py +10 -0
- resolvekit/api/convenience.py +53 -0
- resolvekit/api/resolver.py +457 -0
- resolvekit/builders/README.md +173 -0
- resolvekit/builders/__init__.py +0 -0
- resolvekit/calibration/README.md +351 -0
- resolvekit/calibration/__init__.py +12 -0
- resolvekit/calibration/calibrator.py +184 -0
- resolvekit/calibration/features.py +139 -0
- resolvekit/calibration/models.py +78 -0
- resolvekit/cli/README.md +215 -0
- resolvekit/cli/__init__.py +0 -0
- resolvekit/cli/main.py +18 -0
- resolvekit/config.py +128 -0
- resolvekit/constants.py +252 -0
- resolvekit/constraints/README.md +102 -0
- resolvekit/constraints/__init__.py +17 -0
- resolvekit/constraints/constraint_engine.py +111 -0
- resolvekit/constraints/hierarchy_validator.py +148 -0
- resolvekit/constraints/membership_validator.py +60 -0
- resolvekit/constraints/protocols.py +33 -0
- resolvekit/constraints/temporal_validator.py +43 -0
- resolvekit/constraints/type_validator.py +42 -0
- resolvekit/data/README.md +165 -0
- resolvekit/data/__init__.py +14 -0
- resolvekit/data/alias_repository.py +206 -0
- resolvekit/data/code_repository.py +85 -0
- resolvekit/data/context_filters.py +49 -0
- resolvekit/data/db_manager.py +196 -0
- resolvekit/data/entity_repository.py +466 -0
- resolvekit/data/membership_repository.py +107 -0
- resolvekit/data/query_builder.py +177 -0
- resolvekit/data/schema.py +122 -0
- resolvekit/disambiguation/README.md +72 -0
- resolvekit/disambiguation/__init__.py +0 -0
- resolvekit/extraction/README.md +204 -0
- resolvekit/extraction/__init__.py +0 -0
- resolvekit/matchers/README.md +77 -0
- resolvekit/matchers/__init__.py +65 -0
- resolvekit/matchers/alias_exact.py +65 -0
- resolvekit/matchers/canonical_name.py +62 -0
- resolvekit/matchers/cascade.py +127 -0
- resolvekit/matchers/code_validators.py +250 -0
- resolvekit/matchers/exact_code.py +177 -0
- resolvekit/matchers/fts_matcher.py +106 -0
- resolvekit/matchers/fuzzy_matcher.py +142 -0
- resolvekit/matchers/priorities.py +174 -0
- resolvekit/matchers/protocols.py +75 -0
- resolvekit/normalization/README.md +192 -0
- resolvekit/normalization/__init__.py +8 -0
- resolvekit/normalization/normalizer.py +164 -0
- resolvekit/overlays/README.md +226 -0
- resolvekit/overlays/__init__.py +0 -0
- resolvekit/types.py +534 -0
- resolvekit/utils/README.md +188 -0
- resolvekit/utils/__init__.py +48 -0
- resolvekit/utils/cache.py +109 -0
- resolvekit/utils/dates.py +339 -0
- resolvekit/utils/errors.py +145 -0
- resolvekit/utils/files.py +366 -0
- resolvekit/utils/logging.py +219 -0
- resolvekit/utils/text.py +475 -0
- resolvekit/utils/validation.py +301 -0
- resolvekit-0.0.1.dist-info/METADATA +36 -0
- resolvekit-0.0.1.dist-info/RECORD +70 -0
- resolvekit-0.0.1.dist-info/WHEEL +4 -0
- resolvekit-0.0.1.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Constraints Module
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
|
|
5
|
+
The constraints module enforces Knowledge Graph (KG) and temporal constraints on candidate entities, filtering out invalid matches based on type, hierarchy, temporal validity, and group memberships.
|
|
6
|
+
|
|
7
|
+
## Components
|
|
8
|
+
|
|
9
|
+
### Constraint Validators
|
|
10
|
+
|
|
11
|
+
1. **Type Validator** (`type_validator.py`)
|
|
12
|
+
- Enforces entity type constraints when provided
|
|
13
|
+
- Example: If `entity_type="country"`, filter out states/cities
|
|
14
|
+
|
|
15
|
+
2. **Hierarchy Validator** (`hierarchy_validator.py`)
|
|
16
|
+
- Checks parent-child containment relationships
|
|
17
|
+
- Example: If `parent="country/FRA"`, only keep French subdivisions
|
|
18
|
+
- Validates full hierarchy paths
|
|
19
|
+
|
|
20
|
+
3. **Temporal Validator** (`temporal_validator.py`)
|
|
21
|
+
- Checks validity ranges (valid_from, valid_until)
|
|
22
|
+
- Handles historical entity names
|
|
23
|
+
- Default: current date (pack build date)
|
|
24
|
+
- Supports `as_of` parameter for historical queries
|
|
25
|
+
|
|
26
|
+
4. **Membership Validator** (`membership_validator.py`)
|
|
27
|
+
- Validates group memberships at specific dates
|
|
28
|
+
- Example: Check if entity was EU member in 2005
|
|
29
|
+
- Handles joining/leaving events (Brexit, EU expansions)
|
|
30
|
+
|
|
31
|
+
### Helper Components
|
|
32
|
+
|
|
33
|
+
- `constraint_engine.py`: Orchestrates constraint validation
|
|
34
|
+
- `validity.py`: Temporal validity utilities
|
|
35
|
+
- `hierarchy.py`: Hierarchy traversal utilities
|
|
36
|
+
|
|
37
|
+
## Constraint Application Flow
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
def apply_constraints(
|
|
41
|
+
candidates: list[Candidate],
|
|
42
|
+
entity_type: str | None,
|
|
43
|
+
parent: str | None,
|
|
44
|
+
at: date | None,
|
|
45
|
+
group: str | None
|
|
46
|
+
) -> list[Candidate]:
|
|
47
|
+
"""Apply all constraints to filter candidates."""
|
|
48
|
+
|
|
49
|
+
# Type constraint
|
|
50
|
+
if entity_type:
|
|
51
|
+
candidates = [c for c in candidates if c.type == entity_type]
|
|
52
|
+
|
|
53
|
+
# Hierarchy constraint
|
|
54
|
+
if parent:
|
|
55
|
+
candidates = [c for c in candidates if is_child_of(c.dcid, parent)]
|
|
56
|
+
|
|
57
|
+
# Temporal constraint
|
|
58
|
+
if at:
|
|
59
|
+
candidates = [c for c in candidates if is_valid_at(c, at)]
|
|
60
|
+
|
|
61
|
+
# Membership constraint
|
|
62
|
+
if group and at:
|
|
63
|
+
candidates = [c for c in candidates
|
|
64
|
+
if is_member_at(c.dcid, group, at)]
|
|
65
|
+
|
|
66
|
+
# Mark validity in features
|
|
67
|
+
for c in candidates:
|
|
68
|
+
c.features.type_valid = (entity_type is None or c.type == entity_type)
|
|
69
|
+
c.features.parent_valid = (parent is None or is_child_of(c.dcid, parent))
|
|
70
|
+
c.features.date_valid = (at is None or is_valid_at(c, at))
|
|
71
|
+
|
|
72
|
+
return candidates
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Design Principles
|
|
76
|
+
|
|
77
|
+
1. **Additive filtering**: Each constraint narrows candidate set
|
|
78
|
+
2. **Feature tracking**: Record which constraints passed/failed for calibration
|
|
79
|
+
3. **Graceful degradation**: Missing temporal data doesn't crash queries
|
|
80
|
+
4. **Efficient lookups**: Pre-computed indexes for hierarchy/membership
|
|
81
|
+
|
|
82
|
+
## Temporal Validity Model
|
|
83
|
+
|
|
84
|
+
All temporal-aware tables include:
|
|
85
|
+
- `valid_from`: Start date (ISO format) or NULL (always valid)
|
|
86
|
+
- `valid_until`: End date (ISO format, exclusive) or NULL (still valid)
|
|
87
|
+
|
|
88
|
+
Validity check:
|
|
89
|
+
```python
|
|
90
|
+
def is_valid_at(entity, as_of: date) -> bool:
|
|
91
|
+
"""Check if entity is valid at given date."""
|
|
92
|
+
if entity.valid_from and as_of < entity.valid_from:
|
|
93
|
+
return False
|
|
94
|
+
if entity.valid_until and as_of >= entity.valid_until:
|
|
95
|
+
return False
|
|
96
|
+
return True
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Implementation Priority
|
|
100
|
+
|
|
101
|
+
**Phase A** - Core resolver (type, hierarchy, basic temporal)
|
|
102
|
+
**Phase B** - Full membership queries with temporal support
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Constraint validation module."""
|
|
2
|
+
|
|
3
|
+
from resolvekit.constraints.constraint_engine import ConstraintEngine
|
|
4
|
+
from resolvekit.constraints.hierarchy_validator import HierarchyValidator
|
|
5
|
+
from resolvekit.constraints.membership_validator import MembershipValidator
|
|
6
|
+
from resolvekit.constraints.protocols import Validator
|
|
7
|
+
from resolvekit.constraints.temporal_validator import TemporalValidator
|
|
8
|
+
from resolvekit.constraints.type_validator import TypeValidator
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"ConstraintEngine",
|
|
12
|
+
"HierarchyValidator",
|
|
13
|
+
"MembershipValidator",
|
|
14
|
+
"TemporalValidator",
|
|
15
|
+
"TypeValidator",
|
|
16
|
+
"Validator",
|
|
17
|
+
]
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Constraint engine orchestrator."""
|
|
2
|
+
|
|
3
|
+
from resolvekit.constraints.hierarchy_validator import HierarchyValidator
|
|
4
|
+
from resolvekit.constraints.membership_validator import MembershipValidator
|
|
5
|
+
from resolvekit.constraints.temporal_validator import TemporalValidator
|
|
6
|
+
from resolvekit.constraints.type_validator import TypeValidator
|
|
7
|
+
from resolvekit.data.entity_repository import EntityRepository
|
|
8
|
+
from resolvekit.data.membership_repository import MembershipRepository
|
|
9
|
+
from resolvekit.types import Candidate, MatchContext
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ConstraintEngine:
|
|
13
|
+
"""
|
|
14
|
+
Orchestrates constraint validation with hybrid filtering.
|
|
15
|
+
|
|
16
|
+
Hard filtering (removes candidates):
|
|
17
|
+
- Type constraint (when context.entity_type is set)
|
|
18
|
+
- Parent constraint (when context.parent_dcid is set)
|
|
19
|
+
|
|
20
|
+
Soft features (marks but doesn't filter):
|
|
21
|
+
- Temporal validity (when context.as_of is set)
|
|
22
|
+
- Membership validity (when context.group_dcid is set)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
entity_repo: EntityRepository,
|
|
28
|
+
membership_repo: MembershipRepository,
|
|
29
|
+
hierarchy_depth: int = 3,
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
Initialize constraint engine.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
entity_repo: Repository for entity lookups
|
|
36
|
+
membership_repo: Repository for membership queries
|
|
37
|
+
hierarchy_depth: Maximum depth for hierarchy traversal (default 3)
|
|
38
|
+
"""
|
|
39
|
+
self.type_validator = TypeValidator()
|
|
40
|
+
self.hierarchy_validator = HierarchyValidator(entity_repo, hierarchy_depth)
|
|
41
|
+
self.temporal_validator = TemporalValidator()
|
|
42
|
+
self.membership_validator = MembershipValidator(membership_repo)
|
|
43
|
+
self.hierarchy_depth = hierarchy_depth
|
|
44
|
+
|
|
45
|
+
def apply_constraints(
|
|
46
|
+
self, candidates: list[Candidate], context: MatchContext | None
|
|
47
|
+
) -> list[Candidate]:
|
|
48
|
+
"""
|
|
49
|
+
Apply all constraints with hybrid filtering.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
candidates: List of candidates to validate
|
|
53
|
+
context: Match context with optional constraints
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Validated/enriched candidates
|
|
57
|
+
"""
|
|
58
|
+
# Fast path: empty candidate list
|
|
59
|
+
if not candidates:
|
|
60
|
+
return candidates
|
|
61
|
+
|
|
62
|
+
# Fast path: no context
|
|
63
|
+
if context is None:
|
|
64
|
+
return candidates
|
|
65
|
+
|
|
66
|
+
# Fast path: empty context (all fields None)
|
|
67
|
+
if self._is_empty_context(context):
|
|
68
|
+
return candidates
|
|
69
|
+
|
|
70
|
+
# HARD FILTERS (reduce candidate set)
|
|
71
|
+
# Order: cheapest first to minimize downstream work
|
|
72
|
+
candidates = self.type_validator.validate(candidates, context)
|
|
73
|
+
if not candidates: # Early exit if all filtered
|
|
74
|
+
return candidates
|
|
75
|
+
|
|
76
|
+
# Early exit: Skip hierarchy validation if no parent constraint
|
|
77
|
+
if context.parent_dcid is not None:
|
|
78
|
+
candidates = self.hierarchy_validator.validate(candidates, context)
|
|
79
|
+
if not candidates: # Early exit if all filtered
|
|
80
|
+
return candidates
|
|
81
|
+
else:
|
|
82
|
+
# Add default parent features even when constraint is absent.
|
|
83
|
+
# This ensures calibration model receives consistent feature vectors -
|
|
84
|
+
# all candidates have the same features regardless of which constraints
|
|
85
|
+
# are active. Default values indicate "no constraint" rather than "constraint failed".
|
|
86
|
+
for candidate in candidates:
|
|
87
|
+
candidate.features["parent_valid"] = True
|
|
88
|
+
candidate.features["parent_depth"] = 0
|
|
89
|
+
|
|
90
|
+
# SOFT FEATURES (enrich remaining candidates)
|
|
91
|
+
candidates = self.temporal_validator.validate(candidates, context)
|
|
92
|
+
candidates = self.membership_validator.validate(candidates, context)
|
|
93
|
+
|
|
94
|
+
return candidates
|
|
95
|
+
|
|
96
|
+
def _is_empty_context(self, context: MatchContext) -> bool:
|
|
97
|
+
"""
|
|
98
|
+
Check if context has any constraints.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
context: Match context to check
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
True if all constraint fields are None
|
|
105
|
+
"""
|
|
106
|
+
return (
|
|
107
|
+
context.entity_type is None
|
|
108
|
+
and context.parent_dcid is None
|
|
109
|
+
and context.as_of is None
|
|
110
|
+
and context.group_dcid is None
|
|
111
|
+
)
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Hierarchy constraint validator."""
|
|
2
|
+
|
|
3
|
+
from sqlalchemy import text
|
|
4
|
+
from sqlmodel import Session
|
|
5
|
+
|
|
6
|
+
from resolvekit.data.entity_repository import EntityRepository
|
|
7
|
+
from resolvekit.types import Candidate, MatchContext
|
|
8
|
+
from resolvekit.utils.logging import get_logger
|
|
9
|
+
|
|
10
|
+
logger = get_logger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class HierarchyValidator:
|
|
14
|
+
"""
|
|
15
|
+
Validates parent-child hierarchy constraints.
|
|
16
|
+
|
|
17
|
+
Hard filtering: Removes candidates that are not descendants of context.parent_dcid
|
|
18
|
+
Features: Adds f_parent_valid and f_parent_depth
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, entity_repo: EntityRepository, depth_limit: int = 3):
|
|
22
|
+
"""
|
|
23
|
+
Initialize hierarchy validator.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
entity_repo: Repository for entity lookups
|
|
27
|
+
depth_limit: Maximum depth to traverse up the hierarchy
|
|
28
|
+
"""
|
|
29
|
+
self.entity_repo = entity_repo
|
|
30
|
+
self.depth_limit = depth_limit
|
|
31
|
+
|
|
32
|
+
def validate(
|
|
33
|
+
self, candidates: list[Candidate], context: MatchContext
|
|
34
|
+
) -> list[Candidate]:
|
|
35
|
+
"""
|
|
36
|
+
Validate hierarchy constraints.
|
|
37
|
+
|
|
38
|
+
Optimized to check each unique entity_dcid only once, even if multiple
|
|
39
|
+
candidates share the same entity.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
candidates: List of candidates to validate
|
|
43
|
+
context: Match context with optional parent_dcid constraint
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Filtered candidates with f_parent_valid and f_parent_depth features
|
|
47
|
+
"""
|
|
48
|
+
# Fast path: no parent constraint
|
|
49
|
+
if context.parent_dcid is None:
|
|
50
|
+
for candidate in candidates:
|
|
51
|
+
candidate.features["parent_valid"] = True
|
|
52
|
+
candidate.features["parent_depth"] = 0
|
|
53
|
+
return candidates
|
|
54
|
+
|
|
55
|
+
# Batch query: check all unique entities in single DB query
|
|
56
|
+
entity_dcids = list({c.entity.dcid for c in candidates})
|
|
57
|
+
entity_results = self._batch_check_descendants(
|
|
58
|
+
entity_dcids, context.parent_dcid
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Apply results to all candidates
|
|
62
|
+
for candidate in candidates:
|
|
63
|
+
is_valid, depth = entity_results[candidate.entity.dcid]
|
|
64
|
+
candidate.features["parent_valid"] = is_valid
|
|
65
|
+
candidate.features["parent_depth"] = depth
|
|
66
|
+
|
|
67
|
+
# Hard filter: remove invalid candidates
|
|
68
|
+
return [c for c in candidates if c.features["parent_valid"]]
|
|
69
|
+
|
|
70
|
+
def _batch_check_descendants(
|
|
71
|
+
self, entity_dcids: list[str], parent_dcid: str
|
|
72
|
+
) -> dict[str, tuple[bool, int]]:
|
|
73
|
+
"""
|
|
74
|
+
Check which entities are descendants of parent using recursive CTE.
|
|
75
|
+
|
|
76
|
+
Single database query to check all entities at once, dramatically faster
|
|
77
|
+
than sequential lookups for large candidate sets. Respects overlay
|
|
78
|
+
precedence - if an overlay changes an entity's parent_dcid, the overlay
|
|
79
|
+
version is used for hierarchy traversal.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
entity_dcids: List of entity DCIDs to check
|
|
83
|
+
parent_dcid: Parent to find in ancestry
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Dict mapping entity_dcid -> (is_descendant, depth)
|
|
87
|
+
"""
|
|
88
|
+
if not self.entity_repo.db.engine:
|
|
89
|
+
raise RuntimeError("Database not connected. Call connect() first.")
|
|
90
|
+
|
|
91
|
+
if not entity_dcids:
|
|
92
|
+
return {}
|
|
93
|
+
|
|
94
|
+
# Build placeholders for IN clause
|
|
95
|
+
placeholders = ", ".join(f":dcid_{i}" for i in range(len(entity_dcids)))
|
|
96
|
+
params = {f"dcid_{i}": dcid for i, dcid in enumerate(entity_dcids)}
|
|
97
|
+
params["parent"] = parent_dcid
|
|
98
|
+
params["depth_limit"] = self.depth_limit
|
|
99
|
+
|
|
100
|
+
# Build best_entities CTE - dedupe by precedence
|
|
101
|
+
# This ensures overlay parent_dcid overrides base parent_dcid
|
|
102
|
+
best_selects = ["SELECT dcid, parent_dcid, 0 AS precedence FROM main.entities"]
|
|
103
|
+
for schema_name, precedence in self.entity_repo.db.overlays:
|
|
104
|
+
best_selects.append(
|
|
105
|
+
f"SELECT dcid, parent_dcid, {precedence} AS precedence FROM {schema_name}.entities"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
best_entities_query = "\nUNION ALL\n".join(best_selects)
|
|
109
|
+
|
|
110
|
+
# Build recursive CTE - track original candidate through traversal
|
|
111
|
+
# Uses best_entities to ensure we traverse using highest-precedence parent links
|
|
112
|
+
# ROW_NUMBER() deduplicates by dcid, keeping highest precedence row
|
|
113
|
+
sql = f"""
|
|
114
|
+
WITH RECURSIVE
|
|
115
|
+
best_entities AS (
|
|
116
|
+
SELECT dcid, parent_dcid
|
|
117
|
+
FROM (
|
|
118
|
+
SELECT dcid, parent_dcid,
|
|
119
|
+
ROW_NUMBER() OVER (PARTITION BY dcid ORDER BY precedence DESC) as rn
|
|
120
|
+
FROM ({best_entities_query}) all_entities
|
|
121
|
+
) ranked
|
|
122
|
+
WHERE rn = 1
|
|
123
|
+
),
|
|
124
|
+
ancestors AS (
|
|
125
|
+
SELECT dcid AS original_dcid, dcid AS current_dcid, parent_dcid, 0 AS depth
|
|
126
|
+
FROM best_entities
|
|
127
|
+
WHERE dcid IN ({placeholders})
|
|
128
|
+
|
|
129
|
+
UNION ALL
|
|
130
|
+
|
|
131
|
+
SELECT a.original_dcid, e.dcid AS current_dcid, e.parent_dcid, a.depth + 1
|
|
132
|
+
FROM best_entities e
|
|
133
|
+
INNER JOIN ancestors a ON e.dcid = a.parent_dcid
|
|
134
|
+
WHERE a.depth < :depth_limit AND a.parent_dcid IS NOT NULL
|
|
135
|
+
)
|
|
136
|
+
SELECT original_dcid AS dcid, MIN(depth) + 1 AS depth
|
|
137
|
+
FROM ancestors
|
|
138
|
+
WHERE parent_dcid = :parent AND depth + 1 <= :depth_limit
|
|
139
|
+
GROUP BY original_dcid
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
# Execute query
|
|
143
|
+
with Session(self.entity_repo.db.engine) as session:
|
|
144
|
+
result = session.execute(text(sql), params)
|
|
145
|
+
valid_entities = {row.dcid: (True, row.depth) for row in result}
|
|
146
|
+
|
|
147
|
+
# Build full results map (invalid = False for entities not in results)
|
|
148
|
+
return {dcid: valid_entities.get(dcid, (False, 0)) for dcid in entity_dcids}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Membership constraint validator."""
|
|
2
|
+
|
|
3
|
+
from datetime import date
|
|
4
|
+
|
|
5
|
+
from resolvekit.data.membership_repository import MembershipRepository
|
|
6
|
+
from resolvekit.types import Candidate, MatchContext
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class MembershipValidator:
|
|
10
|
+
"""
|
|
11
|
+
Validates group membership constraints.
|
|
12
|
+
|
|
13
|
+
Soft validation only: Adds f_membership_valid feature, does not filter.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, membership_repo: MembershipRepository):
|
|
17
|
+
"""
|
|
18
|
+
Initialize membership validator.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
membership_repo: Repository for membership queries
|
|
22
|
+
"""
|
|
23
|
+
self.membership_repo = membership_repo
|
|
24
|
+
|
|
25
|
+
def validate(
|
|
26
|
+
self, candidates: list[Candidate], context: MatchContext
|
|
27
|
+
) -> list[Candidate]:
|
|
28
|
+
"""
|
|
29
|
+
Validate membership constraints.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
candidates: List of candidates to validate
|
|
33
|
+
context: Match context with optional group_dcid constraint
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Candidates with f_membership_valid feature (no filtering)
|
|
37
|
+
"""
|
|
38
|
+
# If no group constraint, mark all as valid (no constraint to check)
|
|
39
|
+
if context.group_dcid is None:
|
|
40
|
+
for candidate in candidates:
|
|
41
|
+
candidate.features["membership_valid"] = True
|
|
42
|
+
return candidates
|
|
43
|
+
|
|
44
|
+
# Use as_of date or default to today
|
|
45
|
+
as_of = context.as_of if context.as_of is not None else date.today()
|
|
46
|
+
|
|
47
|
+
# Batch query all candidates
|
|
48
|
+
entity_dcids = [c.entity.dcid for c in candidates]
|
|
49
|
+
members = self.membership_repo.check_memberships_batch(
|
|
50
|
+
entity_dcids=entity_dcids,
|
|
51
|
+
group_dcid=context.group_dcid,
|
|
52
|
+
as_of=as_of,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Mark membership validity
|
|
56
|
+
for candidate in candidates:
|
|
57
|
+
candidate.features["membership_valid"] = candidate.entity.dcid in members
|
|
58
|
+
|
|
59
|
+
# Soft validation: return all candidates (don't filter)
|
|
60
|
+
return candidates
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Protocols for constraint validators."""
|
|
2
|
+
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
from resolvekit.types import Candidate, MatchContext
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Validator(Protocol):
|
|
9
|
+
"""
|
|
10
|
+
Protocol for constraint validators.
|
|
11
|
+
|
|
12
|
+
Validators filter candidates or enrich them with validation features.
|
|
13
|
+
They may perform hard filtering (removing invalid candidates) or soft
|
|
14
|
+
validation (adding features for calibration to consider).
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def validate(
|
|
18
|
+
self, candidates: list[Candidate], context: MatchContext
|
|
19
|
+
) -> list[Candidate]:
|
|
20
|
+
"""
|
|
21
|
+
Validate candidates against constraints.
|
|
22
|
+
|
|
23
|
+
May filter candidates (hard filtering) or enrich them with
|
|
24
|
+
validation features (soft validation).
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
candidates: List of candidates to validate
|
|
28
|
+
context: Match context with constraint parameters
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Validated/enriched candidates
|
|
32
|
+
"""
|
|
33
|
+
...
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Temporal constraint validator."""
|
|
2
|
+
|
|
3
|
+
from resolvekit.types import Candidate, MatchContext
|
|
4
|
+
from resolvekit.utils.dates import is_valid_at
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TemporalValidator:
|
|
8
|
+
"""
|
|
9
|
+
Validates temporal validity of entities.
|
|
10
|
+
|
|
11
|
+
Soft validation only: Adds f_date_valid feature, does not filter.
|
|
12
|
+
Uses existing is_valid_at() utility from utils/dates.py.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def validate(
|
|
16
|
+
self, candidates: list[Candidate], context: MatchContext
|
|
17
|
+
) -> list[Candidate]:
|
|
18
|
+
"""
|
|
19
|
+
Validate temporal constraints.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
candidates: List of candidates to validate
|
|
23
|
+
context: Match context with optional as_of date
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Candidates with f_date_valid feature (no filtering)
|
|
27
|
+
"""
|
|
28
|
+
# If no as_of constraint, mark all as having no temporal constraint
|
|
29
|
+
if context.as_of is None:
|
|
30
|
+
for candidate in candidates:
|
|
31
|
+
candidate.features["date_valid"] = True
|
|
32
|
+
return candidates
|
|
33
|
+
|
|
34
|
+
# Check validity for each candidate
|
|
35
|
+
for candidate in candidates:
|
|
36
|
+
candidate.features["date_valid"] = is_valid_at(
|
|
37
|
+
as_of=context.as_of,
|
|
38
|
+
valid_from=candidate.entity.valid_from,
|
|
39
|
+
valid_until=candidate.entity.valid_until,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Soft validation: return all candidates (don't filter)
|
|
43
|
+
return candidates
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Type constraint validator."""
|
|
2
|
+
|
|
3
|
+
from resolvekit.types import Candidate, MatchContext
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TypeValidator:
|
|
7
|
+
"""
|
|
8
|
+
Validates entity type constraints.
|
|
9
|
+
|
|
10
|
+
Hard filtering: Removes candidates that don't match context.entity_type
|
|
11
|
+
Feature: Adds f_type_valid boolean to all candidates
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def validate(
|
|
15
|
+
self, candidates: list[Candidate], context: MatchContext
|
|
16
|
+
) -> list[Candidate]:
|
|
17
|
+
"""
|
|
18
|
+
Validate entity type constraints.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
candidates: List of candidates to validate
|
|
22
|
+
context: Match context with optional entity_type constraint
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Filtered candidates with f_type_valid feature
|
|
26
|
+
"""
|
|
27
|
+
# Add feature to all candidates
|
|
28
|
+
for candidate in candidates:
|
|
29
|
+
if context.entity_type is None:
|
|
30
|
+
candidate.features["type_valid"] = True
|
|
31
|
+
else:
|
|
32
|
+
candidate.features["type_valid"] = (
|
|
33
|
+
candidate.entity.entity_type == context.entity_type
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Hard filter if type constraint is present
|
|
37
|
+
if context.entity_type is not None:
|
|
38
|
+
candidates = [
|
|
39
|
+
c for c in candidates if c.entity.entity_type == context.entity_type
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
return candidates
|