resolvekit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. resolvekit/README.md +134 -0
  2. resolvekit/__init__.py +67 -0
  3. resolvekit/api/README.md +165 -0
  4. resolvekit/api/__init__.py +10 -0
  5. resolvekit/api/convenience.py +53 -0
  6. resolvekit/api/resolver.py +457 -0
  7. resolvekit/builders/README.md +173 -0
  8. resolvekit/builders/__init__.py +0 -0
  9. resolvekit/calibration/README.md +351 -0
  10. resolvekit/calibration/__init__.py +12 -0
  11. resolvekit/calibration/calibrator.py +184 -0
  12. resolvekit/calibration/features.py +139 -0
  13. resolvekit/calibration/models.py +78 -0
  14. resolvekit/cli/README.md +215 -0
  15. resolvekit/cli/__init__.py +0 -0
  16. resolvekit/cli/main.py +18 -0
  17. resolvekit/config.py +128 -0
  18. resolvekit/constants.py +252 -0
  19. resolvekit/constraints/README.md +102 -0
  20. resolvekit/constraints/__init__.py +17 -0
  21. resolvekit/constraints/constraint_engine.py +111 -0
  22. resolvekit/constraints/hierarchy_validator.py +148 -0
  23. resolvekit/constraints/membership_validator.py +60 -0
  24. resolvekit/constraints/protocols.py +33 -0
  25. resolvekit/constraints/temporal_validator.py +43 -0
  26. resolvekit/constraints/type_validator.py +42 -0
  27. resolvekit/data/README.md +165 -0
  28. resolvekit/data/__init__.py +14 -0
  29. resolvekit/data/alias_repository.py +206 -0
  30. resolvekit/data/code_repository.py +85 -0
  31. resolvekit/data/context_filters.py +49 -0
  32. resolvekit/data/db_manager.py +196 -0
  33. resolvekit/data/entity_repository.py +466 -0
  34. resolvekit/data/membership_repository.py +107 -0
  35. resolvekit/data/query_builder.py +177 -0
  36. resolvekit/data/schema.py +122 -0
  37. resolvekit/disambiguation/README.md +72 -0
  38. resolvekit/disambiguation/__init__.py +0 -0
  39. resolvekit/extraction/README.md +204 -0
  40. resolvekit/extraction/__init__.py +0 -0
  41. resolvekit/matchers/README.md +77 -0
  42. resolvekit/matchers/__init__.py +65 -0
  43. resolvekit/matchers/alias_exact.py +65 -0
  44. resolvekit/matchers/canonical_name.py +62 -0
  45. resolvekit/matchers/cascade.py +127 -0
  46. resolvekit/matchers/code_validators.py +250 -0
  47. resolvekit/matchers/exact_code.py +177 -0
  48. resolvekit/matchers/fts_matcher.py +106 -0
  49. resolvekit/matchers/fuzzy_matcher.py +142 -0
  50. resolvekit/matchers/priorities.py +174 -0
  51. resolvekit/matchers/protocols.py +75 -0
  52. resolvekit/normalization/README.md +192 -0
  53. resolvekit/normalization/__init__.py +8 -0
  54. resolvekit/normalization/normalizer.py +164 -0
  55. resolvekit/overlays/README.md +226 -0
  56. resolvekit/overlays/__init__.py +0 -0
  57. resolvekit/types.py +534 -0
  58. resolvekit/utils/README.md +188 -0
  59. resolvekit/utils/__init__.py +48 -0
  60. resolvekit/utils/cache.py +109 -0
  61. resolvekit/utils/dates.py +339 -0
  62. resolvekit/utils/errors.py +145 -0
  63. resolvekit/utils/files.py +366 -0
  64. resolvekit/utils/logging.py +219 -0
  65. resolvekit/utils/text.py +475 -0
  66. resolvekit/utils/validation.py +301 -0
  67. resolvekit-0.0.1.dist-info/METADATA +36 -0
  68. resolvekit-0.0.1.dist-info/RECORD +70 -0
  69. resolvekit-0.0.1.dist-info/WHEEL +4 -0
  70. resolvekit-0.0.1.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,196 @@
1
+ """Database connection manager."""
2
+
3
+ from contextlib import AbstractContextManager, suppress
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ from sqlalchemy import create_engine, text
8
+ from sqlalchemy.engine import Engine
9
+ from sqlalchemy.pool import StaticPool
10
+
11
+ if TYPE_CHECKING:
12
+ from sqlalchemy.engine import Connection
13
+
14
+ from resolvekit.utils.logging import get_logger
15
+
16
+ logger = get_logger(__name__)
17
+
18
+
19
+ class DatabaseManager:
20
+ """
21
+ Manages SQLite database connections with overlay support.
22
+
23
+ Handles:
24
+ - Connection creation with SQLAlchemy
25
+ - Performance PRAGMA application
26
+ - Overlay database attachment
27
+ - Transaction management
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ base_path: Path,
33
+ overlays: list[Path] | None = None,
34
+ read_only: bool = True,
35
+ ):
36
+ """
37
+ Initialize database manager.
38
+
39
+ Args:
40
+ base_path: Path to base database file
41
+ overlays: Optional list of overlay databases (max 5, ordered by precedence)
42
+ read_only: Whether database is read-only
43
+
44
+ Raises:
45
+ ValueError: If database paths are not unique or too many overlays
46
+ """
47
+ # Validate overlay count
48
+ if overlays and len(overlays) > 5:
49
+ raise ValueError(
50
+ f"Maximum 5 overlays supported, got {len(overlays)}. "
51
+ "Too many overlays can impact performance."
52
+ )
53
+
54
+ # Validate that all paths are unique
55
+ paths = [base_path]
56
+ if overlays:
57
+ paths.extend(overlays)
58
+
59
+ # Resolve paths to catch symbolic links and relative paths
60
+ resolved_paths = [p.resolve() for p in paths]
61
+
62
+ if len(resolved_paths) != len(set(resolved_paths)):
63
+ raise ValueError(
64
+ f"Database paths must be unique. Got duplicates in: {paths}"
65
+ )
66
+
67
+ self.base_path = base_path
68
+ self.overlay_paths = overlays or []
69
+ self.read_only = read_only
70
+ self.overlays: list[tuple[str, int]] = []
71
+ self.engine: Engine | None = None
72
+
73
+ def connect(self) -> None:
74
+ """Initialize connection with pragmas and overlay attachment."""
75
+ # Create SQLAlchemy engine
76
+ uri = f"sqlite:///{self.base_path}"
77
+ self.engine = create_engine(
78
+ uri,
79
+ connect_args={"check_same_thread": False},
80
+ poolclass=StaticPool,
81
+ echo=False,
82
+ )
83
+
84
+ # Apply pragmas
85
+ self._apply_pragmas()
86
+
87
+ # Attach overlays
88
+ self._attach_overlays()
89
+
90
+ def _apply_pragmas(self) -> None:
91
+ """Apply performance pragmas from CLAUDE.md."""
92
+ if not self.engine:
93
+ raise RuntimeError("Engine not initialized")
94
+
95
+ pragmas = [
96
+ "PRAGMA foreign_keys=ON",
97
+ "PRAGMA journal_mode=OFF",
98
+ "PRAGMA synchronous=OFF",
99
+ "PRAGMA temp_store=MEMORY",
100
+ "PRAGMA mmap_size=268435456",
101
+ "PRAGMA cache_size=-100000",
102
+ ]
103
+
104
+ # Add read-only enforcement if requested
105
+ if self.read_only:
106
+ pragmas.append("PRAGMA query_only=ON")
107
+
108
+ with self.engine.connect() as conn:
109
+ for pragma in pragmas:
110
+ conn.execute(text(pragma))
111
+ conn.commit()
112
+
113
+ def _attach_overlays(self) -> None:
114
+ """
115
+ Attach overlay databases with precedence tracking.
116
+
117
+ Precedence is assigned based on list order:
118
+ - First overlay: precedence 100 (highest)
119
+ - Second overlay: precedence 99
120
+ - Third overlay: precedence 98
121
+ - etc.
122
+ """
123
+ if not self.engine:
124
+ raise RuntimeError("Engine not initialized")
125
+
126
+ # Clear overlays list to prevent duplicates on reconnect
127
+ self.overlays.clear()
128
+
129
+ with self.engine.connect() as conn:
130
+ # Attach overlays in order, assigning descending precedence
131
+ for idx, overlay_path in enumerate(self.overlay_paths):
132
+ schema_name = f"overlay_{idx}"
133
+ precedence = 100 - idx # First=100, second=99, etc.
134
+
135
+ conn.execute(text(f"ATTACH DATABASE '{overlay_path}' AS {schema_name}"))
136
+ self.overlays.append((schema_name, precedence))
137
+
138
+ conn.commit()
139
+
140
+ def execute(self, query: str, params: dict[str, Any] | None = None) -> Any:
141
+ """
142
+ Execute a query.
143
+
144
+ Args:
145
+ query: SQL query string
146
+ params: Optional parameters dict
147
+
148
+ Returns:
149
+ List of Row objects for SELECT queries, empty list for non-SELECT
150
+ """
151
+ if not self.engine:
152
+ raise RuntimeError("Database not connected. Call connect() first.")
153
+
154
+ with self.engine.connect() as conn:
155
+ result = conn.execute(text(query), params or {})
156
+ conn.commit()
157
+ # Consume results before connection closes (if query returns rows)
158
+ if result.returns_rows:
159
+ return result.fetchall()
160
+ return []
161
+
162
+ def transaction(self) -> AbstractContextManager["Connection"]:
163
+ """
164
+ Get transaction context manager.
165
+
166
+ Returns:
167
+ Transaction context manager
168
+ """
169
+ if not self.engine:
170
+ raise RuntimeError("Database not connected. Call connect() first.")
171
+
172
+ return self.engine.begin()
173
+
174
+ def close(self) -> None:
175
+ """Close database connection and detach overlays."""
176
+ if self.engine:
177
+ # Detach overlays before disposing engine
178
+ if self.overlays:
179
+ try:
180
+ with self.engine.connect() as conn:
181
+ for overlay_name, _ in self.overlays:
182
+ conn.execute(text(f"DETACH DATABASE {overlay_name}"))
183
+ conn.commit()
184
+ except Exception as e:
185
+ # Log but don't fail - connection may already be closed
186
+ logger.debug(f"Error detaching overlays during close: {e}")
187
+ finally:
188
+ self.overlays.clear()
189
+
190
+ self.engine.dispose()
191
+ self.engine = None
192
+
193
+ def __del__(self) -> None:
194
+ """Cleanup: Close database connection when object is garbage collected."""
195
+ with suppress(Exception):
196
+ self.close()
@@ -0,0 +1,466 @@
1
+ """Entity repository."""
2
+
3
+ from datetime import date
4
+ from typing import Any, ClassVar, overload
5
+
6
+ from resolvekit.data.context_filters import ContextFilterBuilder
7
+ from resolvekit.data.db_manager import DatabaseManager
8
+ from resolvekit.data.query_builder import QueryBuilder
9
+ from resolvekit.types import Entity, EntityRow, MatchContext
10
+
11
+
12
+ class EntityRepository:
13
+ """Repository for entity operations."""
14
+
15
+ # Derive column list from EntityRow model (single source of truth)
16
+ ENTITY_COLUMNS: ClassVar[list[str]] = list(EntityRow.model_fields.keys())
17
+
18
+ def __init__(self, db_manager: DatabaseManager):
19
+ """
20
+ Initialize repository.
21
+
22
+ Args:
23
+ db_manager: Database manager instance
24
+ """
25
+ self.db = db_manager
26
+ self.query_builder = QueryBuilder(db_manager)
27
+
28
+ @overload
29
+ def find_by_dcid(
30
+ self,
31
+ dcid: str,
32
+ as_of: date | None = None,
33
+ include_codes: bool = False,
34
+ ) -> Entity | None: ...
35
+
36
+ @overload
37
+ def find_by_dcid(
38
+ self,
39
+ dcid: list[str],
40
+ as_of: date | None = None,
41
+ include_codes: bool = False,
42
+ ) -> dict[str, Entity]: ...
43
+
44
+ def find_by_dcid(
45
+ self,
46
+ dcid: str | list[str],
47
+ as_of: date | None = None,
48
+ include_codes: bool = False,
49
+ ) -> Entity | None | dict[str, Entity]:
50
+ """
51
+ Find entity by DCID.
52
+
53
+ Args:
54
+ dcid: Single DCID or list of DCIDs
55
+ as_of: Optional date for temporal filtering
56
+ include_codes: Whether to include code mappings
57
+
58
+ Returns:
59
+ Single entity, dict of entities, or None
60
+ """
61
+ if isinstance(dcid, str):
62
+ return self._find_single_by_dcid(dcid, as_of, include_codes)
63
+ else:
64
+ return self._find_batch_by_dcid(dcid, as_of, include_codes)
65
+
66
+ def _find_single_by_dcid(
67
+ self,
68
+ dcid: str,
69
+ as_of: date | None = None,
70
+ include_codes: bool = False,
71
+ ) -> Entity | None:
72
+ """Find single entity by DCID."""
73
+ sql, params = self.query_builder.build_union_query(
74
+ table="entities",
75
+ columns=self.ENTITY_COLUMNS,
76
+ where_clause="dcid = :dcid",
77
+ params={"dcid": dcid},
78
+ unique_key="dcid",
79
+ as_of=as_of,
80
+ )
81
+
82
+ rows = self.db.execute(sql, params)
83
+
84
+ if not rows:
85
+ return None
86
+
87
+ row = rows[0]
88
+ entity = self._row_to_entity(row)
89
+
90
+ # Load codes if requested
91
+ if include_codes:
92
+ self._load_codes_for_entities([entity])
93
+
94
+ return entity
95
+
96
+ def _find_batch_by_dcid(
97
+ self,
98
+ dcids: list[str],
99
+ as_of: date | None = None,
100
+ include_codes: bool = False,
101
+ ) -> dict[str, Entity]:
102
+ """Find multiple entities by DCID."""
103
+ if not dcids:
104
+ return {}
105
+
106
+ # Build IN clause
107
+ placeholders = ", ".join(f":dcid_{i}" for i in range(len(dcids)))
108
+ params = {f"dcid_{i}": dcid for i, dcid in enumerate(dcids)}
109
+
110
+ sql, params = self.query_builder.build_union_query(
111
+ table="entities",
112
+ columns=self.ENTITY_COLUMNS,
113
+ where_clause=f"dcid IN ({placeholders})",
114
+ params=params,
115
+ unique_key="dcid",
116
+ as_of=as_of,
117
+ )
118
+
119
+ result = self.db.execute(sql, params)
120
+
121
+ entities = {}
122
+ for row in result:
123
+ entity = self._row_to_entity(row)
124
+ entities[entity.dcid] = entity
125
+
126
+ # Load codes if requested
127
+ if include_codes and entities:
128
+ self._load_codes_for_entities(list(entities.values()))
129
+
130
+ return entities
131
+
132
+ @overload
133
+ def find_by_code(
134
+ self, code_system: str, code_value: str, context: MatchContext | None = None
135
+ ) -> Entity | None: ...
136
+
137
+ @overload
138
+ def find_by_code(
139
+ self,
140
+ code_system: str,
141
+ code_value: list[str],
142
+ context: MatchContext | None = None,
143
+ ) -> dict[str, Entity]: ...
144
+
145
+ def find_by_code(
146
+ self,
147
+ code_system: str,
148
+ code_value: str | list[str],
149
+ context: MatchContext | None = None,
150
+ ) -> Entity | None | dict[str, Entity]:
151
+ """
152
+ Find entity by code.
153
+
154
+ Args:
155
+ code_system: Code system (e.g., "iso2", "iso3")
156
+ code_value: Single code or list of codes
157
+ context: Optional filtering context
158
+
159
+ Returns:
160
+ Single entity, dict of entities, or None
161
+ """
162
+ if isinstance(code_value, str):
163
+ return self._find_single_by_code(code_system, code_value, context)
164
+ else:
165
+ return self._find_batch_by_code(code_system, code_value, context)
166
+
167
+ def _find_single_by_code(
168
+ self, code_system: str, code_value: str, context: MatchContext | None = None
169
+ ) -> Entity | None:
170
+ """Find single entity by code."""
171
+ sql, params = self.query_builder.build_code_lookup_union(
172
+ code_system=code_system,
173
+ code_values=code_value,
174
+ entity_columns=self.ENTITY_COLUMNS,
175
+ include_code_value=False,
176
+ )
177
+
178
+ # Apply context filters if provided
179
+ if context:
180
+ # Build context filters (no table prefix - subquery columns don't have aliases)
181
+ temporal_filter, type_filter, parent_filter = (
182
+ ContextFilterBuilder.build_filters(context, params, table_prefix="")
183
+ )
184
+ # Wrap the union query and apply filters
185
+ sql = f"""
186
+ SELECT * FROM ({sql}) AS filtered
187
+ WHERE 1=1
188
+ {temporal_filter}
189
+ {type_filter}
190
+ {parent_filter}
191
+ """
192
+
193
+ # Order by precedence DESC to respect overlay precedence
194
+ sql += "\nORDER BY precedence DESC LIMIT 1"
195
+
196
+ rows = self.db.execute(sql, params)
197
+
198
+ if not rows:
199
+ return None
200
+
201
+ row = rows[0]
202
+ return self._row_to_entity(row)
203
+
204
+ def _find_batch_by_code(
205
+ self,
206
+ code_system: str,
207
+ code_values: list[str],
208
+ context: MatchContext | None = None,
209
+ ) -> dict[str, Entity]:
210
+ """Find multiple entities by code."""
211
+ if not code_values:
212
+ return {}
213
+
214
+ sql, params = self.query_builder.build_code_lookup_union(
215
+ code_system=code_system,
216
+ code_values=code_values,
217
+ entity_columns=self.ENTITY_COLUMNS,
218
+ include_code_value=True,
219
+ )
220
+
221
+ # Apply context filters if provided
222
+ if context:
223
+ # Build context filters (no table prefix - subquery columns don't have aliases)
224
+ temporal_filter, type_filter, parent_filter = (
225
+ ContextFilterBuilder.build_filters(context, params, table_prefix="")
226
+ )
227
+ # Wrap the union query and apply filters
228
+ sql = f"""
229
+ SELECT * FROM ({sql}) AS filtered
230
+ WHERE 1=1
231
+ {temporal_filter}
232
+ {type_filter}
233
+ {parent_filter}
234
+ """
235
+
236
+ # Order by code_value and precedence to group overlays together
237
+ sql += "\nORDER BY code_value, precedence DESC"
238
+
239
+ result = self.db.execute(sql, params)
240
+
241
+ # Deduplicate by code_value, keeping highest precedence
242
+ entities = {}
243
+ for row in result:
244
+ code_val = row.code_value
245
+ # Only add if we haven't seen this code_value or this has higher precedence
246
+ if code_val not in entities:
247
+ entity = self._row_to_entity(row)
248
+ entities[code_val] = entity
249
+
250
+ return entities
251
+
252
+ def get_parent(self, dcid: str, as_of: date | None = None) -> Entity | None:
253
+ """
254
+ Get parent entity.
255
+
256
+ Args:
257
+ dcid: Entity DCID
258
+ as_of: Optional date for temporal filtering
259
+
260
+ Returns:
261
+ Parent entity or None
262
+ """
263
+ # Optimized single-query version using JOIN
264
+ entity_cols = ", ".join(f"parent.{col}" for col in self.ENTITY_COLUMNS)
265
+
266
+ # Build temporal filter if needed
267
+ temporal_filter = ""
268
+ params: dict[str, Any] = {"dcid": dcid}
269
+ if as_of is not None:
270
+ # Apply temporal filter to BOTH child and parent to ensure correct historical row selection
271
+ temporal_filter = """
272
+ AND (child.valid_from IS NULL OR child.valid_from <= :as_of)
273
+ AND (child.valid_until IS NULL OR child.valid_until >= :as_of)
274
+ AND (parent.valid_from IS NULL OR parent.valid_from <= :as_of)
275
+ AND (parent.valid_until IS NULL OR parent.valid_until >= :as_of)
276
+ """
277
+ params["as_of"] = as_of.isoformat()
278
+
279
+ # Query from main database
280
+ sql = f"""
281
+ SELECT {entity_cols}, 0 AS precedence, 'main' AS source_db
282
+ FROM main.entities child
283
+ INNER JOIN main.entities parent ON child.parent_dcid = parent.dcid
284
+ WHERE child.dcid = :dcid{temporal_filter}
285
+ """
286
+
287
+ # Add overlays if present (join child from any source with parent from any source)
288
+ for schema_name, precedence in self.db.overlays:
289
+ # Child in overlay, parent in overlay
290
+ sql += f"""
291
+ UNION ALL
292
+ SELECT {entity_cols}, {precedence} AS precedence, '{schema_name}' AS source_db
293
+ FROM {schema_name}.entities child
294
+ INNER JOIN {schema_name}.entities parent ON child.parent_dcid = parent.dcid
295
+ WHERE child.dcid = :dcid{temporal_filter}
296
+ """
297
+ # Child in overlay, parent in main
298
+ sql += f"""
299
+ UNION ALL
300
+ SELECT {entity_cols}, {precedence // 2} AS precedence, 'mixed' AS source_db
301
+ FROM {schema_name}.entities child
302
+ INNER JOIN main.entities parent ON child.parent_dcid = parent.dcid
303
+ WHERE child.dcid = :dcid{temporal_filter}
304
+ """
305
+ # Child in main, parent in overlay (respect overlay precedence)
306
+ sql += f"""
307
+ UNION ALL
308
+ SELECT {entity_cols}, {precedence} AS precedence, '{schema_name}' AS source_db
309
+ FROM main.entities child
310
+ INNER JOIN {schema_name}.entities parent ON child.parent_dcid = parent.dcid
311
+ WHERE child.dcid = :dcid{temporal_filter}
312
+ """
313
+
314
+ # Order by precedence to get highest precedence result
315
+ sql += "\nORDER BY precedence DESC LIMIT 1"
316
+
317
+ rows = self.db.execute(sql, params)
318
+
319
+ if not rows:
320
+ return None
321
+
322
+ row = rows[0]
323
+ return self._row_to_entity(row)
324
+
325
+ def get_children(self, dcid: str, as_of: date | None = None) -> list[Entity]:
326
+ """
327
+ Get child entities.
328
+
329
+ Args:
330
+ dcid: Parent entity DCID
331
+ as_of: Optional date for temporal filtering
332
+
333
+ Returns:
334
+ List of child entities
335
+ """
336
+ sql, params = self.query_builder.build_union_query(
337
+ table="entities",
338
+ columns=self.ENTITY_COLUMNS,
339
+ where_clause="parent_dcid = :parent_dcid",
340
+ params={"parent_dcid": dcid},
341
+ unique_key="dcid",
342
+ as_of=as_of,
343
+ )
344
+
345
+ result = self.db.execute(sql, params)
346
+
347
+ return [self._row_to_entity(row) for row in result]
348
+
349
+ def _row_to_entity(self, row: Any) -> Entity:
350
+ """Convert database row to Entity model via Pydantic validation."""
351
+ # Convert SQLAlchemy Row to dict using only entity columns
352
+ row_dict = {col: getattr(row, col) for col in self.ENTITY_COLUMNS}
353
+
354
+ # Validate through EntityRow model
355
+ entity_row = EntityRow.model_validate(row_dict)
356
+
357
+ # Convert EntityRow to Entity (adding computed fields)
358
+ return Entity(
359
+ **entity_row.model_dump(),
360
+ codes={},
361
+ provenance={},
362
+ )
363
+
364
+ def find_by_canonical_name(
365
+ self, normalized_canonical: str, context: MatchContext | None = None
366
+ ) -> Entity | None:
367
+ """
368
+ Find entity by normalized canonical name.
369
+
370
+ Uses the indexed normalized_canonical column for efficient lookups.
371
+
372
+ Args:
373
+ normalized_canonical: Normalized canonical name
374
+ context: Optional filtering context
375
+
376
+ Returns:
377
+ Entity if found, None otherwise
378
+ """
379
+ params: dict[str, Any] = {"normalized": normalized_canonical}
380
+
381
+ # Build context filters using shared utility
382
+ temporal_filter, type_filter, parent_filter = (
383
+ ContextFilterBuilder.build_filters(context, params, table_prefix="")
384
+ )
385
+
386
+ # Query from main database using indexed normalized_canonical column
387
+ entity_cols = ", ".join(self.ENTITY_COLUMNS)
388
+ sql = f"""
389
+ SELECT {entity_cols}, 0 AS precedence
390
+ FROM main.entities
391
+ WHERE normalized_canonical = :normalized
392
+ {temporal_filter}
393
+ {type_filter}
394
+ {parent_filter}
395
+ """
396
+
397
+ # Add overlays if present
398
+ for schema_name, precedence in self.db.overlays:
399
+ sql += f"""
400
+ UNION ALL
401
+ SELECT {entity_cols}, {precedence} AS precedence
402
+ FROM {schema_name}.entities
403
+ WHERE normalized_canonical = :normalized
404
+ {temporal_filter}
405
+ {type_filter}
406
+ {parent_filter}
407
+ """
408
+
409
+ # Order by precedence to get highest precedence result
410
+ sql += "\nORDER BY precedence DESC LIMIT 1"
411
+
412
+ rows = self.db.execute(sql, params)
413
+
414
+ if not rows:
415
+ return None
416
+
417
+ row = rows[0]
418
+ return self._row_to_entity(row)
419
+
420
+ def _load_codes_for_entities(self, entities: list[Entity]) -> None:
421
+ """
422
+ Load codes for entities and populate their codes dict in place.
423
+
424
+ Args:
425
+ entities: List of entities to load codes for
426
+ """
427
+ if not entities:
428
+ return
429
+
430
+ # Get all DCIDs
431
+ dcids = [e.dcid for e in entities]
432
+
433
+ # Build placeholders for IN clause
434
+ placeholders = ", ".join(f":dcid_{i}" for i in range(len(dcids)))
435
+ params = {f"dcid_{i}": dcid for i, dcid in enumerate(dcids)}
436
+
437
+ # Query codes from main database
438
+ sql = f"""
439
+ SELECT entity_dcid, code_system, code_value
440
+ FROM main.codes
441
+ WHERE entity_dcid IN ({placeholders})
442
+ """
443
+
444
+ # Add overlays if present
445
+ for schema_name, _ in self.db.overlays:
446
+ sql += f"""
447
+ UNION ALL
448
+ SELECT entity_dcid, code_system, code_value
449
+ FROM {schema_name}.codes
450
+ WHERE entity_dcid IN ({placeholders})
451
+ """
452
+
453
+ result = self.db.execute(sql, params)
454
+
455
+ # Group codes by entity_dcid
456
+ codes_by_dcid: dict[str, dict[str, str]] = {}
457
+ for row in result:
458
+ dcid = row.entity_dcid
459
+ if dcid not in codes_by_dcid:
460
+ codes_by_dcid[dcid] = {}
461
+ codes_by_dcid[dcid][row.code_system] = row.code_value
462
+
463
+ # Populate codes in entities
464
+ for entity in entities:
465
+ if entity.dcid in codes_by_dcid:
466
+ entity.codes = codes_by_dcid[entity.dcid]