deped-primitives 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. deped_primitives/__init__.py +3 -0
  2. deped_primitives/access/__init__.py +34 -0
  3. deped_primitives/access/locks.py +60 -0
  4. deped_primitives/access/scope.py +445 -0
  5. deped_primitives/codes/__init__.py +87 -0
  6. deped_primitives/education/__init__.py +70 -0
  7. deped_primitives/filters/__init__.py +88 -0
  8. deped_primitives/filters/area_scope.py +236 -0
  9. deped_primitives/filters/cascade.py +474 -0
  10. deped_primitives/filters/membership.py +179 -0
  11. deped_primitives/filters/models.py +141 -0
  12. deped_primitives/filters/selections.py +176 -0
  13. deped_primitives/hierarchies/__init__.py +159 -0
  14. deped_primitives/hierarchies/aliases.py +27 -0
  15. deped_primitives/hierarchies/definitions.py +229 -0
  16. deped_primitives/hierarchies/graph.py +273 -0
  17. deped_primitives/hierarchies/labels.py +325 -0
  18. deped_primitives/legislative/__init__.py +198 -0
  19. deped_primitives/legislative/coverage.py +598 -0
  20. deped_primitives/legislative/data/legislative_coverage_rules.yml +1186 -0
  21. deped_primitives/legislative/linkage.py +234 -0
  22. deped_primitives/legislative/policy.py +311 -0
  23. deped_primitives/legislative/special_cases.py +96 -0
  24. deped_primitives/marimo/__init__.py +65 -0
  25. deped_primitives/psgc/__init__.py +85 -0
  26. deped_primitives/psgc/constants.py +89 -0
  27. deped_primitives/psgc/core.py +321 -0
  28. deped_primitives/psgc/exceptions.py +9 -0
  29. deped_primitives/psgc/lineage.py +247 -0
  30. deped_primitives/psgc/relations.py +147 -0
  31. deped_primitives/psgc/types.py +22 -0
  32. deped_primitives/region_groups/__init__.py +298 -0
  33. deped_primitives/regions/__init__.py +145 -0
  34. deped_primitives/school_sizes/__init__.py +110 -0
  35. deped_primitives/sql/__init__.py +77 -0
  36. deped_primitives/sql/builders.py +186 -0
  37. deped_primitives/sql/choices.py +155 -0
  38. deped_primitives/sql/clauses.py +233 -0
  39. deped_primitives/sql/labels.py +55 -0
  40. deped_primitives/sql/schema.py +73 -0
  41. deped_primitives-0.0.1.dist-info/METADATA +151 -0
  42. deped_primitives-0.0.1.dist-info/RECORD +43 -0
  43. deped_primitives-0.0.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,3 @@
1
+ from __future__ import annotations
2
+
3
+ __version__ = "0.0.1"
@@ -0,0 +1,34 @@
1
+ from deped_primitives.access.locks import ScopeLocks, scope_locks_from_access
2
+ from deped_primitives.access.scope import (
3
+ NATIONAL_ACCESS_SCOPE,
4
+ AccessScope,
5
+ access_scope_from_value,
6
+ access_scope_group_ids,
7
+ access_scope_options,
8
+ access_scope_stats_scope,
9
+ access_scope_territory_selections,
10
+ default_hierarchy_for_access_scope,
11
+ island_group_area_scope_selections,
12
+ island_group_for_region,
13
+ island_group_options,
14
+ island_group_region_ids,
15
+ island_group_stats_scope,
16
+ )
17
+
18
+ __all__ = [
19
+ "NATIONAL_ACCESS_SCOPE",
20
+ "AccessScope",
21
+ "ScopeLocks",
22
+ "access_scope_from_value",
23
+ "access_scope_group_ids",
24
+ "access_scope_options",
25
+ "access_scope_stats_scope",
26
+ "access_scope_territory_selections",
27
+ "default_hierarchy_for_access_scope",
28
+ "island_group_area_scope_selections",
29
+ "island_group_for_region",
30
+ "island_group_options",
31
+ "island_group_region_ids",
32
+ "island_group_stats_scope",
33
+ "scope_locks_from_access",
34
+ ]
@@ -0,0 +1,60 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from deped_primitives.access.scope import AccessScope, island_group_for_region
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class ScopeLocks:
10
+ """UI lock state derived from an access permission."""
11
+
12
+ island_group: str = ""
13
+ territory_selections: tuple[tuple[str, str], ...] = ()
14
+ territory_labels: tuple[tuple[str, str], ...] = ()
15
+
16
+ @property
17
+ def locked_levels(self) -> tuple[str, ...]:
18
+ return tuple(level for level, _group_id in self.territory_selections)
19
+
20
+ @property
21
+ def has_island_group(self) -> bool:
22
+ return bool(self.island_group)
23
+
24
+ def is_territory_locked(self, level: str) -> bool:
25
+ return str(level or "") in set(self.locked_levels)
26
+
27
+ def group_id_for_level(self, level: str) -> str:
28
+ return dict(self.territory_selections).get(str(level or ""), "")
29
+
30
+ def label_for_level(self, level: str) -> str:
31
+ return dict(self.territory_labels).get(str(level or ""), "")
32
+
33
+
34
+ def scope_locks_from_access(
35
+ access_scope: AccessScope,
36
+ island_group: str = "",
37
+ ) -> ScopeLocks:
38
+ """Derive territory UI locks from an office access scope."""
39
+
40
+ if access_scope.is_national:
41
+ return ScopeLocks()
42
+ selections: list[tuple[str, str]] = []
43
+ labels: list[tuple[str, str]] = []
44
+ if access_scope.is_regional:
45
+ selections.append(("region", access_scope.psgc_region_id))
46
+ labels.append(("region", access_scope.psgc_region_id))
47
+ elif access_scope.is_division:
48
+ selections.append(("region", access_scope.psgc_region_id))
49
+ labels.append(("region", access_scope.psgc_region_id))
50
+ selections.append(("division", access_scope.division_group_id))
51
+ labels.append(
52
+ ("division", access_scope.division_name or access_scope.division_group_id)
53
+ )
54
+
55
+ derived_island_group = island_group_for_region(access_scope.psgc_region_id)
56
+ return ScopeLocks(
57
+ island_group=derived_island_group or str(island_group or "").strip(),
58
+ territory_selections=tuple(selections),
59
+ territory_labels=tuple(labels),
60
+ )
@@ -0,0 +1,445 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ import polars as pl
6
+
7
+ from deped_primitives.filters.membership import base_rows, descendant_group_ids
8
+ from deped_primitives.hierarchies import (
9
+ hierarchy_graph,
10
+ hierarchy_internal_value,
11
+ split_group_id,
12
+ )
13
+ from deped_primitives.region_groups import (
14
+ region_group_area_scope_selections,
15
+ region_group_options,
16
+ region_group_region_ids,
17
+ )
18
+
19
+ ISLAND_GROUPS = ("luzon", "visayas", "mindanao")
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class AccessScope:
24
+ """Office-level permission scope for territory-limited workbench users."""
25
+
26
+ level: str = "national"
27
+ psgc_region_id: str = ""
28
+ division_group_id: str = ""
29
+ division_name: str = ""
30
+
31
+ @property
32
+ def is_national(self) -> bool:
33
+ return self.level == "national"
34
+
35
+ @property
36
+ def is_regional(self) -> bool:
37
+ return self.level == "regional" and bool(self.psgc_region_id)
38
+
39
+ @property
40
+ def is_division(self) -> bool:
41
+ return (
42
+ self.level == "division"
43
+ and bool(self.psgc_region_id)
44
+ and bool(self.division_group_id)
45
+ and bool(self.division_name)
46
+ )
47
+
48
+
49
+ NATIONAL_ACCESS_SCOPE = AccessScope()
50
+ _ACCESS_SCOPE_REGION_PREFIX = "regional:"
51
+ _ACCESS_SCOPE_DIVISION_PREFIX = "division:"
52
+
53
+
54
+ def island_group_options() -> dict[str, str]:
55
+ """Return canonical island/region-group options."""
56
+
57
+ return region_group_options()
58
+
59
+
60
+ def island_group_region_ids(island_group: str) -> tuple[str, ...]:
61
+ """Return PSGC region IDs for an island group."""
62
+
63
+ return region_group_region_ids(island_group)
64
+
65
+
66
+ def island_group_area_scope_selections(
67
+ island_group: str,
68
+ ) -> tuple[tuple[str, str], ...]:
69
+ """Return region-level area-scope selections for an island group."""
70
+
71
+ return region_group_area_scope_selections(island_group)
72
+
73
+
74
+ def island_group_for_region(region_id: str) -> str:
75
+ """Return the broad island group containing a PSGC region ID."""
76
+
77
+ region = str(region_id or "").strip()
78
+ if not region:
79
+ return ""
80
+ for island_group in ISLAND_GROUPS:
81
+ if region in island_group_region_ids(island_group):
82
+ return island_group
83
+ return ""
84
+
85
+
86
+ def island_group_stats_scope(
87
+ stats: pl.DataFrame,
88
+ *,
89
+ school_year_id: int,
90
+ island_group: str,
91
+ ) -> pl.DataFrame:
92
+ """Filter area stats to rows that descend from an island group's regions."""
93
+
94
+ region_ids = island_group_region_ids(island_group)
95
+ if not region_ids or stats.is_empty():
96
+ return stats.clone()
97
+ required = {"school_year_id", "hierarchy", "level", "group_id"}
98
+ if not required.issubset(stats.columns):
99
+ return stats.clear()
100
+
101
+ rows = stats.filter(pl.col("school_year_id").cast(pl.Int64) == int(school_year_id))
102
+ if rows.is_empty():
103
+ return rows
104
+
105
+ scoped_parts: list[pl.DataFrame] = []
106
+ for row in (
107
+ rows.select(["hierarchy", "level"])
108
+ .fill_null("")
109
+ .cast(pl.String)
110
+ .unique(maintain_order=True)
111
+ .iter_rows(named=True)
112
+ ):
113
+ hierarchy = str(row["hierarchy"])
114
+ level = str(row["level"])
115
+ descendant_ids: set[str] = set()
116
+ for region_id in region_ids:
117
+ descendant_ids.update(
118
+ descendant_group_ids(
119
+ stats,
120
+ school_year_id=school_year_id,
121
+ hierarchy=hierarchy,
122
+ ancestor_level="region",
123
+ ancestor_group_id=region_id,
124
+ target_level=level,
125
+ )
126
+ )
127
+ if descendant_ids:
128
+ scoped_parts.append(
129
+ rows.filter(
130
+ (pl.col("hierarchy").cast(pl.String) == hierarchy)
131
+ & (pl.col("level").cast(pl.String) == level)
132
+ & (pl.col("group_id").cast(pl.String).is_in(descendant_ids))
133
+ )
134
+ )
135
+ if not scoped_parts:
136
+ return rows.clear()
137
+ return pl.concat(scoped_parts, how="vertical").unique(maintain_order=True)
138
+
139
+
140
+ def access_scope_options(stats: pl.DataFrame) -> dict[str, str]:
141
+ """Return option labels and encoded values for national, regional, and division scopes."""
142
+
143
+ options = {"All access": "national"}
144
+ if stats.is_empty():
145
+ return options
146
+ for row in _scope_rows(stats, "governance", "region").iter_rows(named=True):
147
+ group_id = str(row["group_id"])
148
+ label = _option_label(group_id, str(row["group_label"]))
149
+ options[f"Region: {label}"] = f"{_ACCESS_SCOPE_REGION_PREFIX}{group_id}"
150
+ for row in _scope_rows(stats, "governance", "division").iter_rows(named=True):
151
+ group_id = str(row["group_id"])
152
+ label = _option_label(group_id, str(row["group_label"]))
153
+ options[f"Division: {label}"] = f"{_ACCESS_SCOPE_DIVISION_PREFIX}{group_id}"
154
+ return options
155
+
156
+
157
+ def access_scope_from_value(stats: pl.DataFrame, value: object) -> AccessScope:
158
+ """Parse and validate an encoded access-scope value against governance stats."""
159
+
160
+ if isinstance(value, AccessScope):
161
+ return _validate_access_scope(stats, value)
162
+ raw = str(value or "")
163
+ if raw in {"", "national"}:
164
+ return NATIONAL_ACCESS_SCOPE
165
+ if raw.startswith(_ACCESS_SCOPE_REGION_PREFIX):
166
+ region_id = raw.removeprefix(_ACCESS_SCOPE_REGION_PREFIX)
167
+ region_ids = set(
168
+ _scope_rows(stats, "governance", "region")
169
+ .get_column("group_id")
170
+ .cast(pl.String)
171
+ .to_list()
172
+ )
173
+ if region_id in region_ids:
174
+ return AccessScope(level="regional", psgc_region_id=region_id)
175
+ if raw.startswith(_ACCESS_SCOPE_DIVISION_PREFIX):
176
+ return _division_access_scope(
177
+ stats, raw.removeprefix(_ACCESS_SCOPE_DIVISION_PREFIX)
178
+ )
179
+ return NATIONAL_ACCESS_SCOPE
180
+
181
+
182
+ def default_hierarchy_for_access_scope(
183
+ access_scope: AccessScope,
184
+ available_hierarchies: list[str] | tuple[str, ...],
185
+ fallback: str,
186
+ ) -> str:
187
+ """Prefer governance for office-scoped users when that hierarchy is available."""
188
+
189
+ if not access_scope.is_national and "governance" in available_hierarchies:
190
+ return "governance"
191
+ return fallback
192
+
193
+
194
+ def access_scope_territory_selections(
195
+ access_scope: AccessScope,
196
+ hierarchy: str,
197
+ ) -> tuple[tuple[str, str], ...]:
198
+ """Return territory selections implied by an access scope for a hierarchy."""
199
+
200
+ if access_scope.is_national:
201
+ return ()
202
+ normalized_hierarchy = _normalize_hierarchy(hierarchy)
203
+ try:
204
+ levels = set(hierarchy_graph(normalized_hierarchy).cascade_path)
205
+ except ValueError:
206
+ levels = set()
207
+ if access_scope.is_regional:
208
+ return (("region", access_scope.psgc_region_id),) if "region" in levels else ()
209
+ if not access_scope.is_division:
210
+ return ()
211
+ if normalized_hierarchy == "governance":
212
+ selections = []
213
+ if "region" in levels:
214
+ selections.append(("region", access_scope.psgc_region_id))
215
+ if "division" in levels:
216
+ selections.append(("division", access_scope.division_group_id))
217
+ return tuple(selections)
218
+ return (("region", access_scope.psgc_region_id),) if "region" in levels else ()
219
+
220
+
221
+ def access_scope_group_ids(
222
+ stats: pl.DataFrame,
223
+ *,
224
+ school_year_id: int,
225
+ hierarchy: str,
226
+ target_level: str,
227
+ access_scope: AccessScope = NATIONAL_ACCESS_SCOPE,
228
+ division_coverage: pl.DataFrame | None = None,
229
+ ) -> set[str]:
230
+ """Return target-level group IDs visible under an access scope."""
231
+
232
+ normalized_hierarchy = _normalize_hierarchy(hierarchy)
233
+ if access_scope.is_national:
234
+ rows = base_rows(
235
+ stats,
236
+ school_year_id=school_year_id,
237
+ hierarchy=normalized_hierarchy,
238
+ level=target_level,
239
+ )
240
+ if rows.is_empty() or "group_id" not in rows.columns:
241
+ return set()
242
+ return set(rows.get_column("group_id").cast(pl.String).to_list()) - {""}
243
+ if access_scope.is_regional:
244
+ return descendant_group_ids(
245
+ stats,
246
+ school_year_id=school_year_id,
247
+ hierarchy=normalized_hierarchy,
248
+ ancestor_level="region",
249
+ ancestor_group_id=access_scope.psgc_region_id,
250
+ target_level=target_level,
251
+ )
252
+ if not access_scope.is_division:
253
+ return set()
254
+ if normalized_hierarchy == "governance":
255
+ return _governance_division_scope_group_ids(
256
+ stats, school_year_id, target_level, access_scope
257
+ )
258
+ if target_level == "region":
259
+ return {access_scope.psgc_region_id}
260
+ return _division_coverage_target_group_ids(
261
+ division_coverage,
262
+ school_year_id=school_year_id,
263
+ access_scope=access_scope,
264
+ hierarchy=normalized_hierarchy,
265
+ target_level=target_level,
266
+ )
267
+
268
+
269
+ def access_scope_stats_scope(
270
+ stats: pl.DataFrame,
271
+ *,
272
+ school_year_id: int,
273
+ access_scope: AccessScope = NATIONAL_ACCESS_SCOPE,
274
+ hierarchy: str = "",
275
+ division_coverage: pl.DataFrame | None = None,
276
+ ) -> pl.DataFrame:
277
+ """Filter area stats to rows visible under an access scope."""
278
+
279
+ if access_scope.is_national or stats.is_empty():
280
+ return stats.clone()
281
+ rows = stats.filter(pl.col("school_year_id").cast(pl.Int64) == int(school_year_id))
282
+ if rows.is_empty():
283
+ return rows
284
+
285
+ scoped_hierarchies = (
286
+ (_normalize_hierarchy(hierarchy),)
287
+ if hierarchy
288
+ else tuple(
289
+ rows.get_column("hierarchy")
290
+ .cast(pl.String)
291
+ .drop_nulls()
292
+ .unique(maintain_order=True)
293
+ .to_list()
294
+ )
295
+ )
296
+ scoped_parts: list[pl.DataFrame] = []
297
+ for scoped_hierarchy in scoped_hierarchies:
298
+ hierarchy_rows = rows.filter(
299
+ pl.col("hierarchy").cast(pl.String) == scoped_hierarchy
300
+ )
301
+ for target_level in (
302
+ hierarchy_rows.get_column("level")
303
+ .cast(pl.String)
304
+ .drop_nulls()
305
+ .unique(maintain_order=True)
306
+ .to_list()
307
+ ):
308
+ group_ids = access_scope_group_ids(
309
+ stats,
310
+ school_year_id=school_year_id,
311
+ hierarchy=scoped_hierarchy,
312
+ target_level=target_level,
313
+ access_scope=access_scope,
314
+ division_coverage=division_coverage,
315
+ )
316
+ if group_ids:
317
+ scoped_parts.append(
318
+ hierarchy_rows.filter(
319
+ (pl.col("level").cast(pl.String) == target_level)
320
+ & (pl.col("group_id").cast(pl.String).is_in(group_ids))
321
+ )
322
+ )
323
+ if not scoped_parts:
324
+ return rows.clear()
325
+ return pl.concat(scoped_parts, how="vertical").unique(maintain_order=True)
326
+
327
+
328
+ def _validate_access_scope(stats: pl.DataFrame, scope: AccessScope) -> AccessScope:
329
+ if scope.is_regional:
330
+ return access_scope_from_value(
331
+ stats, f"{_ACCESS_SCOPE_REGION_PREFIX}{scope.psgc_region_id}"
332
+ )
333
+ if scope.is_division:
334
+ return access_scope_from_value(
335
+ stats, f"{_ACCESS_SCOPE_DIVISION_PREFIX}{scope.division_group_id}"
336
+ )
337
+ return NATIONAL_ACCESS_SCOPE
338
+
339
+
340
+ def _division_access_scope(stats: pl.DataFrame, division_group_id: str) -> AccessScope:
341
+ matched = _scope_rows(stats, "governance", "division").filter(
342
+ pl.col("group_id").cast(pl.String) == str(division_group_id)
343
+ )
344
+ if matched.is_empty():
345
+ return NATIONAL_ACCESS_SCOPE
346
+ try:
347
+ parsed = split_group_id("governance", "division", str(division_group_id))
348
+ except ValueError:
349
+ return NATIONAL_ACCESS_SCOPE
350
+ return AccessScope(
351
+ level="division",
352
+ psgc_region_id=parsed["psgc_region_id"],
353
+ division_group_id=str(division_group_id),
354
+ division_name=parsed["division"],
355
+ )
356
+
357
+
358
+ def _governance_division_scope_group_ids(
359
+ stats: pl.DataFrame,
360
+ school_year_id: int,
361
+ target_level: str,
362
+ access_scope: AccessScope,
363
+ ) -> set[str]:
364
+ if target_level == "region":
365
+ return {access_scope.psgc_region_id}
366
+ if target_level == "division":
367
+ return {access_scope.division_group_id}
368
+ if target_level == "school_district":
369
+ return descendant_group_ids(
370
+ stats,
371
+ school_year_id=school_year_id,
372
+ hierarchy="governance",
373
+ ancestor_level="division",
374
+ ancestor_group_id=access_scope.division_group_id,
375
+ target_level=target_level,
376
+ )
377
+ return set()
378
+
379
+
380
+ def _division_coverage_target_group_ids(
381
+ division_coverage: pl.DataFrame | None,
382
+ *,
383
+ school_year_id: int,
384
+ access_scope: AccessScope,
385
+ hierarchy: str,
386
+ target_level: str,
387
+ ) -> set[str]:
388
+ if division_coverage is None or division_coverage.is_empty():
389
+ return set()
390
+ required = {
391
+ "school_year_id",
392
+ "division_group_id",
393
+ "target_hierarchy",
394
+ "target_level",
395
+ "target_group_id",
396
+ }
397
+ if not required.issubset(division_coverage.columns):
398
+ return set()
399
+ rows = division_coverage.filter(
400
+ (pl.col("school_year_id").cast(pl.Int64) == int(school_year_id))
401
+ & (
402
+ pl.col("division_group_id").cast(pl.String)
403
+ == access_scope.division_group_id
404
+ )
405
+ & (pl.col("target_hierarchy").cast(pl.String) == hierarchy)
406
+ & (pl.col("target_level").cast(pl.String) == target_level)
407
+ )
408
+ if rows.is_empty():
409
+ return set()
410
+ return set(rows.get_column("target_group_id").cast(pl.String).to_list()) - {""}
411
+
412
+
413
+ def _scope_rows(stats: pl.DataFrame, hierarchy: str, level: str) -> pl.DataFrame:
414
+ if stats.is_empty() or not {"hierarchy", "level", "group_id"}.issubset(
415
+ stats.columns
416
+ ):
417
+ return pl.DataFrame({"group_id": [], "group_label": []})
418
+ label_expr = (
419
+ pl.col("group_label").cast(pl.String)
420
+ if "group_label" in stats.columns
421
+ else pl.lit("", dtype=pl.String)
422
+ )
423
+ return (
424
+ stats.filter(
425
+ (pl.col("hierarchy").cast(pl.String) == hierarchy)
426
+ & (pl.col("level").cast(pl.String) == level)
427
+ )
428
+ .with_columns(
429
+ pl.col("group_id").cast(pl.String).alias("group_id"),
430
+ label_expr.alias("group_label"),
431
+ )
432
+ .select(["group_id", "group_label"])
433
+ .unique(subset=["group_id"], keep="first", maintain_order=True)
434
+ .sort(["group_label", "group_id"], maintain_order=True)
435
+ )
436
+
437
+
438
+ def _option_label(group_id: str, group_label: str) -> str:
439
+ label = str(group_label or "").strip()
440
+ return group_id if not label or label == group_id else f"{label} ({group_id})"
441
+
442
+
443
+ def _normalize_hierarchy(hierarchy: str) -> str:
444
+ value = str(hierarchy or "").strip()
445
+ return hierarchy_internal_value(value) or value
@@ -0,0 +1,87 @@
1
+ """Code and name normalization helpers for PSGC and PSA/NAMRIA ADM PCODEs."""
2
+
3
+ import re
4
+ import unicodedata
5
+
6
+ _PH_PCODE_RE = re.compile(r"^PH[0-9]*$")
7
+
8
+
9
+ def clean_text(value: object) -> str:
10
+ """Return a stripped string, preserving empty values as an empty string."""
11
+ if value is None:
12
+ return ""
13
+ return str(value).strip()
14
+
15
+
16
+ def normalize_psgc_id(value: object) -> str:
17
+ """Normalize a PSGC ID to the canonical 10-character string shape."""
18
+ text = clean_text(value)
19
+ if not text:
20
+ return ""
21
+ if not text.isdigit() or len(text) > 10:
22
+ return text
23
+ return text.zfill(10)
24
+
25
+
26
+ def is_valid_boundary_pcode(pcode: object) -> bool:
27
+ """Return true when a boundary PCODE has the expected PH-prefixed shape."""
28
+ text = clean_text(pcode)
29
+ return bool(text) and bool(_PH_PCODE_RE.fullmatch(text))
30
+
31
+
32
+ def normalize_boundary_pcode_to_psgc_id(pcode: object) -> str:
33
+ """Convert a PH-prefixed ADM PCODE into the direct 10-digit PSGC candidate."""
34
+ text = clean_text(pcode)
35
+ if not is_valid_boundary_pcode(text):
36
+ return ""
37
+ return text.removeprefix("PH").ljust(10, "0")
38
+
39
+
40
+ def normalize_legacy_code(value: object) -> str:
41
+ """Normalize the PSGC legacy/common code column to a 9-digit candidate."""
42
+ text = clean_text(value)
43
+ if not text:
44
+ return ""
45
+ if text.endswith(".0"):
46
+ text = text[:-2]
47
+ if not text.isdigit():
48
+ return text
49
+ return text.zfill(9)
50
+
51
+
52
+ def normalize_boundary_pcode_to_legacy_code(pcode: object) -> str:
53
+ """Convert a 2023 ADM PCODE into the 9-digit PSGC legacy code candidate.
54
+
55
+ The 2023 ADM PCODEs retain older compact province/HUC code shapes. Current
56
+ PSGC IDs can move after reorganizations, but the `cc` column keeps the
57
+ older code needed for audited fallback matches.
58
+ """
59
+ text = clean_text(pcode)
60
+ if not is_valid_boundary_pcode(text):
61
+ return ""
62
+ digits = text.removeprefix("PH")
63
+ if len(digits) >= 5 and digits[2] == "0":
64
+ digits = digits[:2] + digits[3:]
65
+ return digits.ljust(9, "0")
66
+
67
+
68
+ def comparable_name(value: object) -> str:
69
+ """Normalize names only enough to catch casing and whitespace noise."""
70
+ return " ".join(clean_text(value).split()).casefold()
71
+
72
+
73
+ def strip_parenthetical(value: object) -> str:
74
+ """Remove parenthetical chunks used for alias markers in source names."""
75
+ return re.sub(r"\s*\([^)]*\)", "", clean_text(value)).strip()
76
+
77
+
78
+ def name_key(value: object) -> str:
79
+ """Normalize a name for exact alias-key comparisons."""
80
+ normalized = unicodedata.normalize("NFKD", clean_text(value))
81
+ ascii_text = normalized.encode("ascii", "ignore").decode("ascii")
82
+ return re.sub(r"[^a-z0-9]+", " ", ascii_text.casefold()).strip()
83
+
84
+
85
+ def alias_key(value: object) -> str:
86
+ """Return the canonical key for exact alias matching."""
87
+ return name_key(strip_parenthetical(value))
@@ -0,0 +1,70 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import asdict, dataclass
4
+ from typing import Iterable
5
+
6
+ import polars as pl
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class GradeSpec:
11
+ """School grade label and key-stage mapping."""
12
+
13
+ label: str
14
+ key_stage: int
15
+
16
+
17
+ SCHOOL_GRADES: tuple[GradeSpec, ...] = (
18
+ GradeSpec("kinder", 1),
19
+ GradeSpec("esng", 1),
20
+ GradeSpec("g1", 1),
21
+ GradeSpec("g2", 1),
22
+ GradeSpec("g3", 1),
23
+ GradeSpec("g4", 2),
24
+ GradeSpec("g5", 2),
25
+ GradeSpec("g6", 2),
26
+ GradeSpec("g7", 3),
27
+ GradeSpec("g8", 3),
28
+ GradeSpec("g9", 3),
29
+ GradeSpec("g10", 3),
30
+ GradeSpec("jhsng", 3),
31
+ GradeSpec("g11", 4),
32
+ GradeSpec("g12", 4),
33
+ )
34
+ SCHOOL_GRADES_BY_LABEL = {grade.label: grade for grade in SCHOOL_GRADES}
35
+
36
+
37
+ def grade_spec(label: str) -> GradeSpec | None:
38
+ """Return the grade spec for a case-insensitive grade label."""
39
+
40
+ return SCHOOL_GRADES_BY_LABEL.get(str(label or "").lower())
41
+
42
+
43
+ def grade_key_stage(label: str) -> int | None:
44
+ """Return the key-stage number for a grade label."""
45
+
46
+ grade = grade_spec(label)
47
+ return grade.key_stage if grade is not None else None
48
+
49
+
50
+ def school_grades_frame(
51
+ grades: Iterable[GradeSpec] = SCHOOL_GRADES,
52
+ *,
53
+ include_id: bool = True,
54
+ ) -> pl.DataFrame:
55
+ """Return school grade mappings as a Polars dataframe."""
56
+
57
+ frame = pl.DataFrame([asdict(grade) for grade in grades])
58
+ if include_id:
59
+ return frame.with_row_index(name="id", offset=1)
60
+ return frame
61
+
62
+
63
+ __all__ = [
64
+ "SCHOOL_GRADES",
65
+ "SCHOOL_GRADES_BY_LABEL",
66
+ "GradeSpec",
67
+ "grade_key_stage",
68
+ "grade_spec",
69
+ "school_grades_frame",
70
+ ]