etlplus 0.12.12__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. etlplus/README.md +2 -2
  2. etlplus/__init__.py +1 -26
  3. etlplus/api/README.md +2 -2
  4. etlplus/api/__init__.py +10 -0
  5. etlplus/api/config.py +36 -20
  6. etlplus/api/endpoint_client.py +3 -3
  7. etlplus/api/enums.py +51 -0
  8. etlplus/api/pagination/client.py +1 -1
  9. etlplus/api/rate_limiting/config.py +13 -1
  10. etlplus/api/rate_limiting/rate_limiter.py +8 -11
  11. etlplus/api/request_manager.py +11 -6
  12. etlplus/api/transport.py +14 -2
  13. etlplus/api/types.py +7 -6
  14. etlplus/{run_helpers.py → api/utils.py} +209 -153
  15. etlplus/cli/README.md +2 -2
  16. etlplus/cli/handlers.py +19 -9
  17. etlplus/config/README.md +31 -33
  18. etlplus/config/__init__.py +9 -32
  19. etlplus/config/types.py +0 -64
  20. etlplus/dag.py +103 -0
  21. etlplus/database/README.md +2 -2
  22. etlplus/enums.py +0 -32
  23. etlplus/file/README.md +2 -2
  24. etlplus/file/enums.py +1 -1
  25. etlplus/{validation → ops}/README.md +2 -2
  26. etlplus/ops/__init__.py +61 -0
  27. etlplus/{extract.py → ops/extract.py} +78 -94
  28. etlplus/{load.py → ops/load.py} +73 -93
  29. etlplus/{run.py → ops/run.py} +153 -118
  30. etlplus/{transform.py → ops/transform.py} +75 -68
  31. etlplus/{validation → ops}/utils.py +80 -15
  32. etlplus/{validate.py → ops/validate.py} +19 -9
  33. etlplus/templates/README.md +2 -2
  34. etlplus/types.py +2 -2
  35. etlplus/workflow/README.md +52 -0
  36. etlplus/workflow/__init__.py +43 -0
  37. etlplus/{config → workflow}/connector.py +17 -16
  38. etlplus/workflow/dag.py +105 -0
  39. etlplus/{config → workflow}/jobs.py +31 -15
  40. etlplus/{config → workflow}/pipeline.py +11 -3
  41. etlplus/{config → workflow}/profile.py +8 -5
  42. etlplus/workflow/types.py +115 -0
  43. {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/METADATA +91 -60
  44. {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/RECORD +49 -43
  45. {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/WHEEL +1 -1
  46. etlplus/validation/__init__.py +0 -44
  47. /etlplus/{config → workflow}/utils.py +0 -0
  48. {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/entry_points.txt +0 -0
  49. {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/licenses/LICENSE +0 -0
  50. {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.run` module.
2
+ :mod:`etlplus.ops.run` module.
3
3
 
4
4
  A module for running ETL jobs defined in YAML configurations.
5
5
  """
@@ -9,126 +9,78 @@ from __future__ import annotations
9
9
  from collections.abc import Mapping
10
10
  from typing import Any
11
11
  from typing import Final
12
- from typing import TypedDict
13
12
  from typing import cast
14
13
  from urllib.parse import urlsplit
15
14
  from urllib.parse import urlunsplit
16
15
 
17
- import requests # type: ignore[import]
18
-
19
- from .api import EndpointClient # noqa: F401 (re-exported for tests)
20
- from .api import PaginationConfigMap
21
- from .api import RequestOptions
22
- from .api import RetryPolicy
23
- from .api import Url
24
- from .config import load_pipeline_config
25
- from .enums import DataConnectorType
16
+ from ..api import EndpointClient # noqa: F401 (re-exported for tests)
17
+ from ..api import HttpMethod
18
+ from ..api import PaginationConfigMap
19
+ from ..api import RequestOptions
20
+ from ..api import compose_api_request_env
21
+ from ..api import compose_api_target_env
22
+ from ..api import paginate_with_client
23
+ from ..enums import DataConnectorType
24
+ from ..file import FileFormat
25
+ from ..types import JSONData
26
+ from ..types import JSONDict
27
+ from ..types import PipelineConfig
28
+ from ..types import StrPath
29
+ from ..types import Timeout
30
+ from ..utils import print_json
31
+ from ..workflow import load_pipeline_config
26
32
  from .extract import extract
27
33
  from .load import load
28
- from .run_helpers import compose_api_request_env
29
- from .run_helpers import compose_api_target_env
30
- from .run_helpers import paginate_with_client
31
34
  from .transform import transform
32
- from .types import JSONDict
33
- from .types import Timeout
34
- from .utils import print_json
35
+ from .utils import maybe_validate
35
36
  from .validate import validate
36
- from .validation.utils import maybe_validate
37
37
 
38
38
  # SECTION: EXPORTS ========================================================== #
39
39
 
40
40
 
41
- __all__ = ['run']
42
-
43
-
44
- # SECTION: TYPED DICTS ====================================================== #
45
-
41
+ __all__ = [
42
+ # Functions
43
+ 'run',
44
+ 'run_pipeline',
45
+ ]
46
46
 
47
- class BaseApiHttpEnv(TypedDict, total=False):
48
- """
49
- Common HTTP request environment for API interactions.
50
-
51
- Fields shared by both source-side and target-side API operations.
52
- """
53
47
 
54
- # Request details
55
- url: Url | None
56
- headers: dict[str, str]
57
- timeout: Timeout
58
-
59
- # Session
60
- session: requests.Session | None
61
-
62
-
63
- class ApiRequestEnv(BaseApiHttpEnv, total=False):
64
- """
65
- Composed request environment for API sources.
48
+ # SECTION: CONSTANTS ======================================================== #
66
49
 
67
- Returned by ``compose_api_request_env`` (run_helpers) and consumed by the
68
- API extract branch. Values are fully merged with endpoint/API defaults and
69
- job-level overrides, preserving the original precedence and behavior.
70
- """
71
50
 
72
- # Client
73
- use_endpoints: bool
74
- base_url: str | None
75
- base_path: str | None
76
- endpoints_map: dict[str, str] | None
77
- endpoint_key: str | None
51
+ DEFAULT_CONFIG_PATH: Final[str] = 'in/pipeline.yml'
78
52
 
79
- # Request
80
- params: dict[str, Any]
81
- pagination: PaginationConfigMap | None
82
- sleep_seconds: float
83
53
 
84
- # Reliability
85
- retry: RetryPolicy | None
86
- retry_network_errors: bool
54
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
87
55
 
88
56
 
89
- class ApiTargetEnv(BaseApiHttpEnv, total=False):
90
- """
91
- Composed request environment for API targets.
92
-
93
- Returned by ``compose_api_target_env`` (run_helpers) and consumed by the
94
- API load branch. Values are merged from the target object, optional
95
- API/endpoint reference, and job-level overrides, preserving original
96
- precedence and behavior.
97
-
98
- Notes
99
- -----
100
- - Precedence for inherited values matches original logic:
101
- overrides -> target -> API profile defaults.
102
- - Target composition does not include pagination/rate-limit/retry since
103
- loads are single-request operations; only headers/timeout/session
104
- apply.
57
+ def _resolve_validation_config(
58
+ job_obj: Any,
59
+ cfg: Any,
60
+ ) -> tuple[bool, dict[str, Any], str, str]:
105
61
  """
62
+ Resolve validation settings for a job with safe defaults.
106
63
 
107
- # Request
108
- method: str | None
109
-
110
-
111
- class SessionConfig(TypedDict, total=False):
112
- """
113
- Minimal session configuration schema accepted by this runner.
64
+ Parameters
65
+ ----------
66
+ job_obj : Any
67
+ Job configuration object.
68
+ cfg : Any
69
+ Pipeline configuration object with validations.
114
70
 
115
- Keys mirror common requests.Session options; all are optional.
71
+ Returns
72
+ -------
73
+ tuple[bool, dict[str, Any], str, str]
74
+ Tuple of (enabled, rules, severity, phase).
116
75
  """
76
+ val_ref = job_obj.validate
77
+ if val_ref is None:
78
+ return False, {}, 'error', 'before_transform'
117
79
 
118
- headers: Mapping[str, Any]
119
- params: Mapping[str, Any]
120
- auth: Any # (user, pass) tuple or requests-compatible auth object
121
- verify: bool | str
122
- cert: Any # str or (cert, key)
123
- proxies: Mapping[str, Any]
124
- cookies: Mapping[str, Any]
125
- trust_env: bool
126
-
127
-
128
- # SECTION: CONSTANTS ======================================================== #
129
-
130
-
131
- DEFAULT_CONFIG_PATH: Final[str] = 'in/pipeline.yml'
80
+ rules = cfg.validations.get(val_ref.ruleset, {})
81
+ severity = (val_ref.severity or 'error').lower()
82
+ phase = (val_ref.phase or 'before_transform').lower()
83
+ return True, rules, severity, phase
132
84
 
133
85
 
134
86
  # SECTION: FUNCTIONS ======================================================== #
@@ -207,12 +159,15 @@ def run(
207
159
  and env.get('endpoint_key')
208
160
  ):
209
161
  # Construct client using module-level EndpointClient so tests
210
- # can monkeypatch this class on etlplus.run.
162
+ # can monkeypatch this class on etlplus.ops.run.
211
163
  ClientClass = EndpointClient # noqa: N806
212
164
  client = ClientClass(
213
- base_url=cast(str, env['base_url']),
165
+ base_url=cast(str, env.get('base_url')),
214
166
  base_path=cast(str | None, env.get('base_path')),
215
- endpoints=cast(dict[str, str], env['endpoints_map']),
167
+ endpoints=cast(
168
+ dict[str, str],
169
+ env.get('endpoints_map', {}),
170
+ ),
216
171
  retry=env.get('retry'),
217
172
  retry_network_errors=bool(
218
173
  env.get('retry_network_errors', False),
@@ -221,7 +176,7 @@ def run(
221
176
  )
222
177
  data = paginate_with_client(
223
178
  client,
224
- cast(str, env['endpoint_key']),
179
+ cast(str, env.get('endpoint_key')),
225
180
  env.get('params'),
226
181
  env.get('headers'),
227
182
  env.get('timeout'),
@@ -263,19 +218,10 @@ def run(
263
218
  # keep explicit guard for defensive programming.
264
219
  raise ValueError(f'Unsupported source type: {stype_raw}')
265
220
 
266
- # DRY: unified validation helper (pre/post transform)
267
- val_ref = job_obj.validate
268
- enabled_validation = val_ref is not None
269
- if enabled_validation:
270
- # Type narrowing for static checkers
271
- assert val_ref is not None
272
- rules = cfg.validations.get(val_ref.ruleset, {})
273
- severity = (val_ref.severity or 'error').lower()
274
- phase = (val_ref.phase or 'before_transform').lower()
275
- else:
276
- rules = {}
277
- severity = 'error'
278
- phase = 'before_transform'
221
+ enabled_validation, rules, severity, phase = _resolve_validation_config(
222
+ job_obj,
223
+ cfg,
224
+ )
279
225
 
280
226
  # Pre-transform validation (if configured).
281
227
  data = maybe_validate(
@@ -333,12 +279,14 @@ def run(
333
279
  if not url_t:
334
280
  raise ValueError('API target missing "url"')
335
281
  kwargs_t: dict[str, Any] = {}
336
- if env_t.get('headers'):
337
- kwargs_t['headers'] = cast(dict[str, str], env_t['headers'])
282
+ headers = env_t.get('headers')
283
+ if headers:
284
+ kwargs_t['headers'] = cast(dict[str, str], headers)
338
285
  if env_t.get('timeout') is not None:
339
- kwargs_t['timeout'] = env_t['timeout']
340
- if env_t.get('session') is not None:
341
- kwargs_t['session'] = env_t['session']
286
+ kwargs_t['timeout'] = env_t.get('timeout')
287
+ session = env_t.get('session')
288
+ if session is not None:
289
+ kwargs_t['session'] = session
342
290
  result = load(
343
291
  data,
344
292
  'api',
@@ -361,3 +309,90 @@ def run(
361
309
  # Return the terminal load result directly; callers (e.g., CLI) can wrap
362
310
  # it in their own envelope when needed.
363
311
  return cast(JSONDict, result)
312
+
313
+
314
+ def run_pipeline(
315
+ *,
316
+ source_type: DataConnectorType | str | None = None,
317
+ source: StrPath | JSONData | None = None,
318
+ operations: PipelineConfig | None = None,
319
+ target_type: DataConnectorType | str | None = None,
320
+ target: StrPath | None = None,
321
+ file_format: FileFormat | str | None = None,
322
+ method: HttpMethod | str | None = None,
323
+ **kwargs: Any,
324
+ ) -> JSONData:
325
+ """
326
+ Run a single extract-transform-load flow without a YAML config.
327
+
328
+ Parameters
329
+ ----------
330
+ source_type : DataConnectorType | str | None, optional
331
+ Connector type for extraction. When ``None``, ``source`` is assumed
332
+ to be pre-loaded data and extraction is skipped.
333
+ source : StrPath | JSONData | None, optional
334
+ Data source for extraction or the pre-loaded payload when
335
+ ``source_type`` is ``None``.
336
+ operations : PipelineConfig | None, optional
337
+ Transform configuration passed to :func:`etlplus.ops.transform`.
338
+ target_type : DataConnectorType | str | None, optional
339
+ Connector type for loading. When ``None``, load is skipped and the
340
+ transformed data is returned.
341
+ target : StrPath | None, optional
342
+ Target for loading (file path, connection string, or API URL).
343
+ file_format : FileFormat | str | None, optional
344
+ File format for file sources/targets (forwarded to extract/load).
345
+ method : HttpMethod | str | None, optional
346
+ HTTP method for API loads (forwarded to :func:`etlplus.ops.load`).
347
+ **kwargs : Any
348
+ Extra keyword arguments forwarded to extract/load for API options
349
+ (headers, timeout, session, etc.).
350
+
351
+ Returns
352
+ -------
353
+ JSONData
354
+ Transformed data or the load result payload.
355
+
356
+ Raises
357
+ ------
358
+ TypeError
359
+ Raised when extracted data is not a dict or list of dicts and no
360
+ target is specified.
361
+ ValueError
362
+ Raised when required source/target inputs are missing.
363
+ """
364
+ if source_type is None:
365
+ if source is None:
366
+ raise ValueError('source or source_type is required')
367
+ data = source
368
+ else:
369
+ if source is None:
370
+ raise ValueError('source is required when source_type is set')
371
+ data = extract(
372
+ source_type,
373
+ cast(StrPath, source),
374
+ file_format=file_format,
375
+ **kwargs,
376
+ )
377
+
378
+ if operations:
379
+ data = transform(data, operations)
380
+
381
+ if target_type is None:
382
+ if not isinstance(data, (dict, list)):
383
+ raise TypeError(
384
+ f'Expected data to be dict or list of dicts, '
385
+ f'got {type(data).__name__}',
386
+ )
387
+ return data
388
+ if target is None:
389
+ raise ValueError('target is required when target_type is set')
390
+
391
+ return load(
392
+ data,
393
+ target_type,
394
+ target,
395
+ file_format=file_format,
396
+ method=method,
397
+ **kwargs,
398
+ )
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.transform` module.
2
+ :mod:`etlplus.ops.transform` module.
3
3
 
4
4
  Helpers to filter, map/rename, select, sort, aggregate, and otherwise
5
5
  transform JSON-like records (dicts and lists of dicts).
@@ -24,7 +24,7 @@ Basic pipeline with strings::
24
24
 
25
25
  Using enums for keys and functions::
26
26
 
27
- from .enums import PipelineStep, OperatorName, AggregateName
27
+ from etlplus.enums import PipelineStep, OperatorName, AggregateName
28
28
  ops = {
29
29
  PipelineStep.FILTER: {
30
30
  'field': 'age', 'op': OperatorName.GTE, 'value': 18
@@ -44,28 +44,28 @@ from collections.abc import Sequence
44
44
  from typing import Any
45
45
  from typing import cast
46
46
 
47
- from .enums import AggregateName
48
- from .enums import OperatorName
49
- from .enums import PipelineStep
47
+ from ..enums import AggregateName
48
+ from ..enums import OperatorName
49
+ from ..enums import PipelineStep
50
+ from ..types import AggregateFunc
51
+ from ..types import AggregateSpec
52
+ from ..types import FieldName
53
+ from ..types import Fields
54
+ from ..types import FilterSpec
55
+ from ..types import JSONData
56
+ from ..types import JSONDict
57
+ from ..types import JSONList
58
+ from ..types import MapSpec
59
+ from ..types import OperatorFunc
60
+ from ..types import PipelineConfig
61
+ from ..types import PipelineStepName
62
+ from ..types import SortKey
63
+ from ..types import StepApplier
64
+ from ..types import StepOrSteps
65
+ from ..types import StepSpec
66
+ from ..types import StrPath
67
+ from ..utils import to_number
50
68
  from .load import load_data
51
- from .types import AggregateFunc
52
- from .types import AggregateSpec
53
- from .types import FieldName
54
- from .types import Fields
55
- from .types import FilterSpec
56
- from .types import JSONData
57
- from .types import JSONDict
58
- from .types import JSONList
59
- from .types import MapSpec
60
- from .types import OperatorFunc
61
- from .types import PipelineConfig
62
- from .types import PipelineStepName
63
- from .types import SortKey
64
- from .types import StepApplier
65
- from .types import StepOrSteps
66
- from .types import StepSpec
67
- from .types import StrPath
68
- from .utils import to_number
69
69
 
70
70
  # SECTION: EXPORTS ========================================================== #
71
71
 
@@ -730,15 +730,16 @@ def _is_plain_fields_list(obj: Any) -> bool:
730
730
 
731
731
 
732
732
  _PIPELINE_STEPS: tuple[PipelineStepName, ...] = (
733
+ 'aggregate',
733
734
  'filter',
734
735
  'map',
735
736
  'select',
736
737
  'sort',
737
- 'aggregate',
738
738
  )
739
739
 
740
740
 
741
741
  _STEP_APPLIERS: dict[PipelineStepName, StepApplier] = {
742
+ 'aggregate': _apply_aggregate_step,
742
743
  'filter': _apply_filter_step,
743
744
  'map': _apply_map_step,
744
745
  'select': _apply_select_step,
@@ -746,7 +747,54 @@ _STEP_APPLIERS: dict[PipelineStepName, StepApplier] = {
746
747
  }
747
748
 
748
749
 
749
- # SECTION: EXPORTS ========================================================== #
750
+ # SECTION: FUNCTIONS ======================================================== #
751
+
752
+
753
+ # -- Helpers -- #
754
+
755
+
756
+ def apply_aggregate(
757
+ records: JSONList,
758
+ operation: AggregateSpec,
759
+ ) -> JSONDict:
760
+ """
761
+ Aggregate a numeric field or count presence.
762
+
763
+ Parameters
764
+ ----------
765
+ records : JSONList
766
+ Records to aggregate.
767
+ operation : AggregateSpec
768
+ Dict with keys ``field`` and ``func``. ``func`` is one of
769
+ ``'sum'``, ``'avg'``, ``'min'``, ``'max'``, or ``'count'``.
770
+ A callable may also be supplied for ``func``. Optionally, set
771
+ ``alias`` to control the output key name.
772
+
773
+ Returns
774
+ -------
775
+ JSONDict
776
+ A single-row result like ``{"sum_age": 42}``.
777
+
778
+ Notes
779
+ -----
780
+ Numeric operations ignore non-numeric values but count their presence
781
+ for ``'count'``.
782
+ """
783
+ field = operation.get('field')
784
+ func = operation.get('func')
785
+ alias = operation.get('alias')
786
+
787
+ if not field or func is None:
788
+ return {'error': 'Invalid aggregation operation'}
789
+
790
+ try:
791
+ aggregator = _resolve_aggregator(func)
792
+ except TypeError:
793
+ return {'error': f'Unknown aggregation function: {func}'}
794
+
795
+ nums, present = _collect_numeric_and_presence(records, field)
796
+ key_name = _derive_agg_key(func, field, alias)
797
+ return {key_name: aggregator(nums, present)}
750
798
 
751
799
 
752
800
  def apply_filter(
@@ -894,48 +942,7 @@ def apply_sort(
894
942
  )
895
943
 
896
944
 
897
- def apply_aggregate(
898
- records: JSONList,
899
- operation: AggregateSpec,
900
- ) -> JSONDict:
901
- """
902
- Aggregate a numeric field or count presence.
903
-
904
- Parameters
905
- ----------
906
- records : JSONList
907
- Records to aggregate.
908
- operation : AggregateSpec
909
- Dict with keys ``field`` and ``func``. ``func`` is one of
910
- ``'sum'``, ``'avg'``, ``'min'``, ``'max'``, or ``'count'``.
911
- A callable may also be supplied for ``func``. Optionally, set
912
- ``alias`` to control the output key name.
913
-
914
- Returns
915
- -------
916
- JSONDict
917
- A single-row result like ``{"sum_age": 42}``.
918
-
919
- Notes
920
- -----
921
- Numeric operations ignore non-numeric values but count their presence
922
- for ``'count'``.
923
- """
924
- field = operation.get('field')
925
- func = operation.get('func')
926
- alias = operation.get('alias')
927
-
928
- if not field or func is None:
929
- return {'error': 'Invalid aggregation operation'}
930
-
931
- try:
932
- aggregator = _resolve_aggregator(func)
933
- except TypeError:
934
- return {'error': f'Unknown aggregation function: {func}'}
935
-
936
- nums, present = _collect_numeric_and_presence(records, field)
937
- key_name = _derive_agg_key(func, field, alias)
938
- return {key_name: aggregator(nums, present)}
945
+ # -- Orchestration -- #
939
946
 
940
947
 
941
948
  def transform(
@@ -982,7 +989,7 @@ def transform(
982
989
 
983
990
  Using enums for keys and functions::
984
991
 
985
- from .enums import PipelineStep, OperatorName, AggregateName
992
+ from etlplus.enums import PipelineStep, OperatorName, AggregateName
986
993
  ops = {
987
994
  PipelineStep.FILTER: {
988
995
  'field': 'age', 'op': OperatorName.GTE, 'value': 18
@@ -1,7 +1,7 @@
1
1
  """
2
- :mod:`etlplus.validation.utils` module.
2
+ :mod:`etlplus.ops.utils` module.
3
3
 
4
- Utility helpers for conditional validation orchestration.
4
+ Utility helpers for conditional data ops orchestration.
5
5
 
6
6
  The helpers defined here embrace a "high cohesion, low coupling" design by
7
7
  isolating normalization, configuration, and logging responsibilities. The
@@ -13,11 +13,14 @@ offloading ancillary concerns to composable helpers.
13
13
  from __future__ import annotations
14
14
 
15
15
  from collections.abc import Callable
16
+ from collections.abc import Mapping
16
17
  from dataclasses import dataclass
18
+ from types import MappingProxyType
17
19
  from typing import Any
18
20
  from typing import Literal
19
21
  from typing import Self
20
22
  from typing import TypedDict
23
+ from typing import cast
21
24
 
22
25
  from ..types import StrAnyMap
23
26
  from ..utils import normalized_str
@@ -47,6 +50,30 @@ type ValidateFn = Callable[[Any, Ruleset], ValidationResult]
47
50
  type PrintFn = Callable[[Any], None]
48
51
 
49
52
 
53
+ # SECTION: INTERNAL CONSTANTS ============================================== #
54
+
55
+
56
+ _PHASE_CHOICES = MappingProxyType(
57
+ {
58
+ 'before_transform': 'before_transform',
59
+ 'after_transform': 'after_transform',
60
+ },
61
+ )
62
+ _SEVERITY_CHOICES = MappingProxyType(
63
+ {
64
+ 'warn': 'warn',
65
+ 'error': 'error',
66
+ },
67
+ )
68
+ _WINDOW_CHOICES = MappingProxyType(
69
+ {
70
+ 'before_transform': 'before_transform',
71
+ 'after_transform': 'after_transform',
72
+ 'both': 'both',
73
+ },
74
+ )
75
+
76
+
50
77
  # SECTION: DATA CLASSES ===================================================== #
51
78
 
52
79
 
@@ -291,11 +318,14 @@ def _normalize_phase(
291
318
  Normalized validation phase. Defaults to ``"before_transform"`` when
292
319
  unspecified.
293
320
  """
294
- match normalized_str(value):
295
- case 'after_transform':
296
- return 'after_transform'
297
- case _:
298
- return 'before_transform'
321
+ return cast(
322
+ ValidationPhase,
323
+ _normalize_choice(
324
+ value,
325
+ mapping=_PHASE_CHOICES,
326
+ default='before_transform',
327
+ ),
328
+ )
299
329
 
300
330
 
301
331
  def _normalize_severity(
@@ -314,7 +344,14 @@ def _normalize_severity(
314
344
  ValidationSeverity
315
345
  Normalized severity. Defaults to ``"error"`` when unspecified.
316
346
  """
317
- return 'warn' if normalized_str(value) == 'warn' else 'error'
347
+ return cast(
348
+ ValidationSeverity,
349
+ _normalize_choice(
350
+ value,
351
+ mapping=_SEVERITY_CHOICES,
352
+ default='error',
353
+ ),
354
+ )
318
355
 
319
356
 
320
357
  def _normalize_window(
@@ -333,13 +370,41 @@ def _normalize_window(
333
370
  ValidationWindow
334
371
  Normalized validation window. Defaults to ``"both"`` when unspecified.
335
372
  """
336
- match normalized_str(value):
337
- case 'before_transform':
338
- return 'before_transform'
339
- case 'after_transform':
340
- return 'after_transform'
341
- case _:
342
- return 'both'
373
+ return cast(
374
+ ValidationWindow,
375
+ _normalize_choice(
376
+ value,
377
+ mapping=_WINDOW_CHOICES,
378
+ default='both',
379
+ ),
380
+ )
381
+
382
+
383
+ def _normalize_choice(
384
+ value: str | None,
385
+ *,
386
+ mapping: Mapping[str, str],
387
+ default: str,
388
+ ) -> str:
389
+ """
390
+ Normalize a text value against a mapping with a default fallback.
391
+
392
+ Parameters
393
+ ----------
394
+ value : str | None
395
+ Input text to normalize.
396
+ mapping : Mapping[str, str]
397
+ Mapping of accepted values to normalized outputs.
398
+ default : str
399
+ Default to return when input is missing or unrecognized.
400
+
401
+ Returns
402
+ -------
403
+ str
404
+ Normalized value.
405
+ """
406
+ normalized = normalized_str(value)
407
+ return mapping.get(normalized, default)
343
408
 
344
409
 
345
410
  def _rule_name(