etlplus 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. etlplus/README.md +37 -0
  2. etlplus/__init__.py +1 -26
  3. etlplus/api/README.md +51 -3
  4. etlplus/api/__init__.py +10 -0
  5. etlplus/api/config.py +39 -28
  6. etlplus/api/endpoint_client.py +3 -3
  7. etlplus/api/enums.py +51 -0
  8. etlplus/api/pagination/client.py +1 -1
  9. etlplus/api/rate_limiting/config.py +13 -1
  10. etlplus/api/rate_limiting/rate_limiter.py +8 -11
  11. etlplus/api/request_manager.py +11 -6
  12. etlplus/api/transport.py +14 -2
  13. etlplus/api/types.py +96 -6
  14. etlplus/{run_helpers.py → api/utils.py} +209 -153
  15. etlplus/cli/README.md +40 -0
  16. etlplus/cli/commands.py +76 -43
  17. etlplus/cli/constants.py +1 -1
  18. etlplus/cli/handlers.py +40 -12
  19. etlplus/cli/io.py +2 -2
  20. etlplus/cli/main.py +1 -1
  21. etlplus/cli/state.py +4 -7
  22. etlplus/database/README.md +48 -0
  23. etlplus/database/ddl.py +1 -1
  24. etlplus/database/engine.py +19 -3
  25. etlplus/database/orm.py +2 -0
  26. etlplus/database/schema.py +1 -1
  27. etlplus/enums.py +1 -157
  28. etlplus/file/README.md +105 -0
  29. etlplus/file/__init__.py +25 -0
  30. etlplus/file/_imports.py +141 -0
  31. etlplus/file/_io.py +160 -0
  32. etlplus/file/accdb.py +78 -0
  33. etlplus/file/arrow.py +78 -0
  34. etlplus/file/avro.py +176 -0
  35. etlplus/file/bson.py +77 -0
  36. etlplus/file/cbor.py +78 -0
  37. etlplus/file/cfg.py +79 -0
  38. etlplus/file/conf.py +80 -0
  39. etlplus/file/core.py +322 -0
  40. etlplus/file/csv.py +79 -0
  41. etlplus/file/dat.py +78 -0
  42. etlplus/file/dta.py +77 -0
  43. etlplus/file/duckdb.py +78 -0
  44. etlplus/file/enums.py +343 -0
  45. etlplus/file/feather.py +111 -0
  46. etlplus/file/fwf.py +77 -0
  47. etlplus/file/gz.py +123 -0
  48. etlplus/file/hbs.py +78 -0
  49. etlplus/file/hdf5.py +78 -0
  50. etlplus/file/ini.py +79 -0
  51. etlplus/file/ion.py +78 -0
  52. etlplus/file/jinja2.py +78 -0
  53. etlplus/file/json.py +98 -0
  54. etlplus/file/log.py +78 -0
  55. etlplus/file/mat.py +78 -0
  56. etlplus/file/mdb.py +78 -0
  57. etlplus/file/msgpack.py +78 -0
  58. etlplus/file/mustache.py +78 -0
  59. etlplus/file/nc.py +78 -0
  60. etlplus/file/ndjson.py +108 -0
  61. etlplus/file/numbers.py +75 -0
  62. etlplus/file/ods.py +79 -0
  63. etlplus/file/orc.py +111 -0
  64. etlplus/file/parquet.py +113 -0
  65. etlplus/file/pb.py +78 -0
  66. etlplus/file/pbf.py +77 -0
  67. etlplus/file/properties.py +78 -0
  68. etlplus/file/proto.py +77 -0
  69. etlplus/file/psv.py +79 -0
  70. etlplus/file/rda.py +78 -0
  71. etlplus/file/rds.py +78 -0
  72. etlplus/file/sas7bdat.py +78 -0
  73. etlplus/file/sav.py +77 -0
  74. etlplus/file/sqlite.py +78 -0
  75. etlplus/file/stub.py +84 -0
  76. etlplus/file/sylk.py +77 -0
  77. etlplus/file/tab.py +81 -0
  78. etlplus/file/toml.py +78 -0
  79. etlplus/file/tsv.py +80 -0
  80. etlplus/file/txt.py +102 -0
  81. etlplus/file/vm.py +78 -0
  82. etlplus/file/wks.py +77 -0
  83. etlplus/file/xls.py +88 -0
  84. etlplus/file/xlsm.py +79 -0
  85. etlplus/file/xlsx.py +99 -0
  86. etlplus/file/xml.py +185 -0
  87. etlplus/file/xpt.py +78 -0
  88. etlplus/file/yaml.py +95 -0
  89. etlplus/file/zip.py +175 -0
  90. etlplus/file/zsav.py +77 -0
  91. etlplus/ops/README.md +50 -0
  92. etlplus/ops/__init__.py +61 -0
  93. etlplus/{extract.py → ops/extract.py} +81 -99
  94. etlplus/{load.py → ops/load.py} +78 -101
  95. etlplus/{run.py → ops/run.py} +159 -127
  96. etlplus/{transform.py → ops/transform.py} +75 -68
  97. etlplus/{validation → ops}/utils.py +53 -17
  98. etlplus/{validate.py → ops/validate.py} +22 -12
  99. etlplus/templates/README.md +46 -0
  100. etlplus/types.py +5 -4
  101. etlplus/utils.py +136 -2
  102. etlplus/workflow/README.md +52 -0
  103. etlplus/{config → workflow}/__init__.py +10 -23
  104. etlplus/{config → workflow}/connector.py +58 -44
  105. etlplus/workflow/dag.py +105 -0
  106. etlplus/{config → workflow}/jobs.py +105 -32
  107. etlplus/{config → workflow}/pipeline.py +59 -51
  108. etlplus/{config → workflow}/profile.py +8 -5
  109. etlplus/workflow/types.py +115 -0
  110. {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/METADATA +210 -17
  111. etlplus-0.9.2.dist-info/RECORD +134 -0
  112. {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/WHEEL +1 -1
  113. etlplus/config/types.py +0 -204
  114. etlplus/config/utils.py +0 -120
  115. etlplus/file.py +0 -657
  116. etlplus/validation/__init__.py +0 -44
  117. etlplus-0.9.1.dist-info/RECORD +0 -65
  118. {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/entry_points.txt +0 -0
  119. {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/licenses/LICENSE +0 -0
  120. {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.run` module.
2
+ :mod:`etlplus.ops.run` module.
3
3
 
4
4
  A module for running ETL jobs defined in YAML configurations.
5
5
  """
@@ -9,127 +9,78 @@ from __future__ import annotations
9
9
  from collections.abc import Mapping
10
10
  from typing import Any
11
11
  from typing import Final
12
- from typing import TypedDict
13
12
  from typing import cast
14
13
  from urllib.parse import urlsplit
15
14
  from urllib.parse import urlunsplit
16
15
 
17
- import requests # type: ignore[import]
18
-
19
- from .api import EndpointClient # noqa: F401 (re-exported for tests)
20
- from .api import PaginationConfigMap
21
- from .api import RequestOptions
22
- from .api import RetryPolicy
23
- from .api import Url
24
- from .config import load_pipeline_config
25
- from .enums import DataConnectorType
26
- from .enums import coerce_data_connector_type
16
+ from ..api import EndpointClient # noqa: F401 (re-exported for tests)
17
+ from ..api import HttpMethod
18
+ from ..api import PaginationConfigMap
19
+ from ..api import RequestOptions
20
+ from ..api import compose_api_request_env
21
+ from ..api import compose_api_target_env
22
+ from ..api import paginate_with_client
23
+ from ..enums import DataConnectorType
24
+ from ..file import FileFormat
25
+ from ..types import JSONData
26
+ from ..types import JSONDict
27
+ from ..types import PipelineConfig
28
+ from ..types import StrPath
29
+ from ..types import Timeout
30
+ from ..utils import print_json
31
+ from ..workflow import load_pipeline_config
27
32
  from .extract import extract
28
33
  from .load import load
29
- from .run_helpers import compose_api_request_env
30
- from .run_helpers import compose_api_target_env
31
- from .run_helpers import paginate_with_client
32
34
  from .transform import transform
33
- from .types import JSONDict
34
- from .types import Timeout
35
- from .utils import print_json
35
+ from .utils import maybe_validate
36
36
  from .validate import validate
37
- from .validation.utils import maybe_validate
38
37
 
39
38
  # SECTION: EXPORTS ========================================================== #
40
39
 
41
40
 
42
- __all__ = ['run']
43
-
44
-
45
- # SECTION: TYPED DICTS ====================================================== #
46
-
41
+ __all__ = [
42
+ # Functions
43
+ 'run',
44
+ 'run_pipeline',
45
+ ]
47
46
 
48
- class BaseApiHttpEnv(TypedDict, total=False):
49
- """
50
- Common HTTP request environment for API interactions.
51
-
52
- Fields shared by both source-side and target-side API operations.
53
- """
54
47
 
55
- # Request details
56
- url: Url | None
57
- headers: dict[str, str]
58
- timeout: Timeout
59
-
60
- # Session
61
- session: requests.Session | None
62
-
63
-
64
- class ApiRequestEnv(BaseApiHttpEnv, total=False):
65
- """
66
- Composed request environment for API sources.
48
+ # SECTION: CONSTANTS ======================================================== #
67
49
 
68
- Returned by ``compose_api_request_env`` (run_helpers) and consumed by the
69
- API extract branch. Values are fully merged with endpoint/API defaults and
70
- job-level overrides, preserving the original precedence and behavior.
71
- """
72
50
 
73
- # Client
74
- use_endpoints: bool
75
- base_url: str | None
76
- base_path: str | None
77
- endpoints_map: dict[str, str] | None
78
- endpoint_key: str | None
51
+ DEFAULT_CONFIG_PATH: Final[str] = 'in/pipeline.yml'
79
52
 
80
- # Request
81
- params: dict[str, Any]
82
- pagination: PaginationConfigMap | None
83
- sleep_seconds: float
84
53
 
85
- # Reliability
86
- retry: RetryPolicy | None
87
- retry_network_errors: bool
54
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
88
55
 
89
56
 
90
- class ApiTargetEnv(BaseApiHttpEnv, total=False):
91
- """
92
- Composed request environment for API targets.
93
-
94
- Returned by ``compose_api_target_env`` (run_helpers) and consumed by the
95
- API load branch. Values are merged from the target object, optional
96
- API/endpoint reference, and job-level overrides, preserving original
97
- precedence and behavior.
98
-
99
- Notes
100
- -----
101
- - Precedence for inherited values matches original logic:
102
- overrides -> target -> API profile defaults.
103
- - Target composition does not include pagination/rate-limit/retry since
104
- loads are single-request operations; only headers/timeout/session
105
- apply.
57
+ def _resolve_validation_config(
58
+ job_obj: Any,
59
+ cfg: Any,
60
+ ) -> tuple[bool, dict[str, Any], str, str]:
106
61
  """
62
+ Resolve validation settings for a job with safe defaults.
107
63
 
108
- # Request
109
- method: str | None
110
-
111
-
112
- class SessionConfig(TypedDict, total=False):
113
- """
114
- Minimal session configuration schema accepted by this runner.
64
+ Parameters
65
+ ----------
66
+ job_obj : Any
67
+ Job configuration object.
68
+ cfg : Any
69
+ Pipeline configuration object with validations.
115
70
 
116
- Keys mirror common requests.Session options; all are optional.
71
+ Returns
72
+ -------
73
+ tuple[bool, dict[str, Any], str, str]
74
+ Tuple of (enabled, rules, severity, phase).
117
75
  """
76
+ val_ref = job_obj.validate
77
+ if val_ref is None:
78
+ return False, {}, 'error', 'before_transform'
118
79
 
119
- headers: Mapping[str, Any]
120
- params: Mapping[str, Any]
121
- auth: Any # (user, pass) tuple or requests-compatible auth object
122
- verify: bool | str
123
- cert: Any # str or (cert, key)
124
- proxies: Mapping[str, Any]
125
- cookies: Mapping[str, Any]
126
- trust_env: bool
127
-
128
-
129
- # SECTION: CONSTANTS ======================================================== #
130
-
131
-
132
- DEFAULT_CONFIG_PATH: Final[str] = 'in/pipeline.yml'
80
+ rules = cfg.validations.get(val_ref.ruleset, {})
81
+ severity = (val_ref.severity or 'error').lower()
82
+ phase = (val_ref.phase or 'before_transform').lower()
83
+ return True, rules, severity, phase
133
84
 
134
85
 
135
86
  # SECTION: FUNCTIONS ======================================================== #
@@ -185,8 +136,7 @@ def run(
185
136
 
186
137
  data: Any
187
138
  stype_raw = getattr(source_obj, 'type', None)
188
- stype = coerce_data_connector_type(stype_raw or '')
189
- match stype:
139
+ match DataConnectorType.coerce(stype_raw or ''):
190
140
  case DataConnectorType.FILE:
191
141
  path = getattr(source_obj, 'path', None)
192
142
  fmt = ex_opts.get('format') or getattr(
@@ -209,12 +159,15 @@ def run(
209
159
  and env.get('endpoint_key')
210
160
  ):
211
161
  # Construct client using module-level EndpointClient so tests
212
- # can monkeypatch this class on etlplus.run.
162
+ # can monkeypatch this class on etlplus.ops.run.
213
163
  ClientClass = EndpointClient # noqa: N806
214
164
  client = ClientClass(
215
- base_url=cast(str, env['base_url']),
165
+ base_url=cast(str, env.get('base_url')),
216
166
  base_path=cast(str | None, env.get('base_path')),
217
- endpoints=cast(dict[str, str], env['endpoints_map']),
167
+ endpoints=cast(
168
+ dict[str, str],
169
+ env.get('endpoints_map', {}),
170
+ ),
218
171
  retry=env.get('retry'),
219
172
  retry_network_errors=bool(
220
173
  env.get('retry_network_errors', False),
@@ -223,7 +176,7 @@ def run(
223
176
  )
224
177
  data = paginate_with_client(
225
178
  client,
226
- cast(str, env['endpoint_key']),
179
+ cast(str, env.get('endpoint_key')),
227
180
  env.get('params'),
228
181
  env.get('headers'),
229
182
  env.get('timeout'),
@@ -261,23 +214,14 @@ def run(
261
214
  sleep_seconds=cast(float, env.get('sleep_seconds', 0.0)),
262
215
  )
263
216
  case _:
264
- # ``coerce_data_connector_type`` already raises for invalid
265
- # connector types; this branch is defensive only.
217
+ # :meth:`coerce` already raises for invalid connector types, but
218
+ # keep explicit guard for defensive programming.
266
219
  raise ValueError(f'Unsupported source type: {stype_raw}')
267
220
 
268
- # DRY: unified validation helper (pre/post transform)
269
- val_ref = job_obj.validate
270
- enabled_validation = val_ref is not None
271
- if enabled_validation:
272
- # Type narrowing for static checkers
273
- assert val_ref is not None
274
- rules = cfg.validations.get(val_ref.ruleset, {})
275
- severity = (val_ref.severity or 'error').lower()
276
- phase = (val_ref.phase or 'before_transform').lower()
277
- else:
278
- rules = {}
279
- severity = 'error'
280
- phase = 'before_transform'
221
+ enabled_validation, rules, severity, phase = _resolve_validation_config(
222
+ job_obj,
223
+ cfg,
224
+ )
281
225
 
282
226
  # Pre-transform validation (if configured).
283
227
  data = maybe_validate(
@@ -318,8 +262,7 @@ def run(
318
262
  overrides = job_obj.load.overrides or {}
319
263
 
320
264
  ttype_raw = getattr(target_obj, 'type', None)
321
- ttype = coerce_data_connector_type(ttype_raw or '')
322
- match ttype:
265
+ match DataConnectorType.coerce(ttype_raw or ''):
323
266
  case DataConnectorType.FILE:
324
267
  path = overrides.get('path') or getattr(target_obj, 'path', None)
325
268
  fmt = overrides.get('format') or getattr(
@@ -336,12 +279,14 @@ def run(
336
279
  if not url_t:
337
280
  raise ValueError('API target missing "url"')
338
281
  kwargs_t: dict[str, Any] = {}
339
- if env_t.get('headers'):
340
- kwargs_t['headers'] = cast(dict[str, str], env_t['headers'])
282
+ headers = env_t.get('headers')
283
+ if headers:
284
+ kwargs_t['headers'] = cast(dict[str, str], headers)
341
285
  if env_t.get('timeout') is not None:
342
- kwargs_t['timeout'] = env_t['timeout']
343
- if env_t.get('session') is not None:
344
- kwargs_t['session'] = env_t['session']
286
+ kwargs_t['timeout'] = env_t.get('timeout')
287
+ session = env_t.get('session')
288
+ if session is not None:
289
+ kwargs_t['session'] = session
345
290
  result = load(
346
291
  data,
347
292
  'api',
@@ -357,10 +302,97 @@ def run(
357
302
  )
358
303
  result = load(data, 'database', str(conn))
359
304
  case _:
360
- # ``coerce_data_connector_type`` already raises for invalid
361
- # connector types; this branch is defensive only.
305
+ # :meth:`coerce` already raises for invalid connector types, but
306
+ # keep explicit guard for defensive programming.
362
307
  raise ValueError(f'Unsupported target type: {ttype_raw}')
363
308
 
364
309
  # Return the terminal load result directly; callers (e.g., CLI) can wrap
365
310
  # it in their own envelope when needed.
366
311
  return cast(JSONDict, result)
312
+
313
+
314
+ def run_pipeline(
315
+ *,
316
+ source_type: DataConnectorType | str | None = None,
317
+ source: StrPath | JSONData | None = None,
318
+ operations: PipelineConfig | None = None,
319
+ target_type: DataConnectorType | str | None = None,
320
+ target: StrPath | None = None,
321
+ file_format: FileFormat | str | None = None,
322
+ method: HttpMethod | str | None = None,
323
+ **kwargs: Any,
324
+ ) -> JSONData:
325
+ """
326
+ Run a single extract-transform-load flow without a YAML config.
327
+
328
+ Parameters
329
+ ----------
330
+ source_type : DataConnectorType | str | None, optional
331
+ Connector type for extraction. When ``None``, ``source`` is assumed
332
+ to be pre-loaded data and extraction is skipped.
333
+ source : StrPath | JSONData | None, optional
334
+ Data source for extraction or the pre-loaded payload when
335
+ ``source_type`` is ``None``.
336
+ operations : PipelineConfig | None, optional
337
+ Transform configuration passed to :func:`etlplus.ops.transform`.
338
+ target_type : DataConnectorType | str | None, optional
339
+ Connector type for loading. When ``None``, load is skipped and the
340
+ transformed data is returned.
341
+ target : StrPath | None, optional
342
+ Target for loading (file path, connection string, or API URL).
343
+ file_format : FileFormat | str | None, optional
344
+ File format for file sources/targets (forwarded to extract/load).
345
+ method : HttpMethod | str | None, optional
346
+ HTTP method for API loads (forwarded to :func:`etlplus.ops.load`).
347
+ **kwargs : Any
348
+ Extra keyword arguments forwarded to extract/load for API options
349
+ (headers, timeout, session, etc.).
350
+
351
+ Returns
352
+ -------
353
+ JSONData
354
+ Transformed data or the load result payload.
355
+
356
+ Raises
357
+ ------
358
+ TypeError
359
+ Raised when extracted data is not a dict or list of dicts and no
360
+ target is specified.
361
+ ValueError
362
+ Raised when required source/target inputs are missing.
363
+ """
364
+ if source_type is None:
365
+ if source is None:
366
+ raise ValueError('source or source_type is required')
367
+ data = source
368
+ else:
369
+ if source is None:
370
+ raise ValueError('source is required when source_type is set')
371
+ data = extract(
372
+ source_type,
373
+ cast(StrPath, source),
374
+ file_format=file_format,
375
+ **kwargs,
376
+ )
377
+
378
+ if operations:
379
+ data = transform(data, operations)
380
+
381
+ if target_type is None:
382
+ if not isinstance(data, (dict, list)):
383
+ raise TypeError(
384
+ f'Expected data to be dict or list of dicts, '
385
+ f'got {type(data).__name__}',
386
+ )
387
+ return data
388
+ if target is None:
389
+ raise ValueError('target is required when target_type is set')
390
+
391
+ return load(
392
+ data,
393
+ target_type,
394
+ target,
395
+ file_format=file_format,
396
+ method=method,
397
+ **kwargs,
398
+ )
@@ -1,5 +1,5 @@
1
1
  """
2
- :mod:`etlplus.transform` module.
2
+ :mod:`etlplus.ops.transform` module.
3
3
 
4
4
  Helpers to filter, map/rename, select, sort, aggregate, and otherwise
5
5
  transform JSON-like records (dicts and lists of dicts).
@@ -24,7 +24,7 @@ Basic pipeline with strings::
24
24
 
25
25
  Using enums for keys and functions::
26
26
 
27
- from .enums import PipelineStep, OperatorName, AggregateName
27
+ from etlplus.enums import PipelineStep, OperatorName, AggregateName
28
28
  ops = {
29
29
  PipelineStep.FILTER: {
30
30
  'field': 'age', 'op': OperatorName.GTE, 'value': 18
@@ -44,28 +44,28 @@ from collections.abc import Sequence
44
44
  from typing import Any
45
45
  from typing import cast
46
46
 
47
- from .enums import AggregateName
48
- from .enums import OperatorName
49
- from .enums import PipelineStep
47
+ from ..enums import AggregateName
48
+ from ..enums import OperatorName
49
+ from ..enums import PipelineStep
50
+ from ..types import AggregateFunc
51
+ from ..types import AggregateSpec
52
+ from ..types import FieldName
53
+ from ..types import Fields
54
+ from ..types import FilterSpec
55
+ from ..types import JSONData
56
+ from ..types import JSONDict
57
+ from ..types import JSONList
58
+ from ..types import MapSpec
59
+ from ..types import OperatorFunc
60
+ from ..types import PipelineConfig
61
+ from ..types import PipelineStepName
62
+ from ..types import SortKey
63
+ from ..types import StepApplier
64
+ from ..types import StepOrSteps
65
+ from ..types import StepSpec
66
+ from ..types import StrPath
67
+ from ..utils import to_number
50
68
  from .load import load_data
51
- from .types import AggregateFunc
52
- from .types import AggregateSpec
53
- from .types import FieldName
54
- from .types import Fields
55
- from .types import FilterSpec
56
- from .types import JSONData
57
- from .types import JSONDict
58
- from .types import JSONList
59
- from .types import MapSpec
60
- from .types import OperatorFunc
61
- from .types import PipelineConfig
62
- from .types import PipelineStepName
63
- from .types import SortKey
64
- from .types import StepApplier
65
- from .types import StepOrSteps
66
- from .types import StepSpec
67
- from .types import StrPath
68
- from .utils import to_number
69
69
 
70
70
  # SECTION: EXPORTS ========================================================== #
71
71
 
@@ -730,15 +730,16 @@ def _is_plain_fields_list(obj: Any) -> bool:
730
730
 
731
731
 
732
732
  _PIPELINE_STEPS: tuple[PipelineStepName, ...] = (
733
+ 'aggregate',
733
734
  'filter',
734
735
  'map',
735
736
  'select',
736
737
  'sort',
737
- 'aggregate',
738
738
  )
739
739
 
740
740
 
741
741
  _STEP_APPLIERS: dict[PipelineStepName, StepApplier] = {
742
+ 'aggregate': _apply_aggregate_step,
742
743
  'filter': _apply_filter_step,
743
744
  'map': _apply_map_step,
744
745
  'select': _apply_select_step,
@@ -746,7 +747,54 @@ _STEP_APPLIERS: dict[PipelineStepName, StepApplier] = {
746
747
  }
747
748
 
748
749
 
749
- # SECTION: EXPORTS ========================================================== #
750
+ # SECTION: FUNCTIONS ======================================================== #
751
+
752
+
753
+ # -- Helpers -- #
754
+
755
+
756
+ def apply_aggregate(
757
+ records: JSONList,
758
+ operation: AggregateSpec,
759
+ ) -> JSONDict:
760
+ """
761
+ Aggregate a numeric field or count presence.
762
+
763
+ Parameters
764
+ ----------
765
+ records : JSONList
766
+ Records to aggregate.
767
+ operation : AggregateSpec
768
+ Dict with keys ``field`` and ``func``. ``func`` is one of
769
+ ``'sum'``, ``'avg'``, ``'min'``, ``'max'``, or ``'count'``.
770
+ A callable may also be supplied for ``func``. Optionally, set
771
+ ``alias`` to control the output key name.
772
+
773
+ Returns
774
+ -------
775
+ JSONDict
776
+ A single-row result like ``{"sum_age": 42}``.
777
+
778
+ Notes
779
+ -----
780
+ Numeric operations ignore non-numeric values but count their presence
781
+ for ``'count'``.
782
+ """
783
+ field = operation.get('field')
784
+ func = operation.get('func')
785
+ alias = operation.get('alias')
786
+
787
+ if not field or func is None:
788
+ return {'error': 'Invalid aggregation operation'}
789
+
790
+ try:
791
+ aggregator = _resolve_aggregator(func)
792
+ except TypeError:
793
+ return {'error': f'Unknown aggregation function: {func}'}
794
+
795
+ nums, present = _collect_numeric_and_presence(records, field)
796
+ key_name = _derive_agg_key(func, field, alias)
797
+ return {key_name: aggregator(nums, present)}
750
798
 
751
799
 
752
800
  def apply_filter(
@@ -894,48 +942,7 @@ def apply_sort(
894
942
  )
895
943
 
896
944
 
897
- def apply_aggregate(
898
- records: JSONList,
899
- operation: AggregateSpec,
900
- ) -> JSONDict:
901
- """
902
- Aggregate a numeric field or count presence.
903
-
904
- Parameters
905
- ----------
906
- records : JSONList
907
- Records to aggregate.
908
- operation : AggregateSpec
909
- Dict with keys ``field`` and ``func``. ``func`` is one of
910
- ``'sum'``, ``'avg'``, ``'min'``, ``'max'``, or ``'count'``.
911
- A callable may also be supplied for ``func``. Optionally, set
912
- ``alias`` to control the output key name.
913
-
914
- Returns
915
- -------
916
- JSONDict
917
- A single-row result like ``{"sum_age": 42}``.
918
-
919
- Notes
920
- -----
921
- Numeric operations ignore non-numeric values but count their presence
922
- for ``'count'``.
923
- """
924
- field = operation.get('field')
925
- func = operation.get('func')
926
- alias = operation.get('alias')
927
-
928
- if not field or func is None:
929
- return {'error': 'Invalid aggregation operation'}
930
-
931
- try:
932
- aggregator = _resolve_aggregator(func)
933
- except TypeError:
934
- return {'error': f'Unknown aggregation function: {func}'}
935
-
936
- nums, present = _collect_numeric_and_presence(records, field)
937
- key_name = _derive_agg_key(func, field, alias)
938
- return {key_name: aggregator(nums, present)}
945
+ # -- Orchestration -- #
939
946
 
940
947
 
941
948
  def transform(
@@ -982,7 +989,7 @@ def transform(
982
989
 
983
990
  Using enums for keys and functions::
984
991
 
985
- from .enums import PipelineStep, OperatorName, AggregateName
992
+ from etlplus.enums import PipelineStep, OperatorName, AggregateName
986
993
  ops = {
987
994
  PipelineStep.FILTER: {
988
995
  'field': 'age', 'op': OperatorName.GTE, 'value': 18
@@ -1,26 +1,27 @@
1
1
  """
2
- :mod:`etlplus.validation.utils` module.
2
+ :mod:`etlplus.ops.utils` module.
3
3
 
4
- Utility helpers for conditional validation orchestration.
4
+ Utility helpers for conditional data ops orchestration.
5
5
 
6
6
  The helpers defined here embrace a "high cohesion, low coupling" design by
7
7
  isolating normalization, configuration, and logging responsibilities. The
8
8
  resulting surface keeps ``maybe_validate`` focused on orchestration while
9
9
  offloading ancillary concerns to composable helpers.
10
-
11
10
  """
12
11
 
13
12
  from __future__ import annotations
14
13
 
15
14
  from collections.abc import Callable
16
15
  from dataclasses import dataclass
16
+ from types import MappingProxyType
17
17
  from typing import Any
18
18
  from typing import Literal
19
19
  from typing import Self
20
20
  from typing import TypedDict
21
+ from typing import cast
21
22
 
22
23
  from ..types import StrAnyMap
23
- from ..utils import normalized_str
24
+ from ..utils import normalize_choice
24
25
 
25
26
  # SECTION: TYPED DICTIONARIES =============================================== #
26
27
 
@@ -47,6 +48,30 @@ type ValidateFn = Callable[[Any, Ruleset], ValidationResult]
47
48
  type PrintFn = Callable[[Any], None]
48
49
 
49
50
 
51
+ # SECTION: INTERNAL CONSTANTS ============================================== #
52
+
53
+
54
+ _PHASE_CHOICES = MappingProxyType(
55
+ {
56
+ 'before_transform': 'before_transform',
57
+ 'after_transform': 'after_transform',
58
+ },
59
+ )
60
+ _SEVERITY_CHOICES = MappingProxyType(
61
+ {
62
+ 'warn': 'warn',
63
+ 'error': 'error',
64
+ },
65
+ )
66
+ _WINDOW_CHOICES = MappingProxyType(
67
+ {
68
+ 'before_transform': 'before_transform',
69
+ 'after_transform': 'after_transform',
70
+ 'both': 'both',
71
+ },
72
+ )
73
+
74
+
50
75
  # SECTION: DATA CLASSES ===================================================== #
51
76
 
52
77
 
@@ -291,11 +316,14 @@ def _normalize_phase(
291
316
  Normalized validation phase. Defaults to ``"before_transform"`` when
292
317
  unspecified.
293
318
  """
294
- match normalized_str(value):
295
- case 'after_transform':
296
- return 'after_transform'
297
- case _:
298
- return 'before_transform'
319
+ return cast(
320
+ ValidationPhase,
321
+ normalize_choice(
322
+ value,
323
+ mapping=_PHASE_CHOICES,
324
+ default='before_transform',
325
+ ),
326
+ )
299
327
 
300
328
 
301
329
  def _normalize_severity(
@@ -314,7 +342,14 @@ def _normalize_severity(
314
342
  ValidationSeverity
315
343
  Normalized severity. Defaults to ``"error"`` when unspecified.
316
344
  """
317
- return 'warn' if normalized_str(value) == 'warn' else 'error'
345
+ return cast(
346
+ ValidationSeverity,
347
+ normalize_choice(
348
+ value,
349
+ mapping=_SEVERITY_CHOICES,
350
+ default='error',
351
+ ),
352
+ )
318
353
 
319
354
 
320
355
  def _normalize_window(
@@ -333,13 +368,14 @@ def _normalize_window(
333
368
  ValidationWindow
334
369
  Normalized validation window. Defaults to ``"both"`` when unspecified.
335
370
  """
336
- match normalized_str(value):
337
- case 'before_transform':
338
- return 'before_transform'
339
- case 'after_transform':
340
- return 'after_transform'
341
- case _:
342
- return 'both'
371
+ return cast(
372
+ ValidationWindow,
373
+ normalize_choice(
374
+ value,
375
+ mapping=_WINDOW_CHOICES,
376
+ default='both',
377
+ ),
378
+ )
343
379
 
344
380
 
345
381
  def _rule_name(