etlplus 0.9.2__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/__init__.py +26 -1
- etlplus/api/README.md +3 -51
- etlplus/api/__init__.py +0 -10
- etlplus/api/config.py +28 -39
- etlplus/api/endpoint_client.py +3 -3
- etlplus/api/pagination/client.py +1 -1
- etlplus/api/rate_limiting/config.py +1 -13
- etlplus/api/rate_limiting/rate_limiter.py +11 -8
- etlplus/api/request_manager.py +6 -11
- etlplus/api/transport.py +2 -14
- etlplus/api/types.py +6 -96
- etlplus/cli/commands.py +43 -76
- etlplus/cli/constants.py +1 -1
- etlplus/cli/handlers.py +12 -40
- etlplus/cli/io.py +2 -2
- etlplus/cli/main.py +1 -1
- etlplus/cli/state.py +7 -4
- etlplus/{workflow → config}/__init__.py +23 -10
- etlplus/{workflow → config}/connector.py +44 -58
- etlplus/{workflow → config}/jobs.py +32 -105
- etlplus/{workflow → config}/pipeline.py +51 -59
- etlplus/{workflow → config}/profile.py +5 -8
- etlplus/config/types.py +204 -0
- etlplus/config/utils.py +120 -0
- etlplus/database/ddl.py +1 -1
- etlplus/database/engine.py +3 -19
- etlplus/database/orm.py +0 -2
- etlplus/database/schema.py +1 -1
- etlplus/enums.py +266 -0
- etlplus/{ops/extract.py → extract.py} +99 -81
- etlplus/file.py +652 -0
- etlplus/{ops/load.py → load.py} +101 -78
- etlplus/{ops/run.py → run.py} +127 -159
- etlplus/{api/utils.py → run_helpers.py} +153 -209
- etlplus/{ops/transform.py → transform.py} +68 -75
- etlplus/types.py +4 -5
- etlplus/utils.py +2 -136
- etlplus/{ops/validate.py → validate.py} +12 -22
- etlplus/validation/__init__.py +44 -0
- etlplus/{ops → validation}/utils.py +17 -53
- {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/METADATA +17 -210
- etlplus-0.10.1.dist-info/RECORD +65 -0
- {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/WHEEL +1 -1
- etlplus/README.md +0 -37
- etlplus/api/enums.py +0 -51
- etlplus/cli/README.md +0 -40
- etlplus/database/README.md +0 -48
- etlplus/file/README.md +0 -105
- etlplus/file/__init__.py +0 -25
- etlplus/file/_imports.py +0 -141
- etlplus/file/_io.py +0 -160
- etlplus/file/accdb.py +0 -78
- etlplus/file/arrow.py +0 -78
- etlplus/file/avro.py +0 -176
- etlplus/file/bson.py +0 -77
- etlplus/file/cbor.py +0 -78
- etlplus/file/cfg.py +0 -79
- etlplus/file/conf.py +0 -80
- etlplus/file/core.py +0 -322
- etlplus/file/csv.py +0 -79
- etlplus/file/dat.py +0 -78
- etlplus/file/dta.py +0 -77
- etlplus/file/duckdb.py +0 -78
- etlplus/file/enums.py +0 -343
- etlplus/file/feather.py +0 -111
- etlplus/file/fwf.py +0 -77
- etlplus/file/gz.py +0 -123
- etlplus/file/hbs.py +0 -78
- etlplus/file/hdf5.py +0 -78
- etlplus/file/ini.py +0 -79
- etlplus/file/ion.py +0 -78
- etlplus/file/jinja2.py +0 -78
- etlplus/file/json.py +0 -98
- etlplus/file/log.py +0 -78
- etlplus/file/mat.py +0 -78
- etlplus/file/mdb.py +0 -78
- etlplus/file/msgpack.py +0 -78
- etlplus/file/mustache.py +0 -78
- etlplus/file/nc.py +0 -78
- etlplus/file/ndjson.py +0 -108
- etlplus/file/numbers.py +0 -75
- etlplus/file/ods.py +0 -79
- etlplus/file/orc.py +0 -111
- etlplus/file/parquet.py +0 -113
- etlplus/file/pb.py +0 -78
- etlplus/file/pbf.py +0 -77
- etlplus/file/properties.py +0 -78
- etlplus/file/proto.py +0 -77
- etlplus/file/psv.py +0 -79
- etlplus/file/rda.py +0 -78
- etlplus/file/rds.py +0 -78
- etlplus/file/sas7bdat.py +0 -78
- etlplus/file/sav.py +0 -77
- etlplus/file/sqlite.py +0 -78
- etlplus/file/stub.py +0 -84
- etlplus/file/sylk.py +0 -77
- etlplus/file/tab.py +0 -81
- etlplus/file/toml.py +0 -78
- etlplus/file/tsv.py +0 -80
- etlplus/file/txt.py +0 -102
- etlplus/file/vm.py +0 -78
- etlplus/file/wks.py +0 -77
- etlplus/file/xls.py +0 -88
- etlplus/file/xlsm.py +0 -79
- etlplus/file/xlsx.py +0 -99
- etlplus/file/xml.py +0 -185
- etlplus/file/xpt.py +0 -78
- etlplus/file/yaml.py +0 -95
- etlplus/file/zip.py +0 -175
- etlplus/file/zsav.py +0 -77
- etlplus/ops/README.md +0 -50
- etlplus/ops/__init__.py +0 -61
- etlplus/templates/README.md +0 -46
- etlplus/workflow/README.md +0 -52
- etlplus/workflow/dag.py +0 -105
- etlplus/workflow/types.py +0 -115
- etlplus-0.9.2.dist-info/RECORD +0 -134
- {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/entry_points.txt +0 -0
- {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.transform` module.
|
|
3
3
|
|
|
4
4
|
Helpers to filter, map/rename, select, sort, aggregate, and otherwise
|
|
5
5
|
transform JSON-like records (dicts and lists of dicts).
|
|
@@ -24,7 +24,7 @@ Basic pipeline with strings::
|
|
|
24
24
|
|
|
25
25
|
Using enums for keys and functions::
|
|
26
26
|
|
|
27
|
-
from
|
|
27
|
+
from .enums import PipelineStep, OperatorName, AggregateName
|
|
28
28
|
ops = {
|
|
29
29
|
PipelineStep.FILTER: {
|
|
30
30
|
'field': 'age', 'op': OperatorName.GTE, 'value': 18
|
|
@@ -44,28 +44,28 @@ from collections.abc import Sequence
|
|
|
44
44
|
from typing import Any
|
|
45
45
|
from typing import cast
|
|
46
46
|
|
|
47
|
-
from
|
|
48
|
-
from
|
|
49
|
-
from
|
|
50
|
-
from ..types import AggregateFunc
|
|
51
|
-
from ..types import AggregateSpec
|
|
52
|
-
from ..types import FieldName
|
|
53
|
-
from ..types import Fields
|
|
54
|
-
from ..types import FilterSpec
|
|
55
|
-
from ..types import JSONData
|
|
56
|
-
from ..types import JSONDict
|
|
57
|
-
from ..types import JSONList
|
|
58
|
-
from ..types import MapSpec
|
|
59
|
-
from ..types import OperatorFunc
|
|
60
|
-
from ..types import PipelineConfig
|
|
61
|
-
from ..types import PipelineStepName
|
|
62
|
-
from ..types import SortKey
|
|
63
|
-
from ..types import StepApplier
|
|
64
|
-
from ..types import StepOrSteps
|
|
65
|
-
from ..types import StepSpec
|
|
66
|
-
from ..types import StrPath
|
|
67
|
-
from ..utils import to_number
|
|
47
|
+
from .enums import AggregateName
|
|
48
|
+
from .enums import OperatorName
|
|
49
|
+
from .enums import PipelineStep
|
|
68
50
|
from .load import load_data
|
|
51
|
+
from .types import AggregateFunc
|
|
52
|
+
from .types import AggregateSpec
|
|
53
|
+
from .types import FieldName
|
|
54
|
+
from .types import Fields
|
|
55
|
+
from .types import FilterSpec
|
|
56
|
+
from .types import JSONData
|
|
57
|
+
from .types import JSONDict
|
|
58
|
+
from .types import JSONList
|
|
59
|
+
from .types import MapSpec
|
|
60
|
+
from .types import OperatorFunc
|
|
61
|
+
from .types import PipelineConfig
|
|
62
|
+
from .types import PipelineStepName
|
|
63
|
+
from .types import SortKey
|
|
64
|
+
from .types import StepApplier
|
|
65
|
+
from .types import StepOrSteps
|
|
66
|
+
from .types import StepSpec
|
|
67
|
+
from .types import StrPath
|
|
68
|
+
from .utils import to_number
|
|
69
69
|
|
|
70
70
|
# SECTION: EXPORTS ========================================================== #
|
|
71
71
|
|
|
@@ -730,16 +730,15 @@ def _is_plain_fields_list(obj: Any) -> bool:
|
|
|
730
730
|
|
|
731
731
|
|
|
732
732
|
_PIPELINE_STEPS: tuple[PipelineStepName, ...] = (
|
|
733
|
-
'aggregate',
|
|
734
733
|
'filter',
|
|
735
734
|
'map',
|
|
736
735
|
'select',
|
|
737
736
|
'sort',
|
|
737
|
+
'aggregate',
|
|
738
738
|
)
|
|
739
739
|
|
|
740
740
|
|
|
741
741
|
_STEP_APPLIERS: dict[PipelineStepName, StepApplier] = {
|
|
742
|
-
'aggregate': _apply_aggregate_step,
|
|
743
742
|
'filter': _apply_filter_step,
|
|
744
743
|
'map': _apply_map_step,
|
|
745
744
|
'select': _apply_select_step,
|
|
@@ -747,54 +746,7 @@ _STEP_APPLIERS: dict[PipelineStepName, StepApplier] = {
|
|
|
747
746
|
}
|
|
748
747
|
|
|
749
748
|
|
|
750
|
-
# SECTION:
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
# -- Helpers -- #
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
def apply_aggregate(
|
|
757
|
-
records: JSONList,
|
|
758
|
-
operation: AggregateSpec,
|
|
759
|
-
) -> JSONDict:
|
|
760
|
-
"""
|
|
761
|
-
Aggregate a numeric field or count presence.
|
|
762
|
-
|
|
763
|
-
Parameters
|
|
764
|
-
----------
|
|
765
|
-
records : JSONList
|
|
766
|
-
Records to aggregate.
|
|
767
|
-
operation : AggregateSpec
|
|
768
|
-
Dict with keys ``field`` and ``func``. ``func`` is one of
|
|
769
|
-
``'sum'``, ``'avg'``, ``'min'``, ``'max'``, or ``'count'``.
|
|
770
|
-
A callable may also be supplied for ``func``. Optionally, set
|
|
771
|
-
``alias`` to control the output key name.
|
|
772
|
-
|
|
773
|
-
Returns
|
|
774
|
-
-------
|
|
775
|
-
JSONDict
|
|
776
|
-
A single-row result like ``{"sum_age": 42}``.
|
|
777
|
-
|
|
778
|
-
Notes
|
|
779
|
-
-----
|
|
780
|
-
Numeric operations ignore non-numeric values but count their presence
|
|
781
|
-
for ``'count'``.
|
|
782
|
-
"""
|
|
783
|
-
field = operation.get('field')
|
|
784
|
-
func = operation.get('func')
|
|
785
|
-
alias = operation.get('alias')
|
|
786
|
-
|
|
787
|
-
if not field or func is None:
|
|
788
|
-
return {'error': 'Invalid aggregation operation'}
|
|
789
|
-
|
|
790
|
-
try:
|
|
791
|
-
aggregator = _resolve_aggregator(func)
|
|
792
|
-
except TypeError:
|
|
793
|
-
return {'error': f'Unknown aggregation function: {func}'}
|
|
794
|
-
|
|
795
|
-
nums, present = _collect_numeric_and_presence(records, field)
|
|
796
|
-
key_name = _derive_agg_key(func, field, alias)
|
|
797
|
-
return {key_name: aggregator(nums, present)}
|
|
749
|
+
# SECTION: EXPORTS ========================================================== #
|
|
798
750
|
|
|
799
751
|
|
|
800
752
|
def apply_filter(
|
|
@@ -942,7 +894,48 @@ def apply_sort(
|
|
|
942
894
|
)
|
|
943
895
|
|
|
944
896
|
|
|
945
|
-
|
|
897
|
+
def apply_aggregate(
|
|
898
|
+
records: JSONList,
|
|
899
|
+
operation: AggregateSpec,
|
|
900
|
+
) -> JSONDict:
|
|
901
|
+
"""
|
|
902
|
+
Aggregate a numeric field or count presence.
|
|
903
|
+
|
|
904
|
+
Parameters
|
|
905
|
+
----------
|
|
906
|
+
records : JSONList
|
|
907
|
+
Records to aggregate.
|
|
908
|
+
operation : AggregateSpec
|
|
909
|
+
Dict with keys ``field`` and ``func``. ``func`` is one of
|
|
910
|
+
``'sum'``, ``'avg'``, ``'min'``, ``'max'``, or ``'count'``.
|
|
911
|
+
A callable may also be supplied for ``func``. Optionally, set
|
|
912
|
+
``alias`` to control the output key name.
|
|
913
|
+
|
|
914
|
+
Returns
|
|
915
|
+
-------
|
|
916
|
+
JSONDict
|
|
917
|
+
A single-row result like ``{"sum_age": 42}``.
|
|
918
|
+
|
|
919
|
+
Notes
|
|
920
|
+
-----
|
|
921
|
+
Numeric operations ignore non-numeric values but count their presence
|
|
922
|
+
for ``'count'``.
|
|
923
|
+
"""
|
|
924
|
+
field = operation.get('field')
|
|
925
|
+
func = operation.get('func')
|
|
926
|
+
alias = operation.get('alias')
|
|
927
|
+
|
|
928
|
+
if not field or func is None:
|
|
929
|
+
return {'error': 'Invalid aggregation operation'}
|
|
930
|
+
|
|
931
|
+
try:
|
|
932
|
+
aggregator = _resolve_aggregator(func)
|
|
933
|
+
except TypeError:
|
|
934
|
+
return {'error': f'Unknown aggregation function: {func}'}
|
|
935
|
+
|
|
936
|
+
nums, present = _collect_numeric_and_presence(records, field)
|
|
937
|
+
key_name = _derive_agg_key(func, field, alias)
|
|
938
|
+
return {key_name: aggregator(nums, present)}
|
|
946
939
|
|
|
947
940
|
|
|
948
941
|
def transform(
|
|
@@ -989,7 +982,7 @@ def transform(
|
|
|
989
982
|
|
|
990
983
|
Using enums for keys and functions::
|
|
991
984
|
|
|
992
|
-
from
|
|
985
|
+
from .enums import PipelineStep, OperatorName, AggregateName
|
|
993
986
|
ops = {
|
|
994
987
|
PipelineStep.FILTER: {
|
|
995
988
|
'field': 'age', 'op': OperatorName.GTE, 'value': 18
|
etlplus/types.py
CHANGED
|
@@ -11,9 +11,8 @@ Notes
|
|
|
11
11
|
|
|
12
12
|
See Also
|
|
13
13
|
--------
|
|
14
|
-
- :mod:`etlplus.api.types` for HTTP-specific aliases
|
|
15
|
-
- :mod:`etlplus.
|
|
16
|
-
surfaces
|
|
14
|
+
- :mod:`etlplus.api.types` for HTTP-specific aliases
|
|
15
|
+
- :mod:`etlplus.config.types` for TypedDict surfaces
|
|
17
16
|
|
|
18
17
|
Examples
|
|
19
18
|
--------
|
|
@@ -194,8 +193,8 @@ type AggregateSpec = StrAnyMap
|
|
|
194
193
|
|
|
195
194
|
# -- Pipelines-- #
|
|
196
195
|
|
|
197
|
-
# Unified pipeline step spec consumed by :mod:`etlplus.
|
|
198
|
-
type StepSpec =
|
|
196
|
+
# Unified pipeline step spec consumed by :mod:`etlplus.transform`.
|
|
197
|
+
type StepSpec = FilterSpec | MapSpec | SelectSpec | SortSpec | AggregateSpec
|
|
199
198
|
|
|
200
199
|
# Collections of steps
|
|
201
200
|
|
etlplus/utils.py
CHANGED
|
@@ -8,7 +8,6 @@ from __future__ import annotations
|
|
|
8
8
|
|
|
9
9
|
import json
|
|
10
10
|
from collections.abc import Callable
|
|
11
|
-
from collections.abc import Iterable
|
|
12
11
|
from collections.abc import Mapping
|
|
13
12
|
from typing import Any
|
|
14
13
|
from typing import TypeVar
|
|
@@ -26,7 +25,6 @@ __all__ = [
|
|
|
26
25
|
# Mapping utilities
|
|
27
26
|
'cast_str_dict',
|
|
28
27
|
'coerce_dict',
|
|
29
|
-
'deep_substitute',
|
|
30
28
|
'maybe_mapping',
|
|
31
29
|
# Float coercion
|
|
32
30
|
'to_float',
|
|
@@ -41,8 +39,7 @@ __all__ = [
|
|
|
41
39
|
# Generic number coercion
|
|
42
40
|
'to_number',
|
|
43
41
|
# Text processing
|
|
44
|
-
'
|
|
45
|
-
'normalize_str',
|
|
42
|
+
'normalized_str',
|
|
46
43
|
]
|
|
47
44
|
|
|
48
45
|
|
|
@@ -59,52 +56,6 @@ Num = TypeVar('Num', int, float)
|
|
|
59
56
|
# -- Data Utilities -- #
|
|
60
57
|
|
|
61
58
|
|
|
62
|
-
def deep_substitute(
|
|
63
|
-
value: Any,
|
|
64
|
-
vars_map: StrAnyMap | None,
|
|
65
|
-
env_map: Mapping[str, str] | None,
|
|
66
|
-
) -> Any:
|
|
67
|
-
"""
|
|
68
|
-
Recursively substitute ``${VAR}`` tokens in nested structures.
|
|
69
|
-
|
|
70
|
-
Only strings are substituted; other types are returned as-is.
|
|
71
|
-
|
|
72
|
-
Parameters
|
|
73
|
-
----------
|
|
74
|
-
value : Any
|
|
75
|
-
The value to perform substitutions on.
|
|
76
|
-
vars_map : StrAnyMap | None
|
|
77
|
-
Mapping of variable names to replacement values (lower precedence).
|
|
78
|
-
env_map : Mapping[str, str] | None
|
|
79
|
-
Mapping of environment variables overriding ``vars_map`` values
|
|
80
|
-
(higher precedence).
|
|
81
|
-
|
|
82
|
-
Returns
|
|
83
|
-
-------
|
|
84
|
-
Any
|
|
85
|
-
New structure with substitutions applied where tokens were found.
|
|
86
|
-
"""
|
|
87
|
-
substitutions = _prepare_substitutions(vars_map, env_map)
|
|
88
|
-
|
|
89
|
-
def _apply(node: Any) -> Any:
|
|
90
|
-
match node:
|
|
91
|
-
case str():
|
|
92
|
-
return _replace_tokens(node, substitutions)
|
|
93
|
-
case Mapping():
|
|
94
|
-
return {k: _apply(v) for k, v in node.items()}
|
|
95
|
-
case list() | tuple() as seq:
|
|
96
|
-
apply = [_apply(item) for item in seq]
|
|
97
|
-
return apply if isinstance(seq, list) else tuple(apply)
|
|
98
|
-
case set():
|
|
99
|
-
return {_apply(item) for item in node}
|
|
100
|
-
case frozenset():
|
|
101
|
-
return frozenset(_apply(item) for item in node)
|
|
102
|
-
case _:
|
|
103
|
-
return node
|
|
104
|
-
|
|
105
|
-
return _apply(value)
|
|
106
|
-
|
|
107
|
-
|
|
108
59
|
def cast_str_dict(
|
|
109
60
|
mapping: StrAnyMap | None,
|
|
110
61
|
) -> dict[str, str]:
|
|
@@ -421,7 +372,7 @@ def to_number(
|
|
|
421
372
|
# -- Text Processing -- #
|
|
422
373
|
|
|
423
374
|
|
|
424
|
-
def
|
|
375
|
+
def normalized_str(
|
|
425
376
|
value: str | None,
|
|
426
377
|
) -> str:
|
|
427
378
|
"""
|
|
@@ -441,36 +392,6 @@ def normalize_str(
|
|
|
441
392
|
return (value or '').strip().lower()
|
|
442
393
|
|
|
443
394
|
|
|
444
|
-
def normalize_choice(
|
|
445
|
-
value: str | None,
|
|
446
|
-
*,
|
|
447
|
-
mapping: Mapping[str, str],
|
|
448
|
-
default: str,
|
|
449
|
-
normalize: Callable[[str | None], str] = normalize_str,
|
|
450
|
-
) -> str:
|
|
451
|
-
"""
|
|
452
|
-
Normalize a string choice using a mapping and fallback.
|
|
453
|
-
|
|
454
|
-
Parameters
|
|
455
|
-
----------
|
|
456
|
-
value : str | None
|
|
457
|
-
Input value to normalize.
|
|
458
|
-
mapping : Mapping[str, str]
|
|
459
|
-
Mapping of acceptable normalized inputs to output values.
|
|
460
|
-
default : str
|
|
461
|
-
Default return value when input is missing or unrecognized.
|
|
462
|
-
normalize : Callable[[str | None], str], optional
|
|
463
|
-
Normalization function applied to *value*. Defaults to
|
|
464
|
-
:func:`normalize_str`.
|
|
465
|
-
|
|
466
|
-
Returns
|
|
467
|
-
-------
|
|
468
|
-
str
|
|
469
|
-
Normalized mapped value or ``default``.
|
|
470
|
-
"""
|
|
471
|
-
return mapping.get(normalize(value), default)
|
|
472
|
-
|
|
473
|
-
|
|
474
395
|
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
475
396
|
|
|
476
397
|
|
|
@@ -504,61 +425,6 @@ def _clamp(
|
|
|
504
425
|
return value
|
|
505
426
|
|
|
506
427
|
|
|
507
|
-
def _prepare_substitutions(
|
|
508
|
-
vars_map: StrAnyMap | None,
|
|
509
|
-
env_map: Mapping[str, Any] | None,
|
|
510
|
-
) -> tuple[tuple[str, Any], ...]:
|
|
511
|
-
"""
|
|
512
|
-
Merge variable and environment maps into an ordered substitutions list.
|
|
513
|
-
|
|
514
|
-
Parameters
|
|
515
|
-
----------
|
|
516
|
-
vars_map : StrAnyMap | None
|
|
517
|
-
Mapping of variable names to replacement values (lower precedence).
|
|
518
|
-
env_map : Mapping[str, Any] | None
|
|
519
|
-
Environment-backed values that override entries from ``vars_map``.
|
|
520
|
-
|
|
521
|
-
Returns
|
|
522
|
-
-------
|
|
523
|
-
tuple[tuple[str, Any], ...]
|
|
524
|
-
Immutable sequence of ``(name, value)`` pairs suitable for token
|
|
525
|
-
replacement.
|
|
526
|
-
"""
|
|
527
|
-
if not vars_map and not env_map:
|
|
528
|
-
return ()
|
|
529
|
-
merged: dict[str, Any] = {**(vars_map or {}), **(env_map or {})}
|
|
530
|
-
return tuple(merged.items())
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
def _replace_tokens(
|
|
534
|
-
text: str,
|
|
535
|
-
substitutions: Iterable[tuple[str, Any]],
|
|
536
|
-
) -> str:
|
|
537
|
-
"""
|
|
538
|
-
Replace ``${VAR}`` tokens in ``text`` using ``substitutions``.
|
|
539
|
-
|
|
540
|
-
Parameters
|
|
541
|
-
----------
|
|
542
|
-
text : str
|
|
543
|
-
Input string that may contain ``${VAR}`` tokens.
|
|
544
|
-
substitutions : Iterable[tuple[str, Any]]
|
|
545
|
-
Sequence of ``(name, value)`` pairs used for token replacement.
|
|
546
|
-
|
|
547
|
-
Returns
|
|
548
|
-
-------
|
|
549
|
-
str
|
|
550
|
-
Updated text with replacements applied.
|
|
551
|
-
"""
|
|
552
|
-
if not substitutions:
|
|
553
|
-
return text
|
|
554
|
-
out = text
|
|
555
|
-
for name, replacement in substitutions:
|
|
556
|
-
token = f'${{{name}}}'
|
|
557
|
-
if token in out:
|
|
558
|
-
out = out.replace(token, str(replacement))
|
|
559
|
-
return out
|
|
560
|
-
|
|
561
|
-
|
|
562
428
|
def _coerce_float(
|
|
563
429
|
value: object,
|
|
564
430
|
) -> float | None:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.validation` module.
|
|
3
3
|
|
|
4
4
|
Validate dicts and lists of dicts using simple, schema-like rules.
|
|
5
5
|
|
|
@@ -11,8 +11,8 @@ Highlights
|
|
|
11
11
|
----------
|
|
12
12
|
- Centralized type map and helpers for clarity and reuse.
|
|
13
13
|
- Consistent error wording; field and item paths like ``[2].email``.
|
|
14
|
-
- Small, focused public API with
|
|
15
|
-
|
|
14
|
+
- Small, focused public API with ``load_data``, ``validate_field``,
|
|
15
|
+
``validate``.
|
|
16
16
|
|
|
17
17
|
Examples
|
|
18
18
|
--------
|
|
@@ -34,11 +34,11 @@ from typing import Final
|
|
|
34
34
|
from typing import Literal
|
|
35
35
|
from typing import TypedDict
|
|
36
36
|
|
|
37
|
-
from ..types import JSONData
|
|
38
|
-
from ..types import Record
|
|
39
|
-
from ..types import StrAnyMap
|
|
40
|
-
from ..types import StrPath
|
|
41
37
|
from .load import load_data
|
|
38
|
+
from .types import JSONData
|
|
39
|
+
from .types import Record
|
|
40
|
+
from .types import StrAnyMap
|
|
41
|
+
from .types import StrPath
|
|
42
42
|
|
|
43
43
|
# SECTION: EXPORTS ========================================================== #
|
|
44
44
|
|
|
@@ -66,7 +66,7 @@ TYPE_MAP: Final[dict[str, type | tuple[type, ...]]] = {
|
|
|
66
66
|
}
|
|
67
67
|
|
|
68
68
|
|
|
69
|
-
# SECTION:
|
|
69
|
+
# SECTION: CLASSES ========================================================== #
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
class FieldRules(TypedDict, total=False):
|
|
@@ -279,15 +279,11 @@ def _type_matches(
|
|
|
279
279
|
bool
|
|
280
280
|
``True`` if the value matches the expected type; ``False`` if not.
|
|
281
281
|
"""
|
|
282
|
-
if expected == 'number':
|
|
283
|
-
return _is_number(value)
|
|
284
|
-
if expected == 'integer':
|
|
285
|
-
return isinstance(value, int) and not isinstance(value, bool)
|
|
286
|
-
if expected == 'boolean':
|
|
287
|
-
return isinstance(value, bool)
|
|
288
|
-
|
|
289
282
|
py_type = TYPE_MAP.get(expected)
|
|
290
|
-
|
|
283
|
+
if py_type:
|
|
284
|
+
return isinstance(value, py_type)
|
|
285
|
+
|
|
286
|
+
return False
|
|
291
287
|
|
|
292
288
|
|
|
293
289
|
def _validate_record(
|
|
@@ -334,9 +330,6 @@ def _validate_record(
|
|
|
334
330
|
# SECTION: FUNCTIONS ======================================================== #
|
|
335
331
|
|
|
336
332
|
|
|
337
|
-
# -- Helpers -- #
|
|
338
|
-
|
|
339
|
-
|
|
340
333
|
def validate_field(
|
|
341
334
|
value: Any,
|
|
342
335
|
rules: StrAnyMap | FieldRules,
|
|
@@ -432,9 +425,6 @@ def validate_field(
|
|
|
432
425
|
return {'valid': len(errors) == 0, 'errors': errors}
|
|
433
426
|
|
|
434
427
|
|
|
435
|
-
# -- Orchestration -- #
|
|
436
|
-
|
|
437
|
-
|
|
438
428
|
def validate(
|
|
439
429
|
source: StrPath | JSONData,
|
|
440
430
|
rules: RulesMap | None = None,
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.validation` package.
|
|
3
|
+
|
|
4
|
+
Conditional validation utilities used across the ETL pipeline.
|
|
5
|
+
|
|
6
|
+
The package intentionally exposes a single helper, :func:`maybe_validate`, to
|
|
7
|
+
keep the public API compact and predictable. Supporting logic lives in
|
|
8
|
+
``etlplus.validation.utils`` where validation configuration is normalized,
|
|
9
|
+
reducing the likelihood of phase/option mismatches.
|
|
10
|
+
|
|
11
|
+
Examples
|
|
12
|
+
--------
|
|
13
|
+
>>> from etlplus.validation import maybe_validate
|
|
14
|
+
>>> payload = {'name': 'Alice'}
|
|
15
|
+
>>> rules = {'required': ['name']}
|
|
16
|
+
>>> def validator(data, config):
|
|
17
|
+
... missing = [field for field in config['required'] if field not in data]
|
|
18
|
+
... return {'valid': not missing, 'errors': missing, 'data': data}
|
|
19
|
+
>>> maybe_validate(
|
|
20
|
+
... payload,
|
|
21
|
+
... when='both',
|
|
22
|
+
... enabled=True,
|
|
23
|
+
... rules=rules,
|
|
24
|
+
... phase='before_transform',
|
|
25
|
+
... severity='warn',
|
|
26
|
+
... validate_fn=validator,
|
|
27
|
+
... print_json_fn=lambda message: message,
|
|
28
|
+
... )
|
|
29
|
+
{'name': 'Alice'}
|
|
30
|
+
|
|
31
|
+
See Also
|
|
32
|
+
--------
|
|
33
|
+
- :mod:`etlplus.validation.utils` for implementation details and helper
|
|
34
|
+
utilities.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
from .utils import maybe_validate
|
|
40
|
+
|
|
41
|
+
# SECTION: EXPORTS ========================================================== #
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
__all__ = ['maybe_validate']
|
|
@@ -1,27 +1,26 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.validation.utils` module.
|
|
3
3
|
|
|
4
|
-
Utility helpers for conditional
|
|
4
|
+
Utility helpers for conditional validation orchestration.
|
|
5
5
|
|
|
6
6
|
The helpers defined here embrace a "high cohesion, low coupling" design by
|
|
7
7
|
isolating normalization, configuration, and logging responsibilities. The
|
|
8
8
|
resulting surface keeps ``maybe_validate`` focused on orchestration while
|
|
9
9
|
offloading ancillary concerns to composable helpers.
|
|
10
|
+
|
|
10
11
|
"""
|
|
11
12
|
|
|
12
13
|
from __future__ import annotations
|
|
13
14
|
|
|
14
15
|
from collections.abc import Callable
|
|
15
16
|
from dataclasses import dataclass
|
|
16
|
-
from types import MappingProxyType
|
|
17
17
|
from typing import Any
|
|
18
18
|
from typing import Literal
|
|
19
19
|
from typing import Self
|
|
20
20
|
from typing import TypedDict
|
|
21
|
-
from typing import cast
|
|
22
21
|
|
|
23
22
|
from ..types import StrAnyMap
|
|
24
|
-
from ..utils import
|
|
23
|
+
from ..utils import normalized_str
|
|
25
24
|
|
|
26
25
|
# SECTION: TYPED DICTIONARIES =============================================== #
|
|
27
26
|
|
|
@@ -48,30 +47,6 @@ type ValidateFn = Callable[[Any, Ruleset], ValidationResult]
|
|
|
48
47
|
type PrintFn = Callable[[Any], None]
|
|
49
48
|
|
|
50
49
|
|
|
51
|
-
# SECTION: INTERNAL CONSTANTS ============================================== #
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
_PHASE_CHOICES = MappingProxyType(
|
|
55
|
-
{
|
|
56
|
-
'before_transform': 'before_transform',
|
|
57
|
-
'after_transform': 'after_transform',
|
|
58
|
-
},
|
|
59
|
-
)
|
|
60
|
-
_SEVERITY_CHOICES = MappingProxyType(
|
|
61
|
-
{
|
|
62
|
-
'warn': 'warn',
|
|
63
|
-
'error': 'error',
|
|
64
|
-
},
|
|
65
|
-
)
|
|
66
|
-
_WINDOW_CHOICES = MappingProxyType(
|
|
67
|
-
{
|
|
68
|
-
'before_transform': 'before_transform',
|
|
69
|
-
'after_transform': 'after_transform',
|
|
70
|
-
'both': 'both',
|
|
71
|
-
},
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
|
|
75
50
|
# SECTION: DATA CLASSES ===================================================== #
|
|
76
51
|
|
|
77
52
|
|
|
@@ -316,14 +291,11 @@ def _normalize_phase(
|
|
|
316
291
|
Normalized validation phase. Defaults to ``"before_transform"`` when
|
|
317
292
|
unspecified.
|
|
318
293
|
"""
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
default='before_transform',
|
|
325
|
-
),
|
|
326
|
-
)
|
|
294
|
+
match normalized_str(value):
|
|
295
|
+
case 'after_transform':
|
|
296
|
+
return 'after_transform'
|
|
297
|
+
case _:
|
|
298
|
+
return 'before_transform'
|
|
327
299
|
|
|
328
300
|
|
|
329
301
|
def _normalize_severity(
|
|
@@ -342,14 +314,7 @@ def _normalize_severity(
|
|
|
342
314
|
ValidationSeverity
|
|
343
315
|
Normalized severity. Defaults to ``"error"`` when unspecified.
|
|
344
316
|
"""
|
|
345
|
-
return
|
|
346
|
-
ValidationSeverity,
|
|
347
|
-
normalize_choice(
|
|
348
|
-
value,
|
|
349
|
-
mapping=_SEVERITY_CHOICES,
|
|
350
|
-
default='error',
|
|
351
|
-
),
|
|
352
|
-
)
|
|
317
|
+
return 'warn' if normalized_str(value) == 'warn' else 'error'
|
|
353
318
|
|
|
354
319
|
|
|
355
320
|
def _normalize_window(
|
|
@@ -368,14 +333,13 @@ def _normalize_window(
|
|
|
368
333
|
ValidationWindow
|
|
369
334
|
Normalized validation window. Defaults to ``"both"`` when unspecified.
|
|
370
335
|
"""
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
)
|
|
336
|
+
match normalized_str(value):
|
|
337
|
+
case 'before_transform':
|
|
338
|
+
return 'before_transform'
|
|
339
|
+
case 'after_transform':
|
|
340
|
+
return 'after_transform'
|
|
341
|
+
case _:
|
|
342
|
+
return 'both'
|
|
379
343
|
|
|
380
344
|
|
|
381
345
|
def _rule_name(
|