etlplus 0.15.0__py3-none-any.whl → 0.16.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +25 -3
- etlplus/__init__.py +2 -0
- etlplus/api/README.md +31 -0
- etlplus/api/__init__.py +14 -14
- etlplus/api/auth.py +10 -7
- etlplus/api/config.py +8 -13
- etlplus/api/endpoint_client.py +20 -20
- etlplus/api/errors.py +4 -4
- etlplus/api/pagination/__init__.py +6 -6
- etlplus/api/pagination/config.py +12 -10
- etlplus/api/pagination/paginator.py +6 -7
- etlplus/api/rate_limiting/__init__.py +2 -2
- etlplus/api/rate_limiting/config.py +14 -14
- etlplus/api/rate_limiting/rate_limiter.py +3 -3
- etlplus/api/request_manager.py +4 -4
- etlplus/api/retry_manager.py +8 -8
- etlplus/api/transport.py +11 -11
- etlplus/api/types.py +131 -11
- etlplus/api/utils.py +50 -50
- etlplus/cli/commands.py +93 -60
- etlplus/cli/constants.py +1 -1
- etlplus/cli/handlers.py +43 -26
- etlplus/cli/io.py +2 -2
- etlplus/cli/main.py +2 -2
- etlplus/cli/state.py +4 -7
- etlplus/{workflow/pipeline.py → config.py} +62 -99
- etlplus/connector/__init__.py +43 -0
- etlplus/connector/api.py +161 -0
- etlplus/connector/connector.py +26 -0
- etlplus/connector/core.py +132 -0
- etlplus/connector/database.py +122 -0
- etlplus/connector/enums.py +52 -0
- etlplus/connector/file.py +120 -0
- etlplus/connector/types.py +40 -0
- etlplus/connector/utils.py +122 -0
- etlplus/database/ddl.py +2 -2
- etlplus/database/engine.py +19 -3
- etlplus/database/orm.py +2 -0
- etlplus/enums.py +36 -200
- etlplus/file/_imports.py +1 -0
- etlplus/file/_io.py +52 -4
- etlplus/file/accdb.py +3 -2
- etlplus/file/arrow.py +3 -2
- etlplus/file/avro.py +3 -2
- etlplus/file/bson.py +3 -2
- etlplus/file/cbor.py +3 -2
- etlplus/file/cfg.py +3 -2
- etlplus/file/conf.py +3 -2
- etlplus/file/core.py +11 -8
- etlplus/file/csv.py +3 -2
- etlplus/file/dat.py +3 -2
- etlplus/file/dta.py +3 -2
- etlplus/file/duckdb.py +3 -2
- etlplus/file/enums.py +1 -1
- etlplus/file/feather.py +3 -2
- etlplus/file/fwf.py +3 -2
- etlplus/file/gz.py +3 -2
- etlplus/file/hbs.py +3 -2
- etlplus/file/hdf5.py +3 -2
- etlplus/file/ini.py +3 -2
- etlplus/file/ion.py +3 -2
- etlplus/file/jinja2.py +3 -2
- etlplus/file/json.py +5 -16
- etlplus/file/log.py +3 -2
- etlplus/file/mat.py +3 -2
- etlplus/file/mdb.py +3 -2
- etlplus/file/msgpack.py +3 -2
- etlplus/file/mustache.py +3 -2
- etlplus/file/nc.py +3 -2
- etlplus/file/ndjson.py +3 -2
- etlplus/file/numbers.py +3 -2
- etlplus/file/ods.py +3 -2
- etlplus/file/orc.py +3 -2
- etlplus/file/parquet.py +3 -2
- etlplus/file/pb.py +3 -2
- etlplus/file/pbf.py +3 -2
- etlplus/file/properties.py +3 -2
- etlplus/file/proto.py +3 -2
- etlplus/file/psv.py +3 -2
- etlplus/file/rda.py +3 -2
- etlplus/file/rds.py +3 -2
- etlplus/file/sas7bdat.py +3 -2
- etlplus/file/sav.py +3 -2
- etlplus/file/sqlite.py +3 -2
- etlplus/file/stub.py +1 -0
- etlplus/file/sylk.py +3 -2
- etlplus/file/tab.py +3 -2
- etlplus/file/toml.py +3 -2
- etlplus/file/tsv.py +3 -2
- etlplus/file/txt.py +4 -3
- etlplus/file/vm.py +3 -2
- etlplus/file/wks.py +3 -2
- etlplus/file/xls.py +3 -2
- etlplus/file/xlsm.py +3 -2
- etlplus/file/xlsx.py +3 -2
- etlplus/file/xml.py +9 -3
- etlplus/file/xpt.py +3 -2
- etlplus/file/yaml.py +5 -16
- etlplus/file/zip.py +3 -2
- etlplus/file/zsav.py +3 -2
- etlplus/ops/__init__.py +1 -0
- etlplus/ops/enums.py +173 -0
- etlplus/ops/extract.py +222 -23
- etlplus/ops/load.py +155 -36
- etlplus/ops/run.py +92 -107
- etlplus/ops/transform.py +48 -29
- etlplus/ops/types.py +147 -0
- etlplus/ops/utils.py +11 -40
- etlplus/ops/validate.py +16 -16
- etlplus/types.py +6 -102
- etlplus/utils.py +163 -29
- etlplus/workflow/README.md +0 -24
- etlplus/workflow/__init__.py +2 -15
- etlplus/workflow/dag.py +23 -1
- etlplus/workflow/jobs.py +83 -39
- etlplus/workflow/profile.py +4 -2
- {etlplus-0.15.0.dist-info → etlplus-0.16.6.dist-info}/METADATA +4 -4
- etlplus-0.16.6.dist-info/RECORD +143 -0
- {etlplus-0.15.0.dist-info → etlplus-0.16.6.dist-info}/WHEEL +1 -1
- etlplus/config/README.md +0 -50
- etlplus/config/__init__.py +0 -33
- etlplus/config/types.py +0 -140
- etlplus/dag.py +0 -103
- etlplus/workflow/connector.py +0 -373
- etlplus/workflow/types.py +0 -115
- etlplus/workflow/utils.py +0 -120
- etlplus-0.15.0.dist-info/RECORD +0 -139
- {etlplus-0.15.0.dist-info → etlplus-0.16.6.dist-info}/entry_points.txt +0 -0
- {etlplus-0.15.0.dist-info → etlplus-0.16.6.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.15.0.dist-info → etlplus-0.16.6.dist-info}/top_level.txt +0 -0
etlplus/ops/validate.py
CHANGED
|
@@ -11,8 +11,8 @@ Highlights
|
|
|
11
11
|
----------
|
|
12
12
|
- Centralized type map and helpers for clarity and reuse.
|
|
13
13
|
- Consistent error wording; field and item paths like ``[2].email``.
|
|
14
|
-
- Small, focused public API with
|
|
15
|
-
|
|
14
|
+
- Small, focused public API with :func:`load_data`, :func:`validate_field`,
|
|
15
|
+
:func:`validate`.
|
|
16
16
|
|
|
17
17
|
Examples
|
|
18
18
|
--------
|
|
@@ -44,9 +44,9 @@ from .load import load_data
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
__all__ = [
|
|
47
|
-
'
|
|
48
|
-
'
|
|
49
|
-
'
|
|
47
|
+
'FieldRulesDict',
|
|
48
|
+
'FieldValidationDict',
|
|
49
|
+
'ValidationDict',
|
|
50
50
|
'validate_field',
|
|
51
51
|
'validate',
|
|
52
52
|
]
|
|
@@ -66,10 +66,10 @@ TYPE_MAP: Final[dict[str, type | tuple[type, ...]]] = {
|
|
|
66
66
|
}
|
|
67
67
|
|
|
68
68
|
|
|
69
|
-
# SECTION:
|
|
69
|
+
# SECTION: TYPED DICTS ====================================================== #
|
|
70
70
|
|
|
71
71
|
|
|
72
|
-
class
|
|
72
|
+
class FieldRulesDict(TypedDict, total=False):
|
|
73
73
|
"""
|
|
74
74
|
Validation rules for a single field.
|
|
75
75
|
|
|
@@ -93,7 +93,7 @@ class FieldRules(TypedDict, total=False):
|
|
|
93
93
|
enum: list[Any]
|
|
94
94
|
|
|
95
95
|
|
|
96
|
-
class
|
|
96
|
+
class FieldValidationDict(TypedDict):
|
|
97
97
|
"""
|
|
98
98
|
Validation result for a single field.
|
|
99
99
|
|
|
@@ -109,7 +109,7 @@ class FieldValidation(TypedDict):
|
|
|
109
109
|
errors: list[str]
|
|
110
110
|
|
|
111
111
|
|
|
112
|
-
class
|
|
112
|
+
class ValidationDict(TypedDict):
|
|
113
113
|
"""
|
|
114
114
|
Validation result for a complete data structure.
|
|
115
115
|
|
|
@@ -134,7 +134,7 @@ class Validation(TypedDict):
|
|
|
134
134
|
# SECTION: TYPE ALIASES ===================================================== #
|
|
135
135
|
|
|
136
136
|
|
|
137
|
-
type RulesMap = Mapping[str,
|
|
137
|
+
type RulesMap = Mapping[str, FieldRulesDict]
|
|
138
138
|
|
|
139
139
|
|
|
140
140
|
# SECTION: INTERNAL FUNCTIONS ============================================== #
|
|
@@ -339,8 +339,8 @@ def _validate_record(
|
|
|
339
339
|
|
|
340
340
|
def validate_field(
|
|
341
341
|
value: Any,
|
|
342
|
-
rules: StrAnyMap |
|
|
343
|
-
) ->
|
|
342
|
+
rules: StrAnyMap | FieldRulesDict,
|
|
343
|
+
) -> FieldValidationDict:
|
|
344
344
|
"""
|
|
345
345
|
Validate a single value against field rules.
|
|
346
346
|
|
|
@@ -348,14 +348,14 @@ def validate_field(
|
|
|
348
348
|
----------
|
|
349
349
|
value : Any
|
|
350
350
|
The value to validate. ``None`` is treated as missing.
|
|
351
|
-
rules : StrAnyMap |
|
|
351
|
+
rules : StrAnyMap | FieldRulesDict
|
|
352
352
|
Rule dictionary. Supported keys include ``required``, ``type``,
|
|
353
353
|
``min``, ``max``, ``minLength``, ``maxLength``, ``pattern``, and
|
|
354
354
|
``enum``.
|
|
355
355
|
|
|
356
356
|
Returns
|
|
357
357
|
-------
|
|
358
|
-
|
|
358
|
+
FieldValidationDict
|
|
359
359
|
Result with ``valid`` and a list of ``errors``.
|
|
360
360
|
|
|
361
361
|
Notes
|
|
@@ -438,7 +438,7 @@ def validate_field(
|
|
|
438
438
|
def validate(
|
|
439
439
|
source: StrPath | JSONData,
|
|
440
440
|
rules: RulesMap | None = None,
|
|
441
|
-
) ->
|
|
441
|
+
) -> ValidationDict:
|
|
442
442
|
"""
|
|
443
443
|
Validate data against rules.
|
|
444
444
|
|
|
@@ -452,7 +452,7 @@ def validate(
|
|
|
452
452
|
|
|
453
453
|
Returns
|
|
454
454
|
-------
|
|
455
|
-
|
|
455
|
+
ValidationDict
|
|
456
456
|
Structured result with keys ``valid``, ``errors``, ``field_errors``,
|
|
457
457
|
and ``data``. If loading fails, ``data`` is ``None`` and an error is
|
|
458
458
|
reported in ``errors``.
|
etlplus/types.py
CHANGED
|
@@ -11,12 +11,13 @@ Notes
|
|
|
11
11
|
|
|
12
12
|
See Also
|
|
13
13
|
--------
|
|
14
|
-
- :mod:`etlplus.api.types` for HTTP-specific aliases
|
|
15
|
-
- :mod:`etlplus.
|
|
14
|
+
- :mod:`etlplus.api.types` for HTTP-specific aliases and data classes
|
|
15
|
+
- :mod:`etlplus.connector.types` for connector-specific aliases
|
|
16
16
|
|
|
17
17
|
Examples
|
|
18
18
|
--------
|
|
19
|
-
>>> from etlplus.types import JSONDict
|
|
19
|
+
>>> from etlplus.types import JSONDict
|
|
20
|
+
>>> from etlplus.ops.types import PipelineConfig
|
|
20
21
|
>>> payload: JSONDict = {'id': 1, 'name': 'Ada'}
|
|
21
22
|
>>> isinstance(payload, dict)
|
|
22
23
|
True
|
|
@@ -53,33 +54,15 @@ __all__ = [
|
|
|
53
54
|
'JSONRecords',
|
|
54
55
|
# Type Aliases (File System)
|
|
55
56
|
'StrPath',
|
|
56
|
-
# Type Aliases (Functions)
|
|
57
|
-
'AggregateFunc',
|
|
58
|
-
'OperatorFunc',
|
|
59
|
-
# Type Aliases (Records & Fields)
|
|
60
|
-
'FieldName',
|
|
61
|
-
'Fields',
|
|
62
57
|
# Type Aliases (Transform Specs)
|
|
63
58
|
'StrAnyMap',
|
|
64
59
|
'StrSeqMap',
|
|
65
60
|
'StrStrMap',
|
|
66
|
-
'AggregateSpec',
|
|
67
|
-
'FilterSpec',
|
|
68
|
-
'MapSpec',
|
|
69
|
-
'SelectSpec',
|
|
70
|
-
'SortSpec',
|
|
71
|
-
# Type Aliases (Pipelines)
|
|
72
|
-
'StepOrSteps',
|
|
73
|
-
'StepSeq',
|
|
74
|
-
'StepSpec',
|
|
75
|
-
'PipelineStepName',
|
|
76
|
-
'PipelineConfig',
|
|
77
|
-
# Type Aliases (Helpers)
|
|
78
|
-
'StepApplier',
|
|
79
|
-
'SortKey',
|
|
80
61
|
# Type Aliases (Networking / Runtime)
|
|
81
62
|
'Sleeper',
|
|
82
63
|
'Timeout',
|
|
64
|
+
# Type Aliases (Templates)
|
|
65
|
+
'TemplateKey',
|
|
83
66
|
]
|
|
84
67
|
|
|
85
68
|
|
|
@@ -124,22 +107,6 @@ type JSONRecords = list[JSONRecord]
|
|
|
124
107
|
# Path-like inputs accepted by file helpers.
|
|
125
108
|
type StrPath = str | Path | PathLike[str]
|
|
126
109
|
|
|
127
|
-
# -- Functions -- #
|
|
128
|
-
|
|
129
|
-
# Callable reducing numeric collections into a summary value.
|
|
130
|
-
type AggregateFunc = Callable[[list[float], int], Any]
|
|
131
|
-
|
|
132
|
-
# Binary predicate consumed by filter operations.
|
|
133
|
-
type OperatorFunc = Callable[[Any, Any], bool]
|
|
134
|
-
|
|
135
|
-
# -- Records & Fields -- #
|
|
136
|
-
|
|
137
|
-
# Individual field identifier referenced inside specs.
|
|
138
|
-
type FieldName = str
|
|
139
|
-
|
|
140
|
-
# Ordered list of :data:`FieldName` entries preserving projection order.
|
|
141
|
-
type Fields = list[FieldName]
|
|
142
|
-
|
|
143
110
|
# -- Transform Specs -- #
|
|
144
111
|
|
|
145
112
|
# Kept intentionally broad for runtime-friendly validation in transform.py.
|
|
@@ -155,69 +122,6 @@ type StrStrMap = Mapping[str, str]
|
|
|
155
122
|
# Mapping whose values are homogeneous sequences.
|
|
156
123
|
type StrSeqMap = Mapping[str, Sequence[Any]]
|
|
157
124
|
|
|
158
|
-
# Transform step specifications
|
|
159
|
-
|
|
160
|
-
# Filtering spec expecting ``field``, ``op``, and ``value`` keys.
|
|
161
|
-
type FilterSpec = StrAnyMap
|
|
162
|
-
|
|
163
|
-
# Field renaming instructions mapping old keys to new ones.
|
|
164
|
-
type MapSpec = StrStrMap
|
|
165
|
-
|
|
166
|
-
# Projection spec as a field list or mapping with metadata.
|
|
167
|
-
#
|
|
168
|
-
# Examples
|
|
169
|
-
# --------
|
|
170
|
-
# >>> from etlplus.types import SelectSpec
|
|
171
|
-
# >>> spec1: SelectSpec = ['a','b']
|
|
172
|
-
# >>> spec2: SelectSpec = {'fields': [...]}
|
|
173
|
-
type SelectSpec = Fields | StrSeqMap
|
|
174
|
-
|
|
175
|
-
# Sort directive expressed as a field string or mapping with flags.
|
|
176
|
-
#
|
|
177
|
-
# Examples
|
|
178
|
-
# --------
|
|
179
|
-
# >>> from etlplus.types import SortSpec
|
|
180
|
-
# >>> spec1: SortSpec = 'field'
|
|
181
|
-
# >>> spec2: SortSpec = {'field': 'x', 'reverse': True}
|
|
182
|
-
type SortSpec = str | StrAnyMap
|
|
183
|
-
|
|
184
|
-
# Aggregate instruction covering ``field``, ``func``, and optional alias.
|
|
185
|
-
#
|
|
186
|
-
# Supported functions: ``avg``, ``count``, ``max``, ``min``, and ``sum``.
|
|
187
|
-
# Examples
|
|
188
|
-
# --------
|
|
189
|
-
# >>> from etlplus.types import AggregateSpec
|
|
190
|
-
# >>> spec: AggregateSpec = \
|
|
191
|
-
# ... {'field': 'x', 'func': 'sum' | 'avg' | ..., 'alias'?: '...'}
|
|
192
|
-
type AggregateSpec = StrAnyMap
|
|
193
|
-
|
|
194
|
-
# -- Pipelines-- #
|
|
195
|
-
|
|
196
|
-
# Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
|
|
197
|
-
type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
|
|
198
|
-
|
|
199
|
-
# Collections of steps
|
|
200
|
-
|
|
201
|
-
# Ordered collection of :data:`StepSpec` entries.
|
|
202
|
-
type StepSeq = Sequence[StepSpec]
|
|
203
|
-
|
|
204
|
-
# Accepts either a single :data:`StepSpec` or a sequence of them.
|
|
205
|
-
type StepOrSteps = StepSpec | StepSeq
|
|
206
|
-
|
|
207
|
-
# Canonical literal names for supported transform stages.
|
|
208
|
-
type PipelineStepName = Literal['filter', 'map', 'select', 'sort', 'aggregate']
|
|
209
|
-
|
|
210
|
-
# Mapping from step name to its associated specification payload.
|
|
211
|
-
type PipelineConfig = Mapping[PipelineStepName, StepOrSteps]
|
|
212
|
-
|
|
213
|
-
# -- Helpers -- #
|
|
214
|
-
|
|
215
|
-
# Callable that applies step configuration to a batch of records.
|
|
216
|
-
type StepApplier = Callable[[JSONList, Any], JSONList]
|
|
217
|
-
|
|
218
|
-
# Tuple combining stable sort index and computed sort value.
|
|
219
|
-
type SortKey = tuple[int, Any]
|
|
220
|
-
|
|
221
125
|
# -- Networking / Runtime -- #
|
|
222
126
|
|
|
223
127
|
# Sleep function used by retry helpers.
|
etlplus/utils.py
CHANGED
|
@@ -8,6 +8,7 @@ from __future__ import annotations
|
|
|
8
8
|
|
|
9
9
|
import json
|
|
10
10
|
from collections.abc import Callable
|
|
11
|
+
from collections.abc import Iterable
|
|
11
12
|
from collections.abc import Mapping
|
|
12
13
|
from typing import Any
|
|
13
14
|
from typing import TypeVar
|
|
@@ -25,6 +26,7 @@ __all__ = [
|
|
|
25
26
|
# Mapping utilities
|
|
26
27
|
'cast_str_dict',
|
|
27
28
|
'coerce_dict',
|
|
29
|
+
'deep_substitute',
|
|
28
30
|
'maybe_mapping',
|
|
29
31
|
# Float coercion
|
|
30
32
|
'to_float',
|
|
@@ -39,7 +41,8 @@ __all__ = [
|
|
|
39
41
|
# Generic number coercion
|
|
40
42
|
'to_number',
|
|
41
43
|
# Text processing
|
|
42
|
-
'
|
|
44
|
+
'normalize_choice',
|
|
45
|
+
'normalize_str',
|
|
43
46
|
]
|
|
44
47
|
|
|
45
48
|
|
|
@@ -56,6 +59,52 @@ Num = TypeVar('Num', int, float)
|
|
|
56
59
|
# -- Data Utilities -- #
|
|
57
60
|
|
|
58
61
|
|
|
62
|
+
def deep_substitute(
|
|
63
|
+
value: Any,
|
|
64
|
+
vars_map: StrAnyMap | None,
|
|
65
|
+
env_map: Mapping[str, str] | None,
|
|
66
|
+
) -> Any:
|
|
67
|
+
"""
|
|
68
|
+
Recursively substitute ``${VAR}`` tokens in nested structures.
|
|
69
|
+
|
|
70
|
+
Only strings are substituted; other types are returned as-is.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
value : Any
|
|
75
|
+
The value to perform substitutions on.
|
|
76
|
+
vars_map : StrAnyMap | None
|
|
77
|
+
Mapping of variable names to replacement values (lower precedence).
|
|
78
|
+
env_map : Mapping[str, str] | None
|
|
79
|
+
Mapping of environment variables overriding *vars_map* values (higher
|
|
80
|
+
precedence).
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
Any
|
|
85
|
+
New structure with substitutions applied where tokens were found.
|
|
86
|
+
"""
|
|
87
|
+
substitutions = _prepare_substitutions(vars_map, env_map)
|
|
88
|
+
|
|
89
|
+
def _apply(node: Any) -> Any:
|
|
90
|
+
match node:
|
|
91
|
+
case str():
|
|
92
|
+
return _replace_tokens(node, substitutions)
|
|
93
|
+
case Mapping():
|
|
94
|
+
return {k: _apply(v) for k, v in node.items()}
|
|
95
|
+
case list() | tuple() as seq:
|
|
96
|
+
apply = [_apply(item) for item in seq]
|
|
97
|
+
return apply if isinstance(seq, list) else tuple(apply)
|
|
98
|
+
case set():
|
|
99
|
+
return {_apply(item) for item in node}
|
|
100
|
+
case frozenset():
|
|
101
|
+
return frozenset(_apply(item) for item in node)
|
|
102
|
+
case _:
|
|
103
|
+
return node
|
|
104
|
+
|
|
105
|
+
return _apply(value)
|
|
106
|
+
|
|
107
|
+
|
|
59
108
|
def cast_str_dict(
|
|
60
109
|
mapping: StrAnyMap | None,
|
|
61
110
|
) -> dict[str, str]:
|
|
@@ -81,7 +130,7 @@ def coerce_dict(
|
|
|
81
130
|
value: Any,
|
|
82
131
|
) -> dict[str, Any]:
|
|
83
132
|
"""
|
|
84
|
-
Return a ``dict`` copy when
|
|
133
|
+
Return a ``dict`` copy when *value* is mapping-like.
|
|
85
134
|
|
|
86
135
|
Parameters
|
|
87
136
|
----------
|
|
@@ -91,7 +140,7 @@ def coerce_dict(
|
|
|
91
140
|
Returns
|
|
92
141
|
-------
|
|
93
142
|
dict[str, Any]
|
|
94
|
-
Shallow copy of
|
|
143
|
+
Shallow copy of *value* converted to a standard ``dict``.
|
|
95
144
|
"""
|
|
96
145
|
return dict(value) if isinstance(value, Mapping) else {}
|
|
97
146
|
|
|
@@ -121,7 +170,7 @@ def maybe_mapping(
|
|
|
121
170
|
value: Any,
|
|
122
171
|
) -> StrAnyMap | None:
|
|
123
172
|
"""
|
|
124
|
-
Return
|
|
173
|
+
Return *value* when it is mapping-like; otherwise ``None``.
|
|
125
174
|
|
|
126
175
|
Parameters
|
|
127
176
|
----------
|
|
@@ -140,7 +189,7 @@ def print_json(
|
|
|
140
189
|
obj: Any,
|
|
141
190
|
) -> None:
|
|
142
191
|
"""
|
|
143
|
-
Pretty-print
|
|
192
|
+
Pretty-print *obj* as UTF-8 JSON without ASCII escaping.
|
|
144
193
|
|
|
145
194
|
Parameters
|
|
146
195
|
----------
|
|
@@ -165,12 +214,12 @@ def to_float(
|
|
|
165
214
|
maximum: float | None = None,
|
|
166
215
|
) -> float | None:
|
|
167
216
|
"""
|
|
168
|
-
Coerce
|
|
217
|
+
Coerce *value* to a float with optional fallback and bounds.
|
|
169
218
|
|
|
170
219
|
Notes
|
|
171
220
|
-----
|
|
172
221
|
For strings, leading/trailing whitespace is ignored. Returns ``None``
|
|
173
|
-
when coercion fails and no
|
|
222
|
+
when coercion fails and no *default* is provided.
|
|
174
223
|
"""
|
|
175
224
|
return _normalize_number(
|
|
176
225
|
_coerce_float,
|
|
@@ -186,7 +235,7 @@ def to_maximum_float(
|
|
|
186
235
|
default: float,
|
|
187
236
|
) -> float:
|
|
188
237
|
"""
|
|
189
|
-
Return the greater of
|
|
238
|
+
Return the greater of *default* and *value* after float coercion.
|
|
190
239
|
|
|
191
240
|
Parameters
|
|
192
241
|
----------
|
|
@@ -198,7 +247,7 @@ def to_maximum_float(
|
|
|
198
247
|
Returns
|
|
199
248
|
-------
|
|
200
249
|
float
|
|
201
|
-
|
|
250
|
+
*default* if coercion fails; else ``max(coerced, default)``.
|
|
202
251
|
"""
|
|
203
252
|
result = to_float(value, default)
|
|
204
253
|
return max(_value_or_default(result, default), default)
|
|
@@ -209,7 +258,7 @@ def to_minimum_float(
|
|
|
209
258
|
default: float,
|
|
210
259
|
) -> float:
|
|
211
260
|
"""
|
|
212
|
-
Return the lesser of
|
|
261
|
+
Return the lesser of *default* and *value* after float coercion.
|
|
213
262
|
|
|
214
263
|
Parameters
|
|
215
264
|
----------
|
|
@@ -221,7 +270,7 @@ def to_minimum_float(
|
|
|
221
270
|
Returns
|
|
222
271
|
-------
|
|
223
272
|
float
|
|
224
|
-
|
|
273
|
+
*default* if coercion fails; else ``min(coerced, default)``.
|
|
225
274
|
"""
|
|
226
275
|
result = to_float(value, default)
|
|
227
276
|
return min(_value_or_default(result, default), default)
|
|
@@ -257,12 +306,12 @@ def to_int(
|
|
|
257
306
|
maximum: int | None = None,
|
|
258
307
|
) -> int | None:
|
|
259
308
|
"""
|
|
260
|
-
Coerce
|
|
309
|
+
Coerce *value* to an integer with optional fallback and bounds.
|
|
261
310
|
|
|
262
311
|
Notes
|
|
263
312
|
-----
|
|
264
313
|
For strings, leading/trailing whitespace is ignored. Returns ``None``
|
|
265
|
-
when coercion fails and no
|
|
314
|
+
when coercion fails and no *default* is provided.
|
|
266
315
|
"""
|
|
267
316
|
return _normalize_number(
|
|
268
317
|
_coerce_int,
|
|
@@ -278,7 +327,7 @@ def to_maximum_int(
|
|
|
278
327
|
default: int,
|
|
279
328
|
) -> int:
|
|
280
329
|
"""
|
|
281
|
-
Return the greater of
|
|
330
|
+
Return the greater of *default* and *value* after integer coercion.
|
|
282
331
|
|
|
283
332
|
Parameters
|
|
284
333
|
----------
|
|
@@ -290,7 +339,7 @@ def to_maximum_int(
|
|
|
290
339
|
Returns
|
|
291
340
|
-------
|
|
292
341
|
int
|
|
293
|
-
|
|
342
|
+
*default* if coercion fails; else ``max(coerced, default)``.
|
|
294
343
|
"""
|
|
295
344
|
result = to_int(value, default)
|
|
296
345
|
return max(_value_or_default(result, default), default)
|
|
@@ -301,7 +350,7 @@ def to_minimum_int(
|
|
|
301
350
|
default: int,
|
|
302
351
|
) -> int:
|
|
303
352
|
"""
|
|
304
|
-
Return the lesser of
|
|
353
|
+
Return the lesser of *default* and *value* after integer coercion.
|
|
305
354
|
|
|
306
355
|
Parameters
|
|
307
356
|
----------
|
|
@@ -313,7 +362,7 @@ def to_minimum_int(
|
|
|
313
362
|
Returns
|
|
314
363
|
-------
|
|
315
364
|
int
|
|
316
|
-
|
|
365
|
+
*default* if coercion fails; else ``min(coerced, default)``.
|
|
317
366
|
"""
|
|
318
367
|
result = to_int(value, default)
|
|
319
368
|
return min(_value_or_default(result, default), default)
|
|
@@ -326,21 +375,21 @@ def to_positive_int(
|
|
|
326
375
|
minimum: int = 1,
|
|
327
376
|
) -> int:
|
|
328
377
|
"""
|
|
329
|
-
Return a positive integer, falling back to
|
|
378
|
+
Return a positive integer, falling back to *minimum* when needed.
|
|
330
379
|
|
|
331
380
|
Parameters
|
|
332
381
|
----------
|
|
333
382
|
value : Any
|
|
334
383
|
Candidate input coerced with :func:`to_int`.
|
|
335
384
|
default : int
|
|
336
|
-
Fallback value when coercion fails; clamped by
|
|
385
|
+
Fallback value when coercion fails; clamped by *minimum*.
|
|
337
386
|
minimum : int
|
|
338
387
|
Inclusive lower bound for the result. Defaults to ``1``.
|
|
339
388
|
|
|
340
389
|
Returns
|
|
341
390
|
-------
|
|
342
391
|
int
|
|
343
|
-
Positive integer respecting
|
|
392
|
+
Positive integer respecting *minimum*.
|
|
344
393
|
"""
|
|
345
394
|
result = to_int(value, default, minimum=minimum)
|
|
346
395
|
return _value_or_default(result, minimum)
|
|
@@ -353,7 +402,7 @@ def to_number(
|
|
|
353
402
|
value: object,
|
|
354
403
|
) -> float | None:
|
|
355
404
|
"""
|
|
356
|
-
Coerce
|
|
405
|
+
Coerce *value* to a ``float`` using the internal float coercer.
|
|
357
406
|
|
|
358
407
|
Parameters
|
|
359
408
|
----------
|
|
@@ -372,7 +421,7 @@ def to_number(
|
|
|
372
421
|
# -- Text Processing -- #
|
|
373
422
|
|
|
374
423
|
|
|
375
|
-
def
|
|
424
|
+
def normalize_str(
|
|
376
425
|
value: str | None,
|
|
377
426
|
) -> str:
|
|
378
427
|
"""
|
|
@@ -392,6 +441,36 @@ def normalized_str(
|
|
|
392
441
|
return (value or '').strip().lower()
|
|
393
442
|
|
|
394
443
|
|
|
444
|
+
def normalize_choice(
|
|
445
|
+
value: str | None,
|
|
446
|
+
*,
|
|
447
|
+
mapping: Mapping[str, str],
|
|
448
|
+
default: str,
|
|
449
|
+
normalize: Callable[[str | None], str] = normalize_str,
|
|
450
|
+
) -> str:
|
|
451
|
+
"""
|
|
452
|
+
Normalize a string choice using a mapping and fallback.
|
|
453
|
+
|
|
454
|
+
Parameters
|
|
455
|
+
----------
|
|
456
|
+
value : str | None
|
|
457
|
+
Input value to normalize.
|
|
458
|
+
mapping : Mapping[str, str]
|
|
459
|
+
Mapping of acceptable normalized inputs to output values.
|
|
460
|
+
default : str
|
|
461
|
+
Default return value when input is missing or unrecognized.
|
|
462
|
+
normalize : Callable[[str | None], str], optional
|
|
463
|
+
Normalization function applied to *value*. Defaults to
|
|
464
|
+
:func:`normalize_str`.
|
|
465
|
+
|
|
466
|
+
Returns
|
|
467
|
+
-------
|
|
468
|
+
str
|
|
469
|
+
Normalized mapped value or *default*.
|
|
470
|
+
"""
|
|
471
|
+
return mapping.get(normalize(value), default)
|
|
472
|
+
|
|
473
|
+
|
|
395
474
|
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
396
475
|
|
|
397
476
|
|
|
@@ -401,7 +480,7 @@ def _clamp(
|
|
|
401
480
|
maximum: Num | None,
|
|
402
481
|
) -> Num:
|
|
403
482
|
"""
|
|
404
|
-
Return
|
|
483
|
+
Return *value* constrained to the interval ``[minimum, maximum]``.
|
|
405
484
|
|
|
406
485
|
Parameters
|
|
407
486
|
----------
|
|
@@ -425,6 +504,61 @@ def _clamp(
|
|
|
425
504
|
return value
|
|
426
505
|
|
|
427
506
|
|
|
507
|
+
def _prepare_substitutions(
|
|
508
|
+
vars_map: StrAnyMap | None,
|
|
509
|
+
env_map: Mapping[str, Any] | None,
|
|
510
|
+
) -> tuple[tuple[str, Any], ...]:
|
|
511
|
+
"""
|
|
512
|
+
Merge variable and environment maps into an ordered substitutions list.
|
|
513
|
+
|
|
514
|
+
Parameters
|
|
515
|
+
----------
|
|
516
|
+
vars_map : StrAnyMap | None
|
|
517
|
+
Mapping of variable names to replacement values (lower precedence).
|
|
518
|
+
env_map : Mapping[str, Any] | None
|
|
519
|
+
Environment-backed values that override entries from *vars_map*.
|
|
520
|
+
|
|
521
|
+
Returns
|
|
522
|
+
-------
|
|
523
|
+
tuple[tuple[str, Any], ...]
|
|
524
|
+
Immutable sequence of ``(name, value)`` pairs suitable for token
|
|
525
|
+
replacement.
|
|
526
|
+
"""
|
|
527
|
+
if not vars_map and not env_map:
|
|
528
|
+
return ()
|
|
529
|
+
merged: dict[str, Any] = {**(vars_map or {}), **(env_map or {})}
|
|
530
|
+
return tuple(merged.items())
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def _replace_tokens(
|
|
534
|
+
text: str,
|
|
535
|
+
substitutions: Iterable[tuple[str, Any]],
|
|
536
|
+
) -> str:
|
|
537
|
+
"""
|
|
538
|
+
Replace ``${VAR}`` tokens in *text* using *substitutions*.
|
|
539
|
+
|
|
540
|
+
Parameters
|
|
541
|
+
----------
|
|
542
|
+
text : str
|
|
543
|
+
Input string that may contain ``${VAR}`` tokens.
|
|
544
|
+
substitutions : Iterable[tuple[str, Any]]
|
|
545
|
+
Sequence of ``(name, value)`` pairs used for token replacement.
|
|
546
|
+
|
|
547
|
+
Returns
|
|
548
|
+
-------
|
|
549
|
+
str
|
|
550
|
+
Updated text with replacements applied.
|
|
551
|
+
"""
|
|
552
|
+
if not substitutions:
|
|
553
|
+
return text
|
|
554
|
+
out = text
|
|
555
|
+
for name, replacement in substitutions:
|
|
556
|
+
token = f'${{{name}}}'
|
|
557
|
+
if token in out:
|
|
558
|
+
out = out.replace(token, str(replacement))
|
|
559
|
+
return out
|
|
560
|
+
|
|
561
|
+
|
|
428
562
|
def _coerce_float(
|
|
429
563
|
value: object,
|
|
430
564
|
) -> float | None:
|
|
@@ -502,7 +636,7 @@ def _integral_from_float(
|
|
|
502
636
|
candidate: float | None,
|
|
503
637
|
) -> int | None:
|
|
504
638
|
"""
|
|
505
|
-
Return ``int(candidate)`` when
|
|
639
|
+
Return ``int(candidate)`` when *candidate* is integral.
|
|
506
640
|
|
|
507
641
|
Parameters
|
|
508
642
|
----------
|
|
@@ -512,7 +646,7 @@ def _integral_from_float(
|
|
|
512
646
|
Returns
|
|
513
647
|
-------
|
|
514
648
|
int | None
|
|
515
|
-
Integer form of
|
|
649
|
+
Integer form of *candidate*; else ``None`` if not integral.
|
|
516
650
|
"""
|
|
517
651
|
if candidate is None or not candidate.is_integer():
|
|
518
652
|
return None
|
|
@@ -528,7 +662,7 @@ def _normalize_number(
|
|
|
528
662
|
maximum: Num | None = None,
|
|
529
663
|
) -> Num | None:
|
|
530
664
|
"""
|
|
531
|
-
Coerce
|
|
665
|
+
Coerce *value* with *coercer* and optionally clamp it.
|
|
532
666
|
|
|
533
667
|
Parameters
|
|
534
668
|
----------
|
|
@@ -561,7 +695,7 @@ def _validate_bounds(
|
|
|
561
695
|
maximum: Num | None,
|
|
562
696
|
) -> tuple[Num | None, Num | None]:
|
|
563
697
|
"""
|
|
564
|
-
Ensure
|
|
698
|
+
Ensure *minimum* does not exceed *maximum*.
|
|
565
699
|
|
|
566
700
|
Parameters
|
|
567
701
|
----------
|
|
@@ -590,7 +724,7 @@ def _value_or_default(
|
|
|
590
724
|
default: Num,
|
|
591
725
|
) -> Num:
|
|
592
726
|
"""
|
|
593
|
-
Return
|
|
727
|
+
Return *value* if not ``None``; else *default*.
|
|
594
728
|
|
|
595
729
|
Parameters
|
|
596
730
|
----------
|
|
@@ -602,6 +736,6 @@ def _value_or_default(
|
|
|
602
736
|
Returns
|
|
603
737
|
-------
|
|
604
738
|
Num
|
|
605
|
-
|
|
739
|
+
*value* or *default*.
|
|
606
740
|
"""
|
|
607
741
|
return default if value is None else value
|
etlplus/workflow/README.md
CHANGED
|
@@ -12,8 +12,6 @@ Back to project overview: see the top-level [README](../../README.md).
|
|
|
12
12
|
|
|
13
13
|
- [`etlplus.workflow` Subpackage](#etlplusworkflow-subpackage)
|
|
14
14
|
- [Supported Configuration Types](#supported-configuration-types)
|
|
15
|
-
- [Loading and Validating Configs](#loading-and-validating-configs)
|
|
16
|
-
- [Example: Loading a Pipeline Config](#example-loading-a-pipeline-config)
|
|
17
15
|
- [See Also](#see-also)
|
|
18
16
|
|
|
19
17
|
## Supported Configuration Types
|
|
@@ -23,28 +21,6 @@ Back to project overview: see the top-level [README](../../README.md).
|
|
|
23
21
|
- **Pipeline**: End-to-end pipeline configuration
|
|
24
22
|
- **Profile**: User or environment-specific settings
|
|
25
23
|
|
|
26
|
-
## Loading and Validating Configs
|
|
27
|
-
|
|
28
|
-
Use the provided classes to load and validate configuration files:
|
|
29
|
-
|
|
30
|
-
```python
|
|
31
|
-
from etlplus.workflow import PipelineConfig
|
|
32
|
-
|
|
33
|
-
cfg = PipelineConfig.from_yaml("pipeline.yml")
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
- Supports YAML and JSON formats
|
|
37
|
-
- Validates against expected schema
|
|
38
|
-
|
|
39
|
-
## Example: Loading a Pipeline Config
|
|
40
|
-
|
|
41
|
-
```python
|
|
42
|
-
from etlplus.workflow import PipelineConfig
|
|
43
|
-
|
|
44
|
-
pipeline = PipelineConfig.from_yaml("configs/pipeline.yml")
|
|
45
|
-
print(pipeline)
|
|
46
|
-
```
|
|
47
|
-
|
|
48
24
|
## See Also
|
|
49
25
|
|
|
50
26
|
- Top-level CLI and library usage in the main [README](../../README.md)
|