etlplus 0.16.2__py3-none-any.whl → 0.16.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/enums.py +35 -167
- etlplus/ops/__init__.py +1 -0
- etlplus/ops/enums.py +173 -0
- etlplus/ops/run.py +1 -1
- etlplus/ops/transform.py +16 -16
- etlplus/ops/types.py +147 -0
- etlplus/types.py +0 -99
- {etlplus-0.16.2.dist-info → etlplus-0.16.3.dist-info}/METADATA +1 -1
- {etlplus-0.16.2.dist-info → etlplus-0.16.3.dist-info}/RECORD +13 -11
- {etlplus-0.16.2.dist-info → etlplus-0.16.3.dist-info}/WHEEL +0 -0
- {etlplus-0.16.2.dist-info → etlplus-0.16.3.dist-info}/entry_points.txt +0 -0
- {etlplus-0.16.2.dist-info → etlplus-0.16.3.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.16.2.dist-info → etlplus-0.16.3.dist-info}/top_level.txt +0 -0
etlplus/enums.py
CHANGED
|
@@ -1,18 +1,14 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.enums` module.
|
|
3
3
|
|
|
4
|
-
Shared enumeration
|
|
4
|
+
Shared enumeration base class.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
import enum
|
|
10
|
-
import operator as _op
|
|
11
|
-
from statistics import fmean
|
|
12
10
|
from typing import Self
|
|
13
11
|
|
|
14
|
-
from .types import AggregateFunc
|
|
15
|
-
from .types import OperatorFunc
|
|
16
12
|
from .types import StrStrMap
|
|
17
13
|
|
|
18
14
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -20,10 +16,7 @@ from .types import StrStrMap
|
|
|
20
16
|
|
|
21
17
|
__all__ = [
|
|
22
18
|
# Enums
|
|
23
|
-
'AggregateName',
|
|
24
19
|
'CoercibleStrEnum',
|
|
25
|
-
'OperatorName',
|
|
26
|
-
'PipelineStep',
|
|
27
20
|
]
|
|
28
21
|
|
|
29
22
|
|
|
@@ -41,6 +34,7 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
41
34
|
Notes
|
|
42
35
|
-----
|
|
43
36
|
- Values are normalized via ``str(value).strip().casefold()``.
|
|
37
|
+
- If value matching fails, the raw string is tried as a member name.
|
|
44
38
|
- Error messages enumerate allowed values for easier debugging.
|
|
45
39
|
"""
|
|
46
40
|
|
|
@@ -56,7 +50,13 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
56
50
|
Returns
|
|
57
51
|
-------
|
|
58
52
|
StrStrMap
|
|
59
|
-
A mapping of alias
|
|
53
|
+
A mapping of alias strings to their corresponding enum member
|
|
54
|
+
values or names.
|
|
55
|
+
|
|
56
|
+
Notes
|
|
57
|
+
-----
|
|
58
|
+
- Alias keys are normalized via ``str(key).strip().casefold()``.
|
|
59
|
+
- Alias values should be member values or member names.
|
|
60
60
|
"""
|
|
61
61
|
return {}
|
|
62
62
|
|
|
@@ -80,7 +80,7 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
80
80
|
Parameters
|
|
81
81
|
----------
|
|
82
82
|
value : Self | str | object
|
|
83
|
-
An existing enum member or a
|
|
83
|
+
An existing enum member or a string-like value to normalize.
|
|
84
84
|
|
|
85
85
|
Returns
|
|
86
86
|
-------
|
|
@@ -95,10 +95,26 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
95
95
|
if isinstance(value, cls):
|
|
96
96
|
return value
|
|
97
97
|
try:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
98
|
+
raw = str(value).strip()
|
|
99
|
+
normalized = raw.casefold()
|
|
100
|
+
aliases = {
|
|
101
|
+
str(key).strip().casefold(): alias
|
|
102
|
+
for key, alias in cls.aliases().items()
|
|
103
|
+
}
|
|
104
|
+
resolved = aliases.get(normalized)
|
|
105
|
+
if resolved is None:
|
|
106
|
+
try:
|
|
107
|
+
return cls(normalized) # type: ignore[arg-type]
|
|
108
|
+
except (ValueError, TypeError):
|
|
109
|
+
return cls[raw] # type: ignore[index]
|
|
110
|
+
if isinstance(resolved, cls):
|
|
111
|
+
return resolved
|
|
112
|
+
try:
|
|
113
|
+
return cls(resolved) # type: ignore[arg-type]
|
|
114
|
+
except (ValueError, TypeError):
|
|
115
|
+
# Allow aliases to reference member names.
|
|
116
|
+
return cls[resolved] # type: ignore[index]
|
|
117
|
+
except (ValueError, TypeError, KeyError) as e:
|
|
102
118
|
allowed = ', '.join(cls.choices())
|
|
103
119
|
raise ValueError(
|
|
104
120
|
f'Invalid {cls.__name__} value: {value!r}. Allowed: {allowed}',
|
|
@@ -107,15 +123,15 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
107
123
|
@classmethod
|
|
108
124
|
def try_coerce(
|
|
109
125
|
cls,
|
|
110
|
-
value: object,
|
|
126
|
+
value: Self | str | object,
|
|
111
127
|
) -> Self | None:
|
|
112
128
|
"""
|
|
113
|
-
|
|
129
|
+
Attempt to coerce a value into the enum; return ``None`` on failure.
|
|
114
130
|
|
|
115
131
|
Parameters
|
|
116
132
|
----------
|
|
117
|
-
value : object
|
|
118
|
-
An existing enum member or a
|
|
133
|
+
value : Self | str | object
|
|
134
|
+
An existing enum member or a string-like value to normalize.
|
|
119
135
|
|
|
120
136
|
Returns
|
|
121
137
|
-------
|
|
@@ -124,153 +140,5 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
124
140
|
"""
|
|
125
141
|
try:
|
|
126
142
|
return cls.coerce(value)
|
|
127
|
-
except ValueError:
|
|
143
|
+
except (ValueError, TypeError, KeyError):
|
|
128
144
|
return None
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
# SECTION: ENUMS ============================================================ #
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
class AggregateName(CoercibleStrEnum):
|
|
135
|
-
"""Supported aggregations with helpers."""
|
|
136
|
-
|
|
137
|
-
# -- Constants -- #
|
|
138
|
-
|
|
139
|
-
AVG = 'avg'
|
|
140
|
-
COUNT = 'count'
|
|
141
|
-
MAX = 'max'
|
|
142
|
-
MIN = 'min'
|
|
143
|
-
SUM = 'sum'
|
|
144
|
-
|
|
145
|
-
# -- Class Methods -- #
|
|
146
|
-
|
|
147
|
-
@property
|
|
148
|
-
def func(self) -> AggregateFunc:
|
|
149
|
-
"""
|
|
150
|
-
Get the aggregation function for this aggregation type.
|
|
151
|
-
|
|
152
|
-
Returns
|
|
153
|
-
-------
|
|
154
|
-
AggregateFunc
|
|
155
|
-
The aggregation function corresponding to this aggregation type.
|
|
156
|
-
"""
|
|
157
|
-
if self is AggregateName.COUNT:
|
|
158
|
-
return lambda xs, n: n
|
|
159
|
-
if self is AggregateName.MAX:
|
|
160
|
-
return lambda xs, n: (max(xs) if xs else None)
|
|
161
|
-
if self is AggregateName.MIN:
|
|
162
|
-
return lambda xs, n: (min(xs) if xs else None)
|
|
163
|
-
if self is AggregateName.SUM:
|
|
164
|
-
return lambda xs, n: sum(xs)
|
|
165
|
-
|
|
166
|
-
# AVG
|
|
167
|
-
return lambda xs, n: (fmean(xs) if xs else 0.0)
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
class OperatorName(CoercibleStrEnum):
|
|
171
|
-
"""Supported comparison operators with helpers."""
|
|
172
|
-
|
|
173
|
-
# -- Constants -- #
|
|
174
|
-
|
|
175
|
-
EQ = 'eq'
|
|
176
|
-
NE = 'ne'
|
|
177
|
-
GT = 'gt'
|
|
178
|
-
GTE = 'gte'
|
|
179
|
-
LT = 'lt'
|
|
180
|
-
LTE = 'lte'
|
|
181
|
-
IN = 'in'
|
|
182
|
-
CONTAINS = 'contains'
|
|
183
|
-
|
|
184
|
-
# -- Getters -- #
|
|
185
|
-
|
|
186
|
-
@property
|
|
187
|
-
def func(self) -> OperatorFunc:
|
|
188
|
-
"""
|
|
189
|
-
Get the comparison function for this operator.
|
|
190
|
-
|
|
191
|
-
Returns
|
|
192
|
-
-------
|
|
193
|
-
OperatorFunc
|
|
194
|
-
The comparison function corresponding to this operator.
|
|
195
|
-
"""
|
|
196
|
-
match self:
|
|
197
|
-
case OperatorName.EQ:
|
|
198
|
-
return _op.eq
|
|
199
|
-
case OperatorName.NE:
|
|
200
|
-
return _op.ne
|
|
201
|
-
case OperatorName.GT:
|
|
202
|
-
return _op.gt
|
|
203
|
-
case OperatorName.GTE:
|
|
204
|
-
return _op.ge
|
|
205
|
-
case OperatorName.LT:
|
|
206
|
-
return _op.lt
|
|
207
|
-
case OperatorName.LTE:
|
|
208
|
-
return _op.le
|
|
209
|
-
case OperatorName.IN:
|
|
210
|
-
return lambda a, b: a in b
|
|
211
|
-
case OperatorName.CONTAINS:
|
|
212
|
-
return lambda a, b: b in a
|
|
213
|
-
|
|
214
|
-
# -- Class Methods -- #
|
|
215
|
-
|
|
216
|
-
@classmethod
|
|
217
|
-
def aliases(cls) -> StrStrMap:
|
|
218
|
-
"""
|
|
219
|
-
Return a mapping of common aliases for each enum member.
|
|
220
|
-
|
|
221
|
-
Returns
|
|
222
|
-
-------
|
|
223
|
-
StrStrMap
|
|
224
|
-
A mapping of alias names to their corresponding enum member names.
|
|
225
|
-
"""
|
|
226
|
-
return {
|
|
227
|
-
'==': 'eq',
|
|
228
|
-
'=': 'eq',
|
|
229
|
-
'!=': 'ne',
|
|
230
|
-
'<>': 'ne',
|
|
231
|
-
'>=': 'gte',
|
|
232
|
-
'≥': 'gte',
|
|
233
|
-
'<=': 'lte',
|
|
234
|
-
'≤': 'lte',
|
|
235
|
-
'>': 'gt',
|
|
236
|
-
'<': 'lt',
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
class PipelineStep(CoercibleStrEnum):
|
|
241
|
-
"""Pipeline step names as an enum for internal orchestration."""
|
|
242
|
-
|
|
243
|
-
# -- Constants -- #
|
|
244
|
-
|
|
245
|
-
FILTER = 'filter'
|
|
246
|
-
MAP = 'map'
|
|
247
|
-
SELECT = 'select'
|
|
248
|
-
SORT = 'sort'
|
|
249
|
-
AGGREGATE = 'aggregate'
|
|
250
|
-
|
|
251
|
-
# -- Getters -- #
|
|
252
|
-
|
|
253
|
-
@property
|
|
254
|
-
def order(self) -> int:
|
|
255
|
-
"""
|
|
256
|
-
Get the execution order of this pipeline step.
|
|
257
|
-
|
|
258
|
-
Returns
|
|
259
|
-
-------
|
|
260
|
-
int
|
|
261
|
-
The execution order of this pipeline step.
|
|
262
|
-
"""
|
|
263
|
-
return _PIPELINE_ORDER_INDEX[self]
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
# SECTION: INTERNAL CONSTANTS ============================================== #
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
# Precomputed order index for PipelineStep; avoids recomputing on each access.
|
|
270
|
-
_PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
|
|
271
|
-
PipelineStep.FILTER: 0,
|
|
272
|
-
PipelineStep.MAP: 1,
|
|
273
|
-
PipelineStep.SELECT: 2,
|
|
274
|
-
PipelineStep.SORT: 3,
|
|
275
|
-
PipelineStep.AGGREGATE: 4,
|
|
276
|
-
}
|
etlplus/ops/__init__.py
CHANGED
etlplus/ops/enums.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.ops.enums` module.
|
|
3
|
+
|
|
4
|
+
Operation-specific enums and helpers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import operator as _op
|
|
10
|
+
from statistics import fmean
|
|
11
|
+
|
|
12
|
+
from ..enums import CoercibleStrEnum
|
|
13
|
+
from ..types import StrStrMap
|
|
14
|
+
from .types import AggregateFunc
|
|
15
|
+
from .types import OperatorFunc
|
|
16
|
+
|
|
17
|
+
# SECTION: EXPORTS ========================================================= #
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
# Enums
|
|
22
|
+
'AggregateName',
|
|
23
|
+
'OperatorName',
|
|
24
|
+
'PipelineStep',
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# SECTION: ENUMS ============================================================ #
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class AggregateName(CoercibleStrEnum):
|
|
32
|
+
"""Supported aggregations with helpers."""
|
|
33
|
+
|
|
34
|
+
# -- Constants -- #
|
|
35
|
+
|
|
36
|
+
AVG = 'avg'
|
|
37
|
+
COUNT = 'count'
|
|
38
|
+
MAX = 'max'
|
|
39
|
+
MIN = 'min'
|
|
40
|
+
SUM = 'sum'
|
|
41
|
+
|
|
42
|
+
# -- Class Methods -- #
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def func(self) -> AggregateFunc:
|
|
46
|
+
"""
|
|
47
|
+
Get the aggregation function for this aggregation type.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
AggregateFunc
|
|
52
|
+
The aggregation function corresponding to this aggregation type.
|
|
53
|
+
"""
|
|
54
|
+
if self is AggregateName.COUNT:
|
|
55
|
+
return lambda xs, n: n
|
|
56
|
+
if self is AggregateName.MAX:
|
|
57
|
+
return lambda xs, n: (max(xs) if xs else None)
|
|
58
|
+
if self is AggregateName.MIN:
|
|
59
|
+
return lambda xs, n: (min(xs) if xs else None)
|
|
60
|
+
if self is AggregateName.SUM:
|
|
61
|
+
return lambda xs, n: sum(xs)
|
|
62
|
+
|
|
63
|
+
# AVG
|
|
64
|
+
return lambda xs, n: (fmean(xs) if xs else 0.0)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class OperatorName(CoercibleStrEnum):
|
|
68
|
+
"""Supported comparison operators with helpers."""
|
|
69
|
+
|
|
70
|
+
# -- Constants -- #
|
|
71
|
+
|
|
72
|
+
EQ = 'eq'
|
|
73
|
+
NE = 'ne'
|
|
74
|
+
GT = 'gt'
|
|
75
|
+
GTE = 'gte'
|
|
76
|
+
LT = 'lt'
|
|
77
|
+
LTE = 'lte'
|
|
78
|
+
IN = 'in'
|
|
79
|
+
CONTAINS = 'contains'
|
|
80
|
+
|
|
81
|
+
# -- Getters -- #
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def func(self) -> OperatorFunc:
|
|
85
|
+
"""
|
|
86
|
+
Get the comparison function for this operator.
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
OperatorFunc
|
|
91
|
+
The comparison function corresponding to this operator.
|
|
92
|
+
"""
|
|
93
|
+
match self:
|
|
94
|
+
case OperatorName.EQ:
|
|
95
|
+
return _op.eq
|
|
96
|
+
case OperatorName.NE:
|
|
97
|
+
return _op.ne
|
|
98
|
+
case OperatorName.GT:
|
|
99
|
+
return _op.gt
|
|
100
|
+
case OperatorName.GTE:
|
|
101
|
+
return _op.ge
|
|
102
|
+
case OperatorName.LT:
|
|
103
|
+
return _op.lt
|
|
104
|
+
case OperatorName.LTE:
|
|
105
|
+
return _op.le
|
|
106
|
+
case OperatorName.IN:
|
|
107
|
+
return lambda a, b: a in b
|
|
108
|
+
case OperatorName.CONTAINS:
|
|
109
|
+
return lambda a, b: b in a
|
|
110
|
+
|
|
111
|
+
# -- Class Methods -- #
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def aliases(cls) -> StrStrMap:
|
|
115
|
+
"""
|
|
116
|
+
Return a mapping of common aliases for each enum member.
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
StrStrMap
|
|
121
|
+
A mapping of alias names to their corresponding enum member names.
|
|
122
|
+
"""
|
|
123
|
+
return {
|
|
124
|
+
'==': 'eq',
|
|
125
|
+
'=': 'eq',
|
|
126
|
+
'!=': 'ne',
|
|
127
|
+
'<>': 'ne',
|
|
128
|
+
'>=': 'gte',
|
|
129
|
+
'≥': 'gte',
|
|
130
|
+
'<=': 'lte',
|
|
131
|
+
'≤': 'lte',
|
|
132
|
+
'>': 'gt',
|
|
133
|
+
'<': 'lt',
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class PipelineStep(CoercibleStrEnum):
|
|
138
|
+
"""Pipeline step names as an enum for internal orchestration."""
|
|
139
|
+
|
|
140
|
+
# -- Constants -- #
|
|
141
|
+
|
|
142
|
+
FILTER = 'filter'
|
|
143
|
+
MAP = 'map'
|
|
144
|
+
SELECT = 'select'
|
|
145
|
+
SORT = 'sort'
|
|
146
|
+
AGGREGATE = 'aggregate'
|
|
147
|
+
|
|
148
|
+
# -- Getters -- #
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def order(self) -> int:
|
|
152
|
+
"""
|
|
153
|
+
Get the execution order of this pipeline step.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
int
|
|
158
|
+
The execution order of this pipeline step.
|
|
159
|
+
"""
|
|
160
|
+
return _PIPELINE_ORDER_INDEX[self]
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# SECTION: INTERNAL CONSTANTS ============================================== #
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# Precomputed order index for PipelineStep; avoids recomputing on each access.
|
|
167
|
+
_PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
|
|
168
|
+
PipelineStep.FILTER: 0,
|
|
169
|
+
PipelineStep.MAP: 1,
|
|
170
|
+
PipelineStep.SELECT: 2,
|
|
171
|
+
PipelineStep.SORT: 3,
|
|
172
|
+
PipelineStep.AGGREGATE: 4,
|
|
173
|
+
}
|
etlplus/ops/run.py
CHANGED
|
@@ -13,9 +13,9 @@ from typing import cast
|
|
|
13
13
|
from ..api import HttpMethod
|
|
14
14
|
from ..connector import DataConnectorType
|
|
15
15
|
from ..file import FileFormat
|
|
16
|
+
from ..ops.types import PipelineConfig
|
|
16
17
|
from ..types import JSONData
|
|
17
18
|
from ..types import JSONDict
|
|
18
|
-
from ..types import PipelineConfig
|
|
19
19
|
from ..types import StrPath
|
|
20
20
|
from ..utils import print_json
|
|
21
21
|
from ..workflow import load_pipeline_config
|
etlplus/ops/transform.py
CHANGED
|
@@ -44,28 +44,28 @@ from collections.abc import Sequence
|
|
|
44
44
|
from typing import Any
|
|
45
45
|
from typing import cast
|
|
46
46
|
|
|
47
|
-
from ..
|
|
48
|
-
from ..enums import OperatorName
|
|
49
|
-
from ..enums import PipelineStep
|
|
50
|
-
from ..types import AggregateFunc
|
|
51
|
-
from ..types import AggregateSpec
|
|
52
|
-
from ..types import FieldName
|
|
53
|
-
from ..types import Fields
|
|
54
|
-
from ..types import FilterSpec
|
|
47
|
+
from ..ops.types import PipelineConfig
|
|
55
48
|
from ..types import JSONData
|
|
56
49
|
from ..types import JSONDict
|
|
57
50
|
from ..types import JSONList
|
|
58
|
-
from ..types import MapSpec
|
|
59
|
-
from ..types import OperatorFunc
|
|
60
|
-
from ..types import PipelineConfig
|
|
61
|
-
from ..types import PipelineStepName
|
|
62
|
-
from ..types import SortKey
|
|
63
|
-
from ..types import StepApplier
|
|
64
|
-
from ..types import StepOrSteps
|
|
65
|
-
from ..types import StepSpec
|
|
66
51
|
from ..types import StrPath
|
|
67
52
|
from ..utils import to_number
|
|
53
|
+
from .enums import AggregateName
|
|
54
|
+
from .enums import OperatorName
|
|
55
|
+
from .enums import PipelineStep
|
|
68
56
|
from .load import load_data
|
|
57
|
+
from .types import AggregateFunc
|
|
58
|
+
from .types import AggregateSpec
|
|
59
|
+
from .types import FieldName
|
|
60
|
+
from .types import Fields
|
|
61
|
+
from .types import FilterSpec
|
|
62
|
+
from .types import MapSpec
|
|
63
|
+
from .types import OperatorFunc
|
|
64
|
+
from .types import PipelineStepName
|
|
65
|
+
from .types import SortKey
|
|
66
|
+
from .types import StepApplier
|
|
67
|
+
from .types import StepOrSteps
|
|
68
|
+
from .types import StepSpec
|
|
69
69
|
|
|
70
70
|
# SECTION: EXPORTS ========================================================== #
|
|
71
71
|
|
etlplus/ops/types.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.ops.types` module.
|
|
3
|
+
|
|
4
|
+
Shared type aliases leveraged across :mod:`etlplus.ops` modules.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- Centralizes ops-focused aliases (functions, specs, and pipeline helpers).
|
|
9
|
+
- Relies on Python 3.13 ``type`` statements for readability and IDE support.
|
|
10
|
+
|
|
11
|
+
Examples
|
|
12
|
+
--------
|
|
13
|
+
>>> from etlplus.ops.types import AggregateFunc, OperatorFunc
|
|
14
|
+
>>> def total(xs: list[float], _: int) -> float:
|
|
15
|
+
... return sum(xs)
|
|
16
|
+
>>> agg: AggregateFunc = total
|
|
17
|
+
>>> op: OperatorFunc = lambda a, b: a == b
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from collections.abc import Callable
|
|
23
|
+
from collections.abc import Mapping
|
|
24
|
+
from collections.abc import Sequence
|
|
25
|
+
from typing import Any
|
|
26
|
+
from typing import Literal
|
|
27
|
+
|
|
28
|
+
from ..types import JSONList
|
|
29
|
+
from ..types import StrAnyMap
|
|
30
|
+
from ..types import StrSeqMap
|
|
31
|
+
from ..types import StrStrMap
|
|
32
|
+
|
|
33
|
+
# SECTION: EXPORTS ========================================================== #
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
# Type Aliases (Functions)
|
|
38
|
+
'AggregateFunc',
|
|
39
|
+
'OperatorFunc',
|
|
40
|
+
# Type Aliases (Records & Fields)
|
|
41
|
+
'FieldName',
|
|
42
|
+
'Fields',
|
|
43
|
+
# Type Aliases (Transform Specs)
|
|
44
|
+
'AggregateSpec',
|
|
45
|
+
'FilterSpec',
|
|
46
|
+
'MapSpec',
|
|
47
|
+
'SelectSpec',
|
|
48
|
+
'SortSpec',
|
|
49
|
+
# Type Aliases (Pipelines)
|
|
50
|
+
'StepOrSteps',
|
|
51
|
+
'StepSeq',
|
|
52
|
+
'StepSpec',
|
|
53
|
+
'PipelineConfig',
|
|
54
|
+
'PipelineStepName',
|
|
55
|
+
# Type Aliases (Helpers)
|
|
56
|
+
'StepApplier',
|
|
57
|
+
'SortKey',
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# SECTION: TYPE ALIASES ===================================================== #
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# -- Functions -- #
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# TODO: Consider redefining to use `functools.reduce` signature.
|
|
68
|
+
# TODO: Consider adding `**kwargs` to support richer aggregation functions.
|
|
69
|
+
# TODO: Consider constraining first argument to `Sequence[float]`.
|
|
70
|
+
# TODO: Consider constraining return type to `float | int | None`.
|
|
71
|
+
# Callable reducing numeric collections into a summary value.
|
|
72
|
+
type AggregateFunc = Callable[[list[float], int], Any]
|
|
73
|
+
|
|
74
|
+
# Binary predicate consumed by filter operations.
|
|
75
|
+
type OperatorFunc = Callable[[Any, Any], bool]
|
|
76
|
+
|
|
77
|
+
# -- Records & Fields -- #
|
|
78
|
+
|
|
79
|
+
# Individual field identifier referenced inside specs.
|
|
80
|
+
type FieldName = str
|
|
81
|
+
|
|
82
|
+
# Ordered list of :data:`FieldName` entries preserving projection order.
|
|
83
|
+
type Fields = list[FieldName]
|
|
84
|
+
|
|
85
|
+
# -- Transform Specs -- #
|
|
86
|
+
|
|
87
|
+
# Filtering spec expecting ``field``, ``op``, and ``value`` keys.
|
|
88
|
+
type FilterSpec = StrAnyMap
|
|
89
|
+
|
|
90
|
+
# Field renaming instructions mapping old keys to new ones.
|
|
91
|
+
type MapSpec = StrStrMap
|
|
92
|
+
|
|
93
|
+
# Projection spec as a field list or mapping with metadata.
|
|
94
|
+
#
|
|
95
|
+
# Examples
|
|
96
|
+
# --------
|
|
97
|
+
# >>> from etlplus.ops.types import SelectSpec
|
|
98
|
+
# >>> spec1: SelectSpec = ['a','b']
|
|
99
|
+
# >>> spec2: SelectSpec = {'fields': [...]}
|
|
100
|
+
type SelectSpec = Fields | StrSeqMap
|
|
101
|
+
|
|
102
|
+
# Sort directive expressed as a field string or mapping with flags.
|
|
103
|
+
#
|
|
104
|
+
# Examples
|
|
105
|
+
# --------
|
|
106
|
+
# >>> from etlplus.ops.types import SortSpec
|
|
107
|
+
# >>> spec1: SortSpec = 'field'
|
|
108
|
+
# >>> spec2: SortSpec = {'field': 'x', 'reverse': True}
|
|
109
|
+
type SortSpec = str | StrAnyMap
|
|
110
|
+
|
|
111
|
+
# Aggregate instruction covering ``field``, ``func``, and optional alias.
|
|
112
|
+
#
|
|
113
|
+
# Supported functions: ``avg``, ``count``, ``max``, ``min``, and ``sum``.
|
|
114
|
+
# Examples
|
|
115
|
+
# --------
|
|
116
|
+
# >>> from etlplus.ops.types import AggregateSpec
|
|
117
|
+
# >>> spec: AggregateSpec = \
|
|
118
|
+
# ... {'field': 'x', 'func': 'sum' | 'avg' | ..., 'alias'?: '...'}
|
|
119
|
+
type AggregateSpec = StrAnyMap
|
|
120
|
+
|
|
121
|
+
# -- Pipelines-- #
|
|
122
|
+
|
|
123
|
+
# Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
|
|
124
|
+
type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
|
|
125
|
+
|
|
126
|
+
# Collections of steps
|
|
127
|
+
|
|
128
|
+
# Ordered collection of :data:`StepSpec` entries.
|
|
129
|
+
type StepSeq = Sequence[StepSpec]
|
|
130
|
+
|
|
131
|
+
# Accepts either a single :data:`StepSpec` or a sequence of them.
|
|
132
|
+
type StepOrSteps = StepSpec | StepSeq
|
|
133
|
+
|
|
134
|
+
# Canonical literal names for supported transform stages.
|
|
135
|
+
type PipelineStepName = Literal['aggregate', 'filter', 'map', 'select', 'sort']
|
|
136
|
+
|
|
137
|
+
# Mapping from step name to its associated specification payload.
|
|
138
|
+
# TODO: Consider replacing with etlplus.workflow.types.PipelineConfig.
|
|
139
|
+
type PipelineConfig = Mapping[PipelineStepName, StepOrSteps]
|
|
140
|
+
|
|
141
|
+
# -- Helpers -- #
|
|
142
|
+
|
|
143
|
+
# Callable that applies step configuration to a batch of records.
|
|
144
|
+
type StepApplier = Callable[[JSONList, Any], JSONList]
|
|
145
|
+
|
|
146
|
+
# Tuple combining stable sort index and computed sort value.
|
|
147
|
+
type SortKey = tuple[int, Any]
|
etlplus/types.py
CHANGED
|
@@ -53,30 +53,10 @@ __all__ = [
|
|
|
53
53
|
'JSONRecords',
|
|
54
54
|
# Type Aliases (File System)
|
|
55
55
|
'StrPath',
|
|
56
|
-
# Type Aliases (Functions)
|
|
57
|
-
'AggregateFunc',
|
|
58
|
-
'OperatorFunc',
|
|
59
|
-
# Type Aliases (Records & Fields)
|
|
60
|
-
'FieldName',
|
|
61
|
-
'Fields',
|
|
62
56
|
# Type Aliases (Transform Specs)
|
|
63
57
|
'StrAnyMap',
|
|
64
58
|
'StrSeqMap',
|
|
65
59
|
'StrStrMap',
|
|
66
|
-
'AggregateSpec',
|
|
67
|
-
'FilterSpec',
|
|
68
|
-
'MapSpec',
|
|
69
|
-
'SelectSpec',
|
|
70
|
-
'SortSpec',
|
|
71
|
-
# Type Aliases (Pipelines)
|
|
72
|
-
'StepOrSteps',
|
|
73
|
-
'StepSeq',
|
|
74
|
-
'StepSpec',
|
|
75
|
-
'PipelineStepName',
|
|
76
|
-
'PipelineConfig',
|
|
77
|
-
# Type Aliases (Helpers)
|
|
78
|
-
'StepApplier',
|
|
79
|
-
'SortKey',
|
|
80
60
|
# Type Aliases (Networking / Runtime)
|
|
81
61
|
'Sleeper',
|
|
82
62
|
'Timeout',
|
|
@@ -126,22 +106,6 @@ type JSONRecords = list[JSONRecord]
|
|
|
126
106
|
# Path-like inputs accepted by file helpers.
|
|
127
107
|
type StrPath = str | Path | PathLike[str]
|
|
128
108
|
|
|
129
|
-
# -- Functions -- #
|
|
130
|
-
|
|
131
|
-
# Callable reducing numeric collections into a summary value.
|
|
132
|
-
type AggregateFunc = Callable[[list[float], int], Any]
|
|
133
|
-
|
|
134
|
-
# Binary predicate consumed by filter operations.
|
|
135
|
-
type OperatorFunc = Callable[[Any, Any], bool]
|
|
136
|
-
|
|
137
|
-
# -- Records & Fields -- #
|
|
138
|
-
|
|
139
|
-
# Individual field identifier referenced inside specs.
|
|
140
|
-
type FieldName = str
|
|
141
|
-
|
|
142
|
-
# Ordered list of :data:`FieldName` entries preserving projection order.
|
|
143
|
-
type Fields = list[FieldName]
|
|
144
|
-
|
|
145
109
|
# -- Transform Specs -- #
|
|
146
110
|
|
|
147
111
|
# Kept intentionally broad for runtime-friendly validation in transform.py.
|
|
@@ -157,69 +121,6 @@ type StrStrMap = Mapping[str, str]
|
|
|
157
121
|
# Mapping whose values are homogeneous sequences.
|
|
158
122
|
type StrSeqMap = Mapping[str, Sequence[Any]]
|
|
159
123
|
|
|
160
|
-
# Transform step specifications
|
|
161
|
-
|
|
162
|
-
# Filtering spec expecting ``field``, ``op``, and ``value`` keys.
|
|
163
|
-
type FilterSpec = StrAnyMap
|
|
164
|
-
|
|
165
|
-
# Field renaming instructions mapping old keys to new ones.
|
|
166
|
-
type MapSpec = StrStrMap
|
|
167
|
-
|
|
168
|
-
# Projection spec as a field list or mapping with metadata.
|
|
169
|
-
#
|
|
170
|
-
# Examples
|
|
171
|
-
# --------
|
|
172
|
-
# >>> from etlplus.types import SelectSpec
|
|
173
|
-
# >>> spec1: SelectSpec = ['a','b']
|
|
174
|
-
# >>> spec2: SelectSpec = {'fields': [...]}
|
|
175
|
-
type SelectSpec = Fields | StrSeqMap
|
|
176
|
-
|
|
177
|
-
# Sort directive expressed as a field string or mapping with flags.
|
|
178
|
-
#
|
|
179
|
-
# Examples
|
|
180
|
-
# --------
|
|
181
|
-
# >>> from etlplus.types import SortSpec
|
|
182
|
-
# >>> spec1: SortSpec = 'field'
|
|
183
|
-
# >>> spec2: SortSpec = {'field': 'x', 'reverse': True}
|
|
184
|
-
type SortSpec = str | StrAnyMap
|
|
185
|
-
|
|
186
|
-
# Aggregate instruction covering ``field``, ``func``, and optional alias.
|
|
187
|
-
#
|
|
188
|
-
# Supported functions: ``avg``, ``count``, ``max``, ``min``, and ``sum``.
|
|
189
|
-
# Examples
|
|
190
|
-
# --------
|
|
191
|
-
# >>> from etlplus.types import AggregateSpec
|
|
192
|
-
# >>> spec: AggregateSpec = \
|
|
193
|
-
# ... {'field': 'x', 'func': 'sum' | 'avg' | ..., 'alias'?: '...'}
|
|
194
|
-
type AggregateSpec = StrAnyMap
|
|
195
|
-
|
|
196
|
-
# -- Pipelines-- #
|
|
197
|
-
|
|
198
|
-
# Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
|
|
199
|
-
type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
|
|
200
|
-
|
|
201
|
-
# Collections of steps
|
|
202
|
-
|
|
203
|
-
# Ordered collection of :data:`StepSpec` entries.
|
|
204
|
-
type StepSeq = Sequence[StepSpec]
|
|
205
|
-
|
|
206
|
-
# Accepts either a single :data:`StepSpec` or a sequence of them.
|
|
207
|
-
type StepOrSteps = StepSpec | StepSeq
|
|
208
|
-
|
|
209
|
-
# Canonical literal names for supported transform stages.
|
|
210
|
-
type PipelineStepName = Literal['filter', 'map', 'select', 'sort', 'aggregate']
|
|
211
|
-
|
|
212
|
-
# Mapping from step name to its associated specification payload.
|
|
213
|
-
type PipelineConfig = Mapping[PipelineStepName, StepOrSteps]
|
|
214
|
-
|
|
215
|
-
# -- Helpers -- #
|
|
216
|
-
|
|
217
|
-
# Callable that applies step configuration to a batch of records.
|
|
218
|
-
type StepApplier = Callable[[JSONList, Any], JSONList]
|
|
219
|
-
|
|
220
|
-
# Tuple combining stable sort index and computed sort value.
|
|
221
|
-
type SortKey = tuple[int, Any]
|
|
222
|
-
|
|
223
124
|
# -- Networking / Runtime -- #
|
|
224
125
|
|
|
225
126
|
# Sleep function used by retry helpers.
|
|
@@ -2,10 +2,10 @@ etlplus/README.md,sha256=JaMSomnMsHrTruDnonHqe83Rv4K0-e7Wy46tMeVoleU,1468
|
|
|
2
2
|
etlplus/__init__.py,sha256=mgTP4PJmRmsEjTCAizzzdtzAmhuHtarmPzphzdjvLgM,277
|
|
3
3
|
etlplus/__main__.py,sha256=btoROneNiigyfBU7BSzPKZ1R9gzBMpxcpsbPwmuHwTM,479
|
|
4
4
|
etlplus/__version__.py,sha256=1E0GMK_yUWCMQFKxXjTvyMwofi0qT2k4CDNiHWiymWE,327
|
|
5
|
-
etlplus/enums.py,sha256=
|
|
5
|
+
etlplus/enums.py,sha256=MfQhy3XDpN7oqLrF7_WwZojl7n8cW3RAzsZGRnAbWgc,4073
|
|
6
6
|
etlplus/mixins.py,sha256=ifGpHwWv7U00yqGf-kN93vJax2IiK4jaGtTsPsO3Oak,1350
|
|
7
7
|
etlplus/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
etlplus/types.py,sha256=
|
|
8
|
+
etlplus/types.py,sha256=DC9424i5qNBuCVGrfNLY3Ha2sz1mU84I0btSRAH7vrc,3428
|
|
9
9
|
etlplus/utils.py,sha256=X-k_Y8i6oDjlE5aQu9sw3gPw7O2ikiSn4uoheVv_ERc,17091
|
|
10
10
|
etlplus/api/README.md,sha256=amxS_eIcsnNuVvD0x_w8nkyfedOTYbhlY0gGhaFg0DE,8705
|
|
11
11
|
etlplus/api/__init__.py,sha256=PK2lQv1FbsE7ZZS_ejevFZQSuOUHGApBc22YfHAzMqA,4615
|
|
@@ -116,11 +116,13 @@ etlplus/file/yaml.py,sha256=b_SxDSEQPVXQv9a9Ih4wAcI940pE5Ksy5pQE6K6ckhw,2062
|
|
|
116
116
|
etlplus/file/zip.py,sha256=8wnmnGW_pGTx65736CzAG67XIi5y98KxucRT8sNDeuQ,4195
|
|
117
117
|
etlplus/file/zsav.py,sha256=5hMuBjYeHw--UL2ZCCDn6TzJkr_YNhdQhvKI6nr3WW0,1674
|
|
118
118
|
etlplus/ops/README.md,sha256=8omi7DYZhelc26JKk8Cm8QR8I3OGwziysPj1ivx41iQ,1380
|
|
119
|
-
etlplus/ops/__init__.py,sha256=
|
|
119
|
+
etlplus/ops/__init__.py,sha256=r5_-pPhSLCD1nq1EbN0rQrLOGpudueeIxCH_JvT2bt0,1718
|
|
120
|
+
etlplus/ops/enums.py,sha256=dC_8CfaTiB2i83Az-oG-2hkjMuAfDADNbcMF2f94UeU,4014
|
|
120
121
|
etlplus/ops/extract.py,sha256=LOYiPrALRMF7JDBabnRF24_HKnnIcfTdfXesWdS3QZM,11020
|
|
121
122
|
etlplus/ops/load.py,sha256=yicciVwomUKkdbhuRqbavKBNpT2Hg813BnQzG6IgF4o,10811
|
|
122
|
-
etlplus/ops/run.py,sha256=
|
|
123
|
-
etlplus/ops/transform.py,sha256
|
|
123
|
+
etlplus/ops/run.py,sha256=2Z27ahRZGVULxDelHldHzUJ_vdbBCwlkwpm5KyKFP7U,11298
|
|
124
|
+
etlplus/ops/transform.py,sha256=-41uw_pwOGsMTUYxtXaeYOmTF_fTkN-L4Q9KT1OFe78,25671
|
|
125
|
+
etlplus/ops/types.py,sha256=Cvp8AJzJhJ1iYjyHd7j9ZLioxE2NdK__3g6fOI0qq6Q,4198
|
|
124
126
|
etlplus/ops/utils.py,sha256=lJmrO1KDob-xZU8Gc2SvZvMgdYLsVoaz-fTV42KkLVo,10835
|
|
125
127
|
etlplus/ops/validate.py,sha256=-OLAwQNNCmmDbmj0SB7zzYXDkJfcyBP_z9nTpqImLP0,13271
|
|
126
128
|
etlplus/templates/README.md,sha256=IfPXlj1TGVA-uFWosHJhE2rabFW-znxOlOMazO9Z5cE,1361
|
|
@@ -133,9 +135,9 @@ etlplus/workflow/dag.py,sha256=-f1x8N1eb-PUuiOwEvFLmJwfR7JaMDJihlCHlhrFhgE,2937
|
|
|
133
135
|
etlplus/workflow/jobs.py,sha256=5DmAzmEZV6XXQ-xzowkLxFzplIh8Eno3wuCmjy79xHw,8818
|
|
134
136
|
etlplus/workflow/pipeline.py,sha256=PA5zhcfrk--pAg3b3x4oBf29WMj5HqR8zOozz4oEmg8,9387
|
|
135
137
|
etlplus/workflow/profile.py,sha256=FQU3bzBZ9_yjKC9kCXKN1FQDS9zjNUjtWB1r3UL95_Q,1993
|
|
136
|
-
etlplus-0.16.
|
|
137
|
-
etlplus-0.16.
|
|
138
|
-
etlplus-0.16.
|
|
139
|
-
etlplus-0.16.
|
|
140
|
-
etlplus-0.16.
|
|
141
|
-
etlplus-0.16.
|
|
138
|
+
etlplus-0.16.3.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
|
|
139
|
+
etlplus-0.16.3.dist-info/METADATA,sha256=uV-JEWCscNgGsoF34kK-1TZWzWjYZKamLylPHGwhlwU,28114
|
|
140
|
+
etlplus-0.16.3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
141
|
+
etlplus-0.16.3.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
|
|
142
|
+
etlplus-0.16.3.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
|
|
143
|
+
etlplus-0.16.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|