etlplus 0.16.0__py3-none-any.whl → 0.16.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +24 -2
- etlplus/__init__.py +2 -0
- etlplus/api/__init__.py +14 -14
- etlplus/api/auth.py +9 -6
- etlplus/api/config.py +6 -6
- etlplus/api/endpoint_client.py +16 -16
- etlplus/api/enums.py +2 -2
- etlplus/api/errors.py +4 -4
- etlplus/api/pagination/__init__.py +6 -6
- etlplus/api/pagination/config.py +11 -9
- etlplus/api/rate_limiting/__init__.py +2 -2
- etlplus/api/rate_limiting/config.py +10 -10
- etlplus/api/rate_limiting/rate_limiter.py +2 -2
- etlplus/api/request_manager.py +4 -4
- etlplus/api/retry_manager.py +6 -6
- etlplus/api/transport.py +10 -10
- etlplus/api/types.py +47 -26
- etlplus/api/utils.py +49 -49
- etlplus/cli/README.md +9 -7
- etlplus/cli/commands.py +22 -22
- etlplus/cli/handlers.py +12 -13
- etlplus/cli/main.py +1 -1
- etlplus/{workflow/pipeline.py → config.py} +54 -91
- etlplus/connector/__init__.py +6 -6
- etlplus/connector/api.py +7 -7
- etlplus/connector/database.py +3 -3
- etlplus/connector/file.py +3 -3
- etlplus/connector/types.py +2 -2
- etlplus/database/README.md +7 -7
- etlplus/enums.py +35 -167
- etlplus/file/README.md +7 -5
- etlplus/file/accdb.py +2 -1
- etlplus/file/arrow.py +2 -1
- etlplus/file/bson.py +2 -1
- etlplus/file/cbor.py +2 -1
- etlplus/file/cfg.py +1 -1
- etlplus/file/conf.py +1 -1
- etlplus/file/dat.py +1 -1
- etlplus/file/dta.py +1 -1
- etlplus/file/duckdb.py +2 -1
- etlplus/file/enums.py +1 -1
- etlplus/file/fwf.py +2 -1
- etlplus/file/hbs.py +2 -1
- etlplus/file/hdf5.py +2 -1
- etlplus/file/ini.py +2 -1
- etlplus/file/ion.py +1 -1
- etlplus/file/jinja2.py +2 -1
- etlplus/file/log.py +1 -1
- etlplus/file/mat.py +1 -1
- etlplus/file/mdb.py +2 -1
- etlplus/file/msgpack.py +2 -1
- etlplus/file/mustache.py +2 -1
- etlplus/file/nc.py +1 -1
- etlplus/file/numbers.py +2 -1
- etlplus/file/ods.py +2 -1
- etlplus/file/pb.py +2 -1
- etlplus/file/pbf.py +2 -1
- etlplus/file/properties.py +2 -1
- etlplus/file/proto.py +2 -1
- etlplus/file/psv.py +2 -1
- etlplus/file/rda.py +2 -1
- etlplus/file/rds.py +1 -1
- etlplus/file/sas7bdat.py +2 -1
- etlplus/file/sav.py +1 -1
- etlplus/file/sqlite.py +2 -1
- etlplus/file/sylk.py +2 -1
- etlplus/file/tab.py +2 -1
- etlplus/file/toml.py +2 -1
- etlplus/file/vm.py +2 -1
- etlplus/file/wks.py +2 -1
- etlplus/file/xls.py +1 -1
- etlplus/file/xlsm.py +2 -2
- etlplus/file/xpt.py +2 -1
- etlplus/file/zsav.py +2 -1
- etlplus/ops/README.md +10 -9
- etlplus/ops/__init__.py +1 -0
- etlplus/ops/enums.py +173 -0
- etlplus/ops/extract.py +209 -22
- etlplus/ops/load.py +140 -34
- etlplus/ops/run.py +88 -103
- etlplus/ops/transform.py +46 -27
- etlplus/ops/types.py +147 -0
- etlplus/ops/utils.py +5 -5
- etlplus/ops/validate.py +13 -13
- etlplus/templates/README.md +11 -9
- etlplus/types.py +5 -102
- etlplus/workflow/README.md +0 -24
- etlplus/workflow/__init__.py +2 -4
- etlplus/workflow/dag.py +23 -1
- etlplus/workflow/jobs.py +15 -28
- etlplus/workflow/profile.py +4 -2
- {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/METADATA +32 -28
- etlplus-0.16.7.dist-info/RECORD +143 -0
- etlplus-0.16.0.dist-info/RECORD +0 -141
- {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/WHEEL +0 -0
- {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/entry_points.txt +0 -0
- {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/top_level.txt +0 -0
etlplus/enums.py
CHANGED
|
@@ -1,18 +1,14 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.enums` module.
|
|
3
3
|
|
|
4
|
-
Shared enumeration
|
|
4
|
+
Shared enumeration base class.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
import enum
|
|
10
|
-
import operator as _op
|
|
11
|
-
from statistics import fmean
|
|
12
10
|
from typing import Self
|
|
13
11
|
|
|
14
|
-
from .types import AggregateFunc
|
|
15
|
-
from .types import OperatorFunc
|
|
16
12
|
from .types import StrStrMap
|
|
17
13
|
|
|
18
14
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -20,10 +16,7 @@ from .types import StrStrMap
|
|
|
20
16
|
|
|
21
17
|
__all__ = [
|
|
22
18
|
# Enums
|
|
23
|
-
'AggregateName',
|
|
24
19
|
'CoercibleStrEnum',
|
|
25
|
-
'OperatorName',
|
|
26
|
-
'PipelineStep',
|
|
27
20
|
]
|
|
28
21
|
|
|
29
22
|
|
|
@@ -41,6 +34,7 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
41
34
|
Notes
|
|
42
35
|
-----
|
|
43
36
|
- Values are normalized via ``str(value).strip().casefold()``.
|
|
37
|
+
- If value matching fails, the raw string is tried as a member name.
|
|
44
38
|
- Error messages enumerate allowed values for easier debugging.
|
|
45
39
|
"""
|
|
46
40
|
|
|
@@ -56,7 +50,13 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
56
50
|
Returns
|
|
57
51
|
-------
|
|
58
52
|
StrStrMap
|
|
59
|
-
A mapping of alias
|
|
53
|
+
A mapping of alias strings to their corresponding enum member
|
|
54
|
+
values or names.
|
|
55
|
+
|
|
56
|
+
Notes
|
|
57
|
+
-----
|
|
58
|
+
- Alias keys are normalized via ``str(key).strip().casefold()``.
|
|
59
|
+
- Alias values should be member values or member names.
|
|
60
60
|
"""
|
|
61
61
|
return {}
|
|
62
62
|
|
|
@@ -80,7 +80,7 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
80
80
|
Parameters
|
|
81
81
|
----------
|
|
82
82
|
value : Self | str | object
|
|
83
|
-
An existing enum member or a
|
|
83
|
+
An existing enum member or a string-like value to normalize.
|
|
84
84
|
|
|
85
85
|
Returns
|
|
86
86
|
-------
|
|
@@ -95,10 +95,26 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
95
95
|
if isinstance(value, cls):
|
|
96
96
|
return value
|
|
97
97
|
try:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
98
|
+
raw = str(value).strip()
|
|
99
|
+
normalized = raw.casefold()
|
|
100
|
+
aliases = {
|
|
101
|
+
str(key).strip().casefold(): alias
|
|
102
|
+
for key, alias in cls.aliases().items()
|
|
103
|
+
}
|
|
104
|
+
resolved = aliases.get(normalized)
|
|
105
|
+
if resolved is None:
|
|
106
|
+
try:
|
|
107
|
+
return cls(normalized) # type: ignore[arg-type]
|
|
108
|
+
except (ValueError, TypeError):
|
|
109
|
+
return cls[raw] # type: ignore[index]
|
|
110
|
+
if isinstance(resolved, cls):
|
|
111
|
+
return resolved
|
|
112
|
+
try:
|
|
113
|
+
return cls(resolved) # type: ignore[arg-type]
|
|
114
|
+
except (ValueError, TypeError):
|
|
115
|
+
# Allow aliases to reference member names.
|
|
116
|
+
return cls[resolved] # type: ignore[index]
|
|
117
|
+
except (ValueError, TypeError, KeyError) as e:
|
|
102
118
|
allowed = ', '.join(cls.choices())
|
|
103
119
|
raise ValueError(
|
|
104
120
|
f'Invalid {cls.__name__} value: {value!r}. Allowed: {allowed}',
|
|
@@ -107,15 +123,15 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
107
123
|
@classmethod
|
|
108
124
|
def try_coerce(
|
|
109
125
|
cls,
|
|
110
|
-
value: object,
|
|
126
|
+
value: Self | str | object,
|
|
111
127
|
) -> Self | None:
|
|
112
128
|
"""
|
|
113
|
-
|
|
129
|
+
Attempt to coerce a value into the enum; return ``None`` on failure.
|
|
114
130
|
|
|
115
131
|
Parameters
|
|
116
132
|
----------
|
|
117
|
-
value : object
|
|
118
|
-
An existing enum member or a
|
|
133
|
+
value : Self | str | object
|
|
134
|
+
An existing enum member or a string-like value to normalize.
|
|
119
135
|
|
|
120
136
|
Returns
|
|
121
137
|
-------
|
|
@@ -124,153 +140,5 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
124
140
|
"""
|
|
125
141
|
try:
|
|
126
142
|
return cls.coerce(value)
|
|
127
|
-
except ValueError:
|
|
143
|
+
except (ValueError, TypeError, KeyError):
|
|
128
144
|
return None
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
# SECTION: ENUMS ============================================================ #
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
class AggregateName(CoercibleStrEnum):
|
|
135
|
-
"""Supported aggregations with helpers."""
|
|
136
|
-
|
|
137
|
-
# -- Constants -- #
|
|
138
|
-
|
|
139
|
-
AVG = 'avg'
|
|
140
|
-
COUNT = 'count'
|
|
141
|
-
MAX = 'max'
|
|
142
|
-
MIN = 'min'
|
|
143
|
-
SUM = 'sum'
|
|
144
|
-
|
|
145
|
-
# -- Class Methods -- #
|
|
146
|
-
|
|
147
|
-
@property
|
|
148
|
-
def func(self) -> AggregateFunc:
|
|
149
|
-
"""
|
|
150
|
-
Get the aggregation function for this aggregation type.
|
|
151
|
-
|
|
152
|
-
Returns
|
|
153
|
-
-------
|
|
154
|
-
AggregateFunc
|
|
155
|
-
The aggregation function corresponding to this aggregation type.
|
|
156
|
-
"""
|
|
157
|
-
if self is AggregateName.COUNT:
|
|
158
|
-
return lambda xs, n: n
|
|
159
|
-
if self is AggregateName.MAX:
|
|
160
|
-
return lambda xs, n: (max(xs) if xs else None)
|
|
161
|
-
if self is AggregateName.MIN:
|
|
162
|
-
return lambda xs, n: (min(xs) if xs else None)
|
|
163
|
-
if self is AggregateName.SUM:
|
|
164
|
-
return lambda xs, n: sum(xs)
|
|
165
|
-
|
|
166
|
-
# AVG
|
|
167
|
-
return lambda xs, n: (fmean(xs) if xs else 0.0)
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
class OperatorName(CoercibleStrEnum):
|
|
171
|
-
"""Supported comparison operators with helpers."""
|
|
172
|
-
|
|
173
|
-
# -- Constants -- #
|
|
174
|
-
|
|
175
|
-
EQ = 'eq'
|
|
176
|
-
NE = 'ne'
|
|
177
|
-
GT = 'gt'
|
|
178
|
-
GTE = 'gte'
|
|
179
|
-
LT = 'lt'
|
|
180
|
-
LTE = 'lte'
|
|
181
|
-
IN = 'in'
|
|
182
|
-
CONTAINS = 'contains'
|
|
183
|
-
|
|
184
|
-
# -- Getters -- #
|
|
185
|
-
|
|
186
|
-
@property
|
|
187
|
-
def func(self) -> OperatorFunc:
|
|
188
|
-
"""
|
|
189
|
-
Get the comparison function for this operator.
|
|
190
|
-
|
|
191
|
-
Returns
|
|
192
|
-
-------
|
|
193
|
-
OperatorFunc
|
|
194
|
-
The comparison function corresponding to this operator.
|
|
195
|
-
"""
|
|
196
|
-
match self:
|
|
197
|
-
case OperatorName.EQ:
|
|
198
|
-
return _op.eq
|
|
199
|
-
case OperatorName.NE:
|
|
200
|
-
return _op.ne
|
|
201
|
-
case OperatorName.GT:
|
|
202
|
-
return _op.gt
|
|
203
|
-
case OperatorName.GTE:
|
|
204
|
-
return _op.ge
|
|
205
|
-
case OperatorName.LT:
|
|
206
|
-
return _op.lt
|
|
207
|
-
case OperatorName.LTE:
|
|
208
|
-
return _op.le
|
|
209
|
-
case OperatorName.IN:
|
|
210
|
-
return lambda a, b: a in b
|
|
211
|
-
case OperatorName.CONTAINS:
|
|
212
|
-
return lambda a, b: b in a
|
|
213
|
-
|
|
214
|
-
# -- Class Methods -- #
|
|
215
|
-
|
|
216
|
-
@classmethod
|
|
217
|
-
def aliases(cls) -> StrStrMap:
|
|
218
|
-
"""
|
|
219
|
-
Return a mapping of common aliases for each enum member.
|
|
220
|
-
|
|
221
|
-
Returns
|
|
222
|
-
-------
|
|
223
|
-
StrStrMap
|
|
224
|
-
A mapping of alias names to their corresponding enum member names.
|
|
225
|
-
"""
|
|
226
|
-
return {
|
|
227
|
-
'==': 'eq',
|
|
228
|
-
'=': 'eq',
|
|
229
|
-
'!=': 'ne',
|
|
230
|
-
'<>': 'ne',
|
|
231
|
-
'>=': 'gte',
|
|
232
|
-
'≥': 'gte',
|
|
233
|
-
'<=': 'lte',
|
|
234
|
-
'≤': 'lte',
|
|
235
|
-
'>': 'gt',
|
|
236
|
-
'<': 'lt',
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
class PipelineStep(CoercibleStrEnum):
|
|
241
|
-
"""Pipeline step names as an enum for internal orchestration."""
|
|
242
|
-
|
|
243
|
-
# -- Constants -- #
|
|
244
|
-
|
|
245
|
-
FILTER = 'filter'
|
|
246
|
-
MAP = 'map'
|
|
247
|
-
SELECT = 'select'
|
|
248
|
-
SORT = 'sort'
|
|
249
|
-
AGGREGATE = 'aggregate'
|
|
250
|
-
|
|
251
|
-
# -- Getters -- #
|
|
252
|
-
|
|
253
|
-
@property
|
|
254
|
-
def order(self) -> int:
|
|
255
|
-
"""
|
|
256
|
-
Get the execution order of this pipeline step.
|
|
257
|
-
|
|
258
|
-
Returns
|
|
259
|
-
-------
|
|
260
|
-
int
|
|
261
|
-
The execution order of this pipeline step.
|
|
262
|
-
"""
|
|
263
|
-
return _PIPELINE_ORDER_INDEX[self]
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
# SECTION: INTERNAL CONSTANTS ============================================== #
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
# Precomputed order index for PipelineStep; avoids recomputing on each access.
|
|
270
|
-
_PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
|
|
271
|
-
PipelineStep.FILTER: 0,
|
|
272
|
-
PipelineStep.MAP: 1,
|
|
273
|
-
PipelineStep.SELECT: 2,
|
|
274
|
-
PipelineStep.SORT: 3,
|
|
275
|
-
PipelineStep.AGGREGATE: 4,
|
|
276
|
-
}
|
etlplus/file/README.md
CHANGED
|
@@ -4,7 +4,7 @@ Documentation for the `etlplus.file` subpackage: unified file format support and
|
|
|
4
4
|
and writing data files.
|
|
5
5
|
|
|
6
6
|
- Provides a consistent interface for reading and writing files in various formats
|
|
7
|
-
-
|
|
7
|
+
- Defines many formats in `FileFormat`; read/write support varies by format
|
|
8
8
|
- Includes helpers for inferring file format and compression from filenames, extensions, or MIME
|
|
9
9
|
types
|
|
10
10
|
- Exposes a `File` class with instance methods for reading and writing data
|
|
@@ -23,7 +23,8 @@ Back to project overview: see the top-level [README](../../README.md).
|
|
|
23
23
|
|
|
24
24
|
## Supported File Formats
|
|
25
25
|
|
|
26
|
-
The following formats are
|
|
26
|
+
The following formats are implemented for reading/writing (unless noted). For the full support
|
|
27
|
+
matrix across all `FileFormat` values, see the top-level [README](../../README.md).
|
|
27
28
|
|
|
28
29
|
| Format | Description |
|
|
29
30
|
|-----------|---------------------------------------------|
|
|
@@ -37,13 +38,14 @@ The following formats are defined in `FileFormat` and supported for reading and
|
|
|
37
38
|
| parquet | Apache Parquet columnar format |
|
|
38
39
|
| tsv | Tab-separated values text files |
|
|
39
40
|
| txt | Plain text files |
|
|
40
|
-
| xls | Microsoft Excel (legacy .xls)
|
|
41
|
+
| xls | Microsoft Excel (legacy .xls; read-only) |
|
|
41
42
|
| xlsx | Microsoft Excel (modern .xlsx) |
|
|
42
43
|
| zip | ZIP-compressed files (see Compression) |
|
|
43
44
|
| xml | XML files |
|
|
44
45
|
| yaml | YAML files |
|
|
45
46
|
|
|
46
|
-
Compression formats (gz, zip) are also supported as wrappers for other formats.
|
|
47
|
+
Compression formats (gz, zip) are also supported as wrappers for other formats. Formats not listed
|
|
48
|
+
here are currently stubbed and will raise `NotImplementedError` on read/write.
|
|
47
49
|
|
|
48
50
|
## Inferring File Format and Compression
|
|
49
51
|
|
|
@@ -77,7 +79,7 @@ f.write(data)
|
|
|
77
79
|
```
|
|
78
80
|
|
|
79
81
|
- The `write()` method serializes and writes data in the appropriate format.
|
|
80
|
-
- Supports
|
|
82
|
+
- Supports the implemented formats listed above.
|
|
81
83
|
|
|
82
84
|
## File Instance Methods
|
|
83
85
|
|
etlplus/file/accdb.py
CHANGED
etlplus/file/arrow.py
CHANGED
etlplus/file/bson.py
CHANGED
etlplus/file/cbor.py
CHANGED
etlplus/file/cfg.py
CHANGED
etlplus/file/conf.py
CHANGED
etlplus/file/dat.py
CHANGED
etlplus/file/dta.py
CHANGED
etlplus/file/duckdb.py
CHANGED
etlplus/file/enums.py
CHANGED
etlplus/file/fwf.py
CHANGED
etlplus/file/hbs.py
CHANGED
etlplus/file/hdf5.py
CHANGED
etlplus/file/ini.py
CHANGED
etlplus/file/ion.py
CHANGED
etlplus/file/jinja2.py
CHANGED
etlplus/file/log.py
CHANGED
etlplus/file/mat.py
CHANGED
etlplus/file/mdb.py
CHANGED
etlplus/file/msgpack.py
CHANGED
etlplus/file/mustache.py
CHANGED
etlplus/file/nc.py
CHANGED
etlplus/file/numbers.py
CHANGED
etlplus/file/ods.py
CHANGED
etlplus/file/pb.py
CHANGED
etlplus/file/pbf.py
CHANGED
etlplus/file/properties.py
CHANGED
etlplus/file/proto.py
CHANGED
etlplus/file/psv.py
CHANGED
etlplus/file/rda.py
CHANGED
etlplus/file/rds.py
CHANGED
etlplus/file/sas7bdat.py
CHANGED