etlplus 0.9.0__py3-none-any.whl → 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +37 -0
- etlplus/__init__.py +1 -26
- etlplus/api/README.md +51 -3
- etlplus/api/__init__.py +10 -0
- etlplus/api/config.py +39 -28
- etlplus/api/endpoint_client.py +3 -3
- etlplus/api/enums.py +51 -0
- etlplus/api/pagination/client.py +1 -1
- etlplus/api/rate_limiting/config.py +13 -1
- etlplus/api/rate_limiting/rate_limiter.py +8 -11
- etlplus/api/request_manager.py +11 -6
- etlplus/api/transport.py +14 -2
- etlplus/api/types.py +96 -6
- etlplus/{run_helpers.py → api/utils.py} +209 -153
- etlplus/cli/README.md +40 -0
- etlplus/cli/commands.py +94 -61
- etlplus/cli/constants.py +1 -1
- etlplus/cli/handlers.py +40 -12
- etlplus/cli/io.py +2 -2
- etlplus/cli/main.py +1 -1
- etlplus/cli/state.py +4 -7
- etlplus/database/README.md +48 -0
- etlplus/database/ddl.py +1 -1
- etlplus/database/engine.py +19 -3
- etlplus/database/orm.py +2 -0
- etlplus/database/schema.py +1 -1
- etlplus/enums.py +1 -107
- etlplus/file/README.md +105 -0
- etlplus/file/__init__.py +25 -0
- etlplus/file/_imports.py +141 -0
- etlplus/file/_io.py +160 -0
- etlplus/file/accdb.py +78 -0
- etlplus/file/arrow.py +78 -0
- etlplus/file/avro.py +176 -0
- etlplus/file/bson.py +77 -0
- etlplus/file/cbor.py +78 -0
- etlplus/file/cfg.py +79 -0
- etlplus/file/conf.py +80 -0
- etlplus/file/core.py +322 -0
- etlplus/file/csv.py +79 -0
- etlplus/file/dat.py +78 -0
- etlplus/file/dta.py +77 -0
- etlplus/file/duckdb.py +78 -0
- etlplus/file/enums.py +343 -0
- etlplus/file/feather.py +111 -0
- etlplus/file/fwf.py +77 -0
- etlplus/file/gz.py +123 -0
- etlplus/file/hbs.py +78 -0
- etlplus/file/hdf5.py +78 -0
- etlplus/file/ini.py +79 -0
- etlplus/file/ion.py +78 -0
- etlplus/file/jinja2.py +78 -0
- etlplus/file/json.py +98 -0
- etlplus/file/log.py +78 -0
- etlplus/file/mat.py +78 -0
- etlplus/file/mdb.py +78 -0
- etlplus/file/msgpack.py +78 -0
- etlplus/file/mustache.py +78 -0
- etlplus/file/nc.py +78 -0
- etlplus/file/ndjson.py +108 -0
- etlplus/file/numbers.py +75 -0
- etlplus/file/ods.py +79 -0
- etlplus/file/orc.py +111 -0
- etlplus/file/parquet.py +113 -0
- etlplus/file/pb.py +78 -0
- etlplus/file/pbf.py +77 -0
- etlplus/file/properties.py +78 -0
- etlplus/file/proto.py +77 -0
- etlplus/file/psv.py +79 -0
- etlplus/file/rda.py +78 -0
- etlplus/file/rds.py +78 -0
- etlplus/file/sas7bdat.py +78 -0
- etlplus/file/sav.py +77 -0
- etlplus/file/sqlite.py +78 -0
- etlplus/file/stub.py +84 -0
- etlplus/file/sylk.py +77 -0
- etlplus/file/tab.py +81 -0
- etlplus/file/toml.py +78 -0
- etlplus/file/tsv.py +80 -0
- etlplus/file/txt.py +102 -0
- etlplus/file/vm.py +78 -0
- etlplus/file/wks.py +77 -0
- etlplus/file/xls.py +88 -0
- etlplus/file/xlsm.py +79 -0
- etlplus/file/xlsx.py +99 -0
- etlplus/file/xml.py +185 -0
- etlplus/file/xpt.py +78 -0
- etlplus/file/yaml.py +95 -0
- etlplus/file/zip.py +175 -0
- etlplus/file/zsav.py +77 -0
- etlplus/ops/README.md +50 -0
- etlplus/ops/__init__.py +61 -0
- etlplus/{extract.py → ops/extract.py} +81 -99
- etlplus/{load.py → ops/load.py} +78 -101
- etlplus/{run.py → ops/run.py} +159 -127
- etlplus/{transform.py → ops/transform.py} +75 -68
- etlplus/{validation → ops}/utils.py +53 -17
- etlplus/{validate.py → ops/validate.py} +22 -12
- etlplus/templates/README.md +46 -0
- etlplus/types.py +5 -4
- etlplus/utils.py +136 -2
- etlplus/workflow/README.md +52 -0
- etlplus/{config → workflow}/__init__.py +10 -23
- etlplus/{config → workflow}/connector.py +58 -44
- etlplus/workflow/dag.py +105 -0
- etlplus/{config → workflow}/jobs.py +105 -32
- etlplus/{config → workflow}/pipeline.py +59 -51
- etlplus/{config → workflow}/profile.py +8 -5
- etlplus/workflow/types.py +115 -0
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/METADATA +210 -17
- etlplus-0.9.2.dist-info/RECORD +134 -0
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/WHEEL +1 -1
- etlplus/config/types.py +0 -204
- etlplus/config/utils.py +0 -120
- etlplus/file.py +0 -657
- etlplus/validation/__init__.py +0 -44
- etlplus-0.9.0.dist-info/RECORD +0 -65
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/entry_points.txt +0 -0
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/top_level.txt +0 -0
etlplus/file/xpt.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.xpt` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing SAS Transport (XPT) files.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- A SAS Transport (XPT) file is a standardized file format used to transfer
|
|
9
|
+
SAS datasets between different systems.
|
|
10
|
+
- Common cases:
|
|
11
|
+
- Sharing datasets between different SAS installations.
|
|
12
|
+
- Archiving datasets in a platform-independent format.
|
|
13
|
+
- Importing/exporting data to/from statistical software that supports XPT.
|
|
14
|
+
- Rule of thumb:
|
|
15
|
+
- If you need to work with XPT files, use this module for reading
|
|
16
|
+
and writing.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
from ..types import JSONData
|
|
24
|
+
from ..types import JSONList
|
|
25
|
+
from . import stub
|
|
26
|
+
|
|
27
|
+
# SECTION: EXPORTS ========================================================== #
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
'read',
|
|
32
|
+
'write',
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def read(
|
|
40
|
+
path: Path,
|
|
41
|
+
) -> JSONList:
|
|
42
|
+
"""
|
|
43
|
+
Read XPT content from ``path``.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
path : Path
|
|
48
|
+
Path to the XPT file on disk.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
JSONList
|
|
53
|
+
The list of dictionaries read from the XPT file.
|
|
54
|
+
"""
|
|
55
|
+
return stub.read(path, format_name='XPT')
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def write(
|
|
59
|
+
path: Path,
|
|
60
|
+
data: JSONData,
|
|
61
|
+
) -> int:
|
|
62
|
+
"""
|
|
63
|
+
Write ``data`` to XPT file at ``path`` and return record count.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
path : Path
|
|
68
|
+
Path to the XPT file on disk.
|
|
69
|
+
data : JSONData
|
|
70
|
+
Data to write as XPT file. Should be a list of dictionaries or a
|
|
71
|
+
single dictionary.
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
int
|
|
76
|
+
The number of rows written to the XPT file.
|
|
77
|
+
"""
|
|
78
|
+
return stub.write(path, data, format_name='XPT')
|
etlplus/file/yaml.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.yaml` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing YAML Ain't Markup Language (YAML) files.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- A YAML file is a human-readable data serialization format.
|
|
9
|
+
- Common cases:
|
|
10
|
+
- Configuration files.
|
|
11
|
+
- Data exchange between languages with different data structures.
|
|
12
|
+
- Complex data storage.
|
|
13
|
+
- Rule of thumb:
|
|
14
|
+
- If the file follows the YAML specification, use this module for
|
|
15
|
+
reading and writing.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
from ..types import JSONData
|
|
23
|
+
from ..utils import count_records
|
|
24
|
+
from ._imports import get_yaml
|
|
25
|
+
from ._io import coerce_record_payload
|
|
26
|
+
|
|
27
|
+
# SECTION: EXPORTS ========================================================== #
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
'read',
|
|
32
|
+
'write',
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def read(
|
|
40
|
+
path: Path,
|
|
41
|
+
) -> JSONData:
|
|
42
|
+
"""
|
|
43
|
+
Read YAML content from ``path``.
|
|
44
|
+
|
|
45
|
+
Validates that the YAML root is a dict or a list of dicts.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
path : Path
|
|
50
|
+
Path to the YAML file on disk.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
JSONData
|
|
55
|
+
The structured data read from the YAML file.
|
|
56
|
+
|
|
57
|
+
Raises
|
|
58
|
+
------
|
|
59
|
+
TypeError
|
|
60
|
+
If the YAML root is not an object or an array of objects.
|
|
61
|
+
"""
|
|
62
|
+
with path.open('r', encoding='utf-8') as handle:
|
|
63
|
+
loaded = get_yaml().safe_load(handle)
|
|
64
|
+
|
|
65
|
+
return coerce_record_payload(loaded, format_name='YAML')
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def write(
|
|
69
|
+
path: Path,
|
|
70
|
+
data: JSONData,
|
|
71
|
+
) -> int:
|
|
72
|
+
"""
|
|
73
|
+
Write ``data`` as YAML to ``path`` and return record count.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
path : Path
|
|
78
|
+
Path to the YAML file on disk.
|
|
79
|
+
data : JSONData
|
|
80
|
+
Data to write as YAML.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
int
|
|
85
|
+
The number of records written.
|
|
86
|
+
"""
|
|
87
|
+
with path.open('w', encoding='utf-8') as handle:
|
|
88
|
+
get_yaml().safe_dump(
|
|
89
|
+
data,
|
|
90
|
+
handle,
|
|
91
|
+
sort_keys=False,
|
|
92
|
+
allow_unicode=True,
|
|
93
|
+
default_flow_style=False,
|
|
94
|
+
)
|
|
95
|
+
return count_records(data)
|
etlplus/file/zip.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.zip` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing ZIP files.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import tempfile
|
|
10
|
+
import zipfile
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from ..types import JSONData
|
|
14
|
+
from ..types import JSONDict
|
|
15
|
+
from .enums import CompressionFormat
|
|
16
|
+
from .enums import FileFormat
|
|
17
|
+
from .enums import infer_file_format_and_compression
|
|
18
|
+
|
|
19
|
+
# SECTION: EXPORTS ========================================================== #
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
'read',
|
|
24
|
+
'write',
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _resolve_format(
|
|
32
|
+
filename: str,
|
|
33
|
+
) -> FileFormat:
|
|
34
|
+
"""
|
|
35
|
+
Resolve the inner file format from a filename.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
filename : str
|
|
40
|
+
The name of the file inside the ZIP archive.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
FileFormat
|
|
45
|
+
The inferred inner file format.
|
|
46
|
+
|
|
47
|
+
Raises
|
|
48
|
+
------
|
|
49
|
+
ValueError
|
|
50
|
+
If the file format cannot be inferred from the filename.
|
|
51
|
+
"""
|
|
52
|
+
fmt, compression = infer_file_format_and_compression(filename)
|
|
53
|
+
if compression is not None and compression is not CompressionFormat.ZIP:
|
|
54
|
+
raise ValueError(f'Unexpected compression in archive: {filename}')
|
|
55
|
+
if fmt is None:
|
|
56
|
+
raise ValueError(
|
|
57
|
+
f'Cannot infer file format from compressed file {filename!r}',
|
|
58
|
+
)
|
|
59
|
+
return fmt
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _extract_payload(
|
|
63
|
+
entry: zipfile.ZipInfo,
|
|
64
|
+
archive: zipfile.ZipFile,
|
|
65
|
+
) -> bytes:
|
|
66
|
+
"""
|
|
67
|
+
Extract an archive entry into memory.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
entry : zipfile.ZipInfo
|
|
72
|
+
The ZIP archive entry.
|
|
73
|
+
archive : zipfile.ZipFile
|
|
74
|
+
The opened ZIP archive.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
bytes
|
|
79
|
+
The raw payload.
|
|
80
|
+
"""
|
|
81
|
+
with archive.open(entry, 'r') as handle:
|
|
82
|
+
return handle.read()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def read(
|
|
89
|
+
path: Path,
|
|
90
|
+
) -> JSONData:
|
|
91
|
+
"""
|
|
92
|
+
Read ZIP content from ``path`` and parse the inner payload(s).
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
path : Path
|
|
97
|
+
Path to the ZIP file on disk.
|
|
98
|
+
|
|
99
|
+
Returns
|
|
100
|
+
-------
|
|
101
|
+
JSONData
|
|
102
|
+
Parsed payload.
|
|
103
|
+
|
|
104
|
+
Raises
|
|
105
|
+
------
|
|
106
|
+
ValueError
|
|
107
|
+
If the ZIP archive is empty.
|
|
108
|
+
"""
|
|
109
|
+
with zipfile.ZipFile(path, 'r') as archive:
|
|
110
|
+
entries = [entry for entry in archive.infolist() if not entry.is_dir()]
|
|
111
|
+
if not entries:
|
|
112
|
+
raise ValueError(f'ZIP archive is empty: {path}')
|
|
113
|
+
|
|
114
|
+
if len(entries) == 1:
|
|
115
|
+
entry = entries[0]
|
|
116
|
+
fmt = _resolve_format(entry.filename)
|
|
117
|
+
payload = _extract_payload(entry, archive)
|
|
118
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
119
|
+
tmp_path = Path(tmpdir) / Path(entry.filename).name
|
|
120
|
+
tmp_path.write_bytes(payload)
|
|
121
|
+
from .core import File
|
|
122
|
+
|
|
123
|
+
return File(tmp_path, fmt).read()
|
|
124
|
+
|
|
125
|
+
results: JSONDict = {}
|
|
126
|
+
for entry in entries:
|
|
127
|
+
fmt = _resolve_format(entry.filename)
|
|
128
|
+
payload = _extract_payload(entry, archive)
|
|
129
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
130
|
+
tmp_path = Path(tmpdir) / Path(entry.filename).name
|
|
131
|
+
tmp_path.write_bytes(payload)
|
|
132
|
+
from .core import File
|
|
133
|
+
|
|
134
|
+
results[entry.filename] = File(tmp_path, fmt).read()
|
|
135
|
+
return results
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def write(
|
|
139
|
+
path: Path,
|
|
140
|
+
data: JSONData,
|
|
141
|
+
) -> int:
|
|
142
|
+
"""
|
|
143
|
+
Write ``data`` to ZIP at ``path`` and return record count.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
path : Path
|
|
148
|
+
Path to the ZIP file on disk.
|
|
149
|
+
data : JSONData
|
|
150
|
+
Data to write.
|
|
151
|
+
|
|
152
|
+
Returns
|
|
153
|
+
-------
|
|
154
|
+
int
|
|
155
|
+
Number of records written.
|
|
156
|
+
"""
|
|
157
|
+
fmt = _resolve_format(path.name)
|
|
158
|
+
inner_name = Path(path.name).with_suffix('').name
|
|
159
|
+
|
|
160
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
161
|
+
tmp_path = Path(tmpdir) / inner_name
|
|
162
|
+
from .core import File
|
|
163
|
+
|
|
164
|
+
count = File(tmp_path, fmt).write(data)
|
|
165
|
+
payload = tmp_path.read_bytes()
|
|
166
|
+
|
|
167
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
168
|
+
with zipfile.ZipFile(
|
|
169
|
+
path,
|
|
170
|
+
'w',
|
|
171
|
+
compression=zipfile.ZIP_DEFLATED,
|
|
172
|
+
) as archive:
|
|
173
|
+
archive.writestr(inner_name, payload)
|
|
174
|
+
|
|
175
|
+
return count
|
etlplus/file/zsav.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.zsav` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing compressed SPSS (ZSAV) data files.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- A ZSAV file is a compressed binary file format used by SPSS to store
|
|
9
|
+
datasets, including variables, labels, and data types.
|
|
10
|
+
- Common cases:
|
|
11
|
+
- Reading compressed data for analysis in Python.
|
|
12
|
+
- Writing processed data back to compressed SPSS format.
|
|
13
|
+
- Rule of thumb:
|
|
14
|
+
- If you need to work with compressed SPSS data files, use this module for
|
|
15
|
+
reading and writing.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
from ..types import JSONData
|
|
23
|
+
from ..types import JSONList
|
|
24
|
+
from . import stub
|
|
25
|
+
|
|
26
|
+
# SECTION: EXPORTS ========================================================== #
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
'read',
|
|
31
|
+
'write',
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def read(
|
|
39
|
+
path: Path,
|
|
40
|
+
) -> JSONList:
|
|
41
|
+
"""
|
|
42
|
+
Read ZSAV content from ``path``.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
path : Path
|
|
47
|
+
Path to the ZSAV file on disk.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
JSONList
|
|
52
|
+
The list of dictionaries read from the ZSAV file.
|
|
53
|
+
"""
|
|
54
|
+
return stub.read(path, format_name='ZSAV')
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def write(
|
|
58
|
+
path: Path,
|
|
59
|
+
data: JSONData,
|
|
60
|
+
) -> int:
|
|
61
|
+
"""
|
|
62
|
+
Write ``data`` to ZSAV file at ``path`` and return record count.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
path : Path
|
|
67
|
+
Path to the ZSAV file on disk.
|
|
68
|
+
data : JSONData
|
|
69
|
+
Data to write as ZSAV file. Should be a list of dictionaries or a
|
|
70
|
+
single dictionary.
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
int
|
|
75
|
+
The number of rows written to the ZSAV file.
|
|
76
|
+
"""
|
|
77
|
+
return stub.write(path, data, format_name='ZSAV')
|
etlplus/ops/README.md
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# etlplus.ops subpackage
|
|
2
|
+
|
|
3
|
+
Documentation for the `etlplus.validation` subpackage: data validation utilities and helpers.
|
|
4
|
+
|
|
5
|
+
- Provides flexible data validation for ETL pipelines
|
|
6
|
+
- Supports type checking, required fields, and custom rules
|
|
7
|
+
- Includes utilities for rule definition and validation logic
|
|
8
|
+
|
|
9
|
+
Back to project overview: see the top-level [README](../../README.md).
|
|
10
|
+
|
|
11
|
+
- [etlplus.ops subpackage](#etlplusops-subpackage)
|
|
12
|
+
- [Validation Features](#validation-features)
|
|
13
|
+
- [Defining Validation Rules](#defining-validation-rules)
|
|
14
|
+
- [Example: Validating Data](#example-validating-data)
|
|
15
|
+
- [See Also](#see-also)
|
|
16
|
+
|
|
17
|
+
## Validation Features
|
|
18
|
+
|
|
19
|
+
- Type checking (string, number, boolean, etc.)
|
|
20
|
+
- Required/optional fields
|
|
21
|
+
- Enum and pattern validation
|
|
22
|
+
- Custom rule support
|
|
23
|
+
|
|
24
|
+
## Defining Validation Rules
|
|
25
|
+
|
|
26
|
+
Validation rules are defined as dictionaries specifying field types, requirements, and constraints:
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
rules = {
|
|
30
|
+
"name": {"type": "string", "required": True},
|
|
31
|
+
"age": {"type": "number", "min": 0, "max": 120},
|
|
32
|
+
}
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Example: Validating Data
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from etlplus.validation import validate
|
|
39
|
+
|
|
40
|
+
result = validate({"name": "Alice", "age": 30}, rules)
|
|
41
|
+
if result["valid"]:
|
|
42
|
+
print("Data is valid!")
|
|
43
|
+
else:
|
|
44
|
+
print(result["errors"])
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## See Also
|
|
48
|
+
|
|
49
|
+
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
50
|
+
- Validation utilities in [utils.py](utils.py)
|
etlplus/ops/__init__.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.ops` package.
|
|
3
|
+
|
|
4
|
+
Data operations helpers.
|
|
5
|
+
|
|
6
|
+
Importing :mod:`etlplus.ops` exposes the coarse-grained helpers most users care
|
|
7
|
+
about: ``extract``, ``transform``, ``load``, ``validate``, ``run``, and
|
|
8
|
+
``run_pipeline``. Each helper delegates to the richer modules under
|
|
9
|
+
``etlplus.ops.*`` while presenting a compact public API surface. Conditional
|
|
10
|
+
validation orchestration is available via
|
|
11
|
+
:func:`etlplus.ops.utils.maybe_validate`. The legacy compatibility module
|
|
12
|
+
:mod:`etlplus.ops.__init__validation` is deprecated in favor of this package.
|
|
13
|
+
|
|
14
|
+
Examples
|
|
15
|
+
--------
|
|
16
|
+
>>> from etlplus.ops import extract, transform
|
|
17
|
+
>>> raw = extract('file', 'input.json')
|
|
18
|
+
>>> curated = transform(raw, {'select': ['id', 'name']})
|
|
19
|
+
|
|
20
|
+
>>> from etlplus.ops.utils import maybe_validate
|
|
21
|
+
>>> payload = {'name': 'Alice'}
|
|
22
|
+
>>> rules = {'required': ['name']}
|
|
23
|
+
>>> def validator(data, config):
|
|
24
|
+
... missing = [field for field in config['required'] if field not in data]
|
|
25
|
+
... return {'valid': not missing, 'errors': missing, 'data': data}
|
|
26
|
+
>>> maybe_validate(
|
|
27
|
+
... payload,
|
|
28
|
+
... when='both',
|
|
29
|
+
... enabled=True,
|
|
30
|
+
... rules=rules,
|
|
31
|
+
... phase='before_transform',
|
|
32
|
+
... severity='warn',
|
|
33
|
+
... validate_fn=validator,
|
|
34
|
+
... print_json_fn=lambda message: message,
|
|
35
|
+
... )
|
|
36
|
+
{'name': 'Alice'}
|
|
37
|
+
|
|
38
|
+
See Also
|
|
39
|
+
--------
|
|
40
|
+
:mod:`etlplus.ops.run`
|
|
41
|
+
:mod:`etlplus.ops.utils`
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
from .extract import extract
|
|
45
|
+
from .load import load
|
|
46
|
+
from .run import run
|
|
47
|
+
from .run import run_pipeline
|
|
48
|
+
from .transform import transform
|
|
49
|
+
from .validate import validate
|
|
50
|
+
|
|
51
|
+
# SECTION: EXPORTS ========================================================== #
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
__all__ = [
|
|
55
|
+
'extract',
|
|
56
|
+
'load',
|
|
57
|
+
'run',
|
|
58
|
+
'run_pipeline',
|
|
59
|
+
'transform',
|
|
60
|
+
'validate',
|
|
61
|
+
]
|