etlplus 0.11.5__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +37 -0
- etlplus/api/README.md +20 -3
- etlplus/cli/README.md +40 -0
- etlplus/cli/handlers.py +1 -1
- etlplus/config/README.md +52 -0
- etlplus/database/README.md +48 -0
- etlplus/database/ddl.py +1 -1
- etlplus/database/engine.py +1 -1
- etlplus/database/schema.py +1 -1
- etlplus/file/README.md +105 -0
- etlplus/file/avro.py +198 -0
- etlplus/file/core.py +105 -105
- etlplus/file/csv.py +12 -3
- etlplus/file/feather.py +144 -0
- etlplus/file/gz.py +123 -0
- etlplus/file/json.py +13 -2
- etlplus/file/ndjson.py +109 -0
- etlplus/file/orc.py +142 -0
- etlplus/file/parquet.py +146 -0
- etlplus/file/tsv.py +91 -0
- etlplus/file/txt.py +99 -0
- etlplus/file/xls.py +132 -0
- etlplus/file/xlsx.py +142 -0
- etlplus/file/xml.py +12 -3
- etlplus/file/yaml.py +13 -2
- etlplus/file/zip.py +175 -0
- etlplus/templates/README.md +46 -0
- etlplus/validation/README.md +50 -0
- {etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/METADATA +58 -14
- {etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/RECORD +34 -16
- {etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/WHEEL +0 -0
- {etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/entry_points.txt +0 -0
- {etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/top_level.txt +0 -0
etlplus/file/xlsx.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.xlsx` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing Excel XLSX files.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
from typing import cast
|
|
12
|
+
|
|
13
|
+
from ..types import JSONData
|
|
14
|
+
from ..types import JSONDict
|
|
15
|
+
from ..types import JSONList
|
|
16
|
+
|
|
17
|
+
# SECTION: EXPORTS ========================================================== #
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
'read',
|
|
22
|
+
'write',
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# SECTION: INTERNAL CONSTANTS =============================================== #
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
_PANDAS_CACHE: dict[str, Any] = {}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _get_pandas() -> Any:
|
|
36
|
+
"""
|
|
37
|
+
Return the pandas module, importing it on first use.
|
|
38
|
+
|
|
39
|
+
Raises an informative ImportError if the optional dependency is missing.
|
|
40
|
+
"""
|
|
41
|
+
mod = _PANDAS_CACHE.get('mod')
|
|
42
|
+
if mod is not None: # pragma: no cover - tiny branch
|
|
43
|
+
return mod
|
|
44
|
+
try:
|
|
45
|
+
_pd = __import__('pandas') # type: ignore[assignment]
|
|
46
|
+
except ImportError as e: # pragma: no cover
|
|
47
|
+
raise ImportError(
|
|
48
|
+
'XLSX support requires optional dependency "pandas".\n'
|
|
49
|
+
'Install with: pip install pandas',
|
|
50
|
+
) from e
|
|
51
|
+
_PANDAS_CACHE['mod'] = _pd
|
|
52
|
+
|
|
53
|
+
return _pd
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _normalize_records(data: JSONData) -> JSONList:
|
|
57
|
+
"""
|
|
58
|
+
Normalize JSON payloads into a list of dictionaries.
|
|
59
|
+
|
|
60
|
+
Raises TypeError when payloads contain non-dict items.
|
|
61
|
+
"""
|
|
62
|
+
if isinstance(data, list):
|
|
63
|
+
if not all(isinstance(item, dict) for item in data):
|
|
64
|
+
raise TypeError('XLSX payloads must contain only objects (dicts)')
|
|
65
|
+
return cast(JSONList, data)
|
|
66
|
+
return [cast(JSONDict, data)]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def read(
|
|
73
|
+
path: Path,
|
|
74
|
+
) -> JSONList:
|
|
75
|
+
"""
|
|
76
|
+
Read XLSX content from ``path``.
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
path : Path
|
|
81
|
+
Path to the XLSX file on disk.
|
|
82
|
+
|
|
83
|
+
Returns
|
|
84
|
+
-------
|
|
85
|
+
JSONList
|
|
86
|
+
The list of dictionaries read from the XLSX file.
|
|
87
|
+
|
|
88
|
+
Raises
|
|
89
|
+
------
|
|
90
|
+
ImportError
|
|
91
|
+
If optional dependencies for XLSX support are missing.
|
|
92
|
+
"""
|
|
93
|
+
pandas = _get_pandas()
|
|
94
|
+
try:
|
|
95
|
+
frame = pandas.read_excel(path)
|
|
96
|
+
except ImportError as e: # pragma: no cover
|
|
97
|
+
raise ImportError(
|
|
98
|
+
'XLSX support requires optional dependency "openpyxl".\n'
|
|
99
|
+
'Install with: pip install openpyxl',
|
|
100
|
+
) from e
|
|
101
|
+
return cast(JSONList, frame.to_dict(orient='records'))
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def write(
|
|
105
|
+
path: Path,
|
|
106
|
+
data: JSONData,
|
|
107
|
+
) -> int:
|
|
108
|
+
"""
|
|
109
|
+
Write ``data`` to XLSX at ``path`` and return record count.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
path : Path
|
|
114
|
+
Path to the XLSX file on disk.
|
|
115
|
+
data : JSONData
|
|
116
|
+
Data to write.
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
int
|
|
121
|
+
Number of records written.
|
|
122
|
+
|
|
123
|
+
Raises
|
|
124
|
+
------
|
|
125
|
+
ImportError
|
|
126
|
+
If optional dependencies for XLSX support are missing.
|
|
127
|
+
"""
|
|
128
|
+
records = _normalize_records(data)
|
|
129
|
+
if not records:
|
|
130
|
+
return 0
|
|
131
|
+
|
|
132
|
+
pandas = _get_pandas()
|
|
133
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
134
|
+
frame = pandas.DataFrame.from_records(records)
|
|
135
|
+
try:
|
|
136
|
+
frame.to_excel(path, index=False)
|
|
137
|
+
except ImportError as e: # pragma: no cover
|
|
138
|
+
raise ImportError(
|
|
139
|
+
'XLSX support requires optional dependency "openpyxl".\n'
|
|
140
|
+
'Install with: pip install openpyxl',
|
|
141
|
+
) from e
|
|
142
|
+
return len(records)
|
etlplus/file/xml.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.xml` module.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Helpers for reading/writing XML files.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
@@ -14,6 +14,15 @@ from ..types import JSONData
|
|
|
14
14
|
from ..types import JSONDict
|
|
15
15
|
from ..utils import count_records
|
|
16
16
|
|
|
17
|
+
# SECTION: EXPORTS ========================================================== #
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
'read',
|
|
22
|
+
'write',
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
17
26
|
# SECTION: CONSTANTS ======================================================== #
|
|
18
27
|
|
|
19
28
|
|
|
@@ -117,7 +126,7 @@ def read(
|
|
|
117
126
|
path: Path,
|
|
118
127
|
) -> JSONDict:
|
|
119
128
|
"""
|
|
120
|
-
|
|
129
|
+
Read XML content from ``path``.
|
|
121
130
|
|
|
122
131
|
Parameters
|
|
123
132
|
----------
|
|
@@ -137,7 +146,7 @@ def read(
|
|
|
137
146
|
|
|
138
147
|
def write(path: Path, data: JSONData, *, root_tag: str) -> int:
|
|
139
148
|
"""
|
|
140
|
-
Write ``data``
|
|
149
|
+
Write ``data`` to XML at ``path`` and return record count.
|
|
141
150
|
|
|
142
151
|
Parameters
|
|
143
152
|
----------
|
etlplus/file/yaml.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.file.yaml` module.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Helpers for reading/writing YAML files.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
@@ -15,6 +15,15 @@ from ..types import JSONDict
|
|
|
15
15
|
from ..types import JSONList
|
|
16
16
|
from ..utils import count_records
|
|
17
17
|
|
|
18
|
+
# SECTION: EXPORTS ========================================================== #
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
'read',
|
|
23
|
+
'write',
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
18
27
|
# SECTION: INTERNAL CONSTANTS =============================================== #
|
|
19
28
|
|
|
20
29
|
|
|
@@ -59,7 +68,9 @@ def read(
|
|
|
59
68
|
path: Path,
|
|
60
69
|
) -> JSONData:
|
|
61
70
|
"""
|
|
62
|
-
|
|
71
|
+
Read YAML content from ``path``.
|
|
72
|
+
|
|
73
|
+
Validates that the YAML root is a dict or a list of dicts.
|
|
63
74
|
|
|
64
75
|
Parameters
|
|
65
76
|
----------
|
etlplus/file/zip.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.zip` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing ZIP files.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import tempfile
|
|
10
|
+
import zipfile
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from ..types import JSONData
|
|
14
|
+
from ..types import JSONDict
|
|
15
|
+
from .enums import CompressionFormat
|
|
16
|
+
from .enums import FileFormat
|
|
17
|
+
from .enums import infer_file_format_and_compression
|
|
18
|
+
|
|
19
|
+
# SECTION: EXPORTS ========================================================== #
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
'read',
|
|
24
|
+
'write',
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _resolve_format(
|
|
32
|
+
filename: str,
|
|
33
|
+
) -> FileFormat:
|
|
34
|
+
"""
|
|
35
|
+
Resolve the inner file format from a filename.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
filename : str
|
|
40
|
+
The name of the file inside the ZIP archive.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
FileFormat
|
|
45
|
+
The inferred inner file format.
|
|
46
|
+
|
|
47
|
+
Raises
|
|
48
|
+
------
|
|
49
|
+
ValueError
|
|
50
|
+
If the file format cannot be inferred from the filename.
|
|
51
|
+
"""
|
|
52
|
+
fmt, compression = infer_file_format_and_compression(filename)
|
|
53
|
+
if compression is not None and compression is not CompressionFormat.ZIP:
|
|
54
|
+
raise ValueError(f'Unexpected compression in archive: {filename}')
|
|
55
|
+
if fmt is None:
|
|
56
|
+
raise ValueError(
|
|
57
|
+
f'Cannot infer file format from compressed file {filename!r}',
|
|
58
|
+
)
|
|
59
|
+
return fmt
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _extract_payload(
|
|
63
|
+
entry: zipfile.ZipInfo,
|
|
64
|
+
archive: zipfile.ZipFile,
|
|
65
|
+
) -> bytes:
|
|
66
|
+
"""
|
|
67
|
+
Extract an archive entry into memory.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
entry : zipfile.ZipInfo
|
|
72
|
+
The ZIP archive entry.
|
|
73
|
+
archive : zipfile.ZipFile
|
|
74
|
+
The opened ZIP archive.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
bytes
|
|
79
|
+
The raw payload.
|
|
80
|
+
"""
|
|
81
|
+
with archive.open(entry, 'r') as handle:
|
|
82
|
+
return handle.read()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def read(
|
|
89
|
+
path: Path,
|
|
90
|
+
) -> JSONData:
|
|
91
|
+
"""
|
|
92
|
+
Read ZIP content from ``path`` and parse the inner payload(s).
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
path : Path
|
|
97
|
+
Path to the ZIP file on disk.
|
|
98
|
+
|
|
99
|
+
Returns
|
|
100
|
+
-------
|
|
101
|
+
JSONData
|
|
102
|
+
Parsed payload.
|
|
103
|
+
|
|
104
|
+
Raises
|
|
105
|
+
------
|
|
106
|
+
ValueError
|
|
107
|
+
If the ZIP archive is empty.
|
|
108
|
+
"""
|
|
109
|
+
with zipfile.ZipFile(path, 'r') as archive:
|
|
110
|
+
entries = [entry for entry in archive.infolist() if not entry.is_dir()]
|
|
111
|
+
if not entries:
|
|
112
|
+
raise ValueError(f'ZIP archive is empty: {path}')
|
|
113
|
+
|
|
114
|
+
if len(entries) == 1:
|
|
115
|
+
entry = entries[0]
|
|
116
|
+
fmt = _resolve_format(entry.filename)
|
|
117
|
+
payload = _extract_payload(entry, archive)
|
|
118
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
119
|
+
tmp_path = Path(tmpdir) / Path(entry.filename).name
|
|
120
|
+
tmp_path.write_bytes(payload)
|
|
121
|
+
from .core import File
|
|
122
|
+
|
|
123
|
+
return File(tmp_path, fmt).read()
|
|
124
|
+
|
|
125
|
+
results: JSONDict = {}
|
|
126
|
+
for entry in entries:
|
|
127
|
+
fmt = _resolve_format(entry.filename)
|
|
128
|
+
payload = _extract_payload(entry, archive)
|
|
129
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
130
|
+
tmp_path = Path(tmpdir) / Path(entry.filename).name
|
|
131
|
+
tmp_path.write_bytes(payload)
|
|
132
|
+
from .core import File
|
|
133
|
+
|
|
134
|
+
results[entry.filename] = File(tmp_path, fmt).read()
|
|
135
|
+
return results
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def write(
|
|
139
|
+
path: Path,
|
|
140
|
+
data: JSONData,
|
|
141
|
+
) -> int:
|
|
142
|
+
"""
|
|
143
|
+
Write ``data`` to ZIP at ``path`` and return record count.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
path : Path
|
|
148
|
+
Path to the ZIP file on disk.
|
|
149
|
+
data : JSONData
|
|
150
|
+
Data to write.
|
|
151
|
+
|
|
152
|
+
Returns
|
|
153
|
+
-------
|
|
154
|
+
int
|
|
155
|
+
Number of records written.
|
|
156
|
+
"""
|
|
157
|
+
fmt = _resolve_format(path.name)
|
|
158
|
+
inner_name = Path(path.name).with_suffix('').name
|
|
159
|
+
|
|
160
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
161
|
+
tmp_path = Path(tmpdir) / inner_name
|
|
162
|
+
from .core import File
|
|
163
|
+
|
|
164
|
+
count = File(tmp_path, fmt).write(data)
|
|
165
|
+
payload = tmp_path.read_bytes()
|
|
166
|
+
|
|
167
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
168
|
+
with zipfile.ZipFile(
|
|
169
|
+
path,
|
|
170
|
+
'w',
|
|
171
|
+
compression=zipfile.ZIP_DEFLATED,
|
|
172
|
+
) as archive:
|
|
173
|
+
archive.writestr(inner_name, payload)
|
|
174
|
+
|
|
175
|
+
return count
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# etlplus.templates subpackage
|
|
2
|
+
|
|
3
|
+
Documentation for the `etlplus.templates` subpackage: SQL and DDL template helpers.
|
|
4
|
+
|
|
5
|
+
- Provides Jinja2 templates for DDL and view generation
|
|
6
|
+
- Supports templated SQL for multiple database backends
|
|
7
|
+
- Includes helpers for rendering templates with schema metadata
|
|
8
|
+
|
|
9
|
+
Back to project overview: see the top-level [README](../../README.md).
|
|
10
|
+
|
|
11
|
+
- [etlplus.templates subpackage](#etlpustemplates-subpackage)
|
|
12
|
+
- [Available Templates](#available-templates)
|
|
13
|
+
- [Rendering Templates](#rendering-templates)
|
|
14
|
+
- [Example: Rendering a DDL Template](#example-rendering-a-ddl-template)
|
|
15
|
+
- [See Also](#see-also)
|
|
16
|
+
|
|
17
|
+
## Available Templates
|
|
18
|
+
|
|
19
|
+
- `ddl.sql.j2`: Generic DDL (CREATE TABLE) template
|
|
20
|
+
- `view.sql.j2`: Generic view creation template
|
|
21
|
+
|
|
22
|
+
## Rendering Templates
|
|
23
|
+
|
|
24
|
+
Use the helpers to render templates with your schema or table metadata:
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from etlplus.templates import render_template
|
|
28
|
+
|
|
29
|
+
sql = render_template("ddl.sql.j2", schema=my_schema)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Example: Rendering a DDL Template
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from etlplus.templates import render_template
|
|
36
|
+
|
|
37
|
+
schema = {"name": "users", "columns": [ ... ]}
|
|
38
|
+
sql = render_template("ddl.sql.j2", schema=schema)
|
|
39
|
+
print(sql)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## See Also
|
|
43
|
+
|
|
44
|
+
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
45
|
+
- DDL template in [ddl.sql.j2](ddl.sql.j2)
|
|
46
|
+
- View template in [view.sql.j2](view.sql.j2)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# etlplus.validation subpackage
|
|
2
|
+
|
|
3
|
+
Documentation for the `etlplus.validation` subpackage: data validation utilities and helpers.
|
|
4
|
+
|
|
5
|
+
- Provides flexible data validation for ETL pipelines
|
|
6
|
+
- Supports type checking, required fields, and custom rules
|
|
7
|
+
- Includes utilities for rule definition and validation logic
|
|
8
|
+
|
|
9
|
+
Back to project overview: see the top-level [README](../../README.md).
|
|
10
|
+
|
|
11
|
+
- [etlplus.validation subpackage](#etlplusvalidation-subpackage)
|
|
12
|
+
- [Validation Features](#validation-features)
|
|
13
|
+
- [Defining Validation Rules](#defining-validation-rules)
|
|
14
|
+
- [Example: Validating Data](#example-validating-data)
|
|
15
|
+
- [See Also](#see-also)
|
|
16
|
+
|
|
17
|
+
## Validation Features
|
|
18
|
+
|
|
19
|
+
- Type checking (string, number, boolean, etc.)
|
|
20
|
+
- Required/optional fields
|
|
21
|
+
- Enum and pattern validation
|
|
22
|
+
- Custom rule support
|
|
23
|
+
|
|
24
|
+
## Defining Validation Rules
|
|
25
|
+
|
|
26
|
+
Validation rules are defined as dictionaries specifying field types, requirements, and constraints:
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
rules = {
|
|
30
|
+
"name": {"type": "string", "required": True},
|
|
31
|
+
"age": {"type": "number", "min": 0, "max": 120},
|
|
32
|
+
}
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Example: Validating Data
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from etlplus.validation import validate
|
|
39
|
+
|
|
40
|
+
result = validate({"name": "Alice", "age": 30}, rules)
|
|
41
|
+
if result["valid"]:
|
|
42
|
+
print("Data is valid!")
|
|
43
|
+
else:
|
|
44
|
+
print(result["errors"])
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## See Also
|
|
48
|
+
|
|
49
|
+
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
50
|
+
- Validation utilities in [utils.py](utils.py)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.1
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -17,8 +17,11 @@ Classifier: Programming Language :: Python :: 3.14
|
|
|
17
17
|
Requires-Python: >=3.13,<3.15
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
License-File: LICENSE
|
|
20
|
+
Requires-Dist: fastavro>=1.12.1
|
|
20
21
|
Requires-Dist: jinja2>=3.1.6
|
|
22
|
+
Requires-Dist: openpyxl>=3.1.5
|
|
21
23
|
Requires-Dist: pyodbc>=5.3.0
|
|
24
|
+
Requires-Dist: pyarrow>=22.0.0
|
|
22
25
|
Requires-Dist: python-dotenv>=1.2.1
|
|
23
26
|
Requires-Dist: pandas>=2.3.3
|
|
24
27
|
Requires-Dist: pydantic>=2.12.5
|
|
@@ -26,6 +29,8 @@ Requires-Dist: PyYAML>=6.0.3
|
|
|
26
29
|
Requires-Dist: requests>=2.32.5
|
|
27
30
|
Requires-Dist: SQLAlchemy>=2.0.45
|
|
28
31
|
Requires-Dist: typer>=0.21.0
|
|
32
|
+
Requires-Dist: xlrd>=2.0.2
|
|
33
|
+
Requires-Dist: xlwt>=1.3.0
|
|
29
34
|
Provides-Extra: dev
|
|
30
35
|
Requires-Dist: black>=25.9.0; extra == "dev"
|
|
31
36
|
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
@@ -59,6 +64,7 @@ ETLPlus is a veritable Swiss Army knife for enabling simple ETL operations, offe
|
|
|
59
64
|
package and command-line interface for data extraction, validation, transformation, and loading.
|
|
60
65
|
|
|
61
66
|
- [ETLPlus](#etlplus)
|
|
67
|
+
- [Getting Started](#getting-started)
|
|
62
68
|
- [Features](#features)
|
|
63
69
|
- [Installation](#installation)
|
|
64
70
|
- [Quickstart](#quickstart)
|
|
@@ -87,11 +93,27 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
87
93
|
- [Linting](#linting)
|
|
88
94
|
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
89
95
|
- [Releasing to PyPI](#releasing-to-pypi)
|
|
90
|
-
- [Links](#links)
|
|
91
96
|
- [License](#license)
|
|
92
97
|
- [Contributing](#contributing)
|
|
98
|
+
- [Documentation](#documentation)
|
|
99
|
+
- [Python Packages/Subpackage](#python-packagessubpackage)
|
|
100
|
+
- [Community Health](#community-health)
|
|
101
|
+
- [Other](#other)
|
|
93
102
|
- [Acknowledgments](#acknowledgments)
|
|
94
103
|
|
|
104
|
+
## Getting Started
|
|
105
|
+
|
|
106
|
+
ETLPlus helps you extract, validate, transform, and load data from files, databases, and APIs, either
|
|
107
|
+
as a Python library or from the command line.
|
|
108
|
+
|
|
109
|
+
To get started:
|
|
110
|
+
|
|
111
|
+
- See [Installation](#installation) for setup instructions.
|
|
112
|
+
- Try the [Quickstart](#quickstart) for a minimal working example (CLI and Python).
|
|
113
|
+
- Explore [Usage](#usage) for more detailed options and workflows.
|
|
114
|
+
|
|
115
|
+
ETLPlus supports Python 3.13 and above.
|
|
116
|
+
|
|
95
117
|
## Features
|
|
96
118
|
|
|
97
119
|
- **Check** data pipeline definitions before running them:
|
|
@@ -416,7 +438,7 @@ etlplus transform \
|
|
|
416
438
|
# 3. Validate transformed data
|
|
417
439
|
etlplus validate \
|
|
418
440
|
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
|
|
419
|
-
|
|
441
|
+
temp/sample_transformed.json
|
|
420
442
|
|
|
421
443
|
# 4. Load to CSV
|
|
422
444
|
cat temp/sample_transformed.json \
|
|
@@ -603,17 +625,6 @@ git push origin v1.4.0
|
|
|
603
625
|
If you want an extra smoke-test before tagging, run `make dist && pip install dist/*.whl` locally;
|
|
604
626
|
this exercises the same build path the workflow uses.
|
|
605
627
|
|
|
606
|
-
## Links
|
|
607
|
-
|
|
608
|
-
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
609
|
-
- Examples: [`examples/README.md`](examples/README.md)
|
|
610
|
-
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
611
|
-
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
612
|
-
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
613
|
-
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
614
|
-
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
615
|
-
- Additional references: [`REFERENCES.md`](`REFERENCES.md)
|
|
616
|
-
|
|
617
628
|
## License
|
|
618
629
|
|
|
619
630
|
This project is licensed under the [MIT License](LICENSE).
|
|
@@ -637,6 +648,39 @@ If you choose to be a code contributor, please first refer these documents:
|
|
|
637
648
|
- Typing philosophy (TypedDicts as editor hints, permissive runtime):
|
|
638
649
|
[`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
639
650
|
|
|
651
|
+
## Documentation
|
|
652
|
+
|
|
653
|
+
### Python Packages/Subpackage
|
|
654
|
+
|
|
655
|
+
Navigate to detailed documentation for each subpackage:
|
|
656
|
+
|
|
657
|
+
- [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
658
|
+
- [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
|
|
659
|
+
- [etlplus.config](etlplus/config/README.md): Configuration helpers for connectors, pipelines, jobs,
|
|
660
|
+
and profiles
|
|
661
|
+
- [etlplus.cli](etlplus/cli/README.md): Command-line interface for ETLPlus workflows
|
|
662
|
+
- [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
|
|
663
|
+
- [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
|
|
664
|
+
- [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
|
|
665
|
+
|
|
666
|
+
### Community Health
|
|
667
|
+
|
|
668
|
+
- [Contributing Guidelines](CONTRIBUTING.md): How to contribute, report issues, and submit PRs
|
|
669
|
+
- [Code of Conduct](CODE_OF_CONDUCT.md): Community standards and expectations
|
|
670
|
+
- [Security Policy](SECURITY.md): Responsible disclosure and vulnerability reporting
|
|
671
|
+
- [Support](SUPPORT.md): Where to get help
|
|
672
|
+
|
|
673
|
+
### Other
|
|
674
|
+
|
|
675
|
+
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
676
|
+
- Examples: [`examples/README.md`](examples/README.md)
|
|
677
|
+
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
678
|
+
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
679
|
+
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
680
|
+
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
681
|
+
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
682
|
+
- Additional references: [`REFERENCES.md`](REFERENCES.md)
|
|
683
|
+
|
|
640
684
|
## Acknowledgments
|
|
641
685
|
|
|
642
686
|
ETLPlus is inspired by common work patterns in data engineering and software engineering patterns in
|