etlplus 0.11.5__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/file/xlsx.py ADDED
@@ -0,0 +1,142 @@
1
+ """
2
+ :mod:`etlplus.file.xlsx` module.
3
+
4
+ Helpers for reading/writing Excel XLSX files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+ from typing import Any
11
+ from typing import cast
12
+
13
+ from ..types import JSONData
14
+ from ..types import JSONDict
15
+ from ..types import JSONList
16
+
17
+ # SECTION: EXPORTS ========================================================== #
18
+
19
+
20
+ __all__ = [
21
+ 'read',
22
+ 'write',
23
+ ]
24
+
25
+
26
+ # SECTION: INTERNAL CONSTANTS =============================================== #
27
+
28
+
29
+ _PANDAS_CACHE: dict[str, Any] = {}
30
+
31
+
32
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
33
+
34
+
35
+ def _get_pandas() -> Any:
36
+ """
37
+ Return the pandas module, importing it on first use.
38
+
39
+ Raises an informative ImportError if the optional dependency is missing.
40
+ """
41
+ mod = _PANDAS_CACHE.get('mod')
42
+ if mod is not None: # pragma: no cover - tiny branch
43
+ return mod
44
+ try:
45
+ _pd = __import__('pandas') # type: ignore[assignment]
46
+ except ImportError as e: # pragma: no cover
47
+ raise ImportError(
48
+ 'XLSX support requires optional dependency "pandas".\n'
49
+ 'Install with: pip install pandas',
50
+ ) from e
51
+ _PANDAS_CACHE['mod'] = _pd
52
+
53
+ return _pd
54
+
55
+
56
+ def _normalize_records(data: JSONData) -> JSONList:
57
+ """
58
+ Normalize JSON payloads into a list of dictionaries.
59
+
60
+ Raises TypeError when payloads contain non-dict items.
61
+ """
62
+ if isinstance(data, list):
63
+ if not all(isinstance(item, dict) for item in data):
64
+ raise TypeError('XLSX payloads must contain only objects (dicts)')
65
+ return cast(JSONList, data)
66
+ return [cast(JSONDict, data)]
67
+
68
+
69
+ # SECTION: FUNCTIONS ======================================================== #
70
+
71
+
72
+ def read(
73
+ path: Path,
74
+ ) -> JSONList:
75
+ """
76
+ Read XLSX content from ``path``.
77
+
78
+ Parameters
79
+ ----------
80
+ path : Path
81
+ Path to the XLSX file on disk.
82
+
83
+ Returns
84
+ -------
85
+ JSONList
86
+ The list of dictionaries read from the XLSX file.
87
+
88
+ Raises
89
+ ------
90
+ ImportError
91
+ If optional dependencies for XLSX support are missing.
92
+ """
93
+ pandas = _get_pandas()
94
+ try:
95
+ frame = pandas.read_excel(path)
96
+ except ImportError as e: # pragma: no cover
97
+ raise ImportError(
98
+ 'XLSX support requires optional dependency "openpyxl".\n'
99
+ 'Install with: pip install openpyxl',
100
+ ) from e
101
+ return cast(JSONList, frame.to_dict(orient='records'))
102
+
103
+
104
+ def write(
105
+ path: Path,
106
+ data: JSONData,
107
+ ) -> int:
108
+ """
109
+ Write ``data`` to XLSX at ``path`` and return record count.
110
+
111
+ Parameters
112
+ ----------
113
+ path : Path
114
+ Path to the XLSX file on disk.
115
+ data : JSONData
116
+ Data to write.
117
+
118
+ Returns
119
+ -------
120
+ int
121
+ Number of records written.
122
+
123
+ Raises
124
+ ------
125
+ ImportError
126
+ If optional dependencies for XLSX support are missing.
127
+ """
128
+ records = _normalize_records(data)
129
+ if not records:
130
+ return 0
131
+
132
+ pandas = _get_pandas()
133
+ path.parent.mkdir(parents=True, exist_ok=True)
134
+ frame = pandas.DataFrame.from_records(records)
135
+ try:
136
+ frame.to_excel(path, index=False)
137
+ except ImportError as e: # pragma: no cover
138
+ raise ImportError(
139
+ 'XLSX support requires optional dependency "openpyxl".\n'
140
+ 'Install with: pip install openpyxl',
141
+ ) from e
142
+ return len(records)
etlplus/file/xml.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.xml` module.
3
3
 
4
- XML read/write helpers.
4
+ Helpers for reading/writing XML files.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
@@ -14,6 +14,15 @@ from ..types import JSONData
14
14
  from ..types import JSONDict
15
15
  from ..utils import count_records
16
16
 
17
+ # SECTION: EXPORTS ========================================================== #
18
+
19
+
20
+ __all__ = [
21
+ 'read',
22
+ 'write',
23
+ ]
24
+
25
+
17
26
  # SECTION: CONSTANTS ======================================================== #
18
27
 
19
28
 
@@ -117,7 +126,7 @@ def read(
117
126
  path: Path,
118
127
  ) -> JSONDict:
119
128
  """
120
- Parse XML document at ``path`` into a nested dictionary.
129
+ Read XML content from ``path``.
121
130
 
122
131
  Parameters
123
132
  ----------
@@ -137,7 +146,7 @@ def read(
137
146
 
138
147
  def write(path: Path, data: JSONData, *, root_tag: str) -> int:
139
148
  """
140
- Write ``data`` as XML to ``path`` and return record count.
149
+ Write ``data`` to XML at ``path`` and return record count.
141
150
 
142
151
  Parameters
143
152
  ----------
etlplus/file/yaml.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
2
  :mod:`etlplus.file.yaml` module.
3
3
 
4
- Optional YAML read/write helpers.
4
+ Helpers for reading/writing YAML files.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
@@ -15,6 +15,15 @@ from ..types import JSONDict
15
15
  from ..types import JSONList
16
16
  from ..utils import count_records
17
17
 
18
+ # SECTION: EXPORTS ========================================================== #
19
+
20
+
21
+ __all__ = [
22
+ 'read',
23
+ 'write',
24
+ ]
25
+
26
+
18
27
  # SECTION: INTERNAL CONSTANTS =============================================== #
19
28
 
20
29
 
@@ -59,7 +68,9 @@ def read(
59
68
  path: Path,
60
69
  ) -> JSONData:
61
70
  """
62
- Load and validate YAML payloads from ``path``.
71
+ Read YAML content from ``path``.
72
+
73
+ Validates that the YAML root is a dict or a list of dicts.
63
74
 
64
75
  Parameters
65
76
  ----------
etlplus/file/zip.py ADDED
@@ -0,0 +1,175 @@
1
+ """
2
+ :mod:`etlplus.file.zip` module.
3
+
4
+ Helpers for reading/writing ZIP files.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import tempfile
10
+ import zipfile
11
+ from pathlib import Path
12
+
13
+ from ..types import JSONData
14
+ from ..types import JSONDict
15
+ from .enums import CompressionFormat
16
+ from .enums import FileFormat
17
+ from .enums import infer_file_format_and_compression
18
+
19
+ # SECTION: EXPORTS ========================================================== #
20
+
21
+
22
+ __all__ = [
23
+ 'read',
24
+ 'write',
25
+ ]
26
+
27
+
28
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
29
+
30
+
31
+ def _resolve_format(
32
+ filename: str,
33
+ ) -> FileFormat:
34
+ """
35
+ Resolve the inner file format from a filename.
36
+
37
+ Parameters
38
+ ----------
39
+ filename : str
40
+ The name of the file inside the ZIP archive.
41
+
42
+ Returns
43
+ -------
44
+ FileFormat
45
+ The inferred inner file format.
46
+
47
+ Raises
48
+ ------
49
+ ValueError
50
+ If the file format cannot be inferred from the filename.
51
+ """
52
+ fmt, compression = infer_file_format_and_compression(filename)
53
+ if compression is not None and compression is not CompressionFormat.ZIP:
54
+ raise ValueError(f'Unexpected compression in archive: {filename}')
55
+ if fmt is None:
56
+ raise ValueError(
57
+ f'Cannot infer file format from compressed file {filename!r}',
58
+ )
59
+ return fmt
60
+
61
+
62
+ def _extract_payload(
63
+ entry: zipfile.ZipInfo,
64
+ archive: zipfile.ZipFile,
65
+ ) -> bytes:
66
+ """
67
+ Extract an archive entry into memory.
68
+
69
+ Parameters
70
+ ----------
71
+ entry : zipfile.ZipInfo
72
+ The ZIP archive entry.
73
+ archive : zipfile.ZipFile
74
+ The opened ZIP archive.
75
+
76
+ Returns
77
+ -------
78
+ bytes
79
+ The raw payload.
80
+ """
81
+ with archive.open(entry, 'r') as handle:
82
+ return handle.read()
83
+
84
+
85
+ # SECTION: FUNCTIONS ======================================================== #
86
+
87
+
88
+ def read(
89
+ path: Path,
90
+ ) -> JSONData:
91
+ """
92
+ Read ZIP content from ``path`` and parse the inner payload(s).
93
+
94
+ Parameters
95
+ ----------
96
+ path : Path
97
+ Path to the ZIP file on disk.
98
+
99
+ Returns
100
+ -------
101
+ JSONData
102
+ Parsed payload.
103
+
104
+ Raises
105
+ ------
106
+ ValueError
107
+ If the ZIP archive is empty.
108
+ """
109
+ with zipfile.ZipFile(path, 'r') as archive:
110
+ entries = [entry for entry in archive.infolist() if not entry.is_dir()]
111
+ if not entries:
112
+ raise ValueError(f'ZIP archive is empty: {path}')
113
+
114
+ if len(entries) == 1:
115
+ entry = entries[0]
116
+ fmt = _resolve_format(entry.filename)
117
+ payload = _extract_payload(entry, archive)
118
+ with tempfile.TemporaryDirectory() as tmpdir:
119
+ tmp_path = Path(tmpdir) / Path(entry.filename).name
120
+ tmp_path.write_bytes(payload)
121
+ from .core import File
122
+
123
+ return File(tmp_path, fmt).read()
124
+
125
+ results: JSONDict = {}
126
+ for entry in entries:
127
+ fmt = _resolve_format(entry.filename)
128
+ payload = _extract_payload(entry, archive)
129
+ with tempfile.TemporaryDirectory() as tmpdir:
130
+ tmp_path = Path(tmpdir) / Path(entry.filename).name
131
+ tmp_path.write_bytes(payload)
132
+ from .core import File
133
+
134
+ results[entry.filename] = File(tmp_path, fmt).read()
135
+ return results
136
+
137
+
138
+ def write(
139
+ path: Path,
140
+ data: JSONData,
141
+ ) -> int:
142
+ """
143
+ Write ``data`` to ZIP at ``path`` and return record count.
144
+
145
+ Parameters
146
+ ----------
147
+ path : Path
148
+ Path to the ZIP file on disk.
149
+ data : JSONData
150
+ Data to write.
151
+
152
+ Returns
153
+ -------
154
+ int
155
+ Number of records written.
156
+ """
157
+ fmt = _resolve_format(path.name)
158
+ inner_name = Path(path.name).with_suffix('').name
159
+
160
+ with tempfile.TemporaryDirectory() as tmpdir:
161
+ tmp_path = Path(tmpdir) / inner_name
162
+ from .core import File
163
+
164
+ count = File(tmp_path, fmt).write(data)
165
+ payload = tmp_path.read_bytes()
166
+
167
+ path.parent.mkdir(parents=True, exist_ok=True)
168
+ with zipfile.ZipFile(
169
+ path,
170
+ 'w',
171
+ compression=zipfile.ZIP_DEFLATED,
172
+ ) as archive:
173
+ archive.writestr(inner_name, payload)
174
+
175
+ return count
@@ -0,0 +1,46 @@
1
+ # etlplus.templates subpackage
2
+
3
+ Documentation for the `etlplus.templates` subpackage: SQL and DDL template helpers.
4
+
5
+ - Provides Jinja2 templates for DDL and view generation
6
+ - Supports templated SQL for multiple database backends
7
+ - Includes helpers for rendering templates with schema metadata
8
+
9
+ Back to project overview: see the top-level [README](../../README.md).
10
+
11
+ - [etlplus.templates subpackage](#etlpustemplates-subpackage)
12
+ - [Available Templates](#available-templates)
13
+ - [Rendering Templates](#rendering-templates)
14
+ - [Example: Rendering a DDL Template](#example-rendering-a-ddl-template)
15
+ - [See Also](#see-also)
16
+
17
+ ## Available Templates
18
+
19
+ - `ddl.sql.j2`: Generic DDL (CREATE TABLE) template
20
+ - `view.sql.j2`: Generic view creation template
21
+
22
+ ## Rendering Templates
23
+
24
+ Use the helpers to render templates with your schema or table metadata:
25
+
26
+ ```python
27
+ from etlplus.templates import render_template
28
+
29
+ sql = render_template("ddl.sql.j2", schema=my_schema)
30
+ ```
31
+
32
+ ## Example: Rendering a DDL Template
33
+
34
+ ```python
35
+ from etlplus.templates import render_template
36
+
37
+ schema = {"name": "users", "columns": [ ... ]}
38
+ sql = render_template("ddl.sql.j2", schema=schema)
39
+ print(sql)
40
+ ```
41
+
42
+ ## See Also
43
+
44
+ - Top-level CLI and library usage in the main [README](../../README.md)
45
+ - DDL template in [ddl.sql.j2](ddl.sql.j2)
46
+ - View template in [view.sql.j2](view.sql.j2)
@@ -0,0 +1,50 @@
1
+ # etlplus.validation subpackage
2
+
3
+ Documentation for the `etlplus.validation` subpackage: data validation utilities and helpers.
4
+
5
+ - Provides flexible data validation for ETL pipelines
6
+ - Supports type checking, required fields, and custom rules
7
+ - Includes utilities for rule definition and validation logic
8
+
9
+ Back to project overview: see the top-level [README](../../README.md).
10
+
11
+ - [etlplus.validation subpackage](#etlplusvalidation-subpackage)
12
+ - [Validation Features](#validation-features)
13
+ - [Defining Validation Rules](#defining-validation-rules)
14
+ - [Example: Validating Data](#example-validating-data)
15
+ - [See Also](#see-also)
16
+
17
+ ## Validation Features
18
+
19
+ - Type checking (string, number, boolean, etc.)
20
+ - Required/optional fields
21
+ - Enum and pattern validation
22
+ - Custom rule support
23
+
24
+ ## Defining Validation Rules
25
+
26
+ Validation rules are defined as dictionaries specifying field types, requirements, and constraints:
27
+
28
+ ```python
29
+ rules = {
30
+ "name": {"type": "string", "required": True},
31
+ "age": {"type": "number", "min": 0, "max": 120},
32
+ }
33
+ ```
34
+
35
+ ## Example: Validating Data
36
+
37
+ ```python
38
+ from etlplus.validation import validate
39
+
40
+ result = validate({"name": "Alice", "age": 30}, rules)
41
+ if result["valid"]:
42
+ print("Data is valid!")
43
+ else:
44
+ print(result["errors"])
45
+ ```
46
+
47
+ ## See Also
48
+
49
+ - Top-level CLI and library usage in the main [README](../../README.md)
50
+ - Validation utilities in [utils.py](utils.py)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: etlplus
3
- Version: 0.11.5
3
+ Version: 0.12.1
4
4
  Summary: A Swiss Army knife for simple ETL operations
5
5
  Home-page: https://github.com/Dagitali/ETLPlus
6
6
  Author: ETLPlus Team
@@ -17,8 +17,11 @@ Classifier: Programming Language :: Python :: 3.14
17
17
  Requires-Python: >=3.13,<3.15
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
+ Requires-Dist: fastavro>=1.12.1
20
21
  Requires-Dist: jinja2>=3.1.6
22
+ Requires-Dist: openpyxl>=3.1.5
21
23
  Requires-Dist: pyodbc>=5.3.0
24
+ Requires-Dist: pyarrow>=22.0.0
22
25
  Requires-Dist: python-dotenv>=1.2.1
23
26
  Requires-Dist: pandas>=2.3.3
24
27
  Requires-Dist: pydantic>=2.12.5
@@ -26,6 +29,8 @@ Requires-Dist: PyYAML>=6.0.3
26
29
  Requires-Dist: requests>=2.32.5
27
30
  Requires-Dist: SQLAlchemy>=2.0.45
28
31
  Requires-Dist: typer>=0.21.0
32
+ Requires-Dist: xlrd>=2.0.2
33
+ Requires-Dist: xlwt>=1.3.0
29
34
  Provides-Extra: dev
30
35
  Requires-Dist: black>=25.9.0; extra == "dev"
31
36
  Requires-Dist: build>=1.2.2; extra == "dev"
@@ -59,6 +64,7 @@ ETLPlus is a veritable Swiss Army knife for enabling simple ETL operations, offe
59
64
  package and command-line interface for data extraction, validation, transformation, and loading.
60
65
 
61
66
  - [ETLPlus](#etlplus)
67
+ - [Getting Started](#getting-started)
62
68
  - [Features](#features)
63
69
  - [Installation](#installation)
64
70
  - [Quickstart](#quickstart)
@@ -87,11 +93,27 @@ package and command-line interface for data extraction, validation, transformati
87
93
  - [Linting](#linting)
88
94
  - [Updating Demo Snippets](#updating-demo-snippets)
89
95
  - [Releasing to PyPI](#releasing-to-pypi)
90
- - [Links](#links)
91
96
  - [License](#license)
92
97
  - [Contributing](#contributing)
98
+ - [Documentation](#documentation)
99
+ - [Python Packages/Subpackage](#python-packagessubpackage)
100
+ - [Community Health](#community-health)
101
+ - [Other](#other)
93
102
  - [Acknowledgments](#acknowledgments)
94
103
 
104
+ ## Getting Started
105
+
106
+ ETLPlus helps you extract, validate, transform, and load data from files, databases, and APIs, either
107
+ as a Python library or from the command line.
108
+
109
+ To get started:
110
+
111
+ - See [Installation](#installation) for setup instructions.
112
+ - Try the [Quickstart](#quickstart) for a minimal working example (CLI and Python).
113
+ - Explore [Usage](#usage) for more detailed options and workflows.
114
+
115
+ ETLPlus supports Python 3.13 and above.
116
+
95
117
  ## Features
96
118
 
97
119
  - **Check** data pipeline definitions before running them:
@@ -416,7 +438,7 @@ etlplus transform \
416
438
  # 3. Validate transformed data
417
439
  etlplus validate \
418
440
  --rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
419
- temo/sample_transformed.json
441
+ temp/sample_transformed.json
420
442
 
421
443
  # 4. Load to CSV
422
444
  cat temp/sample_transformed.json \
@@ -603,17 +625,6 @@ git push origin v1.4.0
603
625
  If you want an extra smoke-test before tagging, run `make dist && pip install dist/*.whl` locally;
604
626
  this exercises the same build path the workflow uses.
605
627
 
606
- ## Links
607
-
608
- - API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
609
- - Examples: [`examples/README.md`](examples/README.md)
610
- - Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
611
- - Runner internals: [`docs/run-module.md`](docs/run-module.md)
612
- - Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
613
- - Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
614
- - Demo and walkthrough: [`DEMO.md`](DEMO.md)
615
- - Additional references: [`REFERENCES.md`](`REFERENCES.md)
616
-
617
628
  ## License
618
629
 
619
630
  This project is licensed under the [MIT License](LICENSE).
@@ -637,6 +648,39 @@ If you choose to be a code contributor, please first refer these documents:
637
648
  - Typing philosophy (TypedDicts as editor hints, permissive runtime):
638
649
  [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
639
650
 
651
+ ## Documentation
652
+
653
+ ### Python Packages/Subpackage
654
+
655
+ Navigate to detailed documentation for each subpackage:
656
+
657
+ - [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
658
+ - [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
659
+ - [etlplus.config](etlplus/config/README.md): Configuration helpers for connectors, pipelines, jobs,
660
+ and profiles
661
+ - [etlplus.cli](etlplus/cli/README.md): Command-line interface for ETLPlus workflows
662
+ - [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
663
+ - [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
664
+ - [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
665
+
666
+ ### Community Health
667
+
668
+ - [Contributing Guidelines](CONTRIBUTING.md): How to contribute, report issues, and submit PRs
669
+ - [Code of Conduct](CODE_OF_CONDUCT.md): Community standards and expectations
670
+ - [Security Policy](SECURITY.md): Responsible disclosure and vulnerability reporting
671
+ - [Support](SUPPORT.md): Where to get help
672
+
673
+ ### Other
674
+
675
+ - API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
676
+ - Examples: [`examples/README.md`](examples/README.md)
677
+ - Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
678
+ - Runner internals: [`docs/run-module.md`](docs/run-module.md)
679
+ - Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
680
+ - Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
681
+ - Demo and walkthrough: [`DEMO.md`](DEMO.md)
682
+ - Additional references: [`REFERENCES.md`](REFERENCES.md)
683
+
640
684
  ## Acknowledgments
641
685
 
642
686
  ETLPlus is inspired by common work patterns in data engineering and software engineering patterns in