PyPI - etlplus - Versions diffs - 0.11.5__py3-none-any.whl → 0.12.1__py3-none-any.whl - Mend

etlplus 0.11.5py3-none-any.whl → 0.12.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

etlplus/README.md +37 -0
etlplus/api/README.md +20 -3
etlplus/cli/README.md +40 -0
etlplus/cli/handlers.py +1 -1
etlplus/config/README.md +52 -0
etlplus/database/README.md +48 -0
etlplus/database/ddl.py +1 -1
etlplus/database/engine.py +1 -1
etlplus/database/schema.py +1 -1
etlplus/file/README.md +105 -0
etlplus/file/avro.py +198 -0
etlplus/file/core.py +105 -105
etlplus/file/csv.py +12 -3
etlplus/file/feather.py +144 -0
etlplus/file/gz.py +123 -0
etlplus/file/json.py +13 -2
etlplus/file/ndjson.py +109 -0
etlplus/file/orc.py +142 -0
etlplus/file/parquet.py +146 -0
etlplus/file/tsv.py +91 -0
etlplus/file/txt.py +99 -0
etlplus/file/xls.py +132 -0
etlplus/file/xlsx.py +142 -0
etlplus/file/xml.py +12 -3
etlplus/file/yaml.py +13 -2
etlplus/file/zip.py +175 -0
etlplus/templates/README.md +46 -0
etlplus/validation/README.md +50 -0
{etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/METADATA +58 -14
{etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/RECORD +34 -16
{etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/WHEEL +0 -0
{etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/entry_points.txt +0 -0
{etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/top_level.txt +0 -0

etlplus/file/xlsx.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""
+:mod:`etlplus.file.xlsx` module.
+Helpers for reading/writing Excel XLSX files.
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+from typing import cast
+from ..types import JSONData
+from ..types import JSONDict
+from ..types import JSONList
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'read',
+    'write',
+]
+# SECTION: INTERNAL CONSTANTS =============================================== #
+_PANDAS_CACHE: dict[str, Any] = {}
+# SECTION: INTERNAL FUNCTIONS =============================================== #
+def _get_pandas() -> Any:
+    """
+    Return the pandas module, importing it on first use.
+    Raises an informative ImportError if the optional dependency is missing.
+    """
+    mod = _PANDAS_CACHE.get('mod')
+    if mod is not None:  # pragma: no cover - tiny branch
+        return mod
+    try:
+        _pd = __import__('pandas')  # type: ignore[assignment]
+    except ImportError as e:  # pragma: no cover
+        raise ImportError(
+            'XLSX support requires optional dependency "pandas".\n'
+            'Install with: pip install pandas',
+        ) from e
+    _PANDAS_CACHE['mod'] = _pd
+    return _pd
+def _normalize_records(data: JSONData) -> JSONList:
+    """
+    Normalize JSON payloads into a list of dictionaries.
+    Raises TypeError when payloads contain non-dict items.
+    """
+    if isinstance(data, list):
+        if not all(isinstance(item, dict) for item in data):
+            raise TypeError('XLSX payloads must contain only objects (dicts)')
+        return cast(JSONList, data)
+    return [cast(JSONDict, data)]
+# SECTION: FUNCTIONS ======================================================== #
+def read(
+    path: Path,
+) -> JSONList:
+    """
+    Read XLSX content from ``path``.
+    Parameters
+    ----------
+    path : Path
+        Path to the XLSX file on disk.
+    Returns
+    -------
+    JSONList
+        The list of dictionaries read from the XLSX file.
+    Raises
+    ------
+    ImportError
+        If optional dependencies for XLSX support are missing.
+    """
+    pandas = _get_pandas()
+    try:
+        frame = pandas.read_excel(path)
+    except ImportError as e:  # pragma: no cover
+        raise ImportError(
+            'XLSX support requires optional dependency "openpyxl".\n'
+            'Install with: pip install openpyxl',
+        ) from e
+    return cast(JSONList, frame.to_dict(orient='records'))
+def write(
+    path: Path,
+    data: JSONData,
+) -> int:
+    """
+    Write ``data`` to XLSX at ``path`` and return record count.
+    Parameters
+    ----------
+    path : Path
+        Path to the XLSX file on disk.
+    data : JSONData
+        Data to write.
+    Returns
+    -------
+    int
+        Number of records written.
+    Raises
+    ------
+    ImportError
+        If optional dependencies for XLSX support are missing.
+    """
+    records = _normalize_records(data)
+    if not records:
+        return 0
+    pandas = _get_pandas()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    frame = pandas.DataFrame.from_records(records)
+    try:
+        frame.to_excel(path, index=False)
+    except ImportError as e:  # pragma: no cover
+        raise ImportError(
+            'XLSX support requires optional dependency "openpyxl".\n'
+            'Install with: pip install openpyxl',
+        ) from e
+    return len(records)

etlplus/file/xml.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 :mod:`etlplus.file.xml` module.
-XML read/write helpers.
+Helpers for reading/writing XML files.
 """
 from __future__ import annotations
@@ -14,6 +14,15 @@ from ..types import JSONData
 from ..types import JSONDict
 from ..utils import count_records
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'read',
+    'write',
+]
 # SECTION: CONSTANTS ======================================================== #
@@ -117,7 +126,7 @@ def read(
     path: Path,
 ) -> JSONDict:
     """
-    Parse XML document at ``path`` into a nested dictionary.
+    Read XML content from ``path``.
     Parameters
     ----------
@@ -137,7 +146,7 @@ def read(
 def write(path: Path, data: JSONData, *, root_tag: str) -> int:
     """
-    Write ``data`` as XML to ``path`` and return record count.
+    Write ``data`` to XML at ``path`` and return record count.
     Parameters
     ----------

etlplus/file/yaml.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 :mod:`etlplus.file.yaml` module.
-Optional YAML read/write helpers.
+Helpers for reading/writing YAML files.
 """
 from __future__ import annotations
@@ -15,6 +15,15 @@ from ..types import JSONDict
 from ..types import JSONList
 from ..utils import count_records
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'read',
+    'write',
+]
 # SECTION: INTERNAL CONSTANTS =============================================== #
@@ -59,7 +68,9 @@ def read(
     path: Path,
 ) -> JSONData:
     """
-    Load and validate YAML payloads from ``path``.
+    Read YAML content from ``path``.
+    Validates that the YAML root is a dict or a list of dicts.
     Parameters
     ----------

etlplus/file/zip.py ADDED Viewed

@@ -0,0 +1,175 @@
+"""
+:mod:`etlplus.file.zip` module.
+Helpers for reading/writing ZIP files.
+"""
+from __future__ import annotations
+import tempfile
+import zipfile
+from pathlib import Path
+from ..types import JSONData
+from ..types import JSONDict
+from .enums import CompressionFormat
+from .enums import FileFormat
+from .enums import infer_file_format_and_compression
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'read',
+    'write',
+]
+# SECTION: INTERNAL FUNCTIONS =============================================== #
+def _resolve_format(
+    filename: str,
+) -> FileFormat:
+    """
+    Resolve the inner file format from a filename.
+    Parameters
+    ----------
+    filename : str
+        The name of the file inside the ZIP archive.
+    Returns
+    -------
+    FileFormat
+        The inferred inner file format.
+    Raises
+    ------
+    ValueError
+        If the file format cannot be inferred from the filename.
+    """
+    fmt, compression = infer_file_format_and_compression(filename)
+    if compression is not None and compression is not CompressionFormat.ZIP:
+        raise ValueError(f'Unexpected compression in archive: {filename}')
+    if fmt is None:
+        raise ValueError(
+            f'Cannot infer file format from compressed file {filename!r}',
+        )
+    return fmt
+def _extract_payload(
+    entry: zipfile.ZipInfo,
+    archive: zipfile.ZipFile,
+) -> bytes:
+    """
+    Extract an archive entry into memory.
+    Parameters
+    ----------
+    entry : zipfile.ZipInfo
+        The ZIP archive entry.
+    archive : zipfile.ZipFile
+        The opened ZIP archive.
+    Returns
+    -------
+    bytes
+        The raw payload.
+    """
+    with archive.open(entry, 'r') as handle:
+        return handle.read()
+# SECTION: FUNCTIONS ======================================================== #
+def read(
+    path: Path,
+) -> JSONData:
+    """
+    Read ZIP content from ``path`` and parse the inner payload(s).
+    Parameters
+    ----------
+    path : Path
+        Path to the ZIP file on disk.
+    Returns
+    -------
+    JSONData
+        Parsed payload.
+    Raises
+    ------
+    ValueError
+        If the ZIP archive is empty.
+    """
+    with zipfile.ZipFile(path, 'r') as archive:
+        entries = [entry for entry in archive.infolist() if not entry.is_dir()]
+        if not entries:
+            raise ValueError(f'ZIP archive is empty: {path}')
+        if len(entries) == 1:
+            entry = entries[0]
+            fmt = _resolve_format(entry.filename)
+            payload = _extract_payload(entry, archive)
+            with tempfile.TemporaryDirectory() as tmpdir:
+                tmp_path = Path(tmpdir) / Path(entry.filename).name
+                tmp_path.write_bytes(payload)
+                from .core import File
+                return File(tmp_path, fmt).read()
+        results: JSONDict = {}
+        for entry in entries:
+            fmt = _resolve_format(entry.filename)
+            payload = _extract_payload(entry, archive)
+            with tempfile.TemporaryDirectory() as tmpdir:
+                tmp_path = Path(tmpdir) / Path(entry.filename).name
+                tmp_path.write_bytes(payload)
+                from .core import File
+                results[entry.filename] = File(tmp_path, fmt).read()
+        return results
+def write(
+    path: Path,
+    data: JSONData,
+) -> int:
+    """
+    Write ``data`` to ZIP at ``path`` and return record count.
+    Parameters
+    ----------
+    path : Path
+        Path to the ZIP file on disk.
+    data : JSONData
+        Data to write.
+    Returns
+    -------
+    int
+        Number of records written.
+    """
+    fmt = _resolve_format(path.name)
+    inner_name = Path(path.name).with_suffix('').name
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmp_path = Path(tmpdir) / inner_name
+        from .core import File
+        count = File(tmp_path, fmt).write(data)
+        payload = tmp_path.read_bytes()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with zipfile.ZipFile(
+        path,
+        'w',
+        compression=zipfile.ZIP_DEFLATED,
+    ) as archive:
+        archive.writestr(inner_name, payload)
+    return count

etlplus/templates/README.md ADDED Viewed

@@ -0,0 +1,46 @@
+# etlplus.templates subpackage
+Documentation for the `etlplus.templates` subpackage: SQL and DDL template helpers.
+- Provides Jinja2 templates for DDL and view generation
+- Supports templated SQL for multiple database backends
+- Includes helpers for rendering templates with schema metadata
+Back to project overview: see the top-level [README](../../README.md).
+- [etlplus.templates subpackage](#etlpustemplates-subpackage)
+    - [Available Templates](#available-templates)
+    - [Rendering Templates](#rendering-templates)
+    - [Example: Rendering a DDL Template](#example-rendering-a-ddl-template)
+    - [See Also](#see-also)
+## Available Templates
+- `ddl.sql.j2`: Generic DDL (CREATE TABLE) template
+- `view.sql.j2`: Generic view creation template
+## Rendering Templates
+Use the helpers to render templates with your schema or table metadata:
+```python
+from etlplus.templates import render_template
+sql = render_template("ddl.sql.j2", schema=my_schema)
+```
+## Example: Rendering a DDL Template
+```python
+from etlplus.templates import render_template
+schema = {"name": "users", "columns": [ ... ]}
+sql = render_template("ddl.sql.j2", schema=schema)
+print(sql)
+```
+## See Also
+- Top-level CLI and library usage in the main [README](../../README.md)
+- DDL template in [ddl.sql.j2](ddl.sql.j2)
+- View template in [view.sql.j2](view.sql.j2)

etlplus/validation/README.md ADDED Viewed

@@ -0,0 +1,50 @@
+# etlplus.validation subpackage
+Documentation for the `etlplus.validation` subpackage: data validation utilities and helpers.
+- Provides flexible data validation for ETL pipelines
+- Supports type checking, required fields, and custom rules
+- Includes utilities for rule definition and validation logic
+Back to project overview: see the top-level [README](../../README.md).
+- [etlplus.validation subpackage](#etlplusvalidation-subpackage)
+  - [Validation Features](#validation-features)
+  - [Defining Validation Rules](#defining-validation-rules)
+  - [Example: Validating Data](#example-validating-data)
+  - [See Also](#see-also)
+## Validation Features
+- Type checking (string, number, boolean, etc.)
+- Required/optional fields
+- Enum and pattern validation
+- Custom rule support
+## Defining Validation Rules
+Validation rules are defined as dictionaries specifying field types, requirements, and constraints:
+```python
+rules = {
+    "name": {"type": "string", "required": True},
+    "age": {"type": "number", "min": 0, "max": 120},
+}
+```
+## Example: Validating Data
+```python
+from etlplus.validation import validate
+result = validate({"name": "Alice", "age": 30}, rules)
+if result["valid"]:
+    print("Data is valid!")
+else:
+    print(result["errors"])
+```
+## See Also
+- Top-level CLI and library usage in the main [README](../../README.md)
+- Validation utilities in [utils.py](utils.py)

{etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: etlplus
-Version: 0.11.5
+Version: 0.12.1
 Summary: A Swiss Army knife for simple ETL operations
 Home-page: https://github.com/Dagitali/ETLPlus
 Author: ETLPlus Team
@@ -17,8 +17,11 @@ Classifier: Programming Language :: Python :: 3.14
 Requires-Python: >=3.13,<3.15
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: fastavro>=1.12.1
 Requires-Dist: jinja2>=3.1.6
+Requires-Dist: openpyxl>=3.1.5
 Requires-Dist: pyodbc>=5.3.0
+Requires-Dist: pyarrow>=22.0.0
 Requires-Dist: python-dotenv>=1.2.1
 Requires-Dist: pandas>=2.3.3
 Requires-Dist: pydantic>=2.12.5
@@ -26,6 +29,8 @@ Requires-Dist: PyYAML>=6.0.3
 Requires-Dist: requests>=2.32.5
 Requires-Dist: SQLAlchemy>=2.0.45
 Requires-Dist: typer>=0.21.0
+Requires-Dist: xlrd>=2.0.2
+Requires-Dist: xlwt>=1.3.0
 Provides-Extra: dev
 Requires-Dist: black>=25.9.0; extra == "dev"
 Requires-Dist: build>=1.2.2; extra == "dev"
@@ -59,6 +64,7 @@ ETLPlus is a veritable Swiss Army knife for enabling simple ETL operations, offe
 package and command-line interface for data extraction, validation, transformation, and loading.
 - [ETLPlus](#etlplus)
+  - [Getting Started](#getting-started)
   - [Features](#features)
   - [Installation](#installation)
   - [Quickstart](#quickstart)
@@ -87,11 +93,27 @@ package and command-line interface for data extraction, validation, transformati
     - [Linting](#linting)
     - [Updating Demo Snippets](#updating-demo-snippets)
     - [Releasing to PyPI](#releasing-to-pypi)
-  - [Links](#links)
   - [License](#license)
   - [Contributing](#contributing)
+  - [Documentation](#documentation)
+    - [Python Packages/Subpackage](#python-packagessubpackage)
+    - [Community Health](#community-health)
+    - [Other](#other)
   - [Acknowledgments](#acknowledgments)
+## Getting Started
+ETLPlus helps you extract, validate, transform, and load data from files, databases, and APIs, either
+as a Python library or from the command line.
+To get started:
+- See [Installation](#installation) for setup instructions.
+- Try the [Quickstart](#quickstart) for a minimal working example (CLI and Python).
+- Explore [Usage](#usage) for more detailed options and workflows.
+ETLPlus supports Python 3.13 and above.
 ## Features
 - **Check** data pipeline definitions before running them:
@@ -416,7 +438,7 @@ etlplus transform \
 # 3. Validate transformed data
 etlplus validate \
   --rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
-  temo/sample_transformed.json
+  temp/sample_transformed.json
 # 4. Load to CSV
 cat temp/sample_transformed.json \
@@ -603,17 +625,6 @@ git push origin v1.4.0
 If you want an extra smoke-test before tagging, run `make dist && pip install dist/*.whl` locally;
 this exercises the same build path the workflow uses.
-## Links
-- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
-- Examples: [`examples/README.md`](examples/README.md)
-- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
-- Runner internals: [`docs/run-module.md`](docs/run-module.md)
-- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
-- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
-- Demo and walkthrough: [`DEMO.md`](DEMO.md)
-- Additional references: [`REFERENCES.md`](`REFERENCES.md)
 ## License
 This project is licensed under the [MIT License](LICENSE).
@@ -637,6 +648,39 @@ If you choose to be a code contributor, please first refer these documents:
 - Typing philosophy (TypedDicts as editor hints, permissive runtime):
   [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
+## Documentation
+### Python Packages/Subpackage
+Navigate to detailed documentation for each subpackage:
+- [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
+- [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
+- [etlplus.config](etlplus/config/README.md): Configuration helpers for connectors, pipelines, jobs,
+  and profiles
+- [etlplus.cli](etlplus/cli/README.md): Command-line interface for ETLPlus workflows
+- [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
+- [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
+- [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
+### Community Health
+- [Contributing Guidelines](CONTRIBUTING.md): How to contribute, report issues, and submit PRs
+- [Code of Conduct](CODE_OF_CONDUCT.md): Community standards and expectations
+- [Security Policy](SECURITY.md): Responsible disclosure and vulnerability reporting
+- [Support](SUPPORT.md): Where to get help
+### Other
+- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
+- Examples: [`examples/README.md`](examples/README.md)
+- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
+- Runner internals: [`docs/run-module.md`](docs/run-module.md)
+- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
+- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
+- Demo and walkthrough: [`DEMO.md`](DEMO.md)
+- Additional references: [`REFERENCES.md`](REFERENCES.md)
 ## Acknowledgments
 ETLPlus is inspired by common work patterns in data engineering and software engineering patterns in

etlplus 0.11.5__py3-none-any.whl → 0.12.1__py3-none-any.whl

etlplus 0.11.5py3-none-any.whl → 0.12.1py3-none-any.whl