PyPI - etlplus - Versions diffs - 0.11.5__py3-none-any.whl → 0.12.1__py3-none-any.whl - Mend

etlplus 0.11.5py3-none-any.whl → 0.12.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

etlplus/README.md +37 -0
etlplus/api/README.md +20 -3
etlplus/cli/README.md +40 -0
etlplus/cli/handlers.py +1 -1
etlplus/config/README.md +52 -0
etlplus/database/README.md +48 -0
etlplus/database/ddl.py +1 -1
etlplus/database/engine.py +1 -1
etlplus/database/schema.py +1 -1
etlplus/file/README.md +105 -0
etlplus/file/avro.py +198 -0
etlplus/file/core.py +105 -105
etlplus/file/csv.py +12 -3
etlplus/file/feather.py +144 -0
etlplus/file/gz.py +123 -0
etlplus/file/json.py +13 -2
etlplus/file/ndjson.py +109 -0
etlplus/file/orc.py +142 -0
etlplus/file/parquet.py +146 -0
etlplus/file/tsv.py +91 -0
etlplus/file/txt.py +99 -0
etlplus/file/xls.py +132 -0
etlplus/file/xlsx.py +142 -0
etlplus/file/xml.py +12 -3
etlplus/file/yaml.py +13 -2
etlplus/file/zip.py +175 -0
etlplus/templates/README.md +46 -0
etlplus/validation/README.md +50 -0
{etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/METADATA +58 -14
{etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/RECORD +34 -16
{etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/WHEEL +0 -0
{etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/entry_points.txt +0 -0
{etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.11.5.dist-info → etlplus-0.12.1.dist-info}/top_level.txt +0 -0

etlplus/file/core.py CHANGED Viewed

@@ -11,11 +11,21 @@ from dataclasses import dataclass
 from pathlib import Path
 from ..types import JSONData
-from ..types import StrPath
+from . import avro
 from . import csv
+from . import feather
+from . import gz
 from . import json
+from . import ndjson
+from . import orc
+from . import parquet
+from . import tsv
+from . import txt
+from . import xls
+from . import xlsx
 from . import xml
 from . import yaml
+from . import zip
 from .enums import FileFormat
 from .enums import infer_file_format_and_compression
@@ -43,7 +53,15 @@ class File:
         Path to the file on disk.
     file_format : FileFormat | None, optional
         Explicit format. If omitted, the format is inferred from the file
-        extension (``.csv``, ``.json``, or ``.xml``).
+        extension (``.csv``, ``.json``, etc.).
+    Parameters
+    ----------
+    path : StrPath
+        Path to the file on disk.
+    file_format : FileFormat | str | None, optional
+        Explicit format. If omitted, the format is inferred from the file
+        extension (``.csv``, ``.json``, etc.).
     """
     # -- Attributes -- #
@@ -62,16 +80,10 @@ class File:
         extension is unknown, the attribute is left as ``None`` and will be
         validated later by :meth:`_ensure_format`.
         """
-        # Normalize incoming path (allow str in constructor) to Path.
-        if isinstance(self.path, str):
-            self.path = Path(self.path)
+        self.path = Path(self.path)
+        self.file_format = self._coerce_format(self.file_format)
         if self.file_format is None:
-            try:
-                self.file_format = self._guess_format()
-            except ValueError:
-                # Leave as None; _ensure_format() will raise on use if needed.
-                pass
+            self.file_format = self._maybe_guess_format()
     # -- Internal Instance Methods -- #
@@ -84,6 +96,28 @@ class File:
         if not self.path.exists():
             raise FileNotFoundError(f'File not found: {self.path}')
+    def _coerce_format(
+        self,
+        file_format: FileFormat | str | None,
+    ) -> FileFormat | None:
+        """
+        Normalize the file format input.
+        Parameters
+        ----------
+        file_format : FileFormat | str | None
+            File format specifier. Strings are coerced into
+            :class:`FileFormat`.
+        Returns
+        -------
+        FileFormat | None
+            A normalized file format, or ``None`` when unspecified.
+        """
+        if file_format is None or isinstance(file_format, FileFormat):
+            return file_format
+        return FileFormat.coerce(file_format)
     def _ensure_format(self) -> FileFormat:
         """
         Resolve the active format, guessing from extension if needed.
@@ -125,7 +159,22 @@ class File:
             f'Cannot infer file format from extension {self.path.suffix!r}',
         )
-    # -- Instance Methods (Generic API) -- #
+    def _maybe_guess_format(self) -> FileFormat | None:
+        """
+        Try to infer the format, returning ``None`` if it cannot be inferred.
+        Returns
+        -------
+        FileFormat | None
+            The inferred format, or ``None`` if inference fails.
+        """
+        try:
+            return self._guess_format()
+        except ValueError:
+            # Leave as None; _ensure_format() will raise on use if needed.
+            return None
+    # -- Instance Methods -- #
     def read(self) -> JSONData:
         """
@@ -144,14 +193,36 @@ class File:
         self._assert_exists()
         fmt = self._ensure_format()
         match fmt:
+            case FileFormat.AVRO:
+                return avro.read(self.path)
             case FileFormat.CSV:
                 return csv.read(self.path)
+            case FileFormat.FEATHER:
+                return feather.read(self.path)
+            case FileFormat.GZ:
+                return gz.read(self.path)
             case FileFormat.JSON:
                 return json.read(self.path)
+            case FileFormat.NDJSON:
+                return ndjson.read(self.path)
+            case FileFormat.ORC:
+                return orc.read(self.path)
+            case FileFormat.PARQUET:
+                return parquet.read(self.path)
+            case FileFormat.TSV:
+                return tsv.read(self.path)
+            case FileFormat.TXT:
+                return txt.read(self.path)
+            case FileFormat.XLS:
+                return xls.read(self.path)
+            case FileFormat.XLSX:
+                return xlsx.read(self.path)
             case FileFormat.XML:
                 return xml.read(self.path)
             case FileFormat.YAML:
                 return yaml.read(self.path)
+            case FileFormat.ZIP:
+                return zip.read(self.path)
         raise ValueError(f'Unsupported format: {fmt}')
     def write(
@@ -183,105 +254,34 @@ class File:
         """
         fmt = self._ensure_format()
         match fmt:
+            case FileFormat.AVRO:
+                return avro.write(self.path, data)
             case FileFormat.CSV:
                 return csv.write(self.path, data)
+            case FileFormat.FEATHER:
+                return feather.write(self.path, data)
+            case FileFormat.GZ:
+                return gz.write(self.path, data)
             case FileFormat.JSON:
                 return json.write(self.path, data)
+            case FileFormat.NDJSON:
+                return ndjson.write(self.path, data)
+            case FileFormat.ORC:
+                return orc.write(self.path, data)
+            case FileFormat.PARQUET:
+                return parquet.write(self.path, data)
+            case FileFormat.TSV:
+                return tsv.write(self.path, data)
+            case FileFormat.TXT:
+                return txt.write(self.path, data)
+            case FileFormat.XLS:
+                return xls.write(self.path, data)
+            case FileFormat.XLSX:
+                return xlsx.write(self.path, data)
             case FileFormat.XML:
                 return xml.write(self.path, data, root_tag=root_tag)
             case FileFormat.YAML:
                 return yaml.write(self.path, data)
+            case FileFormat.ZIP:
+                return zip.write(self.path, data)
         raise ValueError(f'Unsupported format: {fmt}')
-    # -- Class Methods -- #
-    @classmethod
-    def from_path(
-        cls,
-        path: StrPath,
-        *,
-        file_format: FileFormat | str | None = None,
-    ) -> File:
-        """
-        Create a :class:`File` from any path-like and optional format.
-        Parameters
-        ----------
-        path : StrPath
-            Path to the file on disk.
-        file_format : FileFormat | str | None, optional
-            Explicit format. If omitted, the format is inferred from the file
-            extension (``.csv``, ``.json``, or ``.xml``).
-        Returns
-        -------
-        File
-            The constructed :class:`File` instance.
-        """
-        resolved = Path(path)
-        ff: FileFormat | None
-        if isinstance(file_format, str):
-            ff = FileFormat.coerce(file_format)
-        else:
-            ff = file_format
-        return cls(resolved, ff)
-    @classmethod
-    def read_file(
-        cls,
-        path: StrPath,
-        file_format: FileFormat | str | None = None,
-    ) -> JSONData:
-        """
-        Read structured data.
-        Parameters
-        ----------
-        path : StrPath
-            Path to the file on disk.
-        file_format : FileFormat | str | None, optional
-            Explicit format. If omitted, the format is inferred from the file
-            extension (``.csv``, ``.json``, or ``.xml``).
-        Returns
-        -------
-        JSONData
-            The structured data read from the file.
-        """
-        return cls.from_path(path, file_format=file_format).read()
-    @classmethod
-    def write_file(
-        cls,
-        path: StrPath,
-        data: JSONData,
-        file_format: FileFormat | str | None = None,
-        *,
-        root_tag: str = xml.DEFAULT_XML_ROOT,
-    ) -> int:
-        """
-        Write structured data and count written records.
-        Parameters
-        ----------
-        path : StrPath
-            Path to the file on disk.
-        data : JSONData
-            Data to write to the file.
-        file_format : FileFormat | str | None, optional
-            Explicit format. If omitted, the format is inferred from the file
-            extension (``.csv``, ``.json``, or ``.xml``).
-        root_tag : str, optional
-            Root tag name to use when writing XML files. Defaults to
-            ``'root'``.
-        Returns
-        -------
-        int
-            The number of records written to the file.
-        """
-        return cls.from_path(path, file_format=file_format).write(
-            data,
-            root_tag=root_tag,
-        )

etlplus/file/csv.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 :mod:`etlplus.file.csv` module.
-CSV read/write helpers.
+Helpers for reading/writing CSV files.
 """
 from __future__ import annotations
@@ -14,6 +14,15 @@ from ..types import JSONData
 from ..types import JSONDict
 from ..types import JSONList
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'read',
+    'write',
+]
 # SECTION: FUNCTIONS ======================================================== #
@@ -21,7 +30,7 @@ def read(
     path: Path,
 ) -> JSONList:
     """
-    Load CSV content as a list of dictionaries.
+    Read CSV content from ``path``.
     Parameters
     ----------
@@ -48,7 +57,7 @@ def write(
     data: JSONData,
 ) -> int:
     """
-    Write CSV rows to ``path`` and return the number of rows.
+    Write ``data`` to CSV at ``path`` and return record count.
     Parameters
     ----------

etlplus/file/feather.py ADDED Viewed

@@ -0,0 +1,144 @@
+"""
+:mod:`etlplus.file.feather` module.
+Helpers for reading/writing Feather files.
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+from typing import cast
+from ..types import JSONData
+from ..types import JSONDict
+from ..types import JSONList
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'read',
+    'write',
+]
+# SECTION: INTERNAL CONSTANTS =============================================== #
+_PANDAS_CACHE: dict[str, Any] = {}
+# SECTION: INTERNAL FUNCTIONS =============================================== #
+def _get_pandas() -> Any:
+    """
+    Return the pandas module, importing it on first use.
+    Raises an informative ImportError if the optional dependency is missing.
+    """
+    mod = _PANDAS_CACHE.get('mod')
+    if mod is not None:  # pragma: no cover - tiny branch
+        return mod
+    try:
+        _pd = __import__('pandas')  # type: ignore[assignment]
+    except ImportError as e:  # pragma: no cover
+        raise ImportError(
+            'Feather support requires optional dependency "pandas".\n'
+            'Install with: pip install pandas',
+        ) from e
+    _PANDAS_CACHE['mod'] = _pd
+    return _pd
+def _normalize_records(data: JSONData) -> JSONList:
+    """
+    Normalize JSON payloads into a list of dictionaries.
+    Raises TypeError when payloads contain non-dict items.
+    """
+    if isinstance(data, list):
+        if not all(isinstance(item, dict) for item in data):
+            raise TypeError(
+                'Feather payloads must contain only objects (dicts)',
+            )
+        return cast(JSONList, data)
+    return [cast(JSONDict, data)]
+# SECTION: FUNCTIONS ======================================================== #
+def read(
+    path: Path,
+) -> JSONList:
+    """
+    Read Feather content from ``path``.
+    Parameters
+    ----------
+    path : Path
+        Path to the Feather file on disk.
+    Returns
+    -------
+    JSONList
+        The list of dictionaries read from the Feather file.
+    Raises
+    ------
+    ImportError
+        When optional dependency "pyarrow" is missing.
+    """
+    pandas = _get_pandas()
+    try:
+        frame = pandas.read_feather(path)
+    except ImportError as e:  # pragma: no cover
+        raise ImportError(
+            'Feather support requires optional dependency "pyarrow".\n'
+            'Install with: pip install pyarrow',
+        ) from e
+    return cast(JSONList, frame.to_dict(orient='records'))
+def write(
+    path: Path,
+    data: JSONData,
+) -> int:
+    """
+    Write ``data`` to Feather at ``path`` and return record count.
+    Parameters
+    ----------
+    path : Path
+        Path to the Feather file on disk.
+    data : JSONData
+        Data to write.
+    Returns
+    -------
+    int
+        Number of records written.
+    Raises
+    ------
+    ImportError
+        When optional dependency "pyarrow" is missing.
+    """
+    records = _normalize_records(data)
+    if not records:
+        return 0
+    pandas = _get_pandas()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    frame = pandas.DataFrame.from_records(records)
+    try:
+        frame.to_feather(path)
+    except ImportError as e:  # pragma: no cover
+        raise ImportError(
+            'Feather support requires optional dependency "pyarrow".\n'
+            'Install with: pip install pyarrow',
+        ) from e
+    return len(records)

etlplus/file/gz.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""
+:mod:`etlplus.file.gz` module.
+Helpers for reading/writing GZ files.
+"""
+from __future__ import annotations
+import gzip
+import tempfile
+from pathlib import Path
+from ..types import JSONData
+from .enums import CompressionFormat
+from .enums import FileFormat
+from .enums import infer_file_format_and_compression
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'read',
+    'write',
+]
+# SECTION: INTERNAL FUNCTIONS =============================================== #
+def _resolve_format(
+    path: Path,
+) -> FileFormat:
+    """
+    Resolve the inner file format from a .gz filename.
+    Parameters
+    ----------
+    path : Path
+        Path to the GZ file on disk.
+    Returns
+    -------
+    FileFormat
+        The inferred inner file format.
+    Raises
+    ------
+    ValueError
+        If the file format cannot be inferred from the filename.
+    """
+    fmt, compression = infer_file_format_and_compression(path)
+    if compression is not CompressionFormat.GZ:
+        raise ValueError(f'Not a gzip file: {path}')
+    if fmt is None:
+        raise ValueError(
+            f'Cannot infer file format from compressed file {path!r}',
+        )
+    return fmt
+# SECTION: FUNCTIONS ======================================================== #
+def read(
+    path: Path,
+) -> JSONData:
+    """
+    Read GZ content from ``path`` and parse the inner payload.
+    Parameters
+    ----------
+    path : Path
+        Path to the GZ file on disk.
+    Returns
+    -------
+    JSONData
+        Parsed payload.
+    """
+    fmt = _resolve_format(path)
+    with gzip.open(path, 'rb') as handle:
+        payload = handle.read()
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmp_path = Path(tmpdir) / f'payload.{fmt.value}'
+        tmp_path.write_bytes(payload)
+        from .core import File
+        return File(tmp_path, fmt).read()
+def write(
+    path: Path,
+    data: JSONData,
+) -> int:
+    """
+    Write ``data`` to GZ at ``path`` and return record count.
+    Parameters
+    ----------
+    path : Path
+        Path to the GZ file on disk.
+    data : JSONData
+        Data to write.
+    Returns
+    -------
+    int
+        Number of records written.
+    """
+    fmt = _resolve_format(path)
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmp_path = Path(tmpdir) / f'payload.{fmt.value}'
+        from .core import File
+        count = File(tmp_path, fmt).write(data)
+        payload = tmp_path.read_bytes()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with gzip.open(path, 'wb') as handle:
+        handle.write(payload)
+    return count

etlplus/file/json.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 :mod:`etlplus.file.json` module.
-JSON read/write helpers.
+Helpers for reading/writing JSON files.
 """
 from __future__ import annotations
@@ -15,6 +15,15 @@ from ..types import JSONDict
 from ..types import JSONList
 from ..utils import count_records
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'read',
+    'write',
+]
 # SECTION: FUNCTIONS ======================================================== #
@@ -22,7 +31,9 @@ def read(
     path: Path,
 ) -> JSONData:
     """
-    Load and validate JSON payloads from ``path``.
+    Read JSON content from ``path``.
+    Validates that the JSON root is a dict or a list of dicts.
     Parameters
     ----------

etlplus 0.11.5__py3-none-any.whl → 0.12.1__py3-none-any.whl

etlplus 0.11.5py3-none-any.whl → 0.12.1py3-none-any.whl