PyPI - etlplus - Versions diffs - 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl - Mend

etlplus 0.17.2py3-none-any.whl → 0.17.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

etlplus/file/_imports.py +35 -20
etlplus/file/_io.py +138 -15
etlplus/file/_r.py +48 -0
etlplus/file/_sql.py +224 -0
etlplus/file/accdb.py +7 -6
etlplus/file/arrow.py +13 -24
etlplus/file/avro.py +13 -10
etlplus/file/bson.py +61 -22
etlplus/file/cbor.py +13 -25
etlplus/file/cfg.py +7 -6
etlplus/file/conf.py +7 -6
etlplus/file/core.py +1 -1
etlplus/file/csv.py +8 -7
etlplus/file/dat.py +9 -6
etlplus/file/dta.py +15 -30
etlplus/file/duckdb.py +29 -122
etlplus/file/feather.py +15 -30
etlplus/file/fwf.py +16 -14
etlplus/file/gz.py +12 -7
etlplus/file/hbs.py +7 -6
etlplus/file/hdf5.py +31 -6
etlplus/file/ini.py +17 -24
etlplus/file/ion.py +7 -6
etlplus/file/jinja2.py +7 -6
etlplus/file/json.py +10 -11
etlplus/file/log.py +7 -6
etlplus/file/mat.py +7 -6
etlplus/file/mdb.py +7 -6
etlplus/file/msgpack.py +13 -25
etlplus/file/mustache.py +7 -6
etlplus/file/nc.py +30 -21
etlplus/file/ndjson.py +10 -6
etlplus/file/numbers.py +7 -6
etlplus/file/ods.py +10 -6
etlplus/file/orc.py +15 -30
etlplus/file/parquet.py +10 -6
etlplus/file/pb.py +22 -23
etlplus/file/pbf.py +7 -6
etlplus/file/properties.py +15 -29
etlplus/file/proto.py +14 -20
etlplus/file/psv.py +8 -7
etlplus/file/rda.py +19 -51
etlplus/file/rds.py +19 -51
etlplus/file/sas7bdat.py +10 -30
etlplus/file/sav.py +13 -24
etlplus/file/sqlite.py +25 -83
etlplus/file/stub.py +8 -6
etlplus/file/sylk.py +7 -6
etlplus/file/tab.py +8 -7
etlplus/file/toml.py +14 -17
etlplus/file/tsv.py +8 -7
etlplus/file/txt.py +10 -7
etlplus/file/vm.py +7 -6
etlplus/file/wks.py +7 -6
etlplus/file/xls.py +8 -5
etlplus/file/xlsm.py +10 -6
etlplus/file/xlsx.py +10 -6
etlplus/file/xml.py +11 -9
etlplus/file/xpt.py +13 -33
etlplus/file/yaml.py +10 -11
etlplus/file/zip.py +10 -5
etlplus/file/zsav.py +7 -6
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/METADATA +1 -1
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/RECORD +68 -66
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0

etlplus/file/nc.py CHANGED Viewed

@@ -18,14 +18,15 @@ Notes
 from __future__ import annotations
-from pathlib import Path
-from typing import Any
 from typing import cast
 from ..types import JSONData
 from ..types import JSONList
-from ._imports import get_optional_module
+from ..types import StrPath
+from ._imports import get_dependency
 from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -41,18 +42,24 @@ __all__ = [
 # SECTION: INTERNAL FUNCTIONS =============================================== #
-def _get_xarray() -> Any:
-    """Return the xarray module, importing it on first use."""
-    return get_optional_module(
-        'xarray',
-        error_message=(
-            'NC support requires optional dependency "xarray".\n'
-            'Install with: pip install xarray'
-        ),
-    )
+def _raise_engine_error(
+    err: ImportError,
+) -> None:
+    """
+    Raise a consistent ImportError for missing NetCDF engine support.
-def _raise_engine_error(err: ImportError) -> None:
+    Parameters
+    ----------
+    err : ImportError
+        The original ImportError raised when trying to use NetCDF support
+        without the required dependency.
+    Raises
+    ------
+    ImportError
+        Consistent ImportError indicating that NetCDF support requires
+        optional dependencies.
+    """
     raise ImportError(
         'NC support requires optional dependency "netCDF4" or "h5netcdf".\n'
         'Install with: pip install netCDF4',
@@ -63,14 +70,14 @@ def _raise_engine_error(err: ImportError) -> None:
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read NC content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the NC file on disk.
     Returns
@@ -78,7 +85,8 @@ def read(
     JSONList
         The list of dictionaries read from the NC file.
     """
-    xarray = _get_xarray()
+    path = coerce_path(path)
+    xarray = get_dependency('xarray', format_name='NC')
     try:
         dataset = xarray.open_dataset(path)
     except ImportError as err:  # pragma: no cover
@@ -93,7 +101,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -101,7 +109,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the NC file on disk.
     data : JSONData
         Data to write as NC file. Should be a list of dictionaries or a
@@ -112,15 +120,16 @@ def write(
     int
         The number of rows written to the NC file.
     """
+    path = coerce_path(path)
     records = normalize_records(data, 'NC')
     if not records:
         return 0
-    xarray = _get_xarray()
+    xarray = get_dependency('xarray', format_name='NC')
     pandas = get_pandas('NC')
     frame = pandas.DataFrame.from_records(records)
     dataset = xarray.Dataset.from_dataframe(frame)
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     try:
         dataset.to_netcdf(path)
     except ImportError as err:  # pragma: no cover

etlplus/file/ndjson.py CHANGED Viewed

@@ -18,13 +18,15 @@ Notes
 from __future__ import annotations
 import json
-from pathlib import Path
 from typing import cast
 from ..types import JSONData
 from ..types import JSONDict
 from ..types import JSONList
+from ..types import StrPath
 from ..utils import count_records
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -41,14 +43,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read NDJSON content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the NDJSON file on disk.
     Returns
@@ -61,6 +63,7 @@ def read(
     TypeError
         If any line in the NDJSON file is not a JSON object (dict).
     """
+    path = coerce_path(path)
     rows: JSONList = []
     with path.open('r', encoding='utf-8') as handle:
         for idx, line in enumerate(handle, start=1):
@@ -77,7 +80,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -85,7 +88,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the NDJSON file on disk.
     data : JSONData
         Data to write.
@@ -95,12 +98,13 @@ def write(
     int
         Number of records written.
     """
+    path = coerce_path(path)
     rows = normalize_records(data, 'NDJSON')
     if not rows:
         return 0
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     with path.open('w', encoding='utf-8') as handle:
         for row in rows:
             handle.write(json.dumps(row, ensure_ascii=False))

etlplus/file/numbers.py CHANGED Viewed

@@ -16,11 +16,11 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from . import stub
+from ._io import coerce_path
 # SECTION: EXPORTS ========================================================== #
@@ -36,14 +36,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read NUMBERS content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the NUMBERS file on disk.
     Returns
@@ -55,7 +55,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -63,7 +63,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the NUMBERS file on disk.
     data : JSONData
         Data to write as NUMBERS file. Should be a list of dictionaries or a
@@ -74,4 +74,5 @@ def write(
     int
         The number of rows written to the NUMBERS file.
     """
+    path = coerce_path(path)
     return stub.write(path, data, format_name='NUMBERS')

etlplus/file/ods.py CHANGED Viewed

@@ -19,12 +19,14 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from typing import cast
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -41,14 +43,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read ODS content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the ODS file on disk.
     Returns
@@ -61,6 +63,7 @@ def read(
     ImportError
         If optional dependencies for ODS support are missing.
     """
+    path = coerce_path(path)
     pandas = get_pandas('ODS')
     try:
         frame = pandas.read_excel(path, engine='odf')
@@ -73,7 +76,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -81,7 +84,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the ODS file on disk.
     data : JSONData
         Data to write as ODS. Should be a list of dictionaries or a
@@ -97,12 +100,13 @@ def write(
     ImportError
         If optional dependencies for ODS support are missing.
     """
+    path = coerce_path(path)
     records = normalize_records(data, 'ODS')
     if not records:
         return 0
     pandas = get_pandas('ODS')
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     frame = pandas.DataFrame.from_records(records)
     try:
         frame.to_excel(path, index=False, engine='odf')

etlplus/file/orc.py CHANGED Viewed

@@ -18,12 +18,15 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from typing import cast
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
+from ._imports import get_dependency
 from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -40,39 +43,30 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read ORC content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the ORC file on disk.
     Returns
     -------
     JSONList
         The list of dictionaries read from the ORC file.
-    Raises
-    ------
-    ImportError
-        When optional dependency "pyarrow" is missing.
     """
+    path = coerce_path(path)
+    get_dependency('pyarrow', format_name='ORC')
     pandas = get_pandas('ORC')
-    try:
-        frame = pandas.read_orc(path)
-    except ImportError as e:  # pragma: no cover
-        raise ImportError(
-            'ORC support requires optional dependency "pyarrow".\n'
-            'Install with: pip install pyarrow',
-        ) from e
+    frame = pandas.read_orc(path)
     return cast(JSONList, frame.to_dict(orient='records'))
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -80,7 +74,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the ORC file on disk.
     data : JSONData
         Data to write.
@@ -89,24 +83,15 @@ def write(
     -------
     int
         Number of records written.
-    Raises
-    ------
-    ImportError
-        When optional dependency "pyarrow" is missing.
     """
+    path = coerce_path(path)
     records = normalize_records(data, 'ORC')
     if not records:
         return 0
+    get_dependency('pyarrow', format_name='ORC')
     pandas = get_pandas('ORC')
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     frame = pandas.DataFrame.from_records(records)
-    try:
-        frame.to_orc(path, index=False)
-    except ImportError as e:  # pragma: no cover
-        raise ImportError(
-            'ORC support requires optional dependency "pyarrow".\n'
-            'Install with: pip install pyarrow',
-        ) from e
+    frame.to_orc(path, index=False)
     return len(records)

etlplus/file/parquet.py CHANGED Viewed

@@ -18,12 +18,14 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from typing import cast
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -40,14 +42,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read Parquet content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PARQUET file on disk.
     Returns
@@ -60,6 +62,7 @@ def read(
     ImportError
         If optional dependencies for Parquet support are missing.
     """
+    path = coerce_path(path)
     pandas = get_pandas('Parquet')
     try:
         frame = pandas.read_parquet(path)
@@ -73,7 +76,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -81,7 +84,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PARQUET file on disk.
     data : JSONData
         Data to write.
@@ -96,12 +99,13 @@ def write(
     ImportError
         If optional dependencies for Parquet support are missing.
     """
+    path = coerce_path(path)
     records = normalize_records(data, 'Parquet')
     if not records:
         return 0
     pandas = get_pandas('Parquet')
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     frame = pandas.DataFrame.from_records(records)
     try:
         frame.to_parquet(path, index=False)

etlplus/file/pb.py CHANGED Viewed

@@ -5,7 +5,7 @@ Helpers for reading/writing Protocol Buffers binary (PB) files.
 Notes
 -----
-- A PB file contains Protocol Buffers (Protobuff) binary-encoded messages.
+- A PB file contains Protocol Buffers (Protobuf) binary-encoded messages.
 - Common cases:
     - Serialized payloads emitted by services or SDKs.
     - Binary payload dumps for debugging or transport.
@@ -16,9 +16,13 @@ Notes
 from __future__ import annotations
 import base64
-from pathlib import Path
 from ..types import JSONData
+from ..types import StrPath
+from ._io import coerce_path
+from ._io import ensure_parent_dir
+from ._io import require_dict_payload
+from ._io import require_str_key
 # SECTION: EXPORTS ========================================================== #
@@ -34,14 +38,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONData:
     """
     Read PB content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PB file on disk.
     Returns
@@ -49,13 +53,14 @@ def read(
     JSONData
         The structured data read from the PB file.
     """
+    path = coerce_path(path)
     payload = path.read_bytes()
     encoded = base64.b64encode(payload).decode('ascii')
     return {'payload_base64': encoded}
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -63,7 +68,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PB file on disk.
     data : JSONData
         Data to write as PB. Should be a dictionary with ``payload_base64``.
@@ -72,22 +77,16 @@ def write(
     -------
     int
         The number of records written to the PB file.
-    Raises
-    ------
-    TypeError
-        If *data* is not a dictionary or missing ``payload_base64``.
     """
-    if isinstance(data, list):
-        raise TypeError('PB payloads must be a dict')
-    if not isinstance(data, dict):
-        raise TypeError('PB payloads must be a dict')
-    payload_base64 = data.get('payload_base64')
-    if not isinstance(payload_base64, str):
-        raise TypeError('PB payloads must include a "payload_base64" string')
-    payload = base64.b64decode(payload_base64.encode('ascii'))
-    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_bytes(payload)
+    path = coerce_path(path)
+    payload = require_dict_payload(data, format_name='PB')
+    payload_base64 = require_str_key(
+        payload,
+        format_name='PB',
+        key='payload_base64',
+    )
+    decoded = base64.b64decode(payload_base64.encode('ascii'))
+    ensure_parent_dir(path)
+    path.write_bytes(decoded)
     return 1

etlplus/file/pbf.py CHANGED Viewed

@@ -18,11 +18,11 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from . import stub
+from ._io import coerce_path
 # SECTION: EXPORTS ========================================================== #
@@ -38,14 +38,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read PBF content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PBF file on disk.
     Returns
@@ -57,7 +57,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -65,7 +65,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PBF file on disk.
     data : JSONData
         Data to write as PBF. Should be a list of dictionaries or a
@@ -76,4 +76,5 @@ def write(
     int
         The number of rows written to the PBF file.
     """
+    path = coerce_path(path)
     return stub.write(path, data, format_name='PBF')

etlplus 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl

etlplus 0.17.2py3-none-any.whl → 0.17.3py3-none-any.whl