PyPI - etlplus - Versions diffs - 0.16.10__py3-none-any.whl → 0.17.3__py3-none-any.whl - Mend

etlplus 0.16.10py3-none-any.whl → 0.17.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

etlplus/file/README.md +33 -0
etlplus/file/_imports.py +35 -20
etlplus/file/_io.py +138 -15
etlplus/file/_r.py +48 -0
etlplus/file/_sql.py +224 -0
etlplus/file/accdb.py +7 -6
etlplus/file/arrow.py +29 -10
etlplus/file/avro.py +13 -10
etlplus/file/bson.py +94 -10
etlplus/file/cbor.py +29 -17
etlplus/file/cfg.py +7 -6
etlplus/file/conf.py +7 -6
etlplus/file/core.py +1 -1
etlplus/file/csv.py +8 -7
etlplus/file/dat.py +52 -11
etlplus/file/dta.py +36 -16
etlplus/file/duckdb.py +72 -11
etlplus/file/enums.py +29 -0
etlplus/file/feather.py +15 -30
etlplus/file/fwf.py +44 -10
etlplus/file/gz.py +12 -7
etlplus/file/hbs.py +7 -6
etlplus/file/hdf5.py +71 -8
etlplus/file/ini.py +60 -17
etlplus/file/ion.py +7 -6
etlplus/file/jinja2.py +7 -6
etlplus/file/json.py +10 -11
etlplus/file/log.py +7 -6
etlplus/file/mat.py +7 -6
etlplus/file/mdb.py +7 -6
etlplus/file/msgpack.py +27 -15
etlplus/file/mustache.py +7 -6
etlplus/file/nc.py +69 -11
etlplus/file/ndjson.py +10 -6
etlplus/file/numbers.py +7 -6
etlplus/file/ods.py +48 -11
etlplus/file/orc.py +15 -30
etlplus/file/parquet.py +10 -6
etlplus/file/pb.py +36 -24
etlplus/file/pbf.py +7 -6
etlplus/file/properties.py +44 -18
etlplus/file/proto.py +24 -18
etlplus/file/psv.py +12 -11
etlplus/file/rda.py +57 -15
etlplus/file/rds.py +50 -14
etlplus/file/sas7bdat.py +26 -16
etlplus/file/sav.py +34 -16
etlplus/file/sqlite.py +70 -10
etlplus/file/stub.py +8 -6
etlplus/file/sylk.py +7 -6
etlplus/file/tab.py +13 -13
etlplus/file/toml.py +56 -17
etlplus/file/tsv.py +8 -7
etlplus/file/txt.py +10 -7
etlplus/file/vm.py +7 -6
etlplus/file/wks.py +7 -6
etlplus/file/xls.py +8 -5
etlplus/file/xlsm.py +48 -10
etlplus/file/xlsx.py +10 -6
etlplus/file/xml.py +11 -9
etlplus/file/xpt.py +46 -10
etlplus/file/yaml.py +10 -11
etlplus/file/zip.py +10 -5
etlplus/file/zsav.py +7 -6
{etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/METADATA +44 -26
{etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/RECORD +70 -68
{etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
{etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
{etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0

etlplus/file/msgpack.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.msgpack` module.
-Stub helpers for reading/writing MessagePack (MSGPACK) files (not implemented
-yet).
+Helpers for reading/writing MessagePack (MSGPACK) files.
 Notes
 -----
@@ -19,11 +18,13 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
-from ..types import JSONList
-from . import stub
+from ..types import StrPath
+from ._imports import get_dependency
+from ._io import coerce_path
+from ._io import coerce_record_payload
+from ._io import ensure_parent_dir
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -39,26 +40,30 @@ __all__ = [
 def read(
-    path: Path,
-) -> JSONList:
+    path: StrPath,
+) -> JSONData:
     """
     Read MsgPack content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the MsgPack file on disk.
     Returns
     -------
-    JSONList
-        The list of dictionaries read from the MsgPack file.
+    JSONData
+        The structured data read from the MsgPack file.
     """
-    return stub.read(path, format_name='MSGPACK')
+    path = coerce_path(path)
+    msgpack = get_dependency('msgpack', format_name='MSGPACK')
+    with path.open('rb') as handle:
+        payload = msgpack.unpackb(handle.read(), raw=False)
+    return coerce_record_payload(payload, format_name='MSGPACK')
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -66,7 +71,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the MsgPack file on disk.
     data : JSONData
         Data to write as MsgPack. Should be a list of dictionaries or a
@@ -77,4 +82,11 @@ def write(
     int
         The number of rows written to the MsgPack file.
     """
-    return stub.write(path, data, format_name='MSGPACK')
+    path = coerce_path(path)
+    msgpack = get_dependency('msgpack', format_name='MSGPACK')
+    records = normalize_records(data, 'MSGPACK')
+    payload: JSONData = records if isinstance(data, list) else records[0]
+    ensure_parent_dir(path)
+    with path.open('wb') as handle:
+        handle.write(msgpack.packb(payload, use_bin_type=True))
+    return len(records)

etlplus/file/mustache.py CHANGED Viewed

@@ -19,11 +19,11 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from . import stub
+from ._io import coerce_path
 # SECTION: EXPORTS ========================================================== #
@@ -39,14 +39,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read MUSTACHE content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the MUSTACHE file on disk.
     Returns
@@ -58,7 +58,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -66,7 +66,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the MUSTACHE file on disk.
     data : JSONData
         Data to write as MUSTACHE file. Should be a list of dictionaries or a
@@ -77,4 +77,5 @@ def write(
     int
         The number of rows written to the MUSTACHE file.
     """
+    path = coerce_path(path)
     return stub.write(path, data, format_name='MUSTACHE')

etlplus/file/nc.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 :mod:`etlplus.file.nc` module.
-Stub helpers for reading/writing NetCDF (NC) data files (not implemented yet).
+Helpers for reading/writing NetCDF (NC) data files.
 Notes
 -----
@@ -12,17 +12,22 @@ Notes
     - Sharing large datasets in research communities.
     - Efficient data access and manipulation.
 - Rule of thumb:
-    - If the file follows the NetCDF standard, use this module for
-        reading and writing.
+    - If the file follows the NetCDF standard, use this module for reading and
+        writing.
 """
 from __future__ import annotations
-from pathlib import Path
+from typing import cast
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ..types import StrPath
+from ._imports import get_dependency
+from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -34,18 +39,45 @@ __all__ = [
 ]
+# SECTION: INTERNAL FUNCTIONS =============================================== #
+def _raise_engine_error(
+    err: ImportError,
+) -> None:
+    """
+    Raise a consistent ImportError for missing NetCDF engine support.
+    Parameters
+    ----------
+    err : ImportError
+        The original ImportError raised when trying to use NetCDF support
+        without the required dependency.
+    Raises
+    ------
+    ImportError
+        Consistent ImportError indicating that NetCDF support requires
+        optional dependencies.
+    """
+    raise ImportError(
+        'NC support requires optional dependency "netCDF4" or "h5netcdf".\n'
+        'Install with: pip install netCDF4',
+    ) from err
 # SECTION: FUNCTIONS ======================================================== #
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read NC content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the NC file on disk.
     Returns
@@ -53,11 +85,23 @@ def read(
     JSONList
         The list of dictionaries read from the NC file.
     """
-    return stub.read(path, format_name='NC')
+    path = coerce_path(path)
+    xarray = get_dependency('xarray', format_name='NC')
+    try:
+        dataset = xarray.open_dataset(path)
+    except ImportError as err:  # pragma: no cover
+        _raise_engine_error(err)
+    with dataset:
+        frame = dataset.to_dataframe().reset_index()
+    if 'index' in frame.columns:
+        values = list(frame['index'])
+        if values == list(range(len(values))):
+            frame = frame.drop(columns=['index'])
+    return cast(JSONList, frame.to_dict(orient='records'))
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -65,7 +109,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the NC file on disk.
     data : JSONData
         Data to write as NC file. Should be a list of dictionaries or a
@@ -76,4 +120,18 @@ def write(
     int
         The number of rows written to the NC file.
     """
-    return stub.write(path, data, format_name='NC')
+    path = coerce_path(path)
+    records = normalize_records(data, 'NC')
+    if not records:
+        return 0
+    xarray = get_dependency('xarray', format_name='NC')
+    pandas = get_pandas('NC')
+    frame = pandas.DataFrame.from_records(records)
+    dataset = xarray.Dataset.from_dataframe(frame)
+    ensure_parent_dir(path)
+    try:
+        dataset.to_netcdf(path)
+    except ImportError as err:  # pragma: no cover
+        _raise_engine_error(err)
+    return len(records)

etlplus/file/ndjson.py CHANGED Viewed

@@ -18,13 +18,15 @@ Notes
 from __future__ import annotations
 import json
-from pathlib import Path
 from typing import cast
 from ..types import JSONData
 from ..types import JSONDict
 from ..types import JSONList
+from ..types import StrPath
 from ..utils import count_records
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -41,14 +43,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read NDJSON content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the NDJSON file on disk.
     Returns
@@ -61,6 +63,7 @@ def read(
     TypeError
         If any line in the NDJSON file is not a JSON object (dict).
     """
+    path = coerce_path(path)
     rows: JSONList = []
     with path.open('r', encoding='utf-8') as handle:
         for idx, line in enumerate(handle, start=1):
@@ -77,7 +80,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -85,7 +88,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the NDJSON file on disk.
     data : JSONData
         Data to write.
@@ -95,12 +98,13 @@ def write(
     int
         Number of records written.
     """
+    path = coerce_path(path)
     rows = normalize_records(data, 'NDJSON')
     if not rows:
         return 0
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     with path.open('w', encoding='utf-8') as handle:
         for row in rows:
             handle.write(json.dumps(row, ensure_ascii=False))

etlplus/file/numbers.py CHANGED Viewed

@@ -16,11 +16,11 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from . import stub
+from ._io import coerce_path
 # SECTION: EXPORTS ========================================================== #
@@ -36,14 +36,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read NUMBERS content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the NUMBERS file on disk.
     Returns
@@ -55,7 +55,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -63,7 +63,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the NUMBERS file on disk.
     data : JSONData
         Data to write as NUMBERS file. Should be a list of dictionaries or a
@@ -74,4 +74,5 @@ def write(
     int
         The number of rows written to the NUMBERS file.
     """
+    path = coerce_path(path)
     return stub.write(path, data, format_name='NUMBERS')

etlplus/file/ods.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.ods` module.
-Stub helpers for reading/writing OpenDocument (ODS) spreadsheet files (not
-implemented yet).
+Helpers for reading/writing OpenDocument (ODS) spreadsheet files.
 Notes
 -----
@@ -20,11 +19,15 @@ Notes
 from __future__ import annotations
-from pathlib import Path
+from typing import cast
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ..types import StrPath
+from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -40,26 +43,40 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read ODS content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the ODS file on disk.
     Returns
     -------
     JSONList
         The list of dictionaries read from the ODS file.
+    Raises
+    ------
+    ImportError
+        If optional dependencies for ODS support are missing.
     """
-    return stub.read(path, format_name='ODS')
+    path = coerce_path(path)
+    pandas = get_pandas('ODS')
+    try:
+        frame = pandas.read_excel(path, engine='odf')
+    except ImportError as err:  # pragma: no cover
+        raise ImportError(
+            'ODS support requires optional dependency "odfpy".\n'
+            'Install with: pip install odfpy',
+        ) from err
+    return cast(JSONList, frame.to_dict(orient='records'))
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -67,15 +84,35 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the ODS file on disk.
     data : JSONData
-        Data to write as ODS file. Should be a list of dictionaries or a
+        Data to write as ODS. Should be a list of dictionaries or a
         single dictionary.
     Returns
     -------
     int
         The number of rows written to the ODS file.
+    Raises
+    ------
+    ImportError
+        If optional dependencies for ODS support are missing.
     """
-    return stub.write(path, data, format_name='ODS')
+    path = coerce_path(path)
+    records = normalize_records(data, 'ODS')
+    if not records:
+        return 0
+    pandas = get_pandas('ODS')
+    ensure_parent_dir(path)
+    frame = pandas.DataFrame.from_records(records)
+    try:
+        frame.to_excel(path, index=False, engine='odf')
+    except ImportError as err:  # pragma: no cover
+        raise ImportError(
+            'ODS support requires optional dependency "odfpy".\n'
+            'Install with: pip install odfpy',
+        ) from err
+    return len(records)

etlplus/file/orc.py CHANGED Viewed

@@ -18,12 +18,15 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from typing import cast
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
+from ._imports import get_dependency
 from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -40,39 +43,30 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read ORC content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the ORC file on disk.
     Returns
     -------
     JSONList
         The list of dictionaries read from the ORC file.
-    Raises
-    ------
-    ImportError
-        When optional dependency "pyarrow" is missing.
     """
+    path = coerce_path(path)
+    get_dependency('pyarrow', format_name='ORC')
     pandas = get_pandas('ORC')
-    try:
-        frame = pandas.read_orc(path)
-    except ImportError as e:  # pragma: no cover
-        raise ImportError(
-            'ORC support requires optional dependency "pyarrow".\n'
-            'Install with: pip install pyarrow',
-        ) from e
+    frame = pandas.read_orc(path)
     return cast(JSONList, frame.to_dict(orient='records'))
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -80,7 +74,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the ORC file on disk.
     data : JSONData
         Data to write.
@@ -89,24 +83,15 @@ def write(
     -------
     int
         Number of records written.
-    Raises
-    ------
-    ImportError
-        When optional dependency "pyarrow" is missing.
     """
+    path = coerce_path(path)
     records = normalize_records(data, 'ORC')
     if not records:
         return 0
+    get_dependency('pyarrow', format_name='ORC')
     pandas = get_pandas('ORC')
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     frame = pandas.DataFrame.from_records(records)
-    try:
-        frame.to_orc(path, index=False)
-    except ImportError as e:  # pragma: no cover
-        raise ImportError(
-            'ORC support requires optional dependency "pyarrow".\n'
-            'Install with: pip install pyarrow',
-        ) from e
+    frame.to_orc(path, index=False)
     return len(records)

etlplus/file/parquet.py CHANGED Viewed

@@ -18,12 +18,14 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from typing import cast
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -40,14 +42,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read Parquet content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PARQUET file on disk.
     Returns
@@ -60,6 +62,7 @@ def read(
     ImportError
         If optional dependencies for Parquet support are missing.
     """
+    path = coerce_path(path)
     pandas = get_pandas('Parquet')
     try:
         frame = pandas.read_parquet(path)
@@ -73,7 +76,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -81,7 +84,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PARQUET file on disk.
     data : JSONData
         Data to write.
@@ -96,12 +99,13 @@ def write(
     ImportError
         If optional dependencies for Parquet support are missing.
     """
+    path = coerce_path(path)
     records = normalize_records(data, 'Parquet')
     if not records:
         return 0
     pandas = get_pandas('Parquet')
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     frame = pandas.DataFrame.from_records(records)
     try:
         frame.to_parquet(path, index=False)

etlplus 0.16.10__py3-none-any.whl → 0.17.3__py3-none-any.whl

etlplus 0.16.10py3-none-any.whl → 0.17.3py3-none-any.whl