PyPI - etlplus - Versions diffs - 0.16.9__py3-none-any.whl → 0.17.2__py3-none-any.whl - Mend

etlplus 0.16.9py3-none-any.whl → 0.17.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

etlplus/file/README.md +33 -0
etlplus/file/arrow.py +35 -5
etlplus/file/bson.py +50 -5
etlplus/file/cbor.py +35 -11
etlplus/file/dat.py +44 -6
etlplus/file/dta.py +46 -11
etlplus/file/duckdb.py +159 -5
etlplus/file/enums.py +29 -0
etlplus/file/fwf.py +37 -5
etlplus/file/hdf5.py +41 -3
etlplus/file/ini.py +62 -12
etlplus/file/msgpack.py +33 -9
etlplus/file/nc.py +55 -6
etlplus/file/ods.py +39 -6
etlplus/file/pb.py +32 -19
etlplus/file/properties.py +52 -12
etlplus/file/proto.py +24 -12
etlplus/file/psv.py +5 -5
etlplus/file/rda.py +83 -9
etlplus/file/rds.py +76 -8
etlplus/file/sas7bdat.py +41 -11
etlplus/file/sav.py +40 -11
etlplus/file/sqlite.py +123 -5
etlplus/file/tab.py +6 -7
etlplus/file/toml.py +54 -12
etlplus/file/xlsm.py +39 -5
etlplus/file/xpt.py +61 -5
{etlplus-0.16.9.dist-info → etlplus-0.17.2.dist-info}/METADATA +44 -26
{etlplus-0.16.9.dist-info → etlplus-0.17.2.dist-info}/RECORD +33 -33
{etlplus-0.16.9.dist-info → etlplus-0.17.2.dist-info}/WHEEL +0 -0
{etlplus-0.16.9.dist-info → etlplus-0.17.2.dist-info}/entry_points.txt +0 -0
{etlplus-0.16.9.dist-info → etlplus-0.17.2.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.16.9.dist-info → etlplus-0.17.2.dist-info}/top_level.txt +0 -0

etlplus/file/enums.py CHANGED Viewed

@@ -199,19 +199,48 @@ class FileFormat(CoercibleStrEnum):
             'yml': 'yaml',
             # File extensions
             '.avro': 'avro',
+            '.arrow': 'arrow',
             '.csv': 'csv',
+            '.duckdb': 'duckdb',
+            '.dat': 'dat',
             '.feather': 'feather',
+            '.fwf': 'fwf',
             '.gz': 'gz',
+            '.hdf': 'hdf5',
+            '.hdf5': 'hdf5',
+            '.h5': 'hdf5',
+            '.ini': 'ini',
             '.json': 'json',
             '.jsonl': 'ndjson',
+            '.bson': 'bson',
+            '.cbor': 'cbor',
+            '.msgpack': 'msgpack',
             '.ndjson': 'ndjson',
+            '.ods': 'ods',
             '.orc': 'orc',
             '.parquet': 'parquet',
             '.pq': 'parquet',
+            '.pb': 'pb',
+            '.proto': 'proto',
+            '.psv': 'psv',
+            '.sqlite': 'sqlite',
+            '.sqlite3': 'sqlite',
             '.stub': 'stub',
+            '.tab': 'tab',
+            '.dta': 'dta',
+            '.sas7bdat': 'sas7bdat',
+            '.xpt': 'xpt',
+            '.rds': 'rds',
+            '.rda': 'rda',
+            '.nc': 'nc',
+            '.sav': 'sav',
+            '.properties': 'properties',
+            '.prop': 'properties',
+            '.toml': 'toml',
             '.tsv': 'tsv',
             '.txt': 'txt',
             '.xls': 'xls',
+            '.xlsm': 'xlsm',
             '.xlsx': 'xlsx',
             '.zip': 'zip',
             '.xml': 'xml',

etlplus/file/fwf.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.fwf` module.
-Stub helpers for reading/writing Fixed-Width Fields (FWF) files (not
-implemented yet).
+Helpers for reading/writing Fixed-Width Fields (FWF) files.
 Notes
 -----
@@ -19,10 +18,13 @@ Notes
 from __future__ import annotations
 from pathlib import Path
+from typing import Any
+from typing import cast
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ._imports import get_pandas
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -53,7 +55,9 @@ def read(
     JSONList
         The list of dictionaries read from the FWF file.
     """
-    return stub.read(path, format_name='FWF')
+    pandas = get_pandas('FWF')
+    frame = pandas.read_fwf(path)
+    return cast(JSONList, frame.to_dict(orient='records'))
 def write(
@@ -76,4 +80,32 @@ def write(
     int
         The number of rows written to the FWF file.
     """
-    return stub.write(path, data, format_name='FWF')
+    records = normalize_records(data, 'FWF')
+    if not records:
+        return 0
+    fieldnames = sorted({key for row in records for key in row})
+    if not fieldnames:
+        return 0
+    def stringify(value: Any) -> str:
+        if value is None:
+            return ''
+        return str(value)
+    widths: dict[str, int] = {name: len(name) for name in fieldnames}
+    for row in records:
+        for name in fieldnames:
+            widths[name] = max(widths[name], len(stringify(row.get(name))))
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open('w', encoding='utf-8', newline='') as handle:
+        header = ' '.join(name.ljust(widths[name]) for name in fieldnames)
+        handle.write(header + '\n')
+        for row in records:
+            line = ' '.join(
+                stringify(row.get(name)).ljust(widths[name])
+                for name in fieldnames
+            )
+            handle.write(line + '\n')
+    return len(records)

etlplus/file/hdf5.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
 :mod:`etlplus.file.hdf5` module.
-Stub helpers for reading/writing Hierarchical Data Format (HDF5) files (not
-implemented yet).
+Helpers for reading Hierarchical Data Format (HDF5) files. Stub helpers for
+writing such files (not implemented yet).
 Notes
 -----
@@ -20,10 +20,12 @@ Notes
 from __future__ import annotations
 from pathlib import Path
+from typing import cast
 from ..types import JSONData
 from ..types import JSONList
 from . import stub
+from ._imports import get_pandas
 # SECTION: EXPORTS ========================================================== #
@@ -35,6 +37,22 @@ __all__ = [
 ]
+# SECTION: INTERNAL CONSTANTS ============================================== #
+DEFAULT_KEY = 'data'
+# SECTION: INTERNAL FUNCTIONS =============================================== #
+def _raise_tables_error(err: ImportError) -> None:
+    raise ImportError(
+        'HDF5 support requires optional dependency "tables".\n'
+        'Install with: pip install tables',
+    ) from err
 # SECTION: FUNCTIONS ======================================================== #
@@ -54,7 +72,27 @@ def read(
     JSONList
         The list of dictionaries read from the HDF5 file.
     """
-    return stub.read(path, format_name='HDF5')
+    pandas = get_pandas('HDF5')
+    try:
+        store = pandas.HDFStore(path)
+    except ImportError as err:  # pragma: no cover
+        _raise_tables_error(err)
+    with store:
+        keys = [key.lstrip('/') for key in store.keys()]
+        if not keys:
+            return []
+        if DEFAULT_KEY in keys:
+            key = DEFAULT_KEY
+        elif len(keys) == 1:
+            key = keys[0]
+        else:
+            raise ValueError(
+                'Multiple datasets found in HDF5 file; expected "data" or '
+                'a single dataset',
+            )
+        frame = store.get(key)
+    return cast(JSONList, frame.to_dict(orient='records'))
 def write(

etlplus/file/ini.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.ini` module.
-Stub helpers for reading/writing initialization (INI) files (not implemented
-yet).
+Helpers for reading/writing initialization (INI) files.
 Notes
 -----
@@ -20,11 +19,12 @@ Notes
 from __future__ import annotations
+import configparser
 from pathlib import Path
+from typing import Any
 from ..types import JSONData
-from ..types import JSONList
-from . import stub
+from ..types import JSONDict
 # SECTION: EXPORTS ========================================================== #
@@ -36,12 +36,22 @@ __all__ = [
 ]
+# SECTION: INTERNAL FUNCTIONS =============================================== #
+def _stringify(value: Any) -> str:
+    """Normalize INI values into strings."""
+    if value is None:
+        return ''
+    return str(value)
 # SECTION: FUNCTIONS ======================================================== #
 def read(
     path: Path,
-) -> JSONList:
+) -> JSONData:
     """
     Read INI content from *path*.
@@ -52,10 +62,22 @@ def read(
     Returns
     -------
-    JSONList
-        The list of dictionaries read from the INI file.
+    JSONData
+        The structured data read from the INI file.
     """
-    return stub.read(path, format_name='INI')
+    parser = configparser.ConfigParser()
+    parser.read(path, encoding='utf-8')
+    payload: JSONDict = {}
+    if parser.defaults():
+        payload['DEFAULT'] = dict(parser.defaults())
+    defaults = dict(parser.defaults())
+    for section in parser.sections():
+        raw_section = dict(parser.items(section))
+        for key in defaults:
+            raw_section.pop(key, None)
+        payload[section] = raw_section
+    return payload
 def write(
@@ -70,12 +92,40 @@ def write(
     path : Path
         Path to the INI file on disk.
     data : JSONData
-        Data to write as INI. Should be a list of dictionaries or a
-        single dictionary.
+        Data to write as INI. Should be a dictionary.
     Returns
     -------
     int
-        The number of rows written to the INI file.
+        The number of records written to the INI file.
+    Raises
+    ------
+    TypeError
+        If *data* is not a dictionary.
     """
-    return stub.write(path, data, format_name='INI')
+    if isinstance(data, list):
+        raise TypeError('INI payloads must be a dict')
+    if not isinstance(data, dict):
+        raise TypeError('INI payloads must be a dict')
+    parser = configparser.ConfigParser()
+    for section, values in data.items():
+        if section == 'DEFAULT':
+            if isinstance(values, dict):
+                parser['DEFAULT'] = {
+                    key: _stringify(value) for key, value in values.items()
+                }
+            else:
+                raise TypeError('INI DEFAULT section must be a dict')
+            continue
+        if not isinstance(values, dict):
+            raise TypeError('INI sections must map to dicts')
+        parser[section] = {
+            key: _stringify(value) for key, value in values.items()
+        }
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open('w', encoding='utf-8', newline='') as handle:
+        parser.write(handle)
+    return 1

etlplus/file/msgpack.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.msgpack` module.
-Stub helpers for reading/writing MessagePack (MSGPACK) files (not implemented
-yet).
+Helpers for reading/writing MessagePack (MSGPACK) files.
 Notes
 -----
@@ -20,10 +19,12 @@ Notes
 from __future__ import annotations
 from pathlib import Path
+from typing import Any
 from ..types import JSONData
-from ..types import JSONList
-from . import stub
+from ._imports import get_optional_module
+from ._io import coerce_record_payload
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -35,12 +36,26 @@ __all__ = [
 ]
+# SECTION: INTERNAL FUNCTIONS =============================================== #
+def _get_msgpack() -> Any:
+    """Return the msgpack module, importing it on first use."""
+    return get_optional_module(
+        'msgpack',
+        error_message=(
+            'MSGPACK support requires optional dependency "msgpack".\n'
+            'Install with: pip install msgpack'
+        ),
+    )
 # SECTION: FUNCTIONS ======================================================== #
 def read(
     path: Path,
-) -> JSONList:
+) -> JSONData:
     """
     Read MsgPack content from *path*.
@@ -51,10 +66,13 @@ def read(
     Returns
     -------
-    JSONList
-        The list of dictionaries read from the MsgPack file.
+    JSONData
+        The structured data read from the MsgPack file.
     """
-    return stub.read(path, format_name='MSGPACK')
+    msgpack = _get_msgpack()
+    with path.open('rb') as handle:
+        payload = msgpack.unpackb(handle.read(), raw=False)
+    return coerce_record_payload(payload, format_name='MSGPACK')
 def write(
@@ -77,4 +95,10 @@ def write(
     int
         The number of rows written to the MsgPack file.
     """
-    return stub.write(path, data, format_name='MSGPACK')
+    msgpack = _get_msgpack()
+    records = normalize_records(data, 'MSGPACK')
+    payload: JSONData = records if isinstance(data, list) else records[0]
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open('wb') as handle:
+        handle.write(msgpack.packb(payload, use_bin_type=True))
+    return len(records)

etlplus/file/nc.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 :mod:`etlplus.file.nc` module.
-Stub helpers for reading/writing NetCDF (NC) data files (not implemented yet).
+Helpers for reading/writing NetCDF (NC) data files.
 Notes
 -----
@@ -12,17 +12,21 @@ Notes
     - Sharing large datasets in research communities.
     - Efficient data access and manipulation.
 - Rule of thumb:
-    - If the file follows the NetCDF standard, use this module for
-        reading and writing.
+    - If the file follows the NetCDF standard, use this module for reading and
+        writing.
 """
 from __future__ import annotations
 from pathlib import Path
+from typing import Any
+from typing import cast
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ._imports import get_optional_module
+from ._imports import get_pandas
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -34,6 +38,27 @@ __all__ = [
 ]
+# SECTION: INTERNAL FUNCTIONS =============================================== #
+def _get_xarray() -> Any:
+    """Return the xarray module, importing it on first use."""
+    return get_optional_module(
+        'xarray',
+        error_message=(
+            'NC support requires optional dependency "xarray".\n'
+            'Install with: pip install xarray'
+        ),
+    )
+def _raise_engine_error(err: ImportError) -> None:
+    raise ImportError(
+        'NC support requires optional dependency "netCDF4" or "h5netcdf".\n'
+        'Install with: pip install netCDF4',
+    ) from err
 # SECTION: FUNCTIONS ======================================================== #
@@ -53,7 +78,18 @@ def read(
     JSONList
         The list of dictionaries read from the NC file.
     """
-    return stub.read(path, format_name='NC')
+    xarray = _get_xarray()
+    try:
+        dataset = xarray.open_dataset(path)
+    except ImportError as err:  # pragma: no cover
+        _raise_engine_error(err)
+    with dataset:
+        frame = dataset.to_dataframe().reset_index()
+    if 'index' in frame.columns:
+        values = list(frame['index'])
+        if values == list(range(len(values))):
+            frame = frame.drop(columns=['index'])
+    return cast(JSONList, frame.to_dict(orient='records'))
 def write(
@@ -76,4 +112,17 @@ def write(
     int
         The number of rows written to the NC file.
     """
-    return stub.write(path, data, format_name='NC')
+    records = normalize_records(data, 'NC')
+    if not records:
+        return 0
+    xarray = _get_xarray()
+    pandas = get_pandas('NC')
+    frame = pandas.DataFrame.from_records(records)
+    dataset = xarray.Dataset.from_dataframe(frame)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    try:
+        dataset.to_netcdf(path)
+    except ImportError as err:  # pragma: no cover
+        _raise_engine_error(err)
+    return len(records)

etlplus/file/ods.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.ods` module.
-Stub helpers for reading/writing OpenDocument (ODS) spreadsheet files (not
-implemented yet).
+Helpers for reading/writing OpenDocument (ODS) spreadsheet files.
 Notes
 -----
@@ -21,10 +20,12 @@ Notes
 from __future__ import annotations
 from pathlib import Path
+from typing import cast
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ._imports import get_pandas
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -54,8 +55,21 @@ def read(
     -------
     JSONList
         The list of dictionaries read from the ODS file.
+    Raises
+    ------
+    ImportError
+        If optional dependencies for ODS support are missing.
     """
-    return stub.read(path, format_name='ODS')
+    pandas = get_pandas('ODS')
+    try:
+        frame = pandas.read_excel(path, engine='odf')
+    except ImportError as err:  # pragma: no cover
+        raise ImportError(
+            'ODS support requires optional dependency "odfpy".\n'
+            'Install with: pip install odfpy',
+        ) from err
+    return cast(JSONList, frame.to_dict(orient='records'))
 def write(
@@ -70,12 +84,31 @@ def write(
     path : Path
         Path to the ODS file on disk.
     data : JSONData
-        Data to write as ODS file. Should be a list of dictionaries or a
+        Data to write as ODS. Should be a list of dictionaries or a
         single dictionary.
     Returns
     -------
     int
         The number of rows written to the ODS file.
+    Raises
+    ------
+    ImportError
+        If optional dependencies for ODS support are missing.
     """
-    return stub.write(path, data, format_name='ODS')
+    records = normalize_records(data, 'ODS')
+    if not records:
+        return 0
+    pandas = get_pandas('ODS')
+    path.parent.mkdir(parents=True, exist_ok=True)
+    frame = pandas.DataFrame.from_records(records)
+    try:
+        frame.to_excel(path, index=False, engine='odf')
+    except ImportError as err:  # pragma: no cover
+        raise ImportError(
+            'ODS support requires optional dependency "odfpy".\n'
+            'Install with: pip install odfpy',
+        ) from err
+    return len(records)

etlplus/file/pb.py CHANGED Viewed

@@ -1,29 +1,24 @@
 """
 :mod:`etlplus.file.pb` module.
-Stub helpers for reading/writing Protocol Buffer (PB) files (not implemented
-yet).
+Helpers for reading/writing Protocol Buffers binary (PB) files.
 Notes
 -----
-- PB (a.k.a. Protobuff) is a binary serialization format developed by Google
-    for structured data.
+- A PB file contains Protocol Buffers (Protobuff) binary-encoded messages.
 - Common cases:
-    - Data interchange between services.
-    - Efficient storage of structured data.
-    - Communication in distributed systems.
+    - Serialized payloads emitted by services or SDKs.
+    - Binary payload dumps for debugging or transport.
 - Rule of thumb:
-    - If the file follows the Protocol Buffer specification, use this module
-        for reading and writing.
+    - Use this module when you need to store or transport raw protobuf bytes.
 """
 from __future__ import annotations
+import base64
 from pathlib import Path
 from ..types import JSONData
-from ..types import JSONList
-from . import stub
 # SECTION: EXPORTS ========================================================== #
@@ -40,7 +35,7 @@ __all__ = [
 def read(
     path: Path,
-) -> JSONList:
+) -> JSONData:
     """
     Read PB content from *path*.
@@ -51,10 +46,12 @@ def read(
     Returns
     -------
-    JSONList
-        The list of dictionaries read from the PB file.
+    JSONData
+        The structured data read from the PB file.
     """
-    return stub.read(path, format_name='PB')
+    payload = path.read_bytes()
+    encoded = base64.b64encode(payload).decode('ascii')
+    return {'payload_base64': encoded}
 def write(
@@ -69,12 +66,28 @@ def write(
     path : Path
         Path to the PB file on disk.
     data : JSONData
-        Data to write as PB. Should be a list of dictionaries or a
-        single dictionary.
+        Data to write as PB. Should be a dictionary with ``payload_base64``.
     Returns
     -------
     int
-        The number of rows written to the PB file.
+        The number of records written to the PB file.
+    Raises
+    ------
+    TypeError
+        If *data* is not a dictionary or missing ``payload_base64``.
     """
-    return stub.write(path, data, format_name='PB')
+    if isinstance(data, list):
+        raise TypeError('PB payloads must be a dict')
+    if not isinstance(data, dict):
+        raise TypeError('PB payloads must be a dict')
+    payload_base64 = data.get('payload_base64')
+    if not isinstance(payload_base64, str):
+        raise TypeError('PB payloads must include a "payload_base64" string')
+    payload = base64.b64decode(payload_base64.encode('ascii'))
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_bytes(payload)
+    return 1

etlplus 0.16.9__py3-none-any.whl → 0.17.2__py3-none-any.whl

etlplus 0.16.9py3-none-any.whl → 0.17.2py3-none-any.whl