PyPI - etlplus - Versions diffs - 0.16.10__py3-none-any.whl → 0.17.2__py3-none-any.whl - Mend

etlplus 0.16.10py3-none-any.whl → 0.17.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

etlplus/file/README.md +33 -0
etlplus/file/arrow.py +35 -5
etlplus/file/bson.py +50 -5
etlplus/file/cbor.py +35 -11
etlplus/file/dat.py +44 -6
etlplus/file/dta.py +46 -11
etlplus/file/duckdb.py +159 -5
etlplus/file/enums.py +29 -0
etlplus/file/fwf.py +37 -5
etlplus/file/hdf5.py +41 -3
etlplus/file/ini.py +62 -12
etlplus/file/msgpack.py +33 -9
etlplus/file/nc.py +55 -6
etlplus/file/ods.py +39 -6
etlplus/file/pb.py +32 -19
etlplus/file/properties.py +52 -12
etlplus/file/proto.py +24 -12
etlplus/file/psv.py +5 -5
etlplus/file/rda.py +83 -9
etlplus/file/rds.py +76 -8
etlplus/file/sas7bdat.py +41 -11
etlplus/file/sav.py +40 -11
etlplus/file/sqlite.py +123 -5
etlplus/file/tab.py +6 -7
etlplus/file/toml.py +54 -12
etlplus/file/xlsm.py +39 -5
etlplus/file/xpt.py +61 -5
{etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/METADATA +44 -26
{etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/RECORD +33 -33
{etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/WHEEL +0 -0
{etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/entry_points.txt +0 -0
{etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/top_level.txt +0 -0

etlplus/file/properties.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.properties` module.
-Stub helpers for reading/writing properties (PROPERTIES) files (not implemented
-yet).
+Helpers for reading/writing properties (PROPERTIES) files.
 Notes
 -----
@@ -20,10 +19,10 @@ Notes
 from __future__ import annotations
 from pathlib import Path
+from typing import Any
 from ..types import JSONData
-from ..types import JSONList
-from . import stub
+from ..types import JSONDict
 # SECTION: EXPORTS ========================================================== #
@@ -35,12 +34,22 @@ __all__ = [
 ]
+# SECTION: INTERNAL FUNCTIONS =============================================== #
+def _stringify(value: Any) -> str:
+    """Normalize properties values into strings."""
+    if value is None:
+        return ''
+    return str(value)
 # SECTION: FUNCTIONS ======================================================== #
 def read(
     path: Path,
-) -> JSONList:
+) -> JSONData:
     """
     Read PROPERTIES content from *path*.
@@ -51,10 +60,28 @@ def read(
     Returns
     -------
-    JSONList
-        The list of dictionaries read from the PROPERTIES file.
+    JSONData
+        The structured data read from the PROPERTIES file.
     """
-    return stub.read(path, format_name='PROPERTIES')
+    payload: JSONDict = {}
+    for line in path.read_text(encoding='utf-8').splitlines():
+        stripped = line.strip()
+        if not stripped or stripped.startswith(('#', '!')):
+            continue
+        separator_index = -1
+        for sep in ('=', ':'):
+            if sep in stripped:
+                separator_index = stripped.find(sep)
+                break
+        if separator_index == -1:
+            key = stripped
+            value = ''
+        else:
+            key = stripped[:separator_index].strip()
+            value = stripped[separator_index + 1:].strip()
+        if key:
+            payload[key] = value
+    return payload
 def write(
@@ -69,12 +96,25 @@ def write(
     path : Path
         Path to the PROPERTIES file on disk.
     data : JSONData
-        Data to write as PROPERTIES. Should be a list of dictionaries or a
-        single dictionary.
+        Data to write as PROPERTIES. Should be a dictionary.
     Returns
     -------
     int
-        The number of rows written to the PROPERTIES file.
+        The number of records written to the PROPERTIES file.
+    Raises
+    ------
+    TypeError
+        If *data* is not a dictionary.
     """
-    return stub.write(path, data, format_name='PROPERTIES')
+    if isinstance(data, list):
+        raise TypeError('PROPERTIES payloads must be a dict')
+    if not isinstance(data, dict):
+        raise TypeError('PROPERTIES payloads must be a dict')
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open('w', encoding='utf-8', newline='') as handle:
+        for key in sorted(data.keys()):
+            handle.write(f'{key}={_stringify(data[key])}\n')
+    return 1

etlplus/file/proto.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.proto` module.
-Stub helpers for reading/writing Protocol Buffers schema (PROTO) files (not
-implemented yet).
+Helpers for reading/writing Protocol Buffers schema (PROTO) files.
 Notes
 -----
@@ -21,8 +20,6 @@ from __future__ import annotations
 from pathlib import Path
 from ..types import JSONData
-from ..types import JSONList
-from . import stub
 # SECTION: EXPORTS ========================================================== #
@@ -39,7 +36,7 @@ __all__ = [
 def read(
     path: Path,
-) -> JSONList:
+) -> JSONData:
     """
     Read PROTO content from *path*.
@@ -50,10 +47,10 @@ def read(
     Returns
     -------
-    JSONList
-        The list of dictionaries read from the PROTO file.
+    JSONData
+        The structured data read from the PROTO file.
     """
-    return stub.read(path, format_name='PROTO')
+    return {'schema': path.read_text(encoding='utf-8')}
 def write(
@@ -68,12 +65,27 @@ def write(
     path : Path
         Path to the PROTO file on disk.
     data : JSONData
-        Data to write as PROTO. Should be a list of dictionaries or a
-        single dictionary.
+        Data to write as PROTO. Should be a dictionary with ``schema``.
     Returns
     -------
     int
-        The number of rows written to the PROTO file.
+        The number of records written to the PROTO file.
+    Raises
+    ------
+    TypeError
+        If *data* is not a dictionary or is missing a ``schema`` string.
     """
-    return stub.write(path, data, format_name='PROTO')
+    if isinstance(data, list):
+        raise TypeError('PROTO payloads must be a dict')
+    if not isinstance(data, dict):
+        raise TypeError('PROTO payloads must be a dict')
+    schema = data.get('schema')
+    if not isinstance(schema, str):
+        raise TypeError('PROTO payloads must include a "schema" string')
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(schema, encoding='utf-8')
+    return 1

etlplus/file/psv.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.psv` module.
-Stub helpers for reading/writing Pipe-Separated Values (PSV) files (not
-implemented yet).
+Helpers for reading/writing Pipe-Separated Values (PSV) files.
 Notes
 -----
@@ -24,7 +23,8 @@ from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ._io import read_delimited
+from ._io import write_delimited
 # SECTION: EXPORTS ========================================================== #
@@ -55,7 +55,7 @@ def read(
     JSONList
         The list of dictionaries read from the PSV file.
     """
-    return stub.read(path, format_name='PSV')
+    return read_delimited(path, delimiter='|')
 def write(
@@ -78,4 +78,4 @@ def write(
     int
         The number of rows written to the PSV file.
     """
-    return stub.write(path, data, format_name='PSV')
+    return write_delimited(path, data, delimiter='|')

etlplus/file/rda.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.rda` module.
-Stub helpers for reading/writing RData workspace/object bundle (RDA) files (not
-implemented yet).
+Helpers for reading/writing RData workspace/object bundle (RDA) files.
 Notes
 -----
@@ -20,10 +19,13 @@ Notes
 from __future__ import annotations
 from pathlib import Path
+from typing import Any
 from ..types import JSONData
-from ..types import JSONList
-from . import stub
+from ..types import JSONDict
+from ._imports import get_optional_module
+from ._imports import get_pandas
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -35,12 +37,38 @@ __all__ = [
 ]
+# SECTION: INTERNAL FUNCTIONS =============================================== #
+def _get_pyreadr() -> Any:
+    """Return the pyreadr module, importing it on first use."""
+    return get_optional_module(
+        'pyreadr',
+        error_message=(
+            'RDA support requires optional dependency "pyreadr".\n'
+            'Install with: pip install pyreadr'
+        ),
+    )
+def _coerce_r_object(value: Any, pandas: Any) -> JSONData:
+    if isinstance(value, pandas.DataFrame):
+        return value.to_dict(orient='records')
+    if isinstance(value, dict):
+        return value
+    if isinstance(value, list) and all(
+        isinstance(item, dict) for item in value
+    ):
+        return value
+    return {'value': value}
 # SECTION: FUNCTIONS ======================================================== #
 def read(
     path: Path,
-) -> JSONList:
+) -> JSONData:
     """
     Read RDA content from *path*.
@@ -51,10 +79,21 @@ def read(
     Returns
     -------
-    JSONList
-        The list of dictionaries read from the RDA file.
+    JSONData
+        The structured data read from the RDA file.
     """
-    return stub.read(path, format_name='RDA')
+    pyreadr = _get_pyreadr()
+    pandas = get_pandas('RDA')
+    result = pyreadr.read_r(str(path))
+    if not result:
+        return []
+    if len(result) == 1:
+        value = next(iter(result.values()))
+        return _coerce_r_object(value, pandas)
+    payload: JSONDict = {}
+    for key, value in result.items():
+        payload[str(key)] = _coerce_r_object(value, pandas)
+    return payload
 def write(
@@ -76,5 +115,40 @@ def write(
     -------
     int
         The number of rows written to the RDA file.
+    Raises
+    ------
+    ImportError
+        If "pyreadr" is not installed with write support.
+    TypeError
+        If *data* is not a dictionary or list of dictionaries.
     """
-    return stub.write(path, data, format_name='RDA')
+    pyreadr = _get_pyreadr()
+    pandas = get_pandas('RDA')
+    if isinstance(data, list):
+        records = normalize_records(data, 'RDA')
+        frame = pandas.DataFrame.from_records(records)
+        count = len(records)
+    elif isinstance(data, dict):
+        frame = pandas.DataFrame.from_records([data])
+        count = 1
+    else:
+        raise TypeError('RDA payloads must be a dict or list of dicts')
+    writer = getattr(pyreadr, 'write_rdata', None) or getattr(
+        pyreadr,
+        'write_rda',
+        None,
+    )
+    if writer is None:
+        raise ImportError(
+            'RDA write support requires "pyreadr" with write_rdata().',
+        )
+    path.parent.mkdir(parents=True, exist_ok=True)
+    try:
+        writer(str(path), frame, df_name='data')
+    except TypeError:
+        writer(str(path), frame)
+    return count

etlplus/file/rds.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 :mod:`etlplus.file.rds` module.
-Stub helpers for reading/writing R (RDS) data files (not implemented yet).
+Helpers for reading/writing R (RDS) data files.
 Notes
 -----
@@ -19,10 +19,13 @@ Notes
 from __future__ import annotations
 from pathlib import Path
+from typing import Any
 from ..types import JSONData
-from ..types import JSONList
-from . import stub
+from ..types import JSONDict
+from ._imports import get_optional_module
+from ._imports import get_pandas
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -34,12 +37,38 @@ __all__ = [
 ]
+# SECTION: INTERNAL HELPERS ================================================ #
+def _get_pyreadr() -> Any:
+    """Return the pyreadr module, importing it on first use."""
+    return get_optional_module(
+        'pyreadr',
+        error_message=(
+            'RDS support requires optional dependency "pyreadr".\n'
+            'Install with: pip install pyreadr'
+        ),
+    )
+def _coerce_r_object(value: Any, pandas: Any) -> JSONData:
+    if isinstance(value, pandas.DataFrame):
+        return value.to_dict(orient='records')
+    if isinstance(value, dict):
+        return value
+    if isinstance(value, list) and all(
+        isinstance(item, dict) for item in value
+    ):
+        return value
+    return {'value': value}
 # SECTION: FUNCTIONS ======================================================== #
 def read(
     path: Path,
-) -> JSONList:
+) -> JSONData:
     """
     Read RDS content from *path*.
@@ -50,10 +79,21 @@ def read(
     Returns
     -------
-    JSONList
-        The list of dictionaries read from the RDS file.
+    JSONData
+        The structured data read from the RDS file.
     """
-    return stub.read(path, format_name='RDS')
+    pyreadr = _get_pyreadr()
+    pandas = get_pandas('RDS')
+    result = pyreadr.read_r(str(path))
+    if not result:
+        return []
+    if len(result) == 1:
+        value = next(iter(result.values()))
+        return _coerce_r_object(value, pandas)
+    payload: JSONDict = {}
+    for key, value in result.items():
+        payload[str(key)] = _coerce_r_object(value, pandas)
+    return payload
 def write(
@@ -75,5 +115,33 @@ def write(
     -------
     int
         The number of rows written to the RDS file.
+    Raises
+    ------
+    ImportError
+        If "pyreadr" is not installed with write support.
+    TypeError
+        If *data* is not a dictionary or list of dictionaries.
     """
-    return stub.write(path, data, format_name='RDS')
+    pyreadr = _get_pyreadr()
+    pandas = get_pandas('RDS')
+    if isinstance(data, list):
+        records = normalize_records(data, 'RDS')
+        frame = pandas.DataFrame.from_records(records)
+        count = len(records)
+    elif isinstance(data, dict):
+        frame = pandas.DataFrame.from_records([data])
+        count = 1
+    else:
+        raise TypeError('RDS payloads must be a dict or list of dicts')
+    writer = getattr(pyreadr, 'write_rds', None)
+    if writer is None:
+        raise ImportError(
+            'RDS write support requires "pyreadr" with write_rds().',
+        )
+    path.parent.mkdir(parents=True, exist_ok=True)
+    writer(str(path), frame)
+    return count

etlplus/file/sas7bdat.py CHANGED Viewed

@@ -1,29 +1,31 @@
 """
 :mod:`etlplus.file.sas7bdat` module.
-Stub helpers for reading/writing SAS (SAS7BDAT) data files (not implemented
-yet).
+Helpers for reading/writing SAS (SAS7BDAT) data files.
 Notes
 -----
-- A SAS7BDAT file is a binary file format used by SAS to store datasets,
-    including variables, labels, and data types.
+- A SAS7BDAT file is a proprietary binary file format created by SAS to store
+    datasets, including variables, labels, and data types.
 - Common cases:
-    - Delimited text files (e.g., CSV, TSV).
-    - Fixed-width formatted files.
-    - Custom formats specific to certain applications.
+    - Statistical analysis pipelines.
+    - Data exchange with SAS tooling.
 - Rule of thumb:
-    - If the file does not follow a specific standard format, use this module
-        for reading and writing.
+    - If the file follows the SAS7BDAT specification, use this module for
+        reading and writing.
 """
 from __future__ import annotations
 from pathlib import Path
+from typing import Any
+from typing import cast
 from ..types import JSONData
 from ..types import JSONList
 from . import stub
+from ._imports import get_optional_module
+from ._imports import get_pandas
 # SECTION: EXPORTS ========================================================== #
@@ -35,6 +37,27 @@ __all__ = [
 ]
+# SECTION: INTERNAL HELPERS ================================================ #
+def _get_pyreadstat() -> Any:
+    """Return the pyreadstat module, importing it on first use."""
+    return get_optional_module(
+        'pyreadstat',
+        error_message=(
+            'SAS7BDAT support requires optional dependency "pyreadstat".\n'
+            'Install with: pip install pyreadstat'
+        ),
+    )
+def _raise_readstat_error(err: ImportError) -> None:
+    raise ImportError(
+        'SAS7BDAT support requires optional dependency "pyreadstat".\n'
+        'Install with: pip install pyreadstat',
+    ) from err
 # SECTION: FUNCTIONS ======================================================== #
@@ -42,7 +65,7 @@ def read(
     path: Path,
 ) -> JSONList:
     """
-    Read DAT content from *path*.
+    Read SAS7BDAT content from *path*.
     Parameters
     ----------
@@ -54,7 +77,14 @@ def read(
     JSONList
         The list of dictionaries read from the SAS7BDAT file.
     """
-    return stub.read(path, format_name='SAS7BDAT')
+    pandas = get_pandas('SAS7BDAT')
+    try:
+        frame = pandas.read_sas(path, format='sas7bdat')
+    except TypeError:
+        frame = pandas.read_sas(path)
+    except ImportError as err:  # pragma: no cover
+        _raise_readstat_error(err)
+    return cast(JSONList, frame.to_dict(orient='records'))
 def write(

etlplus/file/sav.py CHANGED Viewed

@@ -1,27 +1,31 @@
 """
 :mod:`etlplus.file.sav` module.
-Stub helpers for reading/writing SPSS (SAV) data files (not implemented yet).
+Helpers for reading/writing SPSS (SAV) files.
 Notes
 -----
-- A SAV file is a binary file format used by SPSS to store datasets, including
-    variables, labels, and data types.
+- A SAV file is a dataset created by SPSS.
 - Common cases:
-    - Reading data for analysis in Python.
-    - Writing processed data back to SPSS format.
+    - Survey and market research datasets.
+    - Statistical analysis workflows.
+    - Exchange with SPSS and compatible tools.
 - Rule of thumb:
-    - If you need to work with SPSS data files, use this module for reading
+    - If the file follows the SAV specification, use this module for reading
         and writing.
 """
 from __future__ import annotations
 from pathlib import Path
+from typing import Any
+from typing import cast
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ._imports import get_optional_module
+from ._imports import get_pandas
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -33,6 +37,20 @@ __all__ = [
 ]
+# SECTION: INTERNAL FUNCTION ================================================ #
+def _get_pyreadstat() -> Any:
+    """Return the pyreadstat module, importing it on first use."""
+    return get_optional_module(
+        'pyreadstat',
+        error_message=(
+            'SAV support requires optional dependency "pyreadstat".\n'
+            'Install with: pip install pyreadstat'
+        ),
+    )
 # SECTION: FUNCTIONS ======================================================== #
@@ -52,7 +70,9 @@ def read(
     JSONList
         The list of dictionaries read from the SAV file.
     """
-    return stub.read(path, format_name='SAV')
+    pyreadstat = _get_pyreadstat()
+    frame, _meta = pyreadstat.read_sav(str(path))
+    return cast(JSONList, frame.to_dict(orient='records'))
 def write(
@@ -60,14 +80,14 @@ def write(
     data: JSONData,
 ) -> int:
     """
-    Write *data* to SAV file at *path* and return record count.
+    Write *data* to SAV at *path* and return record count.
     Parameters
     ----------
     path : Path
         Path to the SAV file on disk.
     data : JSONData
-        Data to write as SAV file. Should be a list of dictionaries or a
+        Data to write as SAV. Should be a list of dictionaries or a
         single dictionary.
     Returns
@@ -75,4 +95,13 @@ def write(
     int
         The number of rows written to the SAV file.
     """
-    return stub.write(path, data, format_name='SAV')
+    records = normalize_records(data, 'SAV')
+    if not records:
+        return 0
+    pyreadstat = _get_pyreadstat()
+    pandas = get_pandas('SAV')
+    path.parent.mkdir(parents=True, exist_ok=True)
+    frame = pandas.DataFrame.from_records(records)
+    pyreadstat.write_sav(frame, str(path))
+    return len(records)

etlplus 0.16.10__py3-none-any.whl → 0.17.2__py3-none-any.whl

etlplus 0.16.10py3-none-any.whl → 0.17.2py3-none-any.whl