PyPI - etlplus - Versions diffs - 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl - Mend

etlplus 0.17.2py3-none-any.whl → 0.17.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

etlplus/file/_imports.py +35 -20
etlplus/file/_io.py +138 -15
etlplus/file/_r.py +48 -0
etlplus/file/_sql.py +224 -0
etlplus/file/accdb.py +7 -6
etlplus/file/arrow.py +13 -24
etlplus/file/avro.py +13 -10
etlplus/file/bson.py +61 -22
etlplus/file/cbor.py +13 -25
etlplus/file/cfg.py +7 -6
etlplus/file/conf.py +7 -6
etlplus/file/core.py +1 -1
etlplus/file/csv.py +8 -7
etlplus/file/dat.py +9 -6
etlplus/file/dta.py +15 -30
etlplus/file/duckdb.py +29 -122
etlplus/file/feather.py +15 -30
etlplus/file/fwf.py +16 -14
etlplus/file/gz.py +12 -7
etlplus/file/hbs.py +7 -6
etlplus/file/hdf5.py +31 -6
etlplus/file/ini.py +17 -24
etlplus/file/ion.py +7 -6
etlplus/file/jinja2.py +7 -6
etlplus/file/json.py +10 -11
etlplus/file/log.py +7 -6
etlplus/file/mat.py +7 -6
etlplus/file/mdb.py +7 -6
etlplus/file/msgpack.py +13 -25
etlplus/file/mustache.py +7 -6
etlplus/file/nc.py +30 -21
etlplus/file/ndjson.py +10 -6
etlplus/file/numbers.py +7 -6
etlplus/file/ods.py +10 -6
etlplus/file/orc.py +15 -30
etlplus/file/parquet.py +10 -6
etlplus/file/pb.py +22 -23
etlplus/file/pbf.py +7 -6
etlplus/file/properties.py +15 -29
etlplus/file/proto.py +14 -20
etlplus/file/psv.py +8 -7
etlplus/file/rda.py +19 -51
etlplus/file/rds.py +19 -51
etlplus/file/sas7bdat.py +10 -30
etlplus/file/sav.py +13 -24
etlplus/file/sqlite.py +25 -83
etlplus/file/stub.py +8 -6
etlplus/file/sylk.py +7 -6
etlplus/file/tab.py +8 -7
etlplus/file/toml.py +14 -17
etlplus/file/tsv.py +8 -7
etlplus/file/txt.py +10 -7
etlplus/file/vm.py +7 -6
etlplus/file/wks.py +7 -6
etlplus/file/xls.py +8 -5
etlplus/file/xlsm.py +10 -6
etlplus/file/xlsx.py +10 -6
etlplus/file/xml.py +11 -9
etlplus/file/xpt.py +13 -33
etlplus/file/yaml.py +10 -11
etlplus/file/zip.py +10 -5
etlplus/file/zsav.py +7 -6
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/METADATA +1 -1
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/RECORD +68 -66
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0

etlplus/file/properties.py CHANGED Viewed

@@ -18,11 +18,13 @@ Notes
 from __future__ import annotations
-from pathlib import Path
-from typing import Any
 from ..types import JSONData
 from ..types import JSONDict
+from ..types import StrPath
+from ._io import coerce_path
+from ._io import ensure_parent_dir
+from ._io import require_dict_payload
+from ._io import stringify_value
 # SECTION: EXPORTS ========================================================== #
@@ -34,28 +36,18 @@ __all__ = [
 ]
-# SECTION: INTERNAL FUNCTIONS =============================================== #
-def _stringify(value: Any) -> str:
-    """Normalize properties values into strings."""
-    if value is None:
-        return ''
-    return str(value)
 # SECTION: FUNCTIONS ======================================================== #
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONData:
     """
     Read PROPERTIES content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PROPERTIES file on disk.
     Returns
@@ -63,6 +55,7 @@ def read(
     JSONData
         The structured data read from the PROPERTIES file.
     """
+    path = coerce_path(path)
     payload: JSONDict = {}
     for line in path.read_text(encoding='utf-8').splitlines():
         stripped = line.strip()
@@ -85,7 +78,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -93,7 +86,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PROPERTIES file on disk.
     data : JSONData
         Data to write as PROPERTIES. Should be a dictionary.
@@ -102,19 +95,12 @@ def write(
     -------
     int
         The number of records written to the PROPERTIES file.
-    Raises
-    ------
-    TypeError
-        If *data* is not a dictionary.
     """
-    if isinstance(data, list):
-        raise TypeError('PROPERTIES payloads must be a dict')
-    if not isinstance(data, dict):
-        raise TypeError('PROPERTIES payloads must be a dict')
+    path = coerce_path(path)
+    payload = require_dict_payload(data, format_name='PROPERTIES')
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     with path.open('w', encoding='utf-8', newline='') as handle:
-        for key in sorted(data.keys()):
-            handle.write(f'{key}={_stringify(data[key])}\n')
+        for key in sorted(payload.keys()):
+            handle.write(f'{key}={stringify_value(payload[key])}\n')
     return 1

etlplus/file/proto.py CHANGED Viewed

@@ -17,9 +17,12 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
+from ..types import StrPath
+from ._io import coerce_path
+from ._io import ensure_parent_dir
+from ._io import require_dict_payload
+from ._io import require_str_key
 # SECTION: EXPORTS ========================================================== #
@@ -35,14 +38,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONData:
     """
     Read PROTO content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PROTO file on disk.
     Returns
@@ -50,11 +53,12 @@ def read(
     JSONData
         The structured data read from the PROTO file.
     """
+    path = coerce_path(path)
     return {'schema': path.read_text(encoding='utf-8')}
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -62,7 +66,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PROTO file on disk.
     data : JSONData
         Data to write as PROTO. Should be a dictionary with ``schema``.
@@ -71,21 +75,11 @@ def write(
     -------
     int
         The number of records written to the PROTO file.
-    Raises
-    ------
-    TypeError
-        If *data* is not a dictionary or is missing a ``schema`` string.
     """
-    if isinstance(data, list):
-        raise TypeError('PROTO payloads must be a dict')
-    if not isinstance(data, dict):
-        raise TypeError('PROTO payloads must be a dict')
-    schema = data.get('schema')
-    if not isinstance(schema, str):
-        raise TypeError('PROTO payloads must include a "schema" string')
+    path = coerce_path(path)
+    payload = require_dict_payload(data, format_name='PROTO')
+    schema = require_str_key(payload, format_name='PROTO', key='schema')
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     path.write_text(schema, encoding='utf-8')
     return 1

etlplus/file/psv.py CHANGED Viewed

@@ -19,10 +19,10 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
+from ._io import coerce_path
 from ._io import read_delimited
 from ._io import write_delimited
@@ -40,14 +40,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read PSV content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PSV file on disk.
     Returns
@@ -59,7 +59,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -67,7 +67,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the PSV file on disk.
     data : JSONData
         Data to write as PSV file. Should be a list of dictionaries or a
@@ -78,4 +78,5 @@ def write(
     int
         The number of rows written to the PSV file.
     """
-    return write_delimited(path, data, delimiter='|')
+    path = coerce_path(path)
+    return write_delimited(path, data, delimiter='|', format_name='PSV')

etlplus/file/rda.py CHANGED Viewed

@@ -18,14 +18,15 @@ Notes
 from __future__ import annotations
-from pathlib import Path
-from typing import Any
 from ..types import JSONData
 from ..types import JSONDict
-from ._imports import get_optional_module
+from ..types import StrPath
+from ._imports import get_dependency
 from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
+from ._r import coerce_r_object
 # SECTION: EXPORTS ========================================================== #
@@ -37,44 +38,18 @@ __all__ = [
 ]
-# SECTION: INTERNAL FUNCTIONS =============================================== #
-def _get_pyreadr() -> Any:
-    """Return the pyreadr module, importing it on first use."""
-    return get_optional_module(
-        'pyreadr',
-        error_message=(
-            'RDA support requires optional dependency "pyreadr".\n'
-            'Install with: pip install pyreadr'
-        ),
-    )
-def _coerce_r_object(value: Any, pandas: Any) -> JSONData:
-    if isinstance(value, pandas.DataFrame):
-        return value.to_dict(orient='records')
-    if isinstance(value, dict):
-        return value
-    if isinstance(value, list) and all(
-        isinstance(item, dict) for item in value
-    ):
-        return value
-    return {'value': value}
 # SECTION: FUNCTIONS ======================================================== #
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONData:
     """
     Read RDA content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the RDA file on disk.
     Returns
@@ -82,22 +57,23 @@ def read(
     JSONData
         The structured data read from the RDA file.
     """
-    pyreadr = _get_pyreadr()
+    path = coerce_path(path)
+    pyreadr = get_dependency('pyreadr', format_name='RDA')
     pandas = get_pandas('RDA')
     result = pyreadr.read_r(str(path))
     if not result:
         return []
     if len(result) == 1:
         value = next(iter(result.values()))
-        return _coerce_r_object(value, pandas)
+        return coerce_r_object(value, pandas)
     payload: JSONDict = {}
     for key, value in result.items():
-        payload[str(key)] = _coerce_r_object(value, pandas)
+        payload[str(key)] = coerce_r_object(value, pandas)
     return payload
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -105,7 +81,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the RDA file on disk.
     data : JSONData
         Data to write as RDA file. Should be a list of dictionaries or a
@@ -120,21 +96,13 @@ def write(
     ------
     ImportError
         If "pyreadr" is not installed with write support.
-    TypeError
-        If *data* is not a dictionary or list of dictionaries.
     """
-    pyreadr = _get_pyreadr()
+    path = coerce_path(path)
+    pyreadr = get_dependency('pyreadr', format_name='RDA')
     pandas = get_pandas('RDA')
-    if isinstance(data, list):
-        records = normalize_records(data, 'RDA')
-        frame = pandas.DataFrame.from_records(records)
-        count = len(records)
-    elif isinstance(data, dict):
-        frame = pandas.DataFrame.from_records([data])
-        count = 1
-    else:
-        raise TypeError('RDA payloads must be a dict or list of dicts')
+    records = normalize_records(data, 'RDA')
+    frame = pandas.DataFrame.from_records(records)
+    count = len(records)
     writer = getattr(pyreadr, 'write_rdata', None) or getattr(
         pyreadr,
@@ -146,7 +114,7 @@ def write(
             'RDA write support requires "pyreadr" with write_rdata().',
         )
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     try:
         writer(str(path), frame, df_name='data')
     except TypeError:

etlplus/file/rds.py CHANGED Viewed

@@ -18,14 +18,15 @@ Notes
 from __future__ import annotations
-from pathlib import Path
-from typing import Any
 from ..types import JSONData
 from ..types import JSONDict
-from ._imports import get_optional_module
+from ..types import StrPath
+from ._imports import get_dependency
 from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
+from ._r import coerce_r_object
 # SECTION: EXPORTS ========================================================== #
@@ -37,44 +38,18 @@ __all__ = [
 ]
-# SECTION: INTERNAL HELPERS ================================================ #
-def _get_pyreadr() -> Any:
-    """Return the pyreadr module, importing it on first use."""
-    return get_optional_module(
-        'pyreadr',
-        error_message=(
-            'RDS support requires optional dependency "pyreadr".\n'
-            'Install with: pip install pyreadr'
-        ),
-    )
-def _coerce_r_object(value: Any, pandas: Any) -> JSONData:
-    if isinstance(value, pandas.DataFrame):
-        return value.to_dict(orient='records')
-    if isinstance(value, dict):
-        return value
-    if isinstance(value, list) and all(
-        isinstance(item, dict) for item in value
-    ):
-        return value
-    return {'value': value}
 # SECTION: FUNCTIONS ======================================================== #
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONData:
     """
     Read RDS content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the RDS file on disk.
     Returns
@@ -82,22 +57,23 @@ def read(
     JSONData
         The structured data read from the RDS file.
     """
-    pyreadr = _get_pyreadr()
+    path = coerce_path(path)
+    pyreadr = get_dependency('pyreadr', format_name='RDS')
     pandas = get_pandas('RDS')
     result = pyreadr.read_r(str(path))
     if not result:
         return []
     if len(result) == 1:
         value = next(iter(result.values()))
-        return _coerce_r_object(value, pandas)
+        return coerce_r_object(value, pandas)
     payload: JSONDict = {}
     for key, value in result.items():
-        payload[str(key)] = _coerce_r_object(value, pandas)
+        payload[str(key)] = coerce_r_object(value, pandas)
     return payload
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -105,7 +81,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the RDS file on disk.
     data : JSONData
         Data to write as RDS file. Should be a list of dictionaries or a
@@ -120,21 +96,13 @@ def write(
     ------
     ImportError
         If "pyreadr" is not installed with write support.
-    TypeError
-        If *data* is not a dictionary or list of dictionaries.
     """
-    pyreadr = _get_pyreadr()
+    path = coerce_path(path)
+    pyreadr = get_dependency('pyreadr', format_name='RDS')
     pandas = get_pandas('RDS')
-    if isinstance(data, list):
-        records = normalize_records(data, 'RDS')
-        frame = pandas.DataFrame.from_records(records)
-        count = len(records)
-    elif isinstance(data, dict):
-        frame = pandas.DataFrame.from_records([data])
-        count = 1
-    else:
-        raise TypeError('RDS payloads must be a dict or list of dicts')
+    records = normalize_records(data, 'RDS')
+    frame = pandas.DataFrame.from_records(records)
+    count = len(records)
     writer = getattr(pyreadr, 'write_rds', None)
     if writer is None:
@@ -142,6 +110,6 @@ def write(
             'RDS write support requires "pyreadr" with write_rds().',
         )
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     writer(str(path), frame)
     return count

etlplus/file/sas7bdat.py CHANGED Viewed

@@ -17,15 +17,15 @@ Notes
 from __future__ import annotations
-from pathlib import Path
-from typing import Any
 from typing import cast
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from . import stub
-from ._imports import get_optional_module
+from ._imports import get_dependency
 from ._imports import get_pandas
+from ._io import coerce_path
 # SECTION: EXPORTS ========================================================== #
@@ -37,39 +37,18 @@ __all__ = [
 ]
-# SECTION: INTERNAL HELPERS ================================================ #
-def _get_pyreadstat() -> Any:
-    """Return the pyreadstat module, importing it on first use."""
-    return get_optional_module(
-        'pyreadstat',
-        error_message=(
-            'SAS7BDAT support requires optional dependency "pyreadstat".\n'
-            'Install with: pip install pyreadstat'
-        ),
-    )
-def _raise_readstat_error(err: ImportError) -> None:
-    raise ImportError(
-        'SAS7BDAT support requires optional dependency "pyreadstat".\n'
-        'Install with: pip install pyreadstat',
-    ) from err
 # SECTION: FUNCTIONS ======================================================== #
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read SAS7BDAT content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SAS7BDAT file on disk.
     Returns
@@ -77,18 +56,18 @@ def read(
     JSONList
         The list of dictionaries read from the SAS7BDAT file.
     """
+    path = coerce_path(path)
+    get_dependency('pyreadstat', format_name='SAS7BDAT')
     pandas = get_pandas('SAS7BDAT')
     try:
         frame = pandas.read_sas(path, format='sas7bdat')
     except TypeError:
         frame = pandas.read_sas(path)
-    except ImportError as err:  # pragma: no cover
-        _raise_readstat_error(err)
     return cast(JSONList, frame.to_dict(orient='records'))
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -96,7 +75,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SAS7BDAT file on disk.
     data : JSONData
         Data to write as SAS7BDAT file. Should be a list of dictionaries or a
@@ -107,4 +86,5 @@ def write(
     int
         The number of rows written to the SAS7BDAT file.
     """
+    path = coerce_path(path)
     return stub.write(path, data, format_name='SAS7BDAT')

etlplus/file/sav.py CHANGED Viewed

@@ -17,14 +17,15 @@ Notes
 from __future__ import annotations
-from pathlib import Path
-from typing import Any
 from typing import cast
 from ..types import JSONData
 from ..types import JSONList
-from ._imports import get_optional_module
+from ..types import StrPath
+from ._imports import get_dependency
 from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -37,32 +38,18 @@ __all__ = [
 ]
-# SECTION: INTERNAL FUNCTION ================================================ #
-def _get_pyreadstat() -> Any:
-    """Return the pyreadstat module, importing it on first use."""
-    return get_optional_module(
-        'pyreadstat',
-        error_message=(
-            'SAV support requires optional dependency "pyreadstat".\n'
-            'Install with: pip install pyreadstat'
-        ),
-    )
 # SECTION: FUNCTIONS ======================================================== #
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read SAV content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SAV file on disk.
     Returns
@@ -70,13 +57,14 @@ def read(
     JSONList
         The list of dictionaries read from the SAV file.
     """
-    pyreadstat = _get_pyreadstat()
+    path = coerce_path(path)
+    pyreadstat = get_dependency('pyreadstat', format_name='SAV')
     frame, _meta = pyreadstat.read_sav(str(path))
     return cast(JSONList, frame.to_dict(orient='records'))
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -84,7 +72,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SAV file on disk.
     data : JSONData
         Data to write as SAV. Should be a list of dictionaries or a
@@ -95,13 +83,14 @@ def write(
     int
         The number of rows written to the SAV file.
     """
+    path = coerce_path(path)
     records = normalize_records(data, 'SAV')
     if not records:
         return 0
-    pyreadstat = _get_pyreadstat()
+    pyreadstat = get_dependency('pyreadstat', format_name='SAV')
     pandas = get_pandas('SAV')
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     frame = pandas.DataFrame.from_records(records)
     pyreadstat.write_sav(frame, str(path))
     return len(records)

etlplus 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl

etlplus 0.17.2py3-none-any.whl → 0.17.3py3-none-any.whl