PyPI - etlplus - Versions diffs - 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl - Mend

etlplus 0.17.2py3-none-any.whl → 0.17.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

etlplus/file/_imports.py +35 -20
etlplus/file/_io.py +138 -15
etlplus/file/_r.py +48 -0
etlplus/file/_sql.py +224 -0
etlplus/file/accdb.py +7 -6
etlplus/file/arrow.py +13 -24
etlplus/file/avro.py +13 -10
etlplus/file/bson.py +61 -22
etlplus/file/cbor.py +13 -25
etlplus/file/cfg.py +7 -6
etlplus/file/conf.py +7 -6
etlplus/file/core.py +1 -1
etlplus/file/csv.py +8 -7
etlplus/file/dat.py +9 -6
etlplus/file/dta.py +15 -30
etlplus/file/duckdb.py +29 -122
etlplus/file/feather.py +15 -30
etlplus/file/fwf.py +16 -14
etlplus/file/gz.py +12 -7
etlplus/file/hbs.py +7 -6
etlplus/file/hdf5.py +31 -6
etlplus/file/ini.py +17 -24
etlplus/file/ion.py +7 -6
etlplus/file/jinja2.py +7 -6
etlplus/file/json.py +10 -11
etlplus/file/log.py +7 -6
etlplus/file/mat.py +7 -6
etlplus/file/mdb.py +7 -6
etlplus/file/msgpack.py +13 -25
etlplus/file/mustache.py +7 -6
etlplus/file/nc.py +30 -21
etlplus/file/ndjson.py +10 -6
etlplus/file/numbers.py +7 -6
etlplus/file/ods.py +10 -6
etlplus/file/orc.py +15 -30
etlplus/file/parquet.py +10 -6
etlplus/file/pb.py +22 -23
etlplus/file/pbf.py +7 -6
etlplus/file/properties.py +15 -29
etlplus/file/proto.py +14 -20
etlplus/file/psv.py +8 -7
etlplus/file/rda.py +19 -51
etlplus/file/rds.py +19 -51
etlplus/file/sas7bdat.py +10 -30
etlplus/file/sav.py +13 -24
etlplus/file/sqlite.py +25 -83
etlplus/file/stub.py +8 -6
etlplus/file/sylk.py +7 -6
etlplus/file/tab.py +8 -7
etlplus/file/toml.py +14 -17
etlplus/file/tsv.py +8 -7
etlplus/file/txt.py +10 -7
etlplus/file/vm.py +7 -6
etlplus/file/wks.py +7 -6
etlplus/file/xls.py +8 -5
etlplus/file/xlsm.py +10 -6
etlplus/file/xlsx.py +10 -6
etlplus/file/xml.py +11 -9
etlplus/file/xpt.py +13 -33
etlplus/file/yaml.py +10 -11
etlplus/file/zip.py +10 -5
etlplus/file/zsav.py +7 -6
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/METADATA +1 -1
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/RECORD +68 -66
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0

etlplus/file/dta.py CHANGED Viewed

@@ -18,12 +18,15 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from typing import cast
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
+from ._imports import get_dependency
 from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -40,39 +43,30 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read DTA content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the DTA file on disk.
     Returns
     -------
     JSONList
         The list of dictionaries read from the DTA file.
-    Raises
-    ------
-    ImportError
-        If optional dependencies for DTA support are missing.
     """
+    path = coerce_path(path)
+    get_dependency('pyreadstat', format_name='DTA')
     pandas = get_pandas('DTA')
-    try:
-        frame = pandas.read_stata(path)
-    except ImportError as err:  # pragma: no cover
-        raise ImportError(
-            'DTA support may require optional dependency "pyreadstat".\n'
-            'Install with: pip install pyreadstat',
-        ) from err
+    frame = pandas.read_stata(path)
     return cast(JSONList, frame.to_dict(orient='records'))
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -80,7 +74,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the DTA file on disk.
     data : JSONData
         Data to write as DTA file. Should be a list of dictionaries or a single
@@ -90,24 +84,15 @@ def write(
     -------
     int
         The number of rows written to the DTA file.
-    Raises
-    ------
-    ImportError
-        If optional dependencies for DTA support are missing.
     """
+    path = coerce_path(path)
     records = normalize_records(data, 'DTA')
     if not records:
         return 0
+    get_dependency('pyreadstat', format_name='DTA')
     pandas = get_pandas('DTA')
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     frame = pandas.DataFrame.from_records(records)
-    try:
-        frame.to_stata(path, write_index=False)
-    except ImportError as err:  # pragma: no cover
-        raise ImportError(
-            'DTA support may require optional dependency "pyreadstat".\n'
-            'Install with: pip install pyreadstat',
-        ) from err
+    frame.to_stata(path, write_index=False)
     return len(records)

etlplus/file/duckdb.py CHANGED Viewed

@@ -18,14 +18,20 @@ Notes
 from __future__ import annotations
-import json
-from pathlib import Path
-from typing import Any
 from ..types import JSONData
 from ..types import JSONList
-from ._imports import get_optional_module
+from ..types import StrPath
+from ._imports import get_dependency
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
+from ._sql import DEFAULT_TABLE
+from ._sql import DUCKDB_DIALECT
+from ._sql import coerce_sql_value
+from ._sql import collect_column_values
+from ._sql import infer_column_type
+from ._sql import quote_identifier
+from ._sql import resolve_table
 # SECTION: EXPORTS ========================================================== #
@@ -37,115 +43,18 @@ __all__ = [
 ]
-# SECTION: INTERNAL CONSTANTS ============================================== #
-DEFAULT_TABLE = 'data'
-# SECTION: INTERNAL FUNCTIONS =============================================== #
-def _coerce_sql_value(
-    value: Any,
-) -> Any:
-    """
-    Normalize values into DuckDB-compatible types.
-    Parameters
-    ----------
-    value : Any
-        The value to normalize.
-    Returns
-    -------
-    Any
-        The normalized value.
-    """
-    if value is None or isinstance(value, (str, int, float, bool)):
-        return value
-    return json.dumps(value, ensure_ascii=True)
-def _get_duckdb() -> Any:
-    """
-    Return the duckdb module, importing it on first use.
-    Returns
-    -------
-    Any
-        The duckdb module.
-    """
-    return get_optional_module(
-        'duckdb',
-        error_message=(
-            'DUCKDB support requires optional dependency "duckdb".\n'
-            'Install with: pip install duckdb'
-        ),
-    )
-def _infer_column_type(values: list[Any]) -> str:
-    """Infer a basic DuckDB column type from sample values."""
-    seen_bool = False
-    seen_int = False
-    seen_float = False
-    seen_other = False
-    for value in values:
-        if value is None:
-            continue
-        if isinstance(value, bool):
-            seen_bool = True
-        elif isinstance(value, int):
-            seen_int = True
-        elif isinstance(value, float):
-            seen_float = True
-        else:
-            seen_other = True
-            break
-    if seen_other:
-        return 'VARCHAR'
-    if seen_float:
-        return 'DOUBLE'
-    if seen_int:
-        return 'BIGINT'
-    if seen_bool:
-        return 'BOOLEAN'
-    return 'VARCHAR'
-def _quote_identifier(value: str) -> str:
-    """Return a safely quoted SQL identifier."""
-    escaped = value.replace('"', '""')
-    return f'"{escaped}"'
-def _resolve_table(tables: list[str]) -> str | None:
-    """Pick a table name for read operations."""
-    if not tables:
-        return None
-    if DEFAULT_TABLE in tables:
-        return DEFAULT_TABLE
-    if len(tables) == 1:
-        return tables[0]
-    raise ValueError(
-        'Multiple tables found in DuckDB file; expected "data" or a '
-        'single table',
-    )
 # SECTION: FUNCTIONS ======================================================== #
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read DUCKDB content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the DUCKDB file on disk.
     Returns
@@ -153,20 +62,21 @@ def read(
     JSONList
         The list of dictionaries read from the DUCKDB file.
     """
-    duckdb = _get_duckdb()
+    path = coerce_path(path)
+    duckdb = get_dependency('duckdb', format_name='DUCKDB')
     conn = duckdb.connect(str(path))
     try:
         tables = [row[0] for row in conn.execute('SHOW TABLES').fetchall()]
-        table = _resolve_table(tables)
+        table = resolve_table(tables, engine_name='DuckDB')
         if table is None:
             return []
-        query = f'SELECT * FROM {_quote_identifier(table)}'
+        query = f'SELECT * FROM {quote_identifier(table)}'
         cursor = conn.execute(query)
         rows = cursor.fetchall()
         columns = [desc[0] for desc in cursor.description or []]
         if not columns:
             info = conn.execute(
-                f'PRAGMA table_info({_quote_identifier(table)})',
+                f'PRAGMA table_info({quote_identifier(table)})',
             ).fetchall()
             columns = [row[1] for row in info]
         return [dict(zip(columns, row, strict=True)) for row in rows]
@@ -175,7 +85,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -183,7 +93,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the DUCKDB file on disk.
     data : JSONData
         Data to write as DUCKDB. Should be a list of dictionaries or a
@@ -194,38 +104,35 @@ def write(
     int
         The number of rows written to the DUCKDB file.
     """
+    path = coerce_path(path)
     records = normalize_records(data, 'DUCKDB')
     if not records:
         return 0
-    columns = sorted({key for row in records for key in row})
+    columns, column_values = collect_column_values(records)
     if not columns:
         return 0
-    column_values: dict[str, list[Any]] = {col: [] for col in columns}
-    for row in records:
-        for column in columns:
-            column_values[column].append(row.get(column))
     column_defs = ', '.join(
-        f'{_quote_identifier(column)} {_infer_column_type(values)}'
+        f'{quote_identifier(column)} '
+        f'{infer_column_type(values, DUCKDB_DIALECT)}'
         for column, values in column_values.items()
     )
-    table_ident = _quote_identifier(DEFAULT_TABLE)
-    insert_columns = ', '.join(_quote_identifier(column) for column in columns)
+    table_ident = quote_identifier(DEFAULT_TABLE)
+    insert_columns = ', '.join(quote_identifier(column) for column in columns)
     placeholders = ', '.join('?' for _ in columns)
     insert_sql = (
         f'INSERT INTO {table_ident} ({insert_columns}) VALUES ({placeholders})'
     )
-    duckdb = _get_duckdb()
-    path.parent.mkdir(parents=True, exist_ok=True)
+    duckdb = get_dependency('duckdb', format_name='DUCKDB')
+    ensure_parent_dir(path)
     conn = duckdb.connect(str(path))
     try:
         conn.execute(f'DROP TABLE IF EXISTS {table_ident}')
         conn.execute(f'CREATE TABLE {table_ident} ({column_defs})')
         rows = [
-            tuple(_coerce_sql_value(row.get(column)) for column in columns)
+            tuple(coerce_sql_value(row.get(column)) for column in columns)
             for row in records
         ]
         conn.executemany(insert_sql, rows)

etlplus/file/feather.py CHANGED Viewed

@@ -18,12 +18,15 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from typing import cast
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
+from ._imports import get_dependency
 from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -40,39 +43,30 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read Feather content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the Feather file on disk.
     Returns
     -------
     JSONList
         The list of dictionaries read from the Feather file.
-    Raises
-    ------
-    ImportError
-        When optional dependency "pyarrow" is missing.
     """
+    path = coerce_path(path)
+    get_dependency('pyarrow', format_name='Feather')
     pandas = get_pandas('Feather')
-    try:
-        frame = pandas.read_feather(path)
-    except ImportError as e:  # pragma: no cover
-        raise ImportError(
-            'Feather support requires optional dependency "pyarrow".\n'
-            'Install with: pip install pyarrow',
-        ) from e
+    frame = pandas.read_feather(path)
     return cast(JSONList, frame.to_dict(orient='records'))
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -80,7 +74,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the Feather file on disk.
     data : JSONData
         Data to write.
@@ -89,24 +83,15 @@ def write(
     -------
     int
         Number of records written.
-    Raises
-    ------
-    ImportError
-        When optional dependency "pyarrow" is missing.
     """
+    path = coerce_path(path)
     records = normalize_records(data, 'Feather')
     if not records:
         return 0
+    get_dependency('pyarrow', format_name='Feather')
     pandas = get_pandas('Feather')
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     frame = pandas.DataFrame.from_records(records)
-    try:
-        frame.to_feather(path)
-    except ImportError as e:  # pragma: no cover
-        raise ImportError(
-            'Feather support requires optional dependency "pyarrow".\n'
-            'Install with: pip install pyarrow',
-        ) from e
+    frame.to_feather(path)
     return len(records)

etlplus/file/fwf.py CHANGED Viewed

@@ -17,14 +17,16 @@ Notes
 from __future__ import annotations
-from pathlib import Path
-from typing import Any
 from typing import cast
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
+from ._io import stringify_value
 # SECTION: EXPORTS ========================================================== #
@@ -40,14 +42,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read FWF content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the FWF file on disk.
     Returns
@@ -55,13 +57,14 @@ def read(
     JSONList
         The list of dictionaries read from the FWF file.
     """
+    path = coerce_path(path)
     pandas = get_pandas('FWF')
     frame = pandas.read_fwf(path)
     return cast(JSONList, frame.to_dict(orient='records'))
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -69,7 +72,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the FWF file on disk.
     data : JSONData
         Data to write as FWF file. Should be a list of dictionaries or a
@@ -80,6 +83,7 @@ def write(
     int
         The number of rows written to the FWF file.
     """
+    path = coerce_path(path)
     records = normalize_records(data, 'FWF')
     if not records:
         return 0
@@ -88,23 +92,21 @@ def write(
     if not fieldnames:
         return 0
-    def stringify(value: Any) -> str:
-        if value is None:
-            return ''
-        return str(value)
     widths: dict[str, int] = {name: len(name) for name in fieldnames}
     for row in records:
         for name in fieldnames:
-            widths[name] = max(widths[name], len(stringify(row.get(name))))
+            widths[name] = max(
+                widths[name],
+                len(stringify_value(row.get(name))),
+            )
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     with path.open('w', encoding='utf-8', newline='') as handle:
         header = ' '.join(name.ljust(widths[name]) for name in fieldnames)
         handle.write(header + '\n')
         for row in records:
             line = ' '.join(
-                stringify(row.get(name)).ljust(widths[name])
+                stringify_value(row.get(name)).ljust(widths[name])
                 for name in fieldnames
             )
             handle.write(line + '\n')

etlplus/file/gz.py CHANGED Viewed

@@ -11,6 +11,9 @@ import tempfile
 from pathlib import Path
 from ..types import JSONData
+from ..types import StrPath
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from .enums import CompressionFormat
 from .enums import FileFormat
 from .enums import infer_file_format_and_compression
@@ -29,14 +32,14 @@ __all__ = [
 def _resolve_format(
-    path: Path,
+    path: StrPath,
 ) -> FileFormat:
     """
     Resolve the inner file format from a .gz filename.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the GZ file on disk.
     Returns
@@ -63,14 +66,14 @@ def _resolve_format(
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONData:
     """
     Read GZ content from *path* and parse the inner payload.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the GZ file on disk.
     Returns
@@ -78,6 +81,7 @@ def read(
     JSONData
         Parsed payload.
     """
+    path = coerce_path(path)
     fmt = _resolve_format(path)
     with gzip.open(path, 'rb') as handle:
         payload = handle.read()
@@ -91,7 +95,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -99,7 +103,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the GZ file on disk.
     data : JSONData
         Data to write.
@@ -109,6 +113,7 @@ def write(
     int
         Number of records written.
     """
+    path = coerce_path(path)
     fmt = _resolve_format(path)
     with tempfile.TemporaryDirectory() as tmpdir:
         tmp_path = Path(tmpdir) / f'payload.{fmt.value}'
@@ -117,7 +122,7 @@ def write(
         count = File(tmp_path, fmt).write(data)
         payload = tmp_path.read_bytes()
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     with gzip.open(path, 'wb') as handle:
         handle.write(payload)

etlplus/file/hbs.py CHANGED Viewed

@@ -19,11 +19,11 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from . import stub
+from ._io import coerce_path
 # SECTION: EXPORTS ========================================================== #
@@ -39,14 +39,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read ZSAV content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the HBS file on disk.
     Returns
@@ -58,7 +58,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -66,7 +66,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the HBS file on disk.
     data : JSONData
         Data to write as HBS file. Should be a list of dictionaries or a
@@ -77,4 +77,5 @@ def write(
     int
         The number of rows written to the HBS file.
     """
+    path = coerce_path(path)
     return stub.write(path, data, format_name='HBS')

etlplus 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl

etlplus 0.17.2py3-none-any.whl → 0.17.3py3-none-any.whl