PyPI - etlplus - Versions diffs - 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl - Mend

etlplus 0.17.2py3-none-any.whl → 0.17.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

etlplus/file/_imports.py +35 -20
etlplus/file/_io.py +138 -15
etlplus/file/_r.py +48 -0
etlplus/file/_sql.py +224 -0
etlplus/file/accdb.py +7 -6
etlplus/file/arrow.py +13 -24
etlplus/file/avro.py +13 -10
etlplus/file/bson.py +61 -22
etlplus/file/cbor.py +13 -25
etlplus/file/cfg.py +7 -6
etlplus/file/conf.py +7 -6
etlplus/file/core.py +1 -1
etlplus/file/csv.py +8 -7
etlplus/file/dat.py +9 -6
etlplus/file/dta.py +15 -30
etlplus/file/duckdb.py +29 -122
etlplus/file/feather.py +15 -30
etlplus/file/fwf.py +16 -14
etlplus/file/gz.py +12 -7
etlplus/file/hbs.py +7 -6
etlplus/file/hdf5.py +31 -6
etlplus/file/ini.py +17 -24
etlplus/file/ion.py +7 -6
etlplus/file/jinja2.py +7 -6
etlplus/file/json.py +10 -11
etlplus/file/log.py +7 -6
etlplus/file/mat.py +7 -6
etlplus/file/mdb.py +7 -6
etlplus/file/msgpack.py +13 -25
etlplus/file/mustache.py +7 -6
etlplus/file/nc.py +30 -21
etlplus/file/ndjson.py +10 -6
etlplus/file/numbers.py +7 -6
etlplus/file/ods.py +10 -6
etlplus/file/orc.py +15 -30
etlplus/file/parquet.py +10 -6
etlplus/file/pb.py +22 -23
etlplus/file/pbf.py +7 -6
etlplus/file/properties.py +15 -29
etlplus/file/proto.py +14 -20
etlplus/file/psv.py +8 -7
etlplus/file/rda.py +19 -51
etlplus/file/rds.py +19 -51
etlplus/file/sas7bdat.py +10 -30
etlplus/file/sav.py +13 -24
etlplus/file/sqlite.py +25 -83
etlplus/file/stub.py +8 -6
etlplus/file/sylk.py +7 -6
etlplus/file/tab.py +8 -7
etlplus/file/toml.py +14 -17
etlplus/file/tsv.py +8 -7
etlplus/file/txt.py +10 -7
etlplus/file/vm.py +7 -6
etlplus/file/wks.py +7 -6
etlplus/file/xls.py +8 -5
etlplus/file/xlsm.py +10 -6
etlplus/file/xlsx.py +10 -6
etlplus/file/xml.py +11 -9
etlplus/file/xpt.py +13 -33
etlplus/file/yaml.py +10 -11
etlplus/file/zip.py +10 -5
etlplus/file/zsav.py +7 -6
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/METADATA +1 -1
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/RECORD +68 -66
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.17.2.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0

etlplus/file/sqlite.py CHANGED Viewed

@@ -18,14 +18,21 @@ Notes
 from __future__ import annotations
-import json
 import sqlite3
-from pathlib import Path
-from typing import Any
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
+from ._sql import DEFAULT_TABLE
+from ._sql import SQLITE_DIALECT
+from ._sql import coerce_sql_value
+from ._sql import collect_column_values
+from ._sql import infer_column_type
+from ._sql import quote_identifier
+from ._sql import resolve_table
 # SECTION: EXPORTS ========================================================== #
@@ -37,81 +44,18 @@ __all__ = [
 ]
-# SECTION: INTERNAL CONSTANTS ============================================== #
-DEFAULT_TABLE = 'data'
-# SECTION: INTERNAL FUNCTIONS =============================================== #
-def _quote_identifier(value: str) -> str:
-    """Return a safely quoted SQL identifier."""
-    escaped = value.replace('"', '""')
-    return f'"{escaped}"'
-def _coerce_sql_value(value: Any) -> Any:
-    """Normalize values into SQLite-compatible types."""
-    if value is None or isinstance(value, (str, int, float, bool)):
-        return value
-    return json.dumps(value, ensure_ascii=True)
-def _infer_column_type(values: list[Any]) -> str:
-    """Infer a basic SQLite column type from sample values."""
-    seen_bool = False
-    seen_int = False
-    seen_float = False
-    seen_other = False
-    for value in values:
-        if value is None:
-            continue
-        if isinstance(value, bool):
-            seen_bool = True
-        elif isinstance(value, int):
-            seen_int = True
-        elif isinstance(value, float):
-            seen_float = True
-        else:
-            seen_other = True
-            break
-    if seen_other:
-        return 'TEXT'
-    if seen_float:
-        return 'REAL'
-    if seen_int or seen_bool:
-        return 'INTEGER'
-    return 'TEXT'
-def _resolve_table(tables: list[str]) -> str | None:
-    """Pick a table name for read operations."""
-    if not tables:
-        return None
-    if DEFAULT_TABLE in tables:
-        return DEFAULT_TABLE
-    if len(tables) == 1:
-        return tables[0]
-    raise ValueError(
-        'Multiple tables found in SQLite file; expected "data" or a '
-        'single table',
-    )
 # SECTION: FUNCTIONS ======================================================== #
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read SQLITE content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SQLITE file on disk.
     Returns
@@ -119,6 +63,7 @@ def read(
     JSONList
         The list of dictionaries read from the SQLITE file.
     """
+    path = coerce_path(path)
     conn = sqlite3.connect(str(path))
     try:
         conn.row_factory = sqlite3.Row
@@ -128,10 +73,10 @@ def read(
             'ORDER BY name',
         )
         tables = [row[0] for row in cursor.fetchall()]
-        table = _resolve_table(tables)
+        table = resolve_table(tables, engine_name='SQLite')
         if table is None:
             return []
-        query = f'SELECT * FROM {_quote_identifier(table)}'
+        query = f'SELECT * FROM {quote_identifier(table)}'
         rows = conn.execute(query).fetchall()
         return [dict(row) for row in rows]
     finally:
@@ -139,7 +84,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -147,7 +92,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SQLITE file on disk.
     data : JSONData
         Data to write as SQLITE. Should be a list of dictionaries or a
@@ -158,37 +103,34 @@ def write(
     int
         The number of rows written to the SQLITE file.
     """
+    path = coerce_path(path)
     records = normalize_records(data, 'SQLITE')
     if not records:
         return 0
-    columns = sorted({key for row in records for key in row})
+    columns, column_values = collect_column_values(records)
     if not columns:
         return 0
-    column_values: dict[str, list[Any]] = {col: [] for col in columns}
-    for row in records:
-        for column in columns:
-            column_values[column].append(row.get(column))
     column_defs = ', '.join(
-        f'{_quote_identifier(column)} {_infer_column_type(values)}'
+        f'{quote_identifier(column)} '
+        f'{infer_column_type(values, SQLITE_DIALECT)}'
         for column, values in column_values.items()
     )
-    table_ident = _quote_identifier(DEFAULT_TABLE)
-    insert_columns = ', '.join(_quote_identifier(column) for column in columns)
+    table_ident = quote_identifier(DEFAULT_TABLE)
+    insert_columns = ', '.join(quote_identifier(column) for column in columns)
     placeholders = ', '.join('?' for _ in columns)
     insert_sql = (
         f'INSERT INTO {table_ident} ({insert_columns}) VALUES ({placeholders})'
     )
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     conn = sqlite3.connect(str(path))
     try:
         conn.execute(f'DROP TABLE IF EXISTS {table_ident}')
         conn.execute(f'CREATE TABLE {table_ident} ({column_defs})')
         rows = [
-            tuple(_coerce_sql_value(row.get(column)) for column in columns)
+            tuple(coerce_sql_value(row.get(column)) for column in columns)
             for row in records
         ]
         conn.executemany(insert_sql, rows)

etlplus/file/stub.py CHANGED Viewed

@@ -6,10 +6,10 @@ Helpers for reading/writing stubbed files.
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
+from ._io import coerce_path
 # SECTION: EXPORTS ========================================================== #
@@ -25,7 +25,7 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
     format_name: str = 'Stubbed',
 ) -> JSONList:
     """
@@ -33,7 +33,7 @@ def read(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the stubbed file on disk.
     format_name : str
         Human-readable format name.
@@ -48,12 +48,13 @@ def read(
     NotImplementedError
         Always, since this is a stub implementation.
     """
+    path = coerce_path(path)
     _ = path
     raise NotImplementedError(f'{format_name} read is not implemented yet')
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
     format_name: str = 'Stubbed',
 ) -> int:
@@ -62,7 +63,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the stubbed file on disk.
     data : JSONData
         Data to write as stubbed file. Should be a list of dictionaries or a
@@ -80,6 +81,7 @@ def write(
     NotImplementedError
         Always, since this is a stub implementation.
     """
+    path = coerce_path(path)
     _ = path
     _ = data
     raise NotImplementedError(f'{format_name} write is not implemented yet')

etlplus/file/sylk.py CHANGED Viewed

@@ -18,11 +18,11 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from . import stub
+from ._io import coerce_path
 # SECTION: EXPORTS ========================================================== #
@@ -38,14 +38,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read SYLK content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SYLK file on disk.
     Returns
@@ -57,7 +57,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -65,7 +65,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SYLK file on disk.
     data : JSONData
         Data to write as SYLK file. Should be a list of dictionaries or a
@@ -76,4 +76,5 @@ def write(
     int
         The number of rows written to the SYLK file.
     """
+    path = coerce_path(path)
     return stub.write(path, data, format_name='SYLK')

etlplus/file/tab.py CHANGED Viewed

@@ -20,10 +20,10 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
+from ._io import coerce_path
 from ._io import read_delimited
 from ._io import write_delimited
@@ -41,14 +41,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read TAB content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TAB file on disk.
     Returns
@@ -60,7 +60,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -68,7 +68,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TAB file on disk.
     data : JSONData
         Data to write as TAB file. Should be a list of dictionaries or a
@@ -79,4 +79,5 @@ def write(
     int
         The number of rows written to the TAB file.
     """
-    return write_delimited(path, data, delimiter='\t')
+    path = coerce_path(path)
+    return write_delimited(path, data, delimiter='\t', format_name='TAB')

etlplus/file/toml.py CHANGED Viewed

@@ -19,13 +19,16 @@ Notes
 from __future__ import annotations
 import tomllib
-from pathlib import Path
 from typing import Any
 from typing import cast
 from ..types import JSONData
 from ..types import JSONDict
+from ..types import StrPath
 from ._imports import get_optional_module
+from ._io import coerce_path
+from ._io import ensure_parent_dir
+from ._io import require_dict_payload
 # SECTION: EXPORTS ========================================================== #
@@ -41,14 +44,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONData:
     """
     Read TOML content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TOML file on disk.
     Returns
@@ -61,6 +64,7 @@ def read(
     TypeError
         If the TOML root is not a table (dictionary).
     """
+    path = coerce_path(path)
     payload = tomllib.loads(path.read_text(encoding='utf-8'))
     if isinstance(payload, dict):
         return payload
@@ -68,7 +72,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -76,7 +80,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TOML file on disk.
     data : JSONData
         Data to write as TOML. Should be a dictionary.
@@ -85,16 +89,9 @@ def write(
     -------
     int
         The number of records written to the TOML file.
-    Raises
-    ------
-    TypeError
-        If *data* is not a dictionary.
     """
-    if isinstance(data, list):
-        raise TypeError('TOML payloads must be a dict')
-    if not isinstance(data, dict):
-        raise TypeError('TOML payloads must be a dict')
+    path = coerce_path(path)
+    payload = require_dict_payload(data, format_name='TOML')
     toml_writer: Any
     try:
@@ -105,7 +102,7 @@ def write(
                 'Install with: pip install tomli-w'
             ),
         )
-        content = toml_writer.dumps(cast(JSONDict, data))
+        content = toml_writer.dumps(cast(JSONDict, payload))
     except ImportError:
         toml = get_optional_module(
             'toml',
@@ -115,8 +112,8 @@ def write(
                 'Install with: pip install tomli-w'
             ),
         )
-        content = toml.dumps(cast(JSONDict, data))
+        content = toml.dumps(cast(JSONDict, payload))
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     path.write_text(content, encoding='utf-8')
     return 1

etlplus/file/tsv.py CHANGED Viewed

@@ -19,10 +19,10 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
+from ._io import coerce_path
 from ._io import read_delimited
 from ._io import write_delimited
@@ -40,14 +40,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read TSV content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TSV file on disk.
     Returns
@@ -59,7 +59,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -67,7 +67,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TSV file on disk.
     data : JSONData
         Data to write as TSV. Should be a list of dictionaries or a
@@ -78,4 +78,5 @@ def write(
     int
         The number of rows written to the TSV file.
     """
-    return write_delimited(path, data, delimiter='\t')
+    path = coerce_path(path)
+    return write_delimited(path, data, delimiter='\t', format_name='TSV')

etlplus/file/txt.py CHANGED Viewed

@@ -16,11 +16,12 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from ..utils import count_records
+from ._io import coerce_path
+from ._io import ensure_parent_dir
 from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -37,14 +38,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read TXT content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TXT file on disk.
     Returns
@@ -52,6 +53,7 @@ def read(
     JSONList
         The list of dictionaries read from the TXT file.
     """
+    path = coerce_path(path)
     rows: JSONList = []
     with path.open('r', encoding='utf-8') as handle:
         for line in handle:
@@ -63,7 +65,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -71,7 +73,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TXT file on disk.
     data : JSONData
         Data to write. Expects ``{'text': '...'} `` or a list of those.
@@ -87,12 +89,13 @@ def write(
         If any item in *data* is not a dictionary or if any dictionary
         does not contain a ``'text'`` key.
     """
+    path = coerce_path(path)
     rows = normalize_records(data, 'TXT')
     if not rows:
         return 0
-    path.parent.mkdir(parents=True, exist_ok=True)
+    ensure_parent_dir(path)
     with path.open('w', encoding='utf-8') as handle:
         for row in rows:
             if 'text' not in row:

etlplus/file/vm.py CHANGED Viewed

@@ -19,11 +19,11 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from . import stub
+from ._io import coerce_path
 # SECTION: EXPORTS ========================================================== #
@@ -39,14 +39,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read VM content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the VM file on disk.
     Returns
@@ -58,7 +58,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -66,7 +66,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the VM file on disk.
     data : JSONData
         Data to write as VM file. Should be a list of dictionaries or a single
@@ -77,4 +77,5 @@ def write(
     int
         The number of rows written to the VM file.
     """
+    path = coerce_path(path)
     return stub.write(path, data, format_name='VM')

etlplus/file/wks.py CHANGED Viewed

@@ -18,11 +18,11 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from . import stub
+from ._io import coerce_path
 # SECTION: EXPORTS ========================================================== #
@@ -38,14 +38,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read WKS content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the WKS file on disk.
     Returns
@@ -57,7 +57,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -65,7 +65,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the WKS file on disk.
     data : JSONData
         Data to write as WKS file. Should be a list of dictionaries or a
@@ -76,4 +76,5 @@ def write(
     int
         The number of rows written to the WKS file.
     """
+    path = coerce_path(path)
     return stub.write(path, data, format_name='WKS')

etlplus 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl

etlplus 0.17.2py3-none-any.whl → 0.17.3py3-none-any.whl