PyPI - etlplus - Versions diffs - 0.16.10__py3-none-any.whl → 0.17.3__py3-none-any.whl - Mend

etlplus 0.16.10py3-none-any.whl → 0.17.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

etlplus/file/README.md +33 -0
etlplus/file/_imports.py +35 -20
etlplus/file/_io.py +138 -15
etlplus/file/_r.py +48 -0
etlplus/file/_sql.py +224 -0
etlplus/file/accdb.py +7 -6
etlplus/file/arrow.py +29 -10
etlplus/file/avro.py +13 -10
etlplus/file/bson.py +94 -10
etlplus/file/cbor.py +29 -17
etlplus/file/cfg.py +7 -6
etlplus/file/conf.py +7 -6
etlplus/file/core.py +1 -1
etlplus/file/csv.py +8 -7
etlplus/file/dat.py +52 -11
etlplus/file/dta.py +36 -16
etlplus/file/duckdb.py +72 -11
etlplus/file/enums.py +29 -0
etlplus/file/feather.py +15 -30
etlplus/file/fwf.py +44 -10
etlplus/file/gz.py +12 -7
etlplus/file/hbs.py +7 -6
etlplus/file/hdf5.py +71 -8
etlplus/file/ini.py +60 -17
etlplus/file/ion.py +7 -6
etlplus/file/jinja2.py +7 -6
etlplus/file/json.py +10 -11
etlplus/file/log.py +7 -6
etlplus/file/mat.py +7 -6
etlplus/file/mdb.py +7 -6
etlplus/file/msgpack.py +27 -15
etlplus/file/mustache.py +7 -6
etlplus/file/nc.py +69 -11
etlplus/file/ndjson.py +10 -6
etlplus/file/numbers.py +7 -6
etlplus/file/ods.py +48 -11
etlplus/file/orc.py +15 -30
etlplus/file/parquet.py +10 -6
etlplus/file/pb.py +36 -24
etlplus/file/pbf.py +7 -6
etlplus/file/properties.py +44 -18
etlplus/file/proto.py +24 -18
etlplus/file/psv.py +12 -11
etlplus/file/rda.py +57 -15
etlplus/file/rds.py +50 -14
etlplus/file/sas7bdat.py +26 -16
etlplus/file/sav.py +34 -16
etlplus/file/sqlite.py +70 -10
etlplus/file/stub.py +8 -6
etlplus/file/sylk.py +7 -6
etlplus/file/tab.py +13 -13
etlplus/file/toml.py +56 -17
etlplus/file/tsv.py +8 -7
etlplus/file/txt.py +10 -7
etlplus/file/vm.py +7 -6
etlplus/file/wks.py +7 -6
etlplus/file/xls.py +8 -5
etlplus/file/xlsm.py +48 -10
etlplus/file/xlsx.py +10 -6
etlplus/file/xml.py +11 -9
etlplus/file/xpt.py +46 -10
etlplus/file/yaml.py +10 -11
etlplus/file/zip.py +10 -5
etlplus/file/zsav.py +7 -6
{etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/METADATA +44 -26
{etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/RECORD +70 -68
{etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/WHEEL +0 -0
{etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/entry_points.txt +0 -0
{etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.16.10.dist-info → etlplus-0.17.3.dist-info}/top_level.txt +0 -0

etlplus/file/sas7bdat.py CHANGED Viewed

@@ -1,29 +1,31 @@
 """
 :mod:`etlplus.file.sas7bdat` module.
-Stub helpers for reading/writing SAS (SAS7BDAT) data files (not implemented
-yet).
+Helpers for reading/writing SAS (SAS7BDAT) data files.
 Notes
 -----
-- A SAS7BDAT file is a binary file format used by SAS to store datasets,
-    including variables, labels, and data types.
+- A SAS7BDAT file is a proprietary binary file format created by SAS to store
+    datasets, including variables, labels, and data types.
 - Common cases:
-    - Delimited text files (e.g., CSV, TSV).
-    - Fixed-width formatted files.
-    - Custom formats specific to certain applications.
+    - Statistical analysis pipelines.
+    - Data exchange with SAS tooling.
 - Rule of thumb:
-    - If the file does not follow a specific standard format, use this module
-        for reading and writing.
+    - If the file follows the SAS7BDAT specification, use this module for
+        reading and writing.
 """
 from __future__ import annotations
-from pathlib import Path
+from typing import cast
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from . import stub
+from ._imports import get_dependency
+from ._imports import get_pandas
+from ._io import coerce_path
 # SECTION: EXPORTS ========================================================== #
@@ -39,14 +41,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
-    Read DAT content from *path*.
+    Read SAS7BDAT content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SAS7BDAT file on disk.
     Returns
@@ -54,11 +56,18 @@ def read(
     JSONList
         The list of dictionaries read from the SAS7BDAT file.
     """
-    return stub.read(path, format_name='SAS7BDAT')
+    path = coerce_path(path)
+    get_dependency('pyreadstat', format_name='SAS7BDAT')
+    pandas = get_pandas('SAS7BDAT')
+    try:
+        frame = pandas.read_sas(path, format='sas7bdat')
+    except TypeError:
+        frame = pandas.read_sas(path)
+    return cast(JSONList, frame.to_dict(orient='records'))
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -66,7 +75,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SAS7BDAT file on disk.
     data : JSONData
         Data to write as SAS7BDAT file. Should be a list of dictionaries or a
@@ -77,4 +86,5 @@ def write(
     int
         The number of rows written to the SAS7BDAT file.
     """
+    path = coerce_path(path)
     return stub.write(path, data, format_name='SAS7BDAT')

etlplus/file/sav.py CHANGED Viewed

@@ -1,27 +1,32 @@
 """
 :mod:`etlplus.file.sav` module.
-Stub helpers for reading/writing SPSS (SAV) data files (not implemented yet).
+Helpers for reading/writing SPSS (SAV) files.
 Notes
 -----
-- A SAV file is a binary file format used by SPSS to store datasets, including
-    variables, labels, and data types.
+- A SAV file is a dataset created by SPSS.
 - Common cases:
-    - Reading data for analysis in Python.
-    - Writing processed data back to SPSS format.
+    - Survey and market research datasets.
+    - Statistical analysis workflows.
+    - Exchange with SPSS and compatible tools.
 - Rule of thumb:
-    - If you need to work with SPSS data files, use this module for reading
+    - If the file follows the SAV specification, use this module for reading
         and writing.
 """
 from __future__ import annotations
-from pathlib import Path
+from typing import cast
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ..types import StrPath
+from ._imports import get_dependency
+from ._imports import get_pandas
+from ._io import coerce_path
+from ._io import ensure_parent_dir
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -37,14 +42,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read SAV content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SAV file on disk.
     Returns
@@ -52,22 +57,25 @@ def read(
     JSONList
         The list of dictionaries read from the SAV file.
     """
-    return stub.read(path, format_name='SAV')
+    path = coerce_path(path)
+    pyreadstat = get_dependency('pyreadstat', format_name='SAV')
+    frame, _meta = pyreadstat.read_sav(str(path))
+    return cast(JSONList, frame.to_dict(orient='records'))
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
-    Write *data* to SAV file at *path* and return record count.
+    Write *data* to SAV at *path* and return record count.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SAV file on disk.
     data : JSONData
-        Data to write as SAV file. Should be a list of dictionaries or a
+        Data to write as SAV. Should be a list of dictionaries or a
         single dictionary.
     Returns
@@ -75,4 +83,14 @@ def write(
     int
         The number of rows written to the SAV file.
     """
-    return stub.write(path, data, format_name='SAV')
+    path = coerce_path(path)
+    records = normalize_records(data, 'SAV')
+    if not records:
+        return 0
+    pyreadstat = get_dependency('pyreadstat', format_name='SAV')
+    pandas = get_pandas('SAV')
+    ensure_parent_dir(path)
+    frame = pandas.DataFrame.from_records(records)
+    pyreadstat.write_sav(frame, str(path))
+    return len(records)

etlplus/file/sqlite.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.sqlite` module.
-Stub helpers for reading/writing SQLite database (SQLITE) files (not
-implemented yet).
+Helpers for reading/writing SQLite database (SQLITE) files.
 Notes
 -----
@@ -19,11 +18,21 @@ Notes
 from __future__ import annotations
-from pathlib import Path
+import sqlite3
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ..types import StrPath
+from ._io import coerce_path
+from ._io import ensure_parent_dir
+from ._io import normalize_records
+from ._sql import DEFAULT_TABLE
+from ._sql import SQLITE_DIALECT
+from ._sql import coerce_sql_value
+from ._sql import collect_column_values
+from ._sql import infer_column_type
+from ._sql import quote_identifier
+from ._sql import resolve_table
 # SECTION: EXPORTS ========================================================== #
@@ -39,14 +48,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read SQLITE content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SQLITE file on disk.
     Returns
@@ -54,11 +63,28 @@ def read(
     JSONList
         The list of dictionaries read from the SQLITE file.
     """
-    return stub.read(path, format_name='SQLITE')
+    path = coerce_path(path)
+    conn = sqlite3.connect(str(path))
+    try:
+        conn.row_factory = sqlite3.Row
+        cursor = conn.execute(
+            'SELECT name FROM sqlite_master '
+            "WHERE type='table' AND name NOT LIKE 'sqlite_%' "
+            'ORDER BY name',
+        )
+        tables = [row[0] for row in cursor.fetchall()]
+        table = resolve_table(tables, engine_name='SQLite')
+        if table is None:
+            return []
+        query = f'SELECT * FROM {quote_identifier(table)}'
+        rows = conn.execute(query).fetchall()
+        return [dict(row) for row in rows]
+    finally:
+        conn.close()
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -66,7 +92,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SQLITE file on disk.
     data : JSONData
         Data to write as SQLITE. Should be a list of dictionaries or a
@@ -77,4 +103,38 @@ def write(
     int
         The number of rows written to the SQLITE file.
     """
-    return stub.write(path, data, format_name='SQLITE')
+    path = coerce_path(path)
+    records = normalize_records(data, 'SQLITE')
+    if not records:
+        return 0
+    columns, column_values = collect_column_values(records)
+    if not columns:
+        return 0
+    column_defs = ', '.join(
+        f'{quote_identifier(column)} '
+        f'{infer_column_type(values, SQLITE_DIALECT)}'
+        for column, values in column_values.items()
+    )
+    table_ident = quote_identifier(DEFAULT_TABLE)
+    insert_columns = ', '.join(quote_identifier(column) for column in columns)
+    placeholders = ', '.join('?' for _ in columns)
+    insert_sql = (
+        f'INSERT INTO {table_ident} ({insert_columns}) VALUES ({placeholders})'
+    )
+    ensure_parent_dir(path)
+    conn = sqlite3.connect(str(path))
+    try:
+        conn.execute(f'DROP TABLE IF EXISTS {table_ident}')
+        conn.execute(f'CREATE TABLE {table_ident} ({column_defs})')
+        rows = [
+            tuple(coerce_sql_value(row.get(column)) for column in columns)
+            for row in records
+        ]
+        conn.executemany(insert_sql, rows)
+        conn.commit()
+    finally:
+        conn.close()
+    return len(records)

etlplus/file/stub.py CHANGED Viewed

@@ -6,10 +6,10 @@ Helpers for reading/writing stubbed files.
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
+from ._io import coerce_path
 # SECTION: EXPORTS ========================================================== #
@@ -25,7 +25,7 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
     format_name: str = 'Stubbed',
 ) -> JSONList:
     """
@@ -33,7 +33,7 @@ def read(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the stubbed file on disk.
     format_name : str
         Human-readable format name.
@@ -48,12 +48,13 @@ def read(
     NotImplementedError
         Always, since this is a stub implementation.
     """
+    path = coerce_path(path)
     _ = path
     raise NotImplementedError(f'{format_name} read is not implemented yet')
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
     format_name: str = 'Stubbed',
 ) -> int:
@@ -62,7 +63,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the stubbed file on disk.
     data : JSONData
         Data to write as stubbed file. Should be a list of dictionaries or a
@@ -80,6 +81,7 @@ def write(
     NotImplementedError
         Always, since this is a stub implementation.
     """
+    path = coerce_path(path)
     _ = path
     _ = data
     raise NotImplementedError(f'{format_name} write is not implemented yet')

etlplus/file/sylk.py CHANGED Viewed

@@ -18,11 +18,11 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
 from . import stub
+from ._io import coerce_path
 # SECTION: EXPORTS ========================================================== #
@@ -38,14 +38,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read SYLK content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SYLK file on disk.
     Returns
@@ -57,7 +57,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -65,7 +65,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the SYLK file on disk.
     data : JSONData
         Data to write as SYLK file. Should be a list of dictionaries or a
@@ -76,4 +76,5 @@ def write(
     int
         The number of rows written to the SYLK file.
     """
+    path = coerce_path(path)
     return stub.write(path, data, format_name='SYLK')

etlplus/file/tab.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.tab` module.
-Stub helpers for reading/writing "tab"-formatted (TAB) files (not implemented
-yet).
+Helpers for reading/writing "tab"-formatted (TAB) files.
 Notes
 -----
@@ -15,18 +14,18 @@ Notes
     - Header/metadata lines or multi-line records that break TSV assumptions.
     - Not actually tab-delimited despite the name.
 - Rule of thumb:
-    - If the file is truly tab-delimited, use :mod:`etlplus.file.tsv`.
+    - This implementation treats TAB as tab-delimited text.
     - If the file has fixed-width fields, use :mod:`etlplus.file.fwf`.
-    - Otherwise, use :mod:`etlplus.file.tab` (i.e., this module).
 """
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ..types import StrPath
+from ._io import coerce_path
+from ._io import read_delimited
+from ._io import write_delimited
 # SECTION: EXPORTS ========================================================== #
@@ -42,14 +41,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read TAB content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TAB file on disk.
     Returns
@@ -57,11 +56,11 @@ def read(
     JSONList
         The list of dictionaries read from the TAB file.
     """
-    return stub.read(path, format_name='TAB')
+    return read_delimited(path, delimiter='\t')
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -69,7 +68,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TAB file on disk.
     data : JSONData
         Data to write as TAB file. Should be a list of dictionaries or a
@@ -80,4 +79,5 @@ def write(
     int
         The number of rows written to the TAB file.
     """
-    return stub.write(path, data, format_name='TAB')
+    path = coerce_path(path)
+    return write_delimited(path, data, delimiter='\t', format_name='TAB')

etlplus/file/toml.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.toml` module.
-Stub helpers for reading/writing Tom's Obvious Minimal Language (TOML) files
-(not implemented yet).
+Helpers for reading/writing Tom's Obvious Minimal Language (TOML) files.
 Notes
 -----
@@ -19,11 +18,17 @@ Notes
 from __future__ import annotations
-from pathlib import Path
+import tomllib
+from typing import Any
+from typing import cast
 from ..types import JSONData
-from ..types import JSONList
-from . import stub
+from ..types import JSONDict
+from ..types import StrPath
+from ._imports import get_optional_module
+from ._io import coerce_path
+from ._io import ensure_parent_dir
+from ._io import require_dict_payload
 # SECTION: EXPORTS ========================================================== #
@@ -39,26 +44,35 @@ __all__ = [
 def read(
-    path: Path,
-) -> JSONList:
+    path: StrPath,
+) -> JSONData:
     """
     Read TOML content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TOML file on disk.
     Returns
     -------
-    JSONList
-        The list of dictionaries read from the TOML file.
+    JSONData
+        The structured data read from the TOML file.
+    Raises
+    ------
+    TypeError
+        If the TOML root is not a table (dictionary).
     """
-    return stub.read(path, format_name='TOML')
+    path = coerce_path(path)
+    payload = tomllib.loads(path.read_text(encoding='utf-8'))
+    if isinstance(payload, dict):
+        return payload
+    raise TypeError('TOML root must be a table (dict)')
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -66,15 +80,40 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TOML file on disk.
     data : JSONData
-        Data to write as TOML. Should be a list of dictionaries or a
-        single dictionary.
+        Data to write as TOML. Should be a dictionary.
     Returns
     -------
     int
-        The number of rows written to the TOML file.
+        The number of records written to the TOML file.
     """
-    return stub.write(path, data, format_name='TOML')
+    path = coerce_path(path)
+    payload = require_dict_payload(data, format_name='TOML')
+    toml_writer: Any
+    try:
+        toml_writer = get_optional_module(
+            'tomli_w',
+            error_message=(
+                'TOML write support requires optional dependency "tomli_w".\n'
+                'Install with: pip install tomli-w'
+            ),
+        )
+        content = toml_writer.dumps(cast(JSONDict, payload))
+    except ImportError:
+        toml = get_optional_module(
+            'toml',
+            error_message=(
+                'TOML write support requires optional dependency "tomli_w" '
+                'or "toml".\n'
+                'Install with: pip install tomli-w'
+            ),
+        )
+        content = toml.dumps(cast(JSONDict, payload))
+    ensure_parent_dir(path)
+    path.write_text(content, encoding='utf-8')
+    return 1

etlplus/file/tsv.py CHANGED Viewed

@@ -19,10 +19,10 @@ Notes
 from __future__ import annotations
-from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
+from ..types import StrPath
+from ._io import coerce_path
 from ._io import read_delimited
 from ._io import write_delimited
@@ -40,14 +40,14 @@ __all__ = [
 def read(
-    path: Path,
+    path: StrPath,
 ) -> JSONList:
     """
     Read TSV content from *path*.
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TSV file on disk.
     Returns
@@ -59,7 +59,7 @@ def read(
 def write(
-    path: Path,
+    path: StrPath,
     data: JSONData,
 ) -> int:
     """
@@ -67,7 +67,7 @@ def write(
     Parameters
     ----------
-    path : Path
+    path : StrPath
         Path to the TSV file on disk.
     data : JSONData
         Data to write as TSV. Should be a list of dictionaries or a
@@ -78,4 +78,5 @@ def write(
     int
         The number of rows written to the TSV file.
     """
-    return write_delimited(path, data, delimiter='\t')
+    path = coerce_path(path)
+    return write_delimited(path, data, delimiter='\t', format_name='TSV')

etlplus 0.16.10__py3-none-any.whl → 0.17.3__py3-none-any.whl

etlplus 0.16.10py3-none-any.whl → 0.17.3py3-none-any.whl