PyPI - etlplus - Versions diffs - 0.16.10__py3-none-any.whl → 0.17.2__py3-none-any.whl - Mend

etlplus 0.16.10py3-none-any.whl → 0.17.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

etlplus/file/README.md +33 -0
etlplus/file/arrow.py +35 -5
etlplus/file/bson.py +50 -5
etlplus/file/cbor.py +35 -11
etlplus/file/dat.py +44 -6
etlplus/file/dta.py +46 -11
etlplus/file/duckdb.py +159 -5
etlplus/file/enums.py +29 -0
etlplus/file/fwf.py +37 -5
etlplus/file/hdf5.py +41 -3
etlplus/file/ini.py +62 -12
etlplus/file/msgpack.py +33 -9
etlplus/file/nc.py +55 -6
etlplus/file/ods.py +39 -6
etlplus/file/pb.py +32 -19
etlplus/file/properties.py +52 -12
etlplus/file/proto.py +24 -12
etlplus/file/psv.py +5 -5
etlplus/file/rda.py +83 -9
etlplus/file/rds.py +76 -8
etlplus/file/sas7bdat.py +41 -11
etlplus/file/sav.py +40 -11
etlplus/file/sqlite.py +123 -5
etlplus/file/tab.py +6 -7
etlplus/file/toml.py +54 -12
etlplus/file/xlsm.py +39 -5
etlplus/file/xpt.py +61 -5
{etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/METADATA +44 -26
{etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/RECORD +33 -33
{etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/WHEEL +0 -0
{etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/entry_points.txt +0 -0
{etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/top_level.txt +0 -0

etlplus/file/sqlite.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.sqlite` module.
-Stub helpers for reading/writing SQLite database (SQLITE) files (not
-implemented yet).
+Helpers for reading/writing SQLite database (SQLITE) files.
 Notes
 -----
@@ -19,11 +18,14 @@ Notes
 from __future__ import annotations
+import json
+import sqlite3
 from pathlib import Path
+from typing import Any
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -35,6 +37,69 @@ __all__ = [
 ]
+# SECTION: INTERNAL CONSTANTS ============================================== #
+DEFAULT_TABLE = 'data'
+# SECTION: INTERNAL FUNCTIONS =============================================== #
+def _quote_identifier(value: str) -> str:
+    """Return a safely quoted SQL identifier."""
+    escaped = value.replace('"', '""')
+    return f'"{escaped}"'
+def _coerce_sql_value(value: Any) -> Any:
+    """Normalize values into SQLite-compatible types."""
+    if value is None or isinstance(value, (str, int, float, bool)):
+        return value
+    return json.dumps(value, ensure_ascii=True)
+def _infer_column_type(values: list[Any]) -> str:
+    """Infer a basic SQLite column type from sample values."""
+    seen_bool = False
+    seen_int = False
+    seen_float = False
+    seen_other = False
+    for value in values:
+        if value is None:
+            continue
+        if isinstance(value, bool):
+            seen_bool = True
+        elif isinstance(value, int):
+            seen_int = True
+        elif isinstance(value, float):
+            seen_float = True
+        else:
+            seen_other = True
+            break
+    if seen_other:
+        return 'TEXT'
+    if seen_float:
+        return 'REAL'
+    if seen_int or seen_bool:
+        return 'INTEGER'
+    return 'TEXT'
+def _resolve_table(tables: list[str]) -> str | None:
+    """Pick a table name for read operations."""
+    if not tables:
+        return None
+    if DEFAULT_TABLE in tables:
+        return DEFAULT_TABLE
+    if len(tables) == 1:
+        return tables[0]
+    raise ValueError(
+        'Multiple tables found in SQLite file; expected "data" or a '
+        'single table',
+    )
 # SECTION: FUNCTIONS ======================================================== #
@@ -54,7 +119,23 @@ def read(
     JSONList
         The list of dictionaries read from the SQLITE file.
     """
-    return stub.read(path, format_name='SQLITE')
+    conn = sqlite3.connect(str(path))
+    try:
+        conn.row_factory = sqlite3.Row
+        cursor = conn.execute(
+            'SELECT name FROM sqlite_master '
+            "WHERE type='table' AND name NOT LIKE 'sqlite_%' "
+            'ORDER BY name',
+        )
+        tables = [row[0] for row in cursor.fetchall()]
+        table = _resolve_table(tables)
+        if table is None:
+            return []
+        query = f'SELECT * FROM {_quote_identifier(table)}'
+        rows = conn.execute(query).fetchall()
+        return [dict(row) for row in rows]
+    finally:
+        conn.close()
 def write(
@@ -77,4 +158,41 @@ def write(
     int
         The number of rows written to the SQLITE file.
     """
-    return stub.write(path, data, format_name='SQLITE')
+    records = normalize_records(data, 'SQLITE')
+    if not records:
+        return 0
+    columns = sorted({key for row in records for key in row})
+    if not columns:
+        return 0
+    column_values: dict[str, list[Any]] = {col: [] for col in columns}
+    for row in records:
+        for column in columns:
+            column_values[column].append(row.get(column))
+    column_defs = ', '.join(
+        f'{_quote_identifier(column)} {_infer_column_type(values)}'
+        for column, values in column_values.items()
+    )
+    table_ident = _quote_identifier(DEFAULT_TABLE)
+    insert_columns = ', '.join(_quote_identifier(column) for column in columns)
+    placeholders = ', '.join('?' for _ in columns)
+    insert_sql = (
+        f'INSERT INTO {table_ident} ({insert_columns}) VALUES ({placeholders})'
+    )
+    path.parent.mkdir(parents=True, exist_ok=True)
+    conn = sqlite3.connect(str(path))
+    try:
+        conn.execute(f'DROP TABLE IF EXISTS {table_ident}')
+        conn.execute(f'CREATE TABLE {table_ident} ({column_defs})')
+        rows = [
+            tuple(_coerce_sql_value(row.get(column)) for column in columns)
+            for row in records
+        ]
+        conn.executemany(insert_sql, rows)
+        conn.commit()
+    finally:
+        conn.close()
+    return len(records)

etlplus/file/tab.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.tab` module.
-Stub helpers for reading/writing "tab"-formatted (TAB) files (not implemented
-yet).
+Helpers for reading/writing "tab"-formatted (TAB) files.
 Notes
 -----
@@ -15,9 +14,8 @@ Notes
     - Header/metadata lines or multi-line records that break TSV assumptions.
     - Not actually tab-delimited despite the name.
 - Rule of thumb:
-    - If the file is truly tab-delimited, use :mod:`etlplus.file.tsv`.
+    - This implementation treats TAB as tab-delimited text.
     - If the file has fixed-width fields, use :mod:`etlplus.file.fwf`.
-    - Otherwise, use :mod:`etlplus.file.tab` (i.e., this module).
 """
 from __future__ import annotations
@@ -26,7 +24,8 @@ from pathlib import Path
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ._io import read_delimited
+from ._io import write_delimited
 # SECTION: EXPORTS ========================================================== #
@@ -57,7 +56,7 @@ def read(
     JSONList
         The list of dictionaries read from the TAB file.
     """
-    return stub.read(path, format_name='TAB')
+    return read_delimited(path, delimiter='\t')
 def write(
@@ -80,4 +79,4 @@ def write(
     int
         The number of rows written to the TAB file.
     """
-    return stub.write(path, data, format_name='TAB')
+    return write_delimited(path, data, delimiter='\t')

etlplus/file/toml.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.toml` module.
-Stub helpers for reading/writing Tom's Obvious Minimal Language (TOML) files
-(not implemented yet).
+Helpers for reading/writing Tom's Obvious Minimal Language (TOML) files.
 Notes
 -----
@@ -19,11 +18,14 @@ Notes
 from __future__ import annotations
+import tomllib
 from pathlib import Path
+from typing import Any
+from typing import cast
 from ..types import JSONData
-from ..types import JSONList
-from . import stub
+from ..types import JSONDict
+from ._imports import get_optional_module
 # SECTION: EXPORTS ========================================================== #
@@ -40,7 +42,7 @@ __all__ = [
 def read(
     path: Path,
-) -> JSONList:
+) -> JSONData:
     """
     Read TOML content from *path*.
@@ -51,10 +53,18 @@ def read(
     Returns
     -------
-    JSONList
-        The list of dictionaries read from the TOML file.
+    JSONData
+        The structured data read from the TOML file.
+    Raises
+    ------
+    TypeError
+        If the TOML root is not a table (dictionary).
     """
-    return stub.read(path, format_name='TOML')
+    payload = tomllib.loads(path.read_text(encoding='utf-8'))
+    if isinstance(payload, dict):
+        return payload
+    raise TypeError('TOML root must be a table (dict)')
 def write(
@@ -69,12 +79,44 @@ def write(
     path : Path
         Path to the TOML file on disk.
     data : JSONData
-        Data to write as TOML. Should be a list of dictionaries or a
-        single dictionary.
+        Data to write as TOML. Should be a dictionary.
     Returns
     -------
     int
-        The number of rows written to the TOML file.
+        The number of records written to the TOML file.
+    Raises
+    ------
+    TypeError
+        If *data* is not a dictionary.
     """
-    return stub.write(path, data, format_name='TOML')
+    if isinstance(data, list):
+        raise TypeError('TOML payloads must be a dict')
+    if not isinstance(data, dict):
+        raise TypeError('TOML payloads must be a dict')
+    toml_writer: Any
+    try:
+        toml_writer = get_optional_module(
+            'tomli_w',
+            error_message=(
+                'TOML write support requires optional dependency "tomli_w".\n'
+                'Install with: pip install tomli-w'
+            ),
+        )
+        content = toml_writer.dumps(cast(JSONDict, data))
+    except ImportError:
+        toml = get_optional_module(
+            'toml',
+            error_message=(
+                'TOML write support requires optional dependency "tomli_w" '
+                'or "toml".\n'
+                'Install with: pip install tomli-w'
+            ),
+        )
+        content = toml.dumps(cast(JSONDict, data))
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(content, encoding='utf-8')
+    return 1

etlplus/file/xlsm.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
 :mod:`etlplus.file.xlsm` module.
-Stub helpers for reading/writing Microsoft Excel Macro-Enabled (XLSM)
-spreadsheet files (not implemented yet).
+Helpers for reading/writing Microsoft Excel Macro-Enabled (XLSM)
+spreadsheet files.
 Notes
 -----
@@ -20,10 +20,12 @@ Notes
 from __future__ import annotations
 from pathlib import Path
+from typing import cast
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ._imports import get_pandas
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -53,8 +55,21 @@ def read(
     -------
     JSONList
         The list of dictionaries read from the XLSM file.
+    Raises
+    ------
+    ImportError
+        If optional dependencies for XLSM support are missing.
     """
-    return stub.read(path, format_name='XLSM')
+    pandas = get_pandas('XLSM')
+    try:
+        frame = pandas.read_excel(path)
+    except ImportError as e:  # pragma: no cover
+        raise ImportError(
+            'XLSM support requires optional dependency "openpyxl".\n'
+            'Install with: pip install openpyxl',
+        ) from e
+    return cast(JSONList, frame.to_dict(orient='records'))
 def write(
@@ -76,5 +91,24 @@ def write(
     -------
     int
         The number of rows written to the XLSM file.
+    Raises
+    ------
+    ImportError
+        If optional dependencies for XLSM support are missing.
     """
-    return stub.write(path, data, format_name='XLSM')
+    records = normalize_records(data, 'XLSM')
+    if not records:
+        return 0
+    pandas = get_pandas('XLSM')
+    path.parent.mkdir(parents=True, exist_ok=True)
+    frame = pandas.DataFrame.from_records(records)
+    try:
+        frame.to_excel(path, index=False)
+    except ImportError as e:  # pragma: no cover
+        raise ImportError(
+            'XLSM support requires optional dependency "openpyxl".\n'
+            'Install with: pip install openpyxl',
+        ) from e
+    return len(records)

etlplus/file/xpt.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """
 :mod:`etlplus.file.xpt` module.
-Stub helpers for reading/writing SAS Transport (XPT) files (not implemented
-yet).
+Helpers for reading/writing SAS Transport (XPT) files.
 Notes
 -----
@@ -20,10 +19,14 @@ Notes
 from __future__ import annotations
 from pathlib import Path
+from typing import Any
+from typing import cast
 from ..types import JSONData
 from ..types import JSONList
-from . import stub
+from ._imports import get_optional_module
+from ._imports import get_pandas
+from ._io import normalize_records
 # SECTION: EXPORTS ========================================================== #
@@ -35,6 +38,27 @@ __all__ = [
 ]
+# SECTION: INTERNAL HELPERS ================================================ #
+def _get_pyreadstat() -> Any:
+    """Return the pyreadstat module, importing it on first use."""
+    return get_optional_module(
+        'pyreadstat',
+        error_message=(
+            'XPT support requires optional dependency "pyreadstat".\n'
+            'Install with: pip install pyreadstat'
+        ),
+    )
+def _raise_readstat_error(err: ImportError) -> None:
+    raise ImportError(
+        'XPT support requires optional dependency "pyreadstat".\n'
+        'Install with: pip install pyreadstat',
+    ) from err
 # SECTION: FUNCTIONS ======================================================== #
@@ -54,7 +78,19 @@ def read(
     JSONList
         The list of dictionaries read from the XPT file.
     """
-    return stub.read(path, format_name='XPT')
+    pandas = get_pandas('XPT')
+    pyreadstat = _get_pyreadstat()
+    reader = getattr(pyreadstat, 'read_xport', None)
+    if reader is not None:
+        frame, _meta = reader(str(path))
+        return cast(JSONList, frame.to_dict(orient='records'))
+    try:
+        frame = pandas.read_sas(path, format='xport')
+    except TypeError:
+        frame = pandas.read_sas(path)
+    except ImportError as err:  # pragma: no cover
+        _raise_readstat_error(err)
+    return cast(JSONList, frame.to_dict(orient='records'))
 def write(
@@ -76,5 +112,25 @@ def write(
     -------
     int
         The number of rows written to the XPT file.
+    Raises
+    ------
+    ImportError
+        If "pyreadstat" is not installed with write support.
     """
-    return stub.write(path, data, format_name='XPT')
+    records = normalize_records(data, 'XPT')
+    if not records:
+        return 0
+    pandas = get_pandas('XPT')
+    pyreadstat = _get_pyreadstat()
+    writer = getattr(pyreadstat, 'write_xport', None)
+    if writer is None:
+        raise ImportError(
+            'XPT write support requires "pyreadstat" with write_xport().',
+        )
+    path.parent.mkdir(parents=True, exist_ok=True)
+    frame = pandas.DataFrame.from_records(records)
+    writer(frame, str(path))
+    return len(records)

{etlplus-0.16.10.dist-info → etlplus-0.17.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: etlplus
-Version: 0.16.10
+Version: 0.17.2
 Summary: A Swiss Army knife for simple ETL operations
 Home-page: https://github.com/Dagitali/ETLPlus
 Author: ETLPlus Team
@@ -32,6 +32,7 @@ Requires-Dist: typer>=0.21.0
 Requires-Dist: xlrd>=2.0.2
 Requires-Dist: xlwt>=1.3.0
 Provides-Extra: dev
+Requires-Dist: autopep8>=2.3.2; extra == "dev"
 Requires-Dist: black>=25.9.0; extra == "dev"
 Requires-Dist: build>=1.2.2; extra == "dev"
 Requires-Dist: flake8>=7.3.0; extra == "dev"
@@ -44,6 +45,17 @@ Requires-Dist: ruff>=0.14.4; extra == "dev"
 Provides-Extra: docs
 Requires-Dist: sphinx>=4.0.0; extra == "docs"
 Requires-Dist: sphinx-rtd-theme>=1.0.0; extra == "docs"
+Provides-Extra: file
+Requires-Dist: pymongo>=4.9.1; extra == "file"
+Requires-Dist: cbor2>=5.6.4; extra == "file"
+Requires-Dist: duckdb>=1.1.0; extra == "file"
+Requires-Dist: msgpack>=1.0.8; extra == "file"
+Requires-Dist: netCDF4>=1.7.2; extra == "file"
+Requires-Dist: odfpy>=1.4.1; extra == "file"
+Requires-Dist: pyreadr>=0.5.2; extra == "file"
+Requires-Dist: pyreadstat>=1.3.3; extra == "file"
+Requires-Dist: tomli-w>=1.2.0; extra == "file"
+Requires-Dist: xarray>=2024.9.0; extra == "file"
 Dynamic: home-page
 Dynamic: license-file
 Dynamic: requires-python
@@ -176,6 +188,12 @@ For development:
 pip install -e ".[dev]"
 ```
+For full file-format support (optional extras):
+```bash
+pip install -e ".[file]"
+```
 ## Quickstart
 Get up and running in under a minute.
@@ -240,10 +258,10 @@ Recognized file formats are listed in the tables below. Support for reading to o
 | Format | Read | Write | Description |
 | --- | --- | --- | --- |
 | `csv` | Y | Y | Comma-Separated Values |
-| `dat` | N | N | Generic data file, often delimited or fixed-width |
-| `fwf` | N | N | Fixed-Width Fields |
-| `psv` | N | N | Pipe-Separated Values |
-| `tab` | N | N | Often synonymous with TSV |
+| `dat` | Y | Y | Generic data file, often delimited or fixed-width |
+| `fwf` | Y | Y | Fixed-Width Fields |
+| `psv` | Y | Y | Pipe-Separated Values |
+| `tab` | Y | Y | Often synonymous with TSV |
 | `tsv` | Y | Y | Tab-Separated Values |
 | `txt` | Y | Y | Plain text, often delimited or fixed-width |
@@ -253,11 +271,11 @@ Recognized file formats are listed in the tables below. Support for reading to o
 | --- | --- | --- | --- |
 | `cfg` | N | N | Config-style key-value pairs |
 | `conf` | N | N | Config-style key-value pairs |
-| `ini` | N | N | Config-style key-value pairs |
+| `ini` | Y | Y | Config-style key-value pairs |
 | `json` | Y | Y | JavaScript Object Notation |
 | `ndjson` | Y | Y | Newline-Delimited JSON |
-| `properties` | N | N | Java-style key-value pairs |
-| `toml` | N | N | Tom's Obvious Minimal Language |
+| `properties` | Y | Y | Java-style key-value pairs |
+| `toml` | Y | Y | Tom's Obvious Minimal Language |
 | `xml` | Y | Y | Extensible Markup Language |
 | `yaml` | Y | Y | YAML Ain't Markup Language |
@@ -265,7 +283,7 @@ Recognized file formats are listed in the tables below. Support for reading to o
 | Format | Read | Write | Description |
 | --- | --- | --- | --- |
-| `arrow` | N | N | Apache Arrow IPC |
+| `arrow` | Y | Y | Apache Arrow IPC |
 | `feather` | Y | Y | Apache Arrow Feather |
 | `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
 | `parquet` | Y | Y | Apache Parquet; common in Big Data |
@@ -275,48 +293,48 @@ Recognized file formats are listed in the tables below. Support for reading to o
 | Format | Read | Write | Description |
 | --- | --- | --- | --- |
 | `avro` | Y | Y | Apache Avro |
-| `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
-| `cbor` | N | N | Concise Binary Object Representation |
+| `bson` | Y | Y | Binary JSON; common with MongoDB exports/dumps |
+| `cbor` | Y | Y | Concise Binary Object Representation |
 | `ion` | N | N | Amazon Ion |
-| `msgpack` | N | N | MessagePack |
-| `pb` | N | N | Protocol Buffers (Google Protobuf) |
+| `msgpack` | Y | Y | MessagePack |
+| `pb` | Y | Y | Protocol Buffers (Google Protobuf) |
 | `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
-| `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
+| `proto` | Y | Y | Protocol Buffers schema; often in .pb / .bin |
 #### Databases and Embedded Storage
 | Format | Read | Write | Description |
 | --- | --- | --- | --- |
 | `accdb` | N | N | Microsoft Access (newer format) |
-| `duckdb` | N | N | DuckDB |
+| `duckdb` | Y | Y | DuckDB |
 | `mdb` | N | N | Microsoft Access (older format) |
-| `sqlite` | N | N | SQLite |
+| `sqlite` | Y | Y | SQLite |
 #### Spreadsheets
 | Format | Read | Write | Description |
 | --- | --- | --- | --- |
 | `numbers` | N | N | Apple Numbers |
-| `ods` | N | N | OpenDocument |
+| `ods` | Y | Y | OpenDocument |
 | `wks` | N | N | Lotus 1-2-3  |
 | `xls` | Y | N | Microsoft Excel (BIFF; read-only) |
-| `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
+| `xlsm` | Y | Y | Microsoft Excel Macro-Enabled (Open XML) |
 | `xlsx` | Y | Y | Microsoft Excel (Open XML) |
 #### Statistical / Scientific / Numeric Computing
 | Format | Read | Write | Description |
 | --- | --- | --- | --- |
-| `dta` | N | N | Stata |
-| `hdf5` | N | N | Hierarchical Data Format |
+| `dta` | Y | Y | Stata |
+| `hdf5` | Y | N | Hierarchical Data Format |
 | `mat` | N | N | MATLAB |
-| `nc` | N | N | NetCDF |
-| `rda` | N | N | RData workspace/object |
-| `rds` | N | N | R data |
-| `sas7bdat` | N | N | SAS data |
-| `sav` | N | N | SPSS data |
+| `nc` | Y | Y | NetCDF |
+| `rda` | Y | Y | RData workspace/object |
+| `rds` | Y | Y | R data |
+| `sas7bdat` | Y | N | SAS data |
+| `sav` | Y | Y | SPSS data |
 | `sylk` | N | N | Symbolic Link |
-| `xpt` | N | N | SAS Transport |
+| `xpt` | Y | Y | SAS Transport |
 | `zsav` | N | N | Compressed SPSS data |
 #### Logs and Event Streams

etlplus 0.16.10__py3-none-any.whl → 0.17.2__py3-none-any.whl

etlplus 0.16.10py3-none-any.whl → 0.17.2py3-none-any.whl