PyPI - etlplus - Versions diffs - 0.12.10__py3-none-any.whl → 0.14.3__py3-none-any.whl - Mend

etlplus 0.12.10py3-none-any.whl → 0.14.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

etlplus/README.md +1 -1
etlplus/__init__.py +1 -26
etlplus/api/__init__.py +10 -0
etlplus/api/config.py +36 -20
etlplus/api/endpoint_client.py +3 -3
etlplus/api/enums.py +51 -0
etlplus/api/pagination/client.py +1 -1
etlplus/api/rate_limiting/config.py +13 -1
etlplus/api/rate_limiting/rate_limiter.py +8 -11
etlplus/api/request_manager.py +11 -6
etlplus/api/transport.py +14 -2
etlplus/api/types.py +7 -6
etlplus/{run_helpers.py → api/utils.py} +205 -153
etlplus/cli/handlers.py +17 -7
etlplus/config/jobs.py +14 -4
etlplus/dag.py +103 -0
etlplus/enums.py +0 -32
etlplus/file/cfg.py +2 -2
etlplus/file/conf.py +2 -2
etlplus/file/dta.py +77 -0
etlplus/file/enums.py +10 -4
etlplus/file/hbs.py +78 -0
etlplus/file/hdf5.py +78 -0
etlplus/file/jinja2.py +78 -0
etlplus/file/mat.py +78 -0
etlplus/file/mustache.py +78 -0
etlplus/file/nc.py +78 -0
etlplus/file/numbers.py +75 -0
etlplus/file/ods.py +79 -0
etlplus/file/properties.py +13 -13
etlplus/file/rda.py +78 -0
etlplus/file/rds.py +78 -0
etlplus/file/sas7bdat.py +78 -0
etlplus/file/sav.py +77 -0
etlplus/file/sylk.py +77 -0
etlplus/file/toml.py +1 -1
etlplus/file/vm.py +78 -0
etlplus/file/wks.py +77 -0
etlplus/file/xlsm.py +79 -0
etlplus/file/xpt.py +78 -0
etlplus/file/zsav.py +77 -0
etlplus/{validation → ops}/README.md +2 -2
etlplus/ops/__init__.py +61 -0
etlplus/{extract.py → ops/extract.py} +78 -94
etlplus/{load.py → ops/load.py} +73 -93
etlplus/{run.py → ops/run.py} +140 -110
etlplus/{transform.py → ops/transform.py} +75 -68
etlplus/{validation → ops}/utils.py +80 -15
etlplus/{validate.py → ops/validate.py} +19 -9
etlplus/types.py +2 -2
{etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/METADATA +91 -60
{etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/RECORD +56 -35
etlplus/validation/__init__.py +0 -44
{etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/WHEEL +0 -0
{etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/entry_points.txt +0 -0
{etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/top_level.txt +0 -0

etlplus/file/vm.py ADDED Viewed

@@ -0,0 +1,78 @@
+"""
+:mod:`etlplus.file.vm` module.
+Helpers for reading/writing Apache Velocity (VM) template files.
+Notes
+-----
+- A VM file is a text file used for generating HTML or other text formats
+    by combining templates with data.
+- Common cases:
+    - HTML templates.
+    - Email templates.
+    - Configuration files.
+- Rule of thumb:
+    - If you need to work with Apache Velocity template files, use this module
+        for reading and writing.
+"""
+from __future__ import annotations
+from pathlib import Path
+from ..types import JSONData
+from ..types import JSONList
+from . import stub
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'read',
+    'write',
+]
+# SECTION: FUNCTIONS ======================================================== #
+def read(
+    path: Path,
+) -> JSONList:
+    """
+    Read VM content from ``path``.
+    Parameters
+    ----------
+    path : Path
+        Path to the VM file on disk.
+    Returns
+    -------
+    JSONList
+        The list of dictionaries read from the VM file.
+    """
+    return stub.read(path, format_name='VM')
+def write(
+    path: Path,
+    data: JSONData,
+) -> int:
+    """
+    Write ``data`` to VM file at ``path`` and return record count.
+    Parameters
+    ----------
+    path : Path
+        Path to the VM file on disk.
+    data : JSONData
+        Data to write as VM file. Should be a list of dictionaries or a single
+        dictionary.
+    Returns
+    -------
+    int
+        The number of rows written to the VM file.
+    """
+    return stub.write(path, data, format_name='VM')

etlplus/file/wks.py ADDED Viewed

@@ -0,0 +1,77 @@
+"""
+:mod:`etlplus.file.wks` module.
+Helpers for reading/writing Lotus 1-2-3 (WKS) spreadsheet files.
+Notes
+-----
+- A WKS file is a spreadsheet file created using the Lotus 1-2-3 format.
+- Common cases:
+    - Reading data from legacy Lotus 1-2-3 spreadsheets.
+    - Writing data to Lotus 1-2-3 format for compatibility.
+    - Converting WKS files to more modern formats.
+- Rule of thumb:
+    - If you need to work with Lotus 1-2-3 spreadsheet files, use this module
+        for reading and writing.
+"""
+from __future__ import annotations
+from pathlib import Path
+from ..types import JSONData
+from ..types import JSONList
+from . import stub
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'read',
+    'write',
+]
+# SECTION: FUNCTIONS ======================================================== #
+def read(
+    path: Path,
+) -> JSONList:
+    """
+    Read WKS content from ``path``.
+    Parameters
+    ----------
+    path : Path
+        Path to the WKS file on disk.
+    Returns
+    -------
+    JSONList
+        The list of dictionaries read from the WKS file.
+    """
+    return stub.read(path, format_name='WKS')
+def write(
+    path: Path,
+    data: JSONData,
+) -> int:
+    """
+    Write ``data`` to WKS file at ``path`` and return record count.
+    Parameters
+    ----------
+    path : Path
+        Path to the WKS file on disk.
+    data : JSONData
+        Data to write as WKS file. Should be a list of dictionaries or a
+        single dictionary.
+    Returns
+    -------
+    int
+        The number of rows written to the WKS file.
+    """
+    return stub.write(path, data, format_name='WKS')

etlplus/file/xlsm.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""
+:mod:`etlplus.file.xlsm` module.
+Helpers for reading/writing Microsoft Excel Macro-Enabled (XLSM) spreadsheet
+files.
+Notes
+-----
+- An XLSM file is a spreadsheet file created using the Microsoft Excel Macro-
+    Enabled (Open XML) format.
+- Common cases:
+    - Reading data from Excel Macro-Enabled spreadsheets.
+    - Writing data to Excel Macro-Enabled format for compatibility.
+    - Converting XLSM files to more modern formats.
+- Rule of thumb:
+    - If you need to work with Excel Macro-Enabled spreadsheet files, use this
+        module for reading and writing.
+"""
+from __future__ import annotations
+from pathlib import Path
+from ..types import JSONData
+from ..types import JSONList
+from . import stub
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'read',
+    'write',
+]
+# SECTION: FUNCTIONS ======================================================== #
+def read(
+    path: Path,
+) -> JSONList:
+    """
+    Read XLSM content from ``path``.
+    Parameters
+    ----------
+    path : Path
+        Path to the XLSM file on disk.
+    Returns
+    -------
+    JSONList
+        The list of dictionaries read from the XLSM file.
+    """
+    return stub.read(path, format_name='XLSM')
+def write(
+    path: Path,
+    data: JSONData,
+) -> int:
+    """
+    Write ``data`` to XLSM file at ``path`` and return record count.
+    Parameters
+    ----------
+    path : Path
+        Path to the XLSM file on disk.
+    data : JSONData
+        Data to write as XLSM file. Should be a list of dictionaries or a
+        single dictionary.
+    Returns
+    -------
+    int
+        The number of rows written to the XLSM file.
+    """
+    return stub.write(path, data, format_name='XLSM')

etlplus/file/xpt.py ADDED Viewed

@@ -0,0 +1,78 @@
+"""
+:mod:`etlplus.file.xpt` module.
+Helpers for reading/writing SAS Transport (XPT) files.
+Notes
+-----
+- A SAS Transport (XPT) file is a standardized file format used to transfer
+    SAS datasets between different systems.
+- Common cases:
+    - Sharing datasets between different SAS installations.
+    - Archiving datasets in a platform-independent format.
+    - Importing/exporting data to/from statistical software that supports XPT.
+- Rule of thumb:
+    - If you need to work with XPT files, use this module for reading
+        and writing.
+"""
+from __future__ import annotations
+from pathlib import Path
+from ..types import JSONData
+from ..types import JSONList
+from . import stub
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'read',
+    'write',
+]
+# SECTION: FUNCTIONS ======================================================== #
+def read(
+    path: Path,
+) -> JSONList:
+    """
+    Read XPT content from ``path``.
+    Parameters
+    ----------
+    path : Path
+        Path to the XPT file on disk.
+    Returns
+    -------
+    JSONList
+        The list of dictionaries read from the XPT file.
+    """
+    return stub.read(path, format_name='XPT')
+def write(
+    path: Path,
+    data: JSONData,
+) -> int:
+    """
+    Write ``data`` to XPT file at ``path`` and return record count.
+    Parameters
+    ----------
+    path : Path
+        Path to the XPT file on disk.
+    data : JSONData
+        Data to write as XPT file. Should be a list of dictionaries or a
+        single dictionary.
+    Returns
+    -------
+    int
+        The number of rows written to the XPT file.
+    """
+    return stub.write(path, data, format_name='XPT')

etlplus/file/zsav.py ADDED Viewed

@@ -0,0 +1,77 @@
+"""
+:mod:`etlplus.file.zsav` module.
+Helpers for reading/writing compressed SPSS (ZSAV) data files.
+Notes
+-----
+- A ZSAV file is a compressed binary file format used by SPSS to store
+    datasets, including variables, labels, and data types.
+- Common cases:
+    - Reading compressed data for analysis in Python.
+    - Writing processed data back to compressed SPSS format.
+- Rule of thumb:
+    - If you need to work with compressed SPSS data files, use this module for
+        reading and writing.
+"""
+from __future__ import annotations
+from pathlib import Path
+from ..types import JSONData
+from ..types import JSONList
+from . import stub
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'read',
+    'write',
+]
+# SECTION: FUNCTIONS ======================================================== #
+def read(
+    path: Path,
+) -> JSONList:
+    """
+    Read ZSAV content from ``path``.
+    Parameters
+    ----------
+    path : Path
+        Path to the ZSAV file on disk.
+    Returns
+    -------
+    JSONList
+        The list of dictionaries read from the ZSAV file.
+    """
+    return stub.read(path, format_name='ZSAV')
+def write(
+    path: Path,
+    data: JSONData,
+) -> int:
+    """
+    Write ``data`` to ZSAV file at ``path`` and return record count.
+    Parameters
+    ----------
+    path : Path
+        Path to the ZSAV file on disk.
+    data : JSONData
+        Data to write as ZSAV file. Should be a list of dictionaries or a
+        single dictionary.
+    Returns
+    -------
+    int
+        The number of rows written to the ZSAV file.
+    """
+    return stub.write(path, data, format_name='ZSAV')

etlplus/{validation → ops}/README.md RENAMED Viewed

@@ -1,4 +1,4 @@
-# etlplus.validation subpackage
+# etlplus.ops subpackage
 Documentation for the `etlplus.validation` subpackage: data validation utilities and helpers.
@@ -8,7 +8,7 @@ Documentation for the `etlplus.validation` subpackage: data validation utilities
 Back to project overview: see the top-level [README](../../README.md).
-- [etlplus.validation subpackage](#etlplusvalidation-subpackage)
+- [etlplus.ops subpackage](#etlplusops-subpackage)
   - [Validation Features](#validation-features)
   - [Defining Validation Rules](#defining-validation-rules)
   - [Example: Validating Data](#example-validating-data)

etlplus/ops/__init__.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""
+:mod:`etlplus.ops` package.
+Data operations helpers.
+Importing :mod:`etlplus.ops` exposes the coarse-grained helpers most users care
+about: ``extract``, ``transform``, ``load``, ``validate``, ``run``, and
+``run_pipeline``. Each helper delegates to the richer modules under
+``etlplus.ops.*`` while presenting a compact public API surface. Conditional
+validation orchestration is available via
+:func:`etlplus.ops.utils.maybe_validate`. The legacy compatibility module
+:mod:`etlplus.ops.__init__validation` is deprecated in favor of this package.
+Examples
+--------
+>>> from etlplus.ops import extract, transform
+>>> raw = extract('file', 'input.json')
+>>> curated = transform(raw, {'select': ['id', 'name']})
+>>> from etlplus.ops.utils import maybe_validate
+>>> payload = {'name': 'Alice'}
+>>> rules = {'required': ['name']}
+>>> def validator(data, config):
+...     missing = [field for field in config['required'] if field not in data]
+...     return {'valid': not missing, 'errors': missing, 'data': data}
+>>> maybe_validate(
+...     payload,
+...     when='both',
+...     enabled=True,
+...     rules=rules,
+...     phase='before_transform',
+...     severity='warn',
+...     validate_fn=validator,
+...     print_json_fn=lambda message: message,
+... )
+{'name': 'Alice'}
+See Also
+--------
+:mod:`etlplus.ops.run`
+:mod:`etlplus.ops.utils`
+"""
+from .extract import extract
+from .load import load
+from .run import run
+from .run import run_pipeline
+from .transform import transform
+from .validate import validate
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    'extract',
+    'load',
+    'run',
+    'run_pipeline',
+    'transform',
+    'validate',
+]

etlplus/{extract.py → ops/extract.py} RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-:mod:`etlplus.extract` module.
+:mod:`etlplus.ops.extract` module.
 Helpers to extract data from files, databases, and REST APIs.
 """
@@ -10,56 +10,81 @@ from pathlib import Path
 from typing import Any
 from typing import cast
-import requests  # type: ignore[import]
-from .enums import DataConnectorType
-from .enums import HttpMethod
-from .file import File
-from .file import FileFormat
-from .types import JSONData
-from .types import JSONDict
-from .types import JSONList
-from .types import StrPath
+from ..api import HttpMethod
+from ..api.utils import resolve_request
+from ..enums import DataConnectorType
+from ..file import File
+from ..file import FileFormat
+from ..types import JSONData
+from ..types import JSONDict
+from ..types import JSONList
+from ..types import StrPath
 # SECTION: FUNCTIONS ======================================================== #
-# -- File Extraction -- #
-def extract_from_file(
-    file_path: StrPath,
-    file_format: FileFormat | str | None = FileFormat.JSON,
+def extract_from_api(
+    url: str,
+    method: HttpMethod | str = HttpMethod.GET,
+    **kwargs: Any,
 ) -> JSONData:
     """
-    Extract (semi-)structured data from a local file.
+    Extract data from a REST API.
     Parameters
     ----------
-    file_path : StrPath
-        Source file path.
-    file_format : FileFormat | str | None, optional
-        File format to parse. If ``None``, infer from the filename
-        extension. Defaults to `'json'` for backward compatibility when
-        explicitly provided.
+    url : str
+        API endpoint URL.
+    method : HttpMethod | str, optional
+        HTTP method to use. Defaults to ``GET``.
+    **kwargs : Any
+        Extra arguments forwarded to the underlying ``requests`` call
+        (for example, ``timeout``). To use a pre-configured
+        :class:`requests.Session`, provide it via ``session``.
+        When omitted, ``timeout`` defaults to 10 seconds.
     Returns
     -------
     JSONData
-        Parsed data as a mapping or a list of mappings.
-    """
-    path = Path(file_path)
-    # If no explicit format is provided, let File infer from extension.
-    if file_format is None:
-        return File(path, None).read()
-    fmt = FileFormat.coerce(file_format)
+        Parsed JSON payload, or a fallback object with raw text.
-    # Let file module perform existence and format validation.
-    return File(path, fmt).read()
+    Raises
+    ------
+    TypeError
+        If a provided ``session`` does not expose the required HTTP
+        method (for example, ``get``).
+    """
+    timeout = kwargs.pop('timeout', None)
+    session = kwargs.pop('session', None)
+    request_callable, timeout, _ = resolve_request(
+        method,
+        session=session,
+        timeout=timeout,
+    )
+    response = request_callable(url, timeout=timeout, **kwargs)
+    response.raise_for_status()
+    content_type = response.headers.get('content-type', '').lower()
+    if 'application/json' in content_type:
+        try:
+            payload: Any = response.json()
+        except ValueError:
+            # Malformed JSON despite content-type; fall back to text
+            return {
+                'content': response.text,
+                'content_type': content_type,
+            }
+        if isinstance(payload, dict):
+            return cast(JSONDict, payload)
+        if isinstance(payload, list):
+            if all(isinstance(x, dict) for x in payload):
+                return cast(JSONList, payload)
+            # Coerce non-dict array items into objects for consistency
+            return [{'value': x} for x in payload]
+        # Fallback: wrap scalar JSON
+        return {'value': payload}
-# -- Database Extraction (Placeholder) -- #
+    return {'content': response.text, 'content_type': content_type}
 def extract_from_database(
@@ -94,77 +119,36 @@ def extract_from_database(
     ]
-# -- REST API Extraction -- #
-def extract_from_api(
-    url: str,
-    method: HttpMethod | str = HttpMethod.GET,
-    **kwargs: Any,
+def extract_from_file(
+    file_path: StrPath,
+    file_format: FileFormat | str | None = FileFormat.JSON,
 ) -> JSONData:
     """
-    Extract data from a REST API.
+    Extract (semi-)structured data from a local file.
     Parameters
     ----------
-    url : str
-        API endpoint URL.
-    method : HttpMethod | str, optional
-        HTTP method to use. Defaults to ``GET``.
-    **kwargs : Any
-        Extra arguments forwarded to the underlying ``requests`` call
-        (for example, ``timeout``). To use a pre-configured
-        :class:`requests.Session`, provide it via ``session``.
+    file_path : StrPath
+        Source file path.
+    file_format : FileFormat | str | None, optional
+        File format to parse. If ``None``, infer from the filename
+        extension. Defaults to `'json'` for backward compatibility when
+        explicitly provided.
     Returns
     -------
     JSONData
-        Parsed JSON payload, or a fallback object with raw text.
-    Raises
-    ------
-    TypeError
-        If a provided ``session`` does not expose the required HTTP
-        method (for example, ``get``).
+        Parsed data as a mapping or a list of mappings.
     """
-    http_method = HttpMethod.coerce(method)
-    # Apply a conservative timeout to guard against hanging requests.
-    timeout = kwargs.pop('timeout', 10.0)
-    session = kwargs.pop('session', None)
-    requester = session or requests
-    request_callable = getattr(requester, http_method.value, None)
-    if not callable(request_callable):
-        raise TypeError(
-            'Session object must supply a callable'
-            f'"{http_method.value}" method',
-        )
-    response = request_callable(url, timeout=timeout, **kwargs)
-    response.raise_for_status()
+    path = Path(file_path)
-    content_type = response.headers.get('content-type', '').lower()
-    if 'application/json' in content_type:
-        try:
-            payload: Any = response.json()
-        except ValueError:
-            # Malformed JSON despite content-type; fall back to text
-            return {
-                'content': response.text,
-                'content_type': content_type,
-            }
-        if isinstance(payload, dict):
-            return cast(JSONDict, payload)
-        if isinstance(payload, list):
-            if all(isinstance(x, dict) for x in payload):
-                return cast(JSONList, payload)
-            # Coerce non-dict array items into objects for consistency
-            return [{'value': x} for x in payload]
-        # Fallback: wrap scalar JSON
-        return {'value': payload}
+    # If no explicit format is provided, let File infer from extension.
+    if file_format is None:
+        return File(path, None).read()
+    fmt = FileFormat.coerce(file_format)
-    return {'content': response.text, 'content_type': content_type}
+    # Let file module perform existence and format validation.
+    return File(path, fmt).read()
 # -- Orchestration -- #

etlplus 0.12.10__py3-none-any.whl → 0.14.3__py3-none-any.whl

etlplus 0.12.10py3-none-any.whl → 0.14.3py3-none-any.whl