PyPI - etlplus - Versions diffs - 0.12.12__py3-none-any.whl → 0.15.0__py3-none-any.whl - Mend

etlplus 0.12.12py3-none-any.whl → 0.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

etlplus/README.md +2 -2
etlplus/__init__.py +1 -26
etlplus/api/README.md +2 -2
etlplus/api/__init__.py +10 -0
etlplus/api/config.py +36 -20
etlplus/api/endpoint_client.py +3 -3
etlplus/api/enums.py +51 -0
etlplus/api/pagination/client.py +1 -1
etlplus/api/rate_limiting/config.py +13 -1
etlplus/api/rate_limiting/rate_limiter.py +8 -11
etlplus/api/request_manager.py +11 -6
etlplus/api/transport.py +14 -2
etlplus/api/types.py +7 -6
etlplus/{run_helpers.py → api/utils.py} +209 -153
etlplus/cli/README.md +2 -2
etlplus/cli/handlers.py +19 -9
etlplus/config/README.md +31 -33
etlplus/config/__init__.py +9 -32
etlplus/config/types.py +0 -64
etlplus/dag.py +103 -0
etlplus/database/README.md +2 -2
etlplus/enums.py +0 -32
etlplus/file/README.md +2 -2
etlplus/file/enums.py +1 -1
etlplus/{validation → ops}/README.md +2 -2
etlplus/ops/__init__.py +61 -0
etlplus/{extract.py → ops/extract.py} +78 -94
etlplus/{load.py → ops/load.py} +73 -93
etlplus/{run.py → ops/run.py} +153 -118
etlplus/{transform.py → ops/transform.py} +75 -68
etlplus/{validation → ops}/utils.py +80 -15
etlplus/{validate.py → ops/validate.py} +19 -9
etlplus/templates/README.md +2 -2
etlplus/types.py +2 -2
etlplus/workflow/README.md +52 -0
etlplus/workflow/__init__.py +43 -0
etlplus/{config → workflow}/connector.py +17 -16
etlplus/workflow/dag.py +105 -0
etlplus/{config → workflow}/jobs.py +31 -15
etlplus/{config → workflow}/pipeline.py +11 -3
etlplus/{config → workflow}/profile.py +8 -5
etlplus/workflow/types.py +115 -0
{etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/METADATA +91 -60
{etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/RECORD +49 -43
{etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/WHEEL +1 -1
etlplus/validation/__init__.py +0 -44
/etlplus/{config → workflow}/utils.py +0 -0
{etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/entry_points.txt +0 -0
{etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/top_level.txt +0 -0

etlplus/{run.py → ops/run.py} RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-:mod:`etlplus.run` module.
+:mod:`etlplus.ops.run` module.
 A module for running ETL jobs defined in YAML configurations.
 """
@@ -9,126 +9,78 @@ from __future__ import annotations
 from collections.abc import Mapping
 from typing import Any
 from typing import Final
-from typing import TypedDict
 from typing import cast
 from urllib.parse import urlsplit
 from urllib.parse import urlunsplit
-import requests  # type: ignore[import]
-from .api import EndpointClient  # noqa: F401 (re-exported for tests)
-from .api import PaginationConfigMap
-from .api import RequestOptions
-from .api import RetryPolicy
-from .api import Url
-from .config import load_pipeline_config
-from .enums import DataConnectorType
+from ..api import EndpointClient  # noqa: F401 (re-exported for tests)
+from ..api import HttpMethod
+from ..api import PaginationConfigMap
+from ..api import RequestOptions
+from ..api import compose_api_request_env
+from ..api import compose_api_target_env
+from ..api import paginate_with_client
+from ..enums import DataConnectorType
+from ..file import FileFormat
+from ..types import JSONData
+from ..types import JSONDict
+from ..types import PipelineConfig
+from ..types import StrPath
+from ..types import Timeout
+from ..utils import print_json
+from ..workflow import load_pipeline_config
 from .extract import extract
 from .load import load
-from .run_helpers import compose_api_request_env
-from .run_helpers import compose_api_target_env
-from .run_helpers import paginate_with_client
 from .transform import transform
-from .types import JSONDict
-from .types import Timeout
-from .utils import print_json
+from .utils import maybe_validate
 from .validate import validate
-from .validation.utils import maybe_validate
 # SECTION: EXPORTS ========================================================== #
-__all__ = ['run']
-# SECTION: TYPED DICTS ====================================================== #
+__all__ = [
+    # Functions
+    'run',
+    'run_pipeline',
+]
-class BaseApiHttpEnv(TypedDict, total=False):
-    """
-    Common HTTP request environment for API interactions.
-    Fields shared by both source-side and target-side API operations.
-    """
-    # Request details
-    url: Url | None
-    headers: dict[str, str]
-    timeout: Timeout
-    # Session
-    session: requests.Session | None
-class ApiRequestEnv(BaseApiHttpEnv, total=False):
-    """
-    Composed request environment for API sources.
+# SECTION: CONSTANTS ======================================================== #
-    Returned by ``compose_api_request_env`` (run_helpers) and consumed by the
-    API extract branch. Values are fully merged with endpoint/API defaults and
-    job-level overrides, preserving the original precedence and behavior.
-    """
-    # Client
-    use_endpoints: bool
-    base_url: str | None
-    base_path: str | None
-    endpoints_map: dict[str, str] | None
-    endpoint_key: str | None
+DEFAULT_CONFIG_PATH: Final[str] = 'in/pipeline.yml'
-    # Request
-    params: dict[str, Any]
-    pagination: PaginationConfigMap | None
-    sleep_seconds: float
-    # Reliability
-    retry: RetryPolicy | None
-    retry_network_errors: bool
+# SECTION: INTERNAL FUNCTIONS =============================================== #
-class ApiTargetEnv(BaseApiHttpEnv, total=False):
-    """
-    Composed request environment for API targets.
-    Returned by ``compose_api_target_env`` (run_helpers) and consumed by the
-    API load branch. Values are merged from the target object, optional
-    API/endpoint reference, and job-level overrides, preserving original
-    precedence and behavior.
-    Notes
-    -----
-    - Precedence for inherited values matches original logic:
-        overrides -> target -> API profile defaults.
-    - Target composition does not include pagination/rate-limit/retry since
-        loads are single-request operations; only headers/timeout/session
-        apply.
+def _resolve_validation_config(
+    job_obj: Any,
+    cfg: Any,
+) -> tuple[bool, dict[str, Any], str, str]:
     """
+    Resolve validation settings for a job with safe defaults.
-    # Request
-    method: str | None
-class SessionConfig(TypedDict, total=False):
-    """
-    Minimal session configuration schema accepted by this runner.
+    Parameters
+    ----------
+    job_obj : Any
+        Job configuration object.
+    cfg : Any
+        Pipeline configuration object with validations.
-    Keys mirror common requests.Session options; all are optional.
+    Returns
+    -------
+    tuple[bool, dict[str, Any], str, str]
+        Tuple of (enabled, rules, severity, phase).
     """
+    val_ref = job_obj.validate
+    if val_ref is None:
+        return False, {}, 'error', 'before_transform'
-    headers: Mapping[str, Any]
-    params: Mapping[str, Any]
-    auth: Any  # (user, pass) tuple or requests-compatible auth object
-    verify: bool | str
-    cert: Any  # str or (cert, key)
-    proxies: Mapping[str, Any]
-    cookies: Mapping[str, Any]
-    trust_env: bool
-# SECTION: CONSTANTS ======================================================== #
-DEFAULT_CONFIG_PATH: Final[str] = 'in/pipeline.yml'
+    rules = cfg.validations.get(val_ref.ruleset, {})
+    severity = (val_ref.severity or 'error').lower()
+    phase = (val_ref.phase or 'before_transform').lower()
+    return True, rules, severity, phase
 # SECTION: FUNCTIONS ======================================================== #
@@ -207,12 +159,15 @@ def run(
                 and env.get('endpoint_key')
             ):
                 # Construct client using module-level EndpointClient so tests
-                # can monkeypatch this class on etlplus.run.
+                # can monkeypatch this class on etlplus.ops.run.
                 ClientClass = EndpointClient  # noqa: N806
                 client = ClientClass(
-                    base_url=cast(str, env['base_url']),
+                    base_url=cast(str, env.get('base_url')),
                     base_path=cast(str | None, env.get('base_path')),
-                    endpoints=cast(dict[str, str], env['endpoints_map']),
+                    endpoints=cast(
+                        dict[str, str],
+                        env.get('endpoints_map', {}),
+                    ),
                     retry=env.get('retry'),
                     retry_network_errors=bool(
                         env.get('retry_network_errors', False),
@@ -221,7 +176,7 @@ def run(
                 )
                 data = paginate_with_client(
                     client,
-                    cast(str, env['endpoint_key']),
+                    cast(str, env.get('endpoint_key')),
                     env.get('params'),
                     env.get('headers'),
                     env.get('timeout'),
@@ -263,19 +218,10 @@ def run(
             # keep explicit guard for defensive programming.
             raise ValueError(f'Unsupported source type: {stype_raw}')
-    # DRY: unified validation helper (pre/post transform)
-    val_ref = job_obj.validate
-    enabled_validation = val_ref is not None
-    if enabled_validation:
-        # Type narrowing for static checkers
-        assert val_ref is not None
-        rules = cfg.validations.get(val_ref.ruleset, {})
-        severity = (val_ref.severity or 'error').lower()
-        phase = (val_ref.phase or 'before_transform').lower()
-    else:
-        rules = {}
-        severity = 'error'
-        phase = 'before_transform'
+    enabled_validation, rules, severity, phase = _resolve_validation_config(
+        job_obj,
+        cfg,
+    )
     # Pre-transform validation (if configured).
     data = maybe_validate(
@@ -333,12 +279,14 @@ def run(
             if not url_t:
                 raise ValueError('API target missing "url"')
             kwargs_t: dict[str, Any] = {}
-            if env_t.get('headers'):
-                kwargs_t['headers'] = cast(dict[str, str], env_t['headers'])
+            headers = env_t.get('headers')
+            if headers:
+                kwargs_t['headers'] = cast(dict[str, str], headers)
             if env_t.get('timeout') is not None:
-                kwargs_t['timeout'] = env_t['timeout']
-            if env_t.get('session') is not None:
-                kwargs_t['session'] = env_t['session']
+                kwargs_t['timeout'] = env_t.get('timeout')
+            session = env_t.get('session')
+            if session is not None:
+                kwargs_t['session'] = session
             result = load(
                 data,
                 'api',
@@ -361,3 +309,90 @@ def run(
     # Return the terminal load result directly; callers (e.g., CLI) can wrap
     # it in their own envelope when needed.
     return cast(JSONDict, result)
+def run_pipeline(
+    *,
+    source_type: DataConnectorType | str | None = None,
+    source: StrPath | JSONData | None = None,
+    operations: PipelineConfig | None = None,
+    target_type: DataConnectorType | str | None = None,
+    target: StrPath | None = None,
+    file_format: FileFormat | str | None = None,
+    method: HttpMethod | str | None = None,
+    **kwargs: Any,
+) -> JSONData:
+    """
+    Run a single extract-transform-load flow without a YAML config.
+    Parameters
+    ----------
+    source_type : DataConnectorType | str | None, optional
+        Connector type for extraction. When ``None``, ``source`` is assumed
+        to be pre-loaded data and extraction is skipped.
+    source : StrPath | JSONData | None, optional
+        Data source for extraction or the pre-loaded payload when
+        ``source_type`` is ``None``.
+    operations : PipelineConfig | None, optional
+        Transform configuration passed to :func:`etlplus.ops.transform`.
+    target_type : DataConnectorType | str | None, optional
+        Connector type for loading. When ``None``, load is skipped and the
+        transformed data is returned.
+    target : StrPath | None, optional
+        Target for loading (file path, connection string, or API URL).
+    file_format : FileFormat | str | None, optional
+        File format for file sources/targets (forwarded to extract/load).
+    method : HttpMethod | str | None, optional
+        HTTP method for API loads (forwarded to :func:`etlplus.ops.load`).
+    **kwargs : Any
+        Extra keyword arguments forwarded to extract/load for API options
+        (headers, timeout, session, etc.).
+    Returns
+    -------
+    JSONData
+        Transformed data or the load result payload.
+    Raises
+    ------
+    TypeError
+        Raised when extracted data is not a dict or list of dicts and no
+        target is specified.
+    ValueError
+        Raised when required source/target inputs are missing.
+    """
+    if source_type is None:
+        if source is None:
+            raise ValueError('source or source_type is required')
+        data = source
+    else:
+        if source is None:
+            raise ValueError('source is required when source_type is set')
+        data = extract(
+            source_type,
+            cast(StrPath, source),
+            file_format=file_format,
+            **kwargs,
+        )
+    if operations:
+        data = transform(data, operations)
+    if target_type is None:
+        if not isinstance(data, (dict, list)):
+            raise TypeError(
+                f'Expected data to be dict or list of dicts, '
+                f'got {type(data).__name__}',
+            )
+        return data
+    if target is None:
+        raise ValueError('target is required when target_type is set')
+    return load(
+        data,
+        target_type,
+        target,
+        file_format=file_format,
+        method=method,
+        **kwargs,
+    )

etlplus/{transform.py → ops/transform.py} RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-:mod:`etlplus.transform` module.
+:mod:`etlplus.ops.transform` module.
 Helpers to filter, map/rename, select, sort, aggregate, and otherwise
 transform JSON-like records (dicts and lists of dicts).
@@ -24,7 +24,7 @@ Basic pipeline with strings::
 Using enums for keys and functions::
-    from .enums import PipelineStep, OperatorName, AggregateName
+    from etlplus.enums import PipelineStep, OperatorName, AggregateName
     ops = {
         PipelineStep.FILTER: {
             'field': 'age', 'op': OperatorName.GTE, 'value': 18
@@ -44,28 +44,28 @@ from collections.abc import Sequence
 from typing import Any
 from typing import cast
-from .enums import AggregateName
-from .enums import OperatorName
-from .enums import PipelineStep
+from ..enums import AggregateName
+from ..enums import OperatorName
+from ..enums import PipelineStep
+from ..types import AggregateFunc
+from ..types import AggregateSpec
+from ..types import FieldName
+from ..types import Fields
+from ..types import FilterSpec
+from ..types import JSONData
+from ..types import JSONDict
+from ..types import JSONList
+from ..types import MapSpec
+from ..types import OperatorFunc
+from ..types import PipelineConfig
+from ..types import PipelineStepName
+from ..types import SortKey
+from ..types import StepApplier
+from ..types import StepOrSteps
+from ..types import StepSpec
+from ..types import StrPath
+from ..utils import to_number
 from .load import load_data
-from .types import AggregateFunc
-from .types import AggregateSpec
-from .types import FieldName
-from .types import Fields
-from .types import FilterSpec
-from .types import JSONData
-from .types import JSONDict
-from .types import JSONList
-from .types import MapSpec
-from .types import OperatorFunc
-from .types import PipelineConfig
-from .types import PipelineStepName
-from .types import SortKey
-from .types import StepApplier
-from .types import StepOrSteps
-from .types import StepSpec
-from .types import StrPath
-from .utils import to_number
 # SECTION: EXPORTS ========================================================== #
@@ -730,15 +730,16 @@ def _is_plain_fields_list(obj: Any) -> bool:
 _PIPELINE_STEPS: tuple[PipelineStepName, ...] = (
+    'aggregate',
     'filter',
     'map',
     'select',
     'sort',
-    'aggregate',
 )
 _STEP_APPLIERS: dict[PipelineStepName, StepApplier] = {
+    'aggregate': _apply_aggregate_step,
     'filter': _apply_filter_step,
     'map': _apply_map_step,
     'select': _apply_select_step,
@@ -746,7 +747,54 @@ _STEP_APPLIERS: dict[PipelineStepName, StepApplier] = {
 }
-# SECTION: EXPORTS ========================================================== #
+# SECTION: FUNCTIONS ======================================================== #
+# -- Helpers -- #
+def apply_aggregate(
+    records: JSONList,
+    operation: AggregateSpec,
+) -> JSONDict:
+    """
+    Aggregate a numeric field or count presence.
+    Parameters
+    ----------
+    records : JSONList
+        Records to aggregate.
+    operation : AggregateSpec
+        Dict with keys ``field`` and ``func``. ``func`` is one of
+        ``'sum'``, ``'avg'``, ``'min'``, ``'max'``, or ``'count'``.
+        A callable may also be supplied for ``func``. Optionally, set
+        ``alias`` to control the output key name.
+    Returns
+    -------
+    JSONDict
+        A single-row result like ``{"sum_age": 42}``.
+    Notes
+    -----
+    Numeric operations ignore non-numeric values but count their presence
+    for ``'count'``.
+    """
+    field = operation.get('field')
+    func = operation.get('func')
+    alias = operation.get('alias')
+    if not field or func is None:
+        return {'error': 'Invalid aggregation operation'}
+    try:
+        aggregator = _resolve_aggregator(func)
+    except TypeError:
+        return {'error': f'Unknown aggregation function: {func}'}
+    nums, present = _collect_numeric_and_presence(records, field)
+    key_name = _derive_agg_key(func, field, alias)
+    return {key_name: aggregator(nums, present)}
 def apply_filter(
@@ -894,48 +942,7 @@ def apply_sort(
     )
-def apply_aggregate(
-    records: JSONList,
-    operation: AggregateSpec,
-) -> JSONDict:
-    """
-    Aggregate a numeric field or count presence.
-    Parameters
-    ----------
-    records : JSONList
-        Records to aggregate.
-    operation : AggregateSpec
-        Dict with keys ``field`` and ``func``. ``func`` is one of
-        ``'sum'``, ``'avg'``, ``'min'``, ``'max'``, or ``'count'``.
-        A callable may also be supplied for ``func``. Optionally, set
-        ``alias`` to control the output key name.
-    Returns
-    -------
-    JSONDict
-        A single-row result like ``{"sum_age": 42}``.
-    Notes
-    -----
-    Numeric operations ignore non-numeric values but count their presence
-    for ``'count'``.
-    """
-    field = operation.get('field')
-    func = operation.get('func')
-    alias = operation.get('alias')
-    if not field or func is None:
-        return {'error': 'Invalid aggregation operation'}
-    try:
-        aggregator = _resolve_aggregator(func)
-    except TypeError:
-        return {'error': f'Unknown aggregation function: {func}'}
-    nums, present = _collect_numeric_and_presence(records, field)
-    key_name = _derive_agg_key(func, field, alias)
-    return {key_name: aggregator(nums, present)}
+# -- Orchestration -- #
 def transform(
@@ -982,7 +989,7 @@ def transform(
     Using enums for keys and functions::
-        from .enums import PipelineStep, OperatorName, AggregateName
+        from etlplus.enums import PipelineStep, OperatorName, AggregateName
         ops = {
             PipelineStep.FILTER: {
                 'field': 'age', 'op': OperatorName.GTE, 'value': 18

etlplus/{validation → ops}/utils.py RENAMED Viewed

@@ -1,7 +1,7 @@
 """
-:mod:`etlplus.validation.utils` module.
+:mod:`etlplus.ops.utils` module.
-Utility helpers for conditional validation orchestration.
+Utility helpers for conditional data ops orchestration.
 The helpers defined here embrace a "high cohesion, low coupling" design by
 isolating normalization, configuration, and logging responsibilities. The
@@ -13,11 +13,14 @@ offloading ancillary concerns to composable helpers.
 from __future__ import annotations
 from collections.abc import Callable
+from collections.abc import Mapping
 from dataclasses import dataclass
+from types import MappingProxyType
 from typing import Any
 from typing import Literal
 from typing import Self
 from typing import TypedDict
+from typing import cast
 from ..types import StrAnyMap
 from ..utils import normalized_str
@@ -47,6 +50,30 @@ type ValidateFn = Callable[[Any, Ruleset], ValidationResult]
 type PrintFn = Callable[[Any], None]
+# SECTION: INTERNAL CONSTANTS ============================================== #
+_PHASE_CHOICES = MappingProxyType(
+    {
+        'before_transform': 'before_transform',
+        'after_transform': 'after_transform',
+    },
+)
+_SEVERITY_CHOICES = MappingProxyType(
+    {
+        'warn': 'warn',
+        'error': 'error',
+    },
+)
+_WINDOW_CHOICES = MappingProxyType(
+    {
+        'before_transform': 'before_transform',
+        'after_transform': 'after_transform',
+        'both': 'both',
+    },
+)
 # SECTION: DATA CLASSES ===================================================== #
@@ -291,11 +318,14 @@ def _normalize_phase(
         Normalized validation phase. Defaults to ``"before_transform"`` when
         unspecified.
     """
-    match normalized_str(value):
-        case 'after_transform':
-            return 'after_transform'
-        case _:
-            return 'before_transform'
+    return cast(
+        ValidationPhase,
+        _normalize_choice(
+            value,
+            mapping=_PHASE_CHOICES,
+            default='before_transform',
+        ),
+    )
 def _normalize_severity(
@@ -314,7 +344,14 @@ def _normalize_severity(
     ValidationSeverity
         Normalized severity. Defaults to ``"error"`` when unspecified.
     """
-    return 'warn' if normalized_str(value) == 'warn' else 'error'
+    return cast(
+        ValidationSeverity,
+        _normalize_choice(
+            value,
+            mapping=_SEVERITY_CHOICES,
+            default='error',
+        ),
+    )
 def _normalize_window(
@@ -333,13 +370,41 @@ def _normalize_window(
     ValidationWindow
         Normalized validation window. Defaults to ``"both"`` when unspecified.
     """
-    match normalized_str(value):
-        case 'before_transform':
-            return 'before_transform'
-        case 'after_transform':
-            return 'after_transform'
-        case _:
-            return 'both'
+    return cast(
+        ValidationWindow,
+        _normalize_choice(
+            value,
+            mapping=_WINDOW_CHOICES,
+            default='both',
+        ),
+    )
+def _normalize_choice(
+    value: str | None,
+    *,
+    mapping: Mapping[str, str],
+    default: str,
+) -> str:
+    """
+    Normalize a text value against a mapping with a default fallback.
+    Parameters
+    ----------
+    value : str | None
+        Input text to normalize.
+    mapping : Mapping[str, str]
+        Mapping of accepted values to normalized outputs.
+    default : str
+        Default to return when input is missing or unrecognized.
+    Returns
+    -------
+    str
+        Normalized value.
+    """
+    normalized = normalized_str(value)
+    return mapping.get(normalized, default)
 def _rule_name(

etlplus 0.12.12__py3-none-any.whl → 0.15.0__py3-none-any.whl

etlplus 0.12.12py3-none-any.whl → 0.15.0py3-none-any.whl