PyPI - etlplus - Versions diffs - 0.9.2__py3-none-any.whl → 0.10.1__py3-none-any.whl - Mend

etlplus 0.9.2py3-none-any.whl → 0.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

etlplus/__init__.py +26 -1
etlplus/api/README.md +3 -51
etlplus/api/__init__.py +0 -10
etlplus/api/config.py +28 -39
etlplus/api/endpoint_client.py +3 -3
etlplus/api/pagination/client.py +1 -1
etlplus/api/rate_limiting/config.py +1 -13
etlplus/api/rate_limiting/rate_limiter.py +11 -8
etlplus/api/request_manager.py +6 -11
etlplus/api/transport.py +2 -14
etlplus/api/types.py +6 -96
etlplus/cli/commands.py +43 -76
etlplus/cli/constants.py +1 -1
etlplus/cli/handlers.py +12 -40
etlplus/cli/io.py +2 -2
etlplus/cli/main.py +1 -1
etlplus/cli/state.py +7 -4
etlplus/{workflow → config}/__init__.py +23 -10
etlplus/{workflow → config}/connector.py +44 -58
etlplus/{workflow → config}/jobs.py +32 -105
etlplus/{workflow → config}/pipeline.py +51 -59
etlplus/{workflow → config}/profile.py +5 -8
etlplus/config/types.py +204 -0
etlplus/config/utils.py +120 -0
etlplus/database/ddl.py +1 -1
etlplus/database/engine.py +3 -19
etlplus/database/orm.py +0 -2
etlplus/database/schema.py +1 -1
etlplus/enums.py +266 -0
etlplus/{ops/extract.py → extract.py} +99 -81
etlplus/file.py +652 -0
etlplus/{ops/load.py → load.py} +101 -78
etlplus/{ops/run.py → run.py} +127 -159
etlplus/{api/utils.py → run_helpers.py} +153 -209
etlplus/{ops/transform.py → transform.py} +68 -75
etlplus/types.py +4 -5
etlplus/utils.py +2 -136
etlplus/{ops/validate.py → validate.py} +12 -22
etlplus/validation/__init__.py +44 -0
etlplus/{ops → validation}/utils.py +17 -53
{etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/METADATA +17 -210
etlplus-0.10.1.dist-info/RECORD +65 -0
{etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/WHEEL +1 -1
etlplus/README.md +0 -37
etlplus/api/enums.py +0 -51
etlplus/cli/README.md +0 -40
etlplus/database/README.md +0 -48
etlplus/file/README.md +0 -105
etlplus/file/__init__.py +0 -25
etlplus/file/_imports.py +0 -141
etlplus/file/_io.py +0 -160
etlplus/file/accdb.py +0 -78
etlplus/file/arrow.py +0 -78
etlplus/file/avro.py +0 -176
etlplus/file/bson.py +0 -77
etlplus/file/cbor.py +0 -78
etlplus/file/cfg.py +0 -79
etlplus/file/conf.py +0 -80
etlplus/file/core.py +0 -322
etlplus/file/csv.py +0 -79
etlplus/file/dat.py +0 -78
etlplus/file/dta.py +0 -77
etlplus/file/duckdb.py +0 -78
etlplus/file/enums.py +0 -343
etlplus/file/feather.py +0 -111
etlplus/file/fwf.py +0 -77
etlplus/file/gz.py +0 -123
etlplus/file/hbs.py +0 -78
etlplus/file/hdf5.py +0 -78
etlplus/file/ini.py +0 -79
etlplus/file/ion.py +0 -78
etlplus/file/jinja2.py +0 -78
etlplus/file/json.py +0 -98
etlplus/file/log.py +0 -78
etlplus/file/mat.py +0 -78
etlplus/file/mdb.py +0 -78
etlplus/file/msgpack.py +0 -78
etlplus/file/mustache.py +0 -78
etlplus/file/nc.py +0 -78
etlplus/file/ndjson.py +0 -108
etlplus/file/numbers.py +0 -75
etlplus/file/ods.py +0 -79
etlplus/file/orc.py +0 -111
etlplus/file/parquet.py +0 -113
etlplus/file/pb.py +0 -78
etlplus/file/pbf.py +0 -77
etlplus/file/properties.py +0 -78
etlplus/file/proto.py +0 -77
etlplus/file/psv.py +0 -79
etlplus/file/rda.py +0 -78
etlplus/file/rds.py +0 -78
etlplus/file/sas7bdat.py +0 -78
etlplus/file/sav.py +0 -77
etlplus/file/sqlite.py +0 -78
etlplus/file/stub.py +0 -84
etlplus/file/sylk.py +0 -77
etlplus/file/tab.py +0 -81
etlplus/file/toml.py +0 -78
etlplus/file/tsv.py +0 -80
etlplus/file/txt.py +0 -102
etlplus/file/vm.py +0 -78
etlplus/file/wks.py +0 -77
etlplus/file/xls.py +0 -88
etlplus/file/xlsm.py +0 -79
etlplus/file/xlsx.py +0 -99
etlplus/file/xml.py +0 -185
etlplus/file/xpt.py +0 -78
etlplus/file/yaml.py +0 -95
etlplus/file/zip.py +0 -175
etlplus/file/zsav.py +0 -77
etlplus/ops/README.md +0 -50
etlplus/ops/__init__.py +0 -61
etlplus/templates/README.md +0 -46
etlplus/workflow/README.md +0 -52
etlplus/workflow/dag.py +0 -105
etlplus/workflow/types.py +0 -115
etlplus-0.9.2.dist-info/RECORD +0 -134
{etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/entry_points.txt +0 -0
{etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.9.2.dist-info → etlplus-0.10.1.dist-info}/top_level.txt +0 -0

etlplus/{ops/transform.py → transform.py} RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-:mod:`etlplus.ops.transform` module.
+:mod:`etlplus.transform` module.
 Helpers to filter, map/rename, select, sort, aggregate, and otherwise
 transform JSON-like records (dicts and lists of dicts).
@@ -24,7 +24,7 @@ Basic pipeline with strings::
 Using enums for keys and functions::
-    from etlplus.enums import PipelineStep, OperatorName, AggregateName
+    from .enums import PipelineStep, OperatorName, AggregateName
     ops = {
         PipelineStep.FILTER: {
             'field': 'age', 'op': OperatorName.GTE, 'value': 18
@@ -44,28 +44,28 @@ from collections.abc import Sequence
 from typing import Any
 from typing import cast
-from ..enums import AggregateName
-from ..enums import OperatorName
-from ..enums import PipelineStep
-from ..types import AggregateFunc
-from ..types import AggregateSpec
-from ..types import FieldName
-from ..types import Fields
-from ..types import FilterSpec
-from ..types import JSONData
-from ..types import JSONDict
-from ..types import JSONList
-from ..types import MapSpec
-from ..types import OperatorFunc
-from ..types import PipelineConfig
-from ..types import PipelineStepName
-from ..types import SortKey
-from ..types import StepApplier
-from ..types import StepOrSteps
-from ..types import StepSpec
-from ..types import StrPath
-from ..utils import to_number
+from .enums import AggregateName
+from .enums import OperatorName
+from .enums import PipelineStep
 from .load import load_data
+from .types import AggregateFunc
+from .types import AggregateSpec
+from .types import FieldName
+from .types import Fields
+from .types import FilterSpec
+from .types import JSONData
+from .types import JSONDict
+from .types import JSONList
+from .types import MapSpec
+from .types import OperatorFunc
+from .types import PipelineConfig
+from .types import PipelineStepName
+from .types import SortKey
+from .types import StepApplier
+from .types import StepOrSteps
+from .types import StepSpec
+from .types import StrPath
+from .utils import to_number
 # SECTION: EXPORTS ========================================================== #
@@ -730,16 +730,15 @@ def _is_plain_fields_list(obj: Any) -> bool:
 _PIPELINE_STEPS: tuple[PipelineStepName, ...] = (
-    'aggregate',
     'filter',
     'map',
     'select',
     'sort',
+    'aggregate',
 )
 _STEP_APPLIERS: dict[PipelineStepName, StepApplier] = {
-    'aggregate': _apply_aggregate_step,
     'filter': _apply_filter_step,
     'map': _apply_map_step,
     'select': _apply_select_step,
@@ -747,54 +746,7 @@ _STEP_APPLIERS: dict[PipelineStepName, StepApplier] = {
 }
-# SECTION: FUNCTIONS ======================================================== #
-# -- Helpers -- #
-def apply_aggregate(
-    records: JSONList,
-    operation: AggregateSpec,
-) -> JSONDict:
-    """
-    Aggregate a numeric field or count presence.
-    Parameters
-    ----------
-    records : JSONList
-        Records to aggregate.
-    operation : AggregateSpec
-        Dict with keys ``field`` and ``func``. ``func`` is one of
-        ``'sum'``, ``'avg'``, ``'min'``, ``'max'``, or ``'count'``.
-        A callable may also be supplied for ``func``. Optionally, set
-        ``alias`` to control the output key name.
-    Returns
-    -------
-    JSONDict
-        A single-row result like ``{"sum_age": 42}``.
-    Notes
-    -----
-    Numeric operations ignore non-numeric values but count their presence
-    for ``'count'``.
-    """
-    field = operation.get('field')
-    func = operation.get('func')
-    alias = operation.get('alias')
-    if not field or func is None:
-        return {'error': 'Invalid aggregation operation'}
-    try:
-        aggregator = _resolve_aggregator(func)
-    except TypeError:
-        return {'error': f'Unknown aggregation function: {func}'}
-    nums, present = _collect_numeric_and_presence(records, field)
-    key_name = _derive_agg_key(func, field, alias)
-    return {key_name: aggregator(nums, present)}
+# SECTION: EXPORTS ========================================================== #
 def apply_filter(
@@ -942,7 +894,48 @@ def apply_sort(
     )
-# -- Orchestration -- #
+def apply_aggregate(
+    records: JSONList,
+    operation: AggregateSpec,
+) -> JSONDict:
+    """
+    Aggregate a numeric field or count presence.
+    Parameters
+    ----------
+    records : JSONList
+        Records to aggregate.
+    operation : AggregateSpec
+        Dict with keys ``field`` and ``func``. ``func`` is one of
+        ``'sum'``, ``'avg'``, ``'min'``, ``'max'``, or ``'count'``.
+        A callable may also be supplied for ``func``. Optionally, set
+        ``alias`` to control the output key name.
+    Returns
+    -------
+    JSONDict
+        A single-row result like ``{"sum_age": 42}``.
+    Notes
+    -----
+    Numeric operations ignore non-numeric values but count their presence
+    for ``'count'``.
+    """
+    field = operation.get('field')
+    func = operation.get('func')
+    alias = operation.get('alias')
+    if not field or func is None:
+        return {'error': 'Invalid aggregation operation'}
+    try:
+        aggregator = _resolve_aggregator(func)
+    except TypeError:
+        return {'error': f'Unknown aggregation function: {func}'}
+    nums, present = _collect_numeric_and_presence(records, field)
+    key_name = _derive_agg_key(func, field, alias)
+    return {key_name: aggregator(nums, present)}
 def transform(
@@ -989,7 +982,7 @@ def transform(
     Using enums for keys and functions::
-        from etlplus.enums import PipelineStep, OperatorName, AggregateName
+        from .enums import PipelineStep, OperatorName, AggregateName
         ops = {
             PipelineStep.FILTER: {
                 'field': 'age', 'op': OperatorName.GTE, 'value': 18

etlplus/types.py CHANGED Viewed

@@ -11,9 +11,8 @@ Notes
 See Also
 --------
-- :mod:`etlplus.api.types` for HTTP-specific aliases and data classes
-- :mod:`etlplus.workflow.types` for workflow-specific aliases and TypedDict
-    surfaces
+- :mod:`etlplus.api.types` for HTTP-specific aliases
+- :mod:`etlplus.config.types` for TypedDict surfaces
 Examples
 --------
@@ -194,8 +193,8 @@ type AggregateSpec = StrAnyMap
 # -- Pipelines-- #
-# Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
-type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
+# Unified pipeline step spec consumed by :mod:`etlplus.transform`.
+type StepSpec = FilterSpec | MapSpec | SelectSpec | SortSpec | AggregateSpec
 # Collections of steps

etlplus/utils.py CHANGED Viewed

@@ -8,7 +8,6 @@ from __future__ import annotations
 import json
 from collections.abc import Callable
-from collections.abc import Iterable
 from collections.abc import Mapping
 from typing import Any
 from typing import TypeVar
@@ -26,7 +25,6 @@ __all__ = [
     # Mapping utilities
     'cast_str_dict',
     'coerce_dict',
-    'deep_substitute',
     'maybe_mapping',
     # Float coercion
     'to_float',
@@ -41,8 +39,7 @@ __all__ = [
     # Generic number coercion
     'to_number',
     # Text processing
-    'normalize_choice',
-    'normalize_str',
+    'normalized_str',
 ]
@@ -59,52 +56,6 @@ Num = TypeVar('Num', int, float)
 # -- Data Utilities -- #
-def deep_substitute(
-    value: Any,
-    vars_map: StrAnyMap | None,
-    env_map: Mapping[str, str] | None,
-) -> Any:
-    """
-    Recursively substitute ``${VAR}`` tokens in nested structures.
-    Only strings are substituted; other types are returned as-is.
-    Parameters
-    ----------
-    value : Any
-        The value to perform substitutions on.
-    vars_map : StrAnyMap | None
-        Mapping of variable names to replacement values (lower precedence).
-    env_map : Mapping[str, str] | None
-        Mapping of environment variables overriding ``vars_map`` values
-        (higher precedence).
-    Returns
-    -------
-    Any
-        New structure with substitutions applied where tokens were found.
-    """
-    substitutions = _prepare_substitutions(vars_map, env_map)
-    def _apply(node: Any) -> Any:
-        match node:
-            case str():
-                return _replace_tokens(node, substitutions)
-            case Mapping():
-                return {k: _apply(v) for k, v in node.items()}
-            case list() | tuple() as seq:
-                apply = [_apply(item) for item in seq]
-                return apply if isinstance(seq, list) else tuple(apply)
-            case set():
-                return {_apply(item) for item in node}
-            case frozenset():
-                return frozenset(_apply(item) for item in node)
-            case _:
-                return node
-    return _apply(value)
 def cast_str_dict(
     mapping: StrAnyMap | None,
 ) -> dict[str, str]:
@@ -421,7 +372,7 @@ def to_number(
 # -- Text Processing -- #
-def normalize_str(
+def normalized_str(
     value: str | None,
 ) -> str:
     """
@@ -441,36 +392,6 @@ def normalize_str(
     return (value or '').strip().lower()
-def normalize_choice(
-    value: str | None,
-    *,
-    mapping: Mapping[str, str],
-    default: str,
-    normalize: Callable[[str | None], str] = normalize_str,
-) -> str:
-    """
-    Normalize a string choice using a mapping and fallback.
-    Parameters
-    ----------
-    value : str | None
-        Input value to normalize.
-    mapping : Mapping[str, str]
-        Mapping of acceptable normalized inputs to output values.
-    default : str
-        Default return value when input is missing or unrecognized.
-    normalize : Callable[[str | None], str], optional
-        Normalization function applied to *value*. Defaults to
-        :func:`normalize_str`.
-    Returns
-    -------
-    str
-        Normalized mapped value or ``default``.
-    """
-    return mapping.get(normalize(value), default)
 # SECTION: INTERNAL FUNCTIONS =============================================== #
@@ -504,61 +425,6 @@ def _clamp(
     return value
-def _prepare_substitutions(
-    vars_map: StrAnyMap | None,
-    env_map: Mapping[str, Any] | None,
-) -> tuple[tuple[str, Any], ...]:
-    """
-    Merge variable and environment maps into an ordered substitutions list.
-    Parameters
-    ----------
-    vars_map : StrAnyMap | None
-        Mapping of variable names to replacement values (lower precedence).
-    env_map : Mapping[str, Any] | None
-        Environment-backed values that override entries from ``vars_map``.
-    Returns
-    -------
-    tuple[tuple[str, Any], ...]
-        Immutable sequence of ``(name, value)`` pairs suitable for token
-        replacement.
-    """
-    if not vars_map and not env_map:
-        return ()
-    merged: dict[str, Any] = {**(vars_map or {}), **(env_map or {})}
-    return tuple(merged.items())
-def _replace_tokens(
-    text: str,
-    substitutions: Iterable[tuple[str, Any]],
-) -> str:
-    """
-    Replace ``${VAR}`` tokens in ``text`` using ``substitutions``.
-    Parameters
-    ----------
-    text : str
-        Input string that may contain ``${VAR}`` tokens.
-    substitutions : Iterable[tuple[str, Any]]
-        Sequence of ``(name, value)`` pairs used for token replacement.
-    Returns
-    -------
-    str
-        Updated text with replacements applied.
-    """
-    if not substitutions:
-        return text
-    out = text
-    for name, replacement in substitutions:
-        token = f'${{{name}}}'
-        if token in out:
-            out = out.replace(token, str(replacement))
-    return out
 def _coerce_float(
     value: object,
 ) -> float | None:

etlplus/{ops/validate.py → validate.py} RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-:mod:`etlplus.ops.validate` module.
+:mod:`etlplus.validation` module.
 Validate dicts and lists of dicts using simple, schema-like rules.
@@ -11,8 +11,8 @@ Highlights
 ----------
 - Centralized type map and helpers for clarity and reuse.
 - Consistent error wording; field and item paths like ``[2].email``.
-- Small, focused public API with :func:`load_data`, :func:`validate_field`,
-    :func:`validate`.
+- Small, focused public API with ``load_data``, ``validate_field``,
+  ``validate``.
 Examples
 --------
@@ -34,11 +34,11 @@ from typing import Final
 from typing import Literal
 from typing import TypedDict
-from ..types import JSONData
-from ..types import Record
-from ..types import StrAnyMap
-from ..types import StrPath
 from .load import load_data
+from .types import JSONData
+from .types import Record
+from .types import StrAnyMap
+from .types import StrPath
 # SECTION: EXPORTS ========================================================== #
@@ -66,7 +66,7 @@ TYPE_MAP: Final[dict[str, type | tuple[type, ...]]] = {
 }
-# SECTION: TYPED DICTS ====================================================== #
+# SECTION: CLASSES ========================================================== #
 class FieldRules(TypedDict, total=False):
@@ -279,15 +279,11 @@ def _type_matches(
     bool
         ``True`` if the value matches the expected type; ``False`` if not.
     """
-    if expected == 'number':
-        return _is_number(value)
-    if expected == 'integer':
-        return isinstance(value, int) and not isinstance(value, bool)
-    if expected == 'boolean':
-        return isinstance(value, bool)
     py_type = TYPE_MAP.get(expected)
-    return isinstance(value, py_type) if py_type else False
+    if py_type:
+        return isinstance(value, py_type)
+    return False
 def _validate_record(
@@ -334,9 +330,6 @@ def _validate_record(
 # SECTION: FUNCTIONS ======================================================== #
-# -- Helpers -- #
 def validate_field(
     value: Any,
     rules: StrAnyMap | FieldRules,
@@ -432,9 +425,6 @@ def validate_field(
     return {'valid': len(errors) == 0, 'errors': errors}
-# -- Orchestration -- #
 def validate(
     source: StrPath | JSONData,
     rules: RulesMap | None = None,

etlplus/validation/__init__.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""
+:mod:`etlplus.validation` package.
+Conditional validation utilities used across the ETL pipeline.
+The package intentionally exposes a single helper, :func:`maybe_validate`, to
+keep the public API compact and predictable. Supporting logic lives in
+``etlplus.validation.utils`` where validation configuration is normalized,
+reducing the likelihood of phase/option mismatches.
+Examples
+--------
+>>> from etlplus.validation import maybe_validate
+>>> payload = {'name': 'Alice'}
+>>> rules = {'required': ['name']}
+>>> def validator(data, config):
+...     missing = [field for field in config['required'] if field not in data]
+...     return {'valid': not missing, 'errors': missing, 'data': data}
+>>> maybe_validate(
+...     payload,
+...     when='both',
+...     enabled=True,
+...     rules=rules,
+...     phase='before_transform',
+...     severity='warn',
+...     validate_fn=validator,
+...     print_json_fn=lambda message: message,
+... )
+{'name': 'Alice'}
+See Also
+--------
+- :mod:`etlplus.validation.utils` for implementation details and helper
+    utilities.
+"""
+from __future__ import annotations
+from .utils import maybe_validate
+# SECTION: EXPORTS ========================================================== #
+__all__ = ['maybe_validate']

etlplus/{ops → validation}/utils.py RENAMED Viewed

@@ -1,27 +1,26 @@
 """
-:mod:`etlplus.ops.utils` module.
+:mod:`etlplus.validation.utils` module.
-Utility helpers for conditional data ops orchestration.
+Utility helpers for conditional validation orchestration.
 The helpers defined here embrace a "high cohesion, low coupling" design by
 isolating normalization, configuration, and logging responsibilities. The
 resulting surface keeps ``maybe_validate`` focused on orchestration while
 offloading ancillary concerns to composable helpers.
 """
 from __future__ import annotations
 from collections.abc import Callable
 from dataclasses import dataclass
-from types import MappingProxyType
 from typing import Any
 from typing import Literal
 from typing import Self
 from typing import TypedDict
-from typing import cast
 from ..types import StrAnyMap
-from ..utils import normalize_choice
+from ..utils import normalized_str
 # SECTION: TYPED DICTIONARIES =============================================== #
@@ -48,30 +47,6 @@ type ValidateFn = Callable[[Any, Ruleset], ValidationResult]
 type PrintFn = Callable[[Any], None]
-# SECTION: INTERNAL CONSTANTS ============================================== #
-_PHASE_CHOICES = MappingProxyType(
-    {
-        'before_transform': 'before_transform',
-        'after_transform': 'after_transform',
-    },
-)
-_SEVERITY_CHOICES = MappingProxyType(
-    {
-        'warn': 'warn',
-        'error': 'error',
-    },
-)
-_WINDOW_CHOICES = MappingProxyType(
-    {
-        'before_transform': 'before_transform',
-        'after_transform': 'after_transform',
-        'both': 'both',
-    },
-)
 # SECTION: DATA CLASSES ===================================================== #
@@ -316,14 +291,11 @@ def _normalize_phase(
         Normalized validation phase. Defaults to ``"before_transform"`` when
         unspecified.
     """
-    return cast(
-        ValidationPhase,
-        normalize_choice(
-            value,
-            mapping=_PHASE_CHOICES,
-            default='before_transform',
-        ),
-    )
+    match normalized_str(value):
+        case 'after_transform':
+            return 'after_transform'
+        case _:
+            return 'before_transform'
 def _normalize_severity(
@@ -342,14 +314,7 @@ def _normalize_severity(
     ValidationSeverity
         Normalized severity. Defaults to ``"error"`` when unspecified.
     """
-    return cast(
-        ValidationSeverity,
-        normalize_choice(
-            value,
-            mapping=_SEVERITY_CHOICES,
-            default='error',
-        ),
-    )
+    return 'warn' if normalized_str(value) == 'warn' else 'error'
 def _normalize_window(
@@ -368,14 +333,13 @@ def _normalize_window(
     ValidationWindow
         Normalized validation window. Defaults to ``"both"`` when unspecified.
     """
-    return cast(
-        ValidationWindow,
-        normalize_choice(
-            value,
-            mapping=_WINDOW_CHOICES,
-            default='both',
-        ),
-    )
+    match normalized_str(value):
+        case 'before_transform':
+            return 'before_transform'
+        case 'after_transform':
+            return 'after_transform'
+        case _:
+            return 'both'
 def _rule_name(

etlplus 0.9.2__py3-none-any.whl → 0.10.1__py3-none-any.whl

etlplus 0.9.2py3-none-any.whl → 0.10.1py3-none-any.whl