PyPI - etlplus - Versions diffs - 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl - Mend

etlplus 0.9.1py3-none-any.whl → 0.9.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

etlplus/README.md +37 -0
etlplus/__init__.py +1 -26
etlplus/api/README.md +51 -3
etlplus/api/__init__.py +10 -0
etlplus/api/config.py +39 -28
etlplus/api/endpoint_client.py +3 -3
etlplus/api/enums.py +51 -0
etlplus/api/pagination/client.py +1 -1
etlplus/api/rate_limiting/config.py +13 -1
etlplus/api/rate_limiting/rate_limiter.py +8 -11
etlplus/api/request_manager.py +11 -6
etlplus/api/transport.py +14 -2
etlplus/api/types.py +96 -6
etlplus/{run_helpers.py → api/utils.py} +209 -153
etlplus/cli/README.md +40 -0
etlplus/cli/commands.py +76 -43
etlplus/cli/constants.py +1 -1
etlplus/cli/handlers.py +40 -12
etlplus/cli/io.py +2 -2
etlplus/cli/main.py +1 -1
etlplus/cli/state.py +4 -7
etlplus/database/README.md +48 -0
etlplus/database/ddl.py +1 -1
etlplus/database/engine.py +19 -3
etlplus/database/orm.py +2 -0
etlplus/database/schema.py +1 -1
etlplus/enums.py +1 -157
etlplus/file/README.md +105 -0
etlplus/file/__init__.py +25 -0
etlplus/file/_imports.py +141 -0
etlplus/file/_io.py +160 -0
etlplus/file/accdb.py +78 -0
etlplus/file/arrow.py +78 -0
etlplus/file/avro.py +176 -0
etlplus/file/bson.py +77 -0
etlplus/file/cbor.py +78 -0
etlplus/file/cfg.py +79 -0
etlplus/file/conf.py +80 -0
etlplus/file/core.py +322 -0
etlplus/file/csv.py +79 -0
etlplus/file/dat.py +78 -0
etlplus/file/dta.py +77 -0
etlplus/file/duckdb.py +78 -0
etlplus/file/enums.py +343 -0
etlplus/file/feather.py +111 -0
etlplus/file/fwf.py +77 -0
etlplus/file/gz.py +123 -0
etlplus/file/hbs.py +78 -0
etlplus/file/hdf5.py +78 -0
etlplus/file/ini.py +79 -0
etlplus/file/ion.py +78 -0
etlplus/file/jinja2.py +78 -0
etlplus/file/json.py +98 -0
etlplus/file/log.py +78 -0
etlplus/file/mat.py +78 -0
etlplus/file/mdb.py +78 -0
etlplus/file/msgpack.py +78 -0
etlplus/file/mustache.py +78 -0
etlplus/file/nc.py +78 -0
etlplus/file/ndjson.py +108 -0
etlplus/file/numbers.py +75 -0
etlplus/file/ods.py +79 -0
etlplus/file/orc.py +111 -0
etlplus/file/parquet.py +113 -0
etlplus/file/pb.py +78 -0
etlplus/file/pbf.py +77 -0
etlplus/file/properties.py +78 -0
etlplus/file/proto.py +77 -0
etlplus/file/psv.py +79 -0
etlplus/file/rda.py +78 -0
etlplus/file/rds.py +78 -0
etlplus/file/sas7bdat.py +78 -0
etlplus/file/sav.py +77 -0
etlplus/file/sqlite.py +78 -0
etlplus/file/stub.py +84 -0
etlplus/file/sylk.py +77 -0
etlplus/file/tab.py +81 -0
etlplus/file/toml.py +78 -0
etlplus/file/tsv.py +80 -0
etlplus/file/txt.py +102 -0
etlplus/file/vm.py +78 -0
etlplus/file/wks.py +77 -0
etlplus/file/xls.py +88 -0
etlplus/file/xlsm.py +79 -0
etlplus/file/xlsx.py +99 -0
etlplus/file/xml.py +185 -0
etlplus/file/xpt.py +78 -0
etlplus/file/yaml.py +95 -0
etlplus/file/zip.py +175 -0
etlplus/file/zsav.py +77 -0
etlplus/ops/README.md +50 -0
etlplus/ops/__init__.py +61 -0
etlplus/{extract.py → ops/extract.py} +81 -99
etlplus/{load.py → ops/load.py} +78 -101
etlplus/{run.py → ops/run.py} +159 -127
etlplus/{transform.py → ops/transform.py} +75 -68
etlplus/{validation → ops}/utils.py +53 -17
etlplus/{validate.py → ops/validate.py} +22 -12
etlplus/templates/README.md +46 -0
etlplus/types.py +5 -4
etlplus/utils.py +136 -2
etlplus/workflow/README.md +52 -0
etlplus/{config → workflow}/__init__.py +10 -23
etlplus/{config → workflow}/connector.py +58 -44
etlplus/workflow/dag.py +105 -0
etlplus/{config → workflow}/jobs.py +105 -32
etlplus/{config → workflow}/pipeline.py +59 -51
etlplus/{config → workflow}/profile.py +8 -5
etlplus/workflow/types.py +115 -0
{etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/METADATA +210 -17
etlplus-0.9.2.dist-info/RECORD +134 -0
{etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/WHEEL +1 -1
etlplus/config/types.py +0 -204
etlplus/config/utils.py +0 -120
etlplus/file.py +0 -657
etlplus/validation/__init__.py +0 -44
etlplus-0.9.1.dist-info/RECORD +0 -65
{etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/entry_points.txt +0 -0
{etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/licenses/LICENSE +0 -0
{etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/top_level.txt +0 -0

etlplus/workflow/dag.py ADDED Viewed

@@ -0,0 +1,105 @@
+"""
+:mod:`etlplus.workflow.dag` module.
+Lightweight directed acyclic graph (DAG) helpers for ordering jobs based on
+:attr:`depends_on`.
+"""
+from __future__ import annotations
+from collections import deque
+from dataclasses import dataclass
+from .jobs import JobConfig
+# SECTION: EXPORTS ========================================================== #
+__all__ = [
+    # Errors
+    'DagError',
+    # Functions
+    'topological_sort_jobs',
+]
+# SECTION: ERRORS =========================================================== #
+@dataclass(slots=True)
+class DagError(ValueError):
+    """
+    Raised when the job dependency graph is invalid.
+    Attributes
+    ----------
+    message : str
+        Error message.
+    """
+    # -- Attributes -- #
+    message: str
+    # -- Magic Methods (Object Representation) -- #
+    def __str__(self) -> str:
+        return self.message
+# SECTION: FUNCTIONS ======================================================== #
+def topological_sort_jobs(
+    jobs: list[JobConfig],
+) -> list[JobConfig]:
+    """
+    Return jobs in topological order based on :attr:`depends_on`.
+    Parameters
+    ----------
+    jobs : list[JobConfig]
+        List of job configurations to sort.
+    Returns
+    -------
+    list[JobConfig]
+        Jobs sorted in topological order.
+    Raises
+    ------
+    DagError
+        If a dependency is missing, self-referential, or when a cycle is
+        detected.
+    """
+    index = {job.name: job for job in jobs}
+    edges: dict[str, set[str]] = {name: set() for name in index}
+    indegree: dict[str, int] = {name: 0 for name in index}
+    for job in jobs:
+        for dep in job.depends_on:
+            if dep not in index:
+                raise DagError(
+                    f'Unknown dependency "{dep}" in job "{job.name}"',
+                )
+            if dep == job.name:
+                raise DagError(f'Job "{job.name}" depends on itself')
+            if job.name not in edges[dep]:
+                edges[dep].add(job.name)
+                indegree[job.name] += 1
+    queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
+    ordered: list[str] = []
+    while queue:
+        name = queue.popleft()
+        ordered.append(name)
+        for child in sorted(edges[name]):
+            indegree[child] -= 1
+            if indegree[child] == 0:
+                queue.append(child)
+    if len(ordered) != len(jobs):
+        raise DagError('Dependency cycle detected')
+    return [index[name] for name in ordered]

etlplus/{config → workflow}/jobs.py RENAMED Viewed

@@ -1,12 +1,12 @@
 """
-:mod:`etlplus.config.jobs` module.
+:mod:`etlplus.workflow.jobs` module.
 Data classes modeling job orchestration references (extract, validate,
 transform, load).
 Notes
 -----
-- Lightweight references used inside ``PipelineConfig`` to avoid storing
+- Lightweight references used inside :class:`PipelineConfig` to avoid storing
     large nested structures.
 - All attributes are simple and optional where appropriate, keeping parsing
     tolerant.
@@ -19,6 +19,7 @@ from dataclasses import field
 from typing import Any
 from typing import Self
+from ..types import StrAnyMap
 from ..utils import coerce_dict
 from ..utils import maybe_mapping
@@ -26,6 +27,7 @@ from ..utils import maybe_mapping
 __all__ = [
+    # Data Classes
     'ExtractRef',
     'JobConfig',
     'LoadRef',
@@ -34,10 +36,76 @@ __all__ = [
 ]
-# SECTION: TYPE ALIASES ===================================================== #
+# SECTION: INTERNAL FUNCTIONS =============================================== #
-# SECTION: CLASSES ========================================================== #
+def _coerce_optional_str(value: Any) -> str | None:
+    """
+    Normalize optional string values, coercing non-strings when needed.
+    Parameters
+    ----------
+    value : Any
+        Optional value to normalize.
+    Returns
+    -------
+    str | None
+        ``None`` when ``value`` is ``None``; otherwise a string value.
+    """
+    if value is None:
+        return None
+    return value if isinstance(value, str) else str(value)
+def _parse_depends_on(
+    value: Any,
+) -> list[str]:
+    """
+    Normalize dependency declarations into a string list.
+    Parameters
+    ----------
+    value : Any
+        Input dependency specification (string or list of strings).
+    Returns
+    -------
+    list[str]
+        Normalized dependency list.
+    """
+    if isinstance(value, str):
+        return [value]
+    if isinstance(value, list):
+        return [entry for entry in value if isinstance(entry, str)]
+    return []
+def _require_str(
+    # data: dict[str, Any],
+    data: StrAnyMap,
+    key: str,
+) -> str | None:
+    """
+    Extract a required string field from a mapping.
+    Parameters
+    ----------
+    data : StrAnyMap
+        Mapping containing the target field.
+    key : str
+        Field name to extract.
+    Returns
+    -------
+    str | None
+        The string value when present and valid; otherwise ``None``.
+    """
+    value = data.get(key)
+    return value if isinstance(value, str) else None
+# SECTION: DATA CLASSES ===================================================== #
 @dataclass(kw_only=True, slots=True)
@@ -65,12 +133,13 @@ class ExtractRef:
         cls,
         obj: Any,
     ) -> Self | None:
-        """Parse a mapping into an :class:`ExtractRef` instance.
+        """
+        Parse a mapping into an :class:`ExtractRef` instance.
         Parameters
         ----------
         obj : Any
-            Mapping with ``source`` and optional ``options``.
+            Mapping with :attr:`source` and optional :attr:`options`.
         Returns
         -------
@@ -80,8 +149,8 @@ class ExtractRef:
         data = maybe_mapping(obj)
         if not data:
             return None
-        source = data.get('source')
-        if not isinstance(source, str):
+        source = _require_str(data, 'source')
+        if source is None:
             return None
         return cls(
             source=source,
@@ -100,6 +169,8 @@ class JobConfig:
         Unique job name.
     description : str | None
         Optional human-friendly description.
+    depends_on : list[str]
+        Optional job dependency list. Dependencies must refer to other jobs.
     extract : ExtractRef | None
         Extraction reference.
     validate : ValidationRef | None
@@ -114,6 +185,7 @@ class JobConfig:
     name: str
     description: str | None = None
+    depends_on: list[str] = field(default_factory=list)
     extract: ExtractRef | None = None
     validate: ValidationRef | None = None
     transform: TransformRef | None = None
@@ -126,7 +198,8 @@ class JobConfig:
         cls,
         obj: Any,
     ) -> Self | None:
-        """Parse a mapping into a :class:`JobConfig` instance.
+        """
+        Parse a mapping into a :class:`JobConfig` instance.
         Parameters
         ----------
@@ -141,17 +214,18 @@ class JobConfig:
         data = maybe_mapping(obj)
         if not data:
             return None
-        name = data.get('name')
-        if not isinstance(name, str):
+        name = _require_str(data, 'name')
+        if name is None:
             return None
-        description = data.get('description')
-        if description is not None and not isinstance(description, str):
-            description = str(description)
+        description = _coerce_optional_str(data.get('description'))
+        depends_on = _parse_depends_on(data.get('depends_on'))
         return cls(
             name=name,
             description=description,
+            depends_on=depends_on,
             extract=ExtractRef.from_obj(data.get('extract')),
             validate=ValidationRef.from_obj(data.get('validate')),
             transform=TransformRef.from_obj(data.get('transform')),
@@ -184,12 +258,13 @@ class LoadRef:
         cls,
         obj: Any,
     ) -> Self | None:
-        """Parse a mapping into a :class:`LoadRef` instance.
+        """
+        Parse a mapping into a :class:`LoadRef` instance.
         Parameters
         ----------
         obj : Any
-            Mapping with ``target`` and optional ``overrides``.
+            Mapping with :attr:`target` and optional :attr:`overrides`.
         Returns
         -------
@@ -199,8 +274,8 @@ class LoadRef:
         data = maybe_mapping(obj)
         if not data:
             return None
-        target = data.get('target')
-        if not isinstance(target, str):
+        target = _require_str(data, 'target')
+        if target is None:
             return None
         return cls(
             target=target,
@@ -230,12 +305,13 @@ class TransformRef:
         cls,
         obj: Any,
     ) -> Self | None:
-        """Parse a mapping into a :class:`TransformRef` instance.
+        """
+        Parse a mapping into a :class:`TransformRef` instance.
         Parameters
         ----------
         obj : Any
-            Mapping with ``pipeline``.
+            Mapping with :attr:`pipeline`.
         Returns
         -------
@@ -245,8 +321,8 @@ class TransformRef:
         data = maybe_mapping(obj)
         if not data:
             return None
-        pipeline = data.get('pipeline')
-        if not isinstance(pipeline, str):
+        pipeline = _require_str(data, 'pipeline')
+        if pipeline is None:
             return None
         return cls(pipeline=pipeline)
@@ -280,12 +356,13 @@ class ValidationRef:
         cls,
         obj: Any,
     ) -> Self | None:
-        """Parse a mapping into a :class:`ValidationRef` instance.
+        """
+        Parse a mapping into a :class:`ValidationRef` instance.
         Parameters
         ----------
         obj : Any
-            Mapping with ``ruleset`` plus optional metadata.
+            Mapping with :attr:`ruleset` plus optional metadata.
         Returns
         -------
@@ -295,15 +372,11 @@ class ValidationRef:
         data = maybe_mapping(obj)
         if not data:
             return None
-        ruleset = data.get('ruleset')
-        if not isinstance(ruleset, str):
+        ruleset = _require_str(data, 'ruleset')
+        if ruleset is None:
             return None
-        severity = data.get('severity')
-        if severity is not None and not isinstance(severity, str):
-            severity = str(severity)
-        phase = data.get('phase')
-        if phase is not None and not isinstance(phase, str):
-            phase = str(phase)
+        severity = _coerce_optional_str(data.get('severity'))
+        phase = _coerce_optional_str(data.get('phase'))
         return cls(
             ruleset=ruleset,
             severity=severity,

etlplus/{config → workflow}/pipeline.py RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-:mod:`etlplus.config.pipeline` module.
+:mod:`etlplus.workflow.pipeline` module.
 Pipeline configuration model and helpers for job orchestration.
@@ -16,6 +16,7 @@ Notes
 from __future__ import annotations
 import os
+from collections.abc import Callable
 from collections.abc import Mapping
 from dataclasses import dataclass
 from dataclasses import field
@@ -24,72 +25,90 @@ from typing import Any
 from typing import Self
 from ..api import ApiConfig
-from ..enums import FileFormat
 from ..file import File
+from ..file import FileFormat
 from ..types import StrAnyMap
 from ..utils import coerce_dict
+from ..utils import deep_substitute
 from ..utils import maybe_mapping
 from .connector import Connector
 from .connector import parse_connector
 from .jobs import JobConfig
 from .profile import ProfileConfig
-from .utils import deep_substitute
 # SECTION: EXPORTS ========================================================== #
-__all__ = ['PipelineConfig', 'load_pipeline_config']
+__all__ = [
+    # Data Classes
+    'PipelineConfig',
+    # Functions
+    'load_pipeline_config',
+]
-def _build_jobs(
+# SECTION: INTERNAL FUNCTIONS =============================================== #
+def _collect_parsed[T](
     raw: StrAnyMap,
-) -> list[JobConfig]:
+    key: str,
+    parser: Callable[[Any], T | None],
+) -> list[T]:
     """
-    Return a list of ``JobConfig`` objects parsed from the mapping.
+    Collect parsed items from ``raw[key]`` using a tolerant parser.
     Parameters
     ----------
     raw : StrAnyMap
         Raw pipeline mapping.
+    key : str
+        Key pointing to a list-like payload.
+    parser : Callable[[Any], T | None]
+        Parser that returns an instance or ``None`` for invalid entries.
     Returns
     -------
-    list[JobConfig]
-        Parsed job configurations.
+    list[T]
+        Parsed items, excluding invalid entries.
     """
-    jobs: list[JobConfig] = []
-    for job_raw in raw.get('jobs', []) or []:
-        job_cfg = JobConfig.from_obj(job_raw)
-        if job_cfg is not None:
-            jobs.append(job_cfg)
-    return jobs
+    items: list[T] = []
+    for entry in raw.get(key, []) or []:
+        parsed = parser(entry)
+        if parsed is not None:
+            items.append(parsed)
+    return items
-def _build_sources(
-    raw: StrAnyMap,
-) -> list[Connector]:
+def _parse_connector_entry(
+    obj: Any,
+) -> Connector | None:
     """
-    Return a list of source connectors parsed from the mapping.
+    Parse a connector mapping into a concrete connector instance.
     Parameters
     ----------
-    raw : StrAnyMap
-        Raw pipeline mapping.
+    obj : Any
+        Candidate connector mapping.
     Returns
     -------
-    list[Connector]
-        Parsed source connectors.
+    Connector | None
+        Parsed connector instance or ``None`` when invalid.
     """
-    return _build_connectors(raw, 'sources')
+    if not (entry := maybe_mapping(obj)):
+        return None
+    try:
+        return parse_connector(entry)
+    except TypeError:
+        return None
-def _build_targets(
+def _build_sources(
     raw: StrAnyMap,
 ) -> list[Connector]:
     """
-    Return a list of target connectors parsed from the mapping.
+    Return a list of source connectors parsed from the mapping.
     Parameters
     ----------
@@ -99,43 +118,32 @@ def _build_targets(
     Returns
     -------
     list[Connector]
-        Parsed target connectors.
+        Parsed source connectors.
     """
-    return _build_connectors(raw, 'targets')
+    return list(
+        _collect_parsed(raw, 'sources', _parse_connector_entry),
+    )
-def _build_connectors(
+def _build_targets(
     raw: StrAnyMap,
-    key: str,
 ) -> list[Connector]:
     """
-    Return parsed connectors from ``raw[key]`` using tolerant parsing.
-    Unknown or malformed entries are skipped to preserve permissiveness.
+    Return a list of target connectors parsed from the mapping.
     Parameters
     ----------
     raw : StrAnyMap
         Raw pipeline mapping.
-    key : str
-        List-containing top-level key ("sources" or "targets").
     Returns
     -------
     list[Connector]
-        Constructed connector instances (malformed entries skipped).
+        Parsed target connectors.
     """
-    items: list[Connector] = []
-    for obj in raw.get(key, []) or []:
-        if not (entry := maybe_mapping(obj)):
-            continue
-        try:
-            items.append(parse_connector(entry))
-        except TypeError:
-            # Skip unsupported types or malformed entries
-            continue
-    return items
+    return list(
+        _collect_parsed(raw, 'targets', _parse_connector_entry),
+    )
 # SECTION: FUNCTIONS ======================================================== #
@@ -156,7 +164,7 @@ def load_pipeline_config(
     return PipelineConfig.from_yaml(path, substitute=substitute, env=env)
-# SECTION: CLASSES ========================================================== #
+# SECTION: DATA CLASSES ===================================================== #
 @dataclass(kw_only=True, slots=True)
@@ -246,7 +254,7 @@ class PipelineConfig:
         TypeError
             If the YAML root is not a mapping/object.
         """
-        raw = File(Path(path), FileFormat.YAML).read_yaml()
+        raw = File(Path(path), FileFormat.YAML).read()
         if not isinstance(raw, dict):
             raise TypeError('Pipeline YAML must have a mapping/object root')
@@ -313,7 +321,7 @@ class PipelineConfig:
         targets = _build_targets(raw)
         # Jobs
-        jobs = _build_jobs(raw)
+        jobs = _collect_parsed(raw, 'jobs', JobConfig.from_obj)
         # Table schemas (optional, tolerant pass-through structures).
         table_schemas: list[dict[str, Any]] = []

etlplus/{config → workflow}/profile.py RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-:mod:`etlplus.config.profile` module.
+:mod:`etlplus.workflow.profile` module.
 Profile model for pipeline-level defaults and environment.
@@ -22,10 +22,13 @@ from ..utils import cast_str_dict
 # SECTION: EXPORTS ========================================================== #
-__all__ = ['ProfileConfig']
+__all__ = [
+    # Data Classes
+    'ProfileConfig',
+]
-# SECTION: CLASSES ========================================================== #
+# SECTION: DATA CLASSES ===================================================== #
 @dataclass(kw_only=True, slots=True)
@@ -53,7 +56,7 @@ class ProfileConfig:
         cls,
         obj: StrAnyMap | None,
     ) -> Self:
-        """Parse a mapping into a ``ProfileConfig`` instance.
+        """Parse a mapping into a :class:`ProfileConfig` instance.
         Parameters
         ----------
@@ -64,7 +67,7 @@ class ProfileConfig:
         -------
         Self
             Parsed profile configuration; non-mapping input yields a default
-            instance. All ``env`` values are coerced to strings.
+            instance. All :attr:`env` values are coerced to strings.
         """
         if not isinstance(obj, Mapping):
             return cls()

etlplus 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl

etlplus 0.9.1py3-none-any.whl → 0.9.2py3-none-any.whl